miga-base 1.2.15.2 → 1.2.15.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/download/gtdb.rb +4 -1
- data/lib/miga/cli/action/gtdb_get.rb +4 -0
- data/lib/miga/daemon.rb +4 -1
- data/lib/miga/lair.rb +6 -4
- data/lib/miga/remote_dataset/download.rb +3 -2
- data/lib/miga/remote_dataset.rb +25 -7
- data/lib/miga/taxonomy.rb +6 -0
- data/lib/miga/version.rb +2 -2
- metadata +6 -302
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
- data/utils/FastAAI/FastAAI +0 -3659
- data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
- data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
- data/utils/FastAAI/README.md +0 -84
- data/utils/enveomics/Docs/recplot2.md +0 -244
- data/utils/enveomics/Examples/aai-matrix.bash +0 -66
- data/utils/enveomics/Examples/ani-matrix.bash +0 -66
- data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
- data/utils/enveomics/LICENSE.txt +0 -73
- data/utils/enveomics/Makefile +0 -52
- data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
- data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
- data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
- data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
- data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
- data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
- data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
- data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
- data/utils/enveomics/Manifest/Tasks/other.json +0 -906
- data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
- data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
- data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
- data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
- data/utils/enveomics/Manifest/categories.json +0 -165
- data/utils/enveomics/Manifest/examples.json +0 -162
- data/utils/enveomics/Manifest/tasks.json +0 -4
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
- data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
- data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
- data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
- data/utils/enveomics/README.md +0 -42
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
- data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
- data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
- data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
- data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
- data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
- data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
- data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
- data/utils/enveomics/Scripts/Chao1.pl +0 -97
- data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
- data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
- data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
- data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
- data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
- data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
- data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
- data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
- data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
- data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
- data/utils/enveomics/Scripts/FastA.length.pl +0 -38
- data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
- data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
- data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
- data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
- data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
- data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
- data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
- data/utils/enveomics/Scripts/FastA.split.pl +0 -55
- data/utils/enveomics/Scripts/FastA.split.rb +0 -79
- data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
- data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
- data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
- data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
- data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
- data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
- data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
- data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
- data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
- data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
- data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
- data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
- data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
- data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
- data/utils/enveomics/Scripts/SRA.download.bash +0 -55
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
- data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
- data/utils/enveomics/Scripts/Table.barplot.R +0 -31
- data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
- data/utils/enveomics/Scripts/Table.filter.pl +0 -61
- data/utils/enveomics/Scripts/Table.merge.pl +0 -77
- data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
- data/utils/enveomics/Scripts/Table.replace.rb +0 -69
- data/utils/enveomics/Scripts/Table.round.rb +0 -63
- data/utils/enveomics/Scripts/Table.split.pl +0 -57
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
- data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
- data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
- data/utils/enveomics/Scripts/aai.rb +0 -421
- data/utils/enveomics/Scripts/ani.rb +0 -362
- data/utils/enveomics/Scripts/anir.rb +0 -137
- data/utils/enveomics/Scripts/clust.rand.rb +0 -102
- data/utils/enveomics/Scripts/gi2tax.rb +0 -103
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
- data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
- data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
- data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
- data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
- data/utils/enveomics/Scripts/ogs.rb +0 -104
- data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
- data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
- data/utils/enveomics/Scripts/rbm.rb +0 -108
- data/utils/enveomics/Scripts/sam.filter.rb +0 -148
- data/utils/enveomics/Tests/Makefile +0 -10
- data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
- data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
- data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
- data/utils/enveomics/Tests/alkB.nwk +0 -1
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
- data/utils/enveomics/Tests/hiv1.faa +0 -59
- data/utils/enveomics/Tests/hiv1.fna +0 -134
- data/utils/enveomics/Tests/hiv2.faa +0 -70
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
- data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
- data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
- data/utils/enveomics/build_enveomics_r.bash +0 -45
- data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
- data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
- data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
- data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
- data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
- data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
- data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
- data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
- data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
- data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
- data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
- data/utils/enveomics/enveomics.R/R/utils.R +0 -80
- data/utils/enveomics/enveomics.R/README.md +0 -81
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
- data/utils/enveomics/globals.mk +0 -8
- data/utils/enveomics/manifest.json +0 -9
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +0 -67
- data/utils/multitrim/multitrim.py +0 -1555
- data/utils/multitrim/multitrim.yml +0 -13
|
@@ -1,157 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
|
5
|
-
# @update: Jul-29-2015
|
|
6
|
-
# @license artistic license 2.0
|
|
7
|
-
#
|
|
8
|
-
|
|
9
|
-
require 'optparse'
|
|
10
|
-
|
|
11
|
-
opts = {:minscore=>0, :besthits=>0, :orient=>0, :sisprefix=>"_"}
|
|
12
|
-
ARGV << '-h' if ARGV.size==0
|
|
13
|
-
OptionParser.new do |opt|
|
|
14
|
-
opt.separator "Identifies the best hits of paired-reads."
|
|
15
|
-
opt.separator ""
|
|
16
|
-
opt.on("-i", "--blast FILE", "Input BLAST file."){ |v| opts[:blast]=v }
|
|
17
|
-
opt.on("-s", "--minscore FLOAT", "Minimum (summed) Bit-Score to consider a pair-match."){ |v| opts[:minscore] = v.to_f }
|
|
18
|
-
opt.on("-b", "--besthits INT", "Outputs top best-hits only (use 0 to output all the paired hits)."){ |v| opts[:besthits]=v.to_i }
|
|
19
|
-
opt.on("-o", "--orient INT", "Checks the orientation of the hit. Values are: 0, no checking; 1, same direction; 2,",
|
|
20
|
-
"inwards; 3, outwards; 4, different direction (i.e., 2 or 3)."){ |v| opts[:orient]=v.to_i }
|
|
21
|
-
opt.on("-p", "--sisprefix STR", "Sister read number prefix in the name of the reads. Escape characters as dots (\\.),",
|
|
22
|
-
"parenthesis (\\(, \\), \\[, \\]), or other characters with special meaning in regular expressions",
|
|
23
|
-
"(\\*, \\+, \\^, \\$, \\|). This prefix allows regular expressions (for example, use ':|\\.' to use any of",
|
|
24
|
-
"colon or dot). Notice that the prefix will not be included in the base name reported in the output."){ |v| opts[:sisprefix]=v }
|
|
25
|
-
opt.on("-h","--help","Display this screen") do
|
|
26
|
-
puts opt
|
|
27
|
-
exit
|
|
28
|
-
end
|
|
29
|
-
opt.separator ""
|
|
30
|
-
opt.separator "Output:"
|
|
31
|
-
opt.separator " Tab-delimited flat file, with the following columns:"
|
|
32
|
-
opt.separator " 1. Query ID (without the \"sister\" identifier)."
|
|
33
|
-
opt.separator " 2. Subject ID."
|
|
34
|
-
opt.separator " 3. Bit score (summed from both sister reads)."
|
|
35
|
-
opt.separator " 4/5. From/To (subject) coordinates for read 1."
|
|
36
|
-
opt.separator " 6/7. From/To (subject) coordinates for read 2."
|
|
37
|
-
opt.separator " 8. Reads orientation (1: same direction, 2: inwards, 3: outwards)."
|
|
38
|
-
opt.separator " 9. Estimated insert size."
|
|
39
|
-
opt.separator ""
|
|
40
|
-
opt.separator "Important note: This script assumes that paired hits are next to each other."
|
|
41
|
-
opt.separator " If this is not the case (e.g., because the blast was concatenated),"
|
|
42
|
-
opt.separator " you must sort the input before running this script."
|
|
43
|
-
opt.separator ""
|
|
44
|
-
end.parse!
|
|
45
|
-
abort "-i/--blast is mandatory." if opts[:blast].nil?
|
|
46
|
-
abort "-i/--blast must exist." unless File.exists? opts[:blast]
|
|
47
|
-
|
|
48
|
-
class SingleHit
|
|
49
|
-
attr_reader :sbj, :score, :orient, :sfrom, :sto, :qfrom, :qto
|
|
50
|
-
def initialize(blast_ln)
|
|
51
|
-
blast_ln.chomp!
|
|
52
|
-
ln = blast_ln.split("\t")
|
|
53
|
-
@sbj = ln[1]
|
|
54
|
-
@score = ln[11].to_f
|
|
55
|
-
@qfrom = ln[6].to_i
|
|
56
|
-
@qto = ln[7].to_i
|
|
57
|
-
@sfrom = ln[8].to_i
|
|
58
|
-
@sto = ln[9].to_i
|
|
59
|
-
@orient = @sfrom < @sto ? 1 : -1;
|
|
60
|
-
end
|
|
61
|
-
end
|
|
62
|
-
class DoubleHit
|
|
63
|
-
attr_reader :name, :sbj, :score, :orient, :hitA, :hitB
|
|
64
|
-
def initialize(name, hitA, hitB)
|
|
65
|
-
raise "Trying to set DoubleHit from hits with different subjects" unless hitA.sbj == hitB.sbj
|
|
66
|
-
@name = name
|
|
67
|
-
@hitA = hitA
|
|
68
|
-
@hitB = hitB
|
|
69
|
-
@sbj = hitA.sbj
|
|
70
|
-
@score = hitA.score + hitB.score
|
|
71
|
-
@orient = (hitA.orient == hitB.orient ? 1:
|
|
72
|
-
((hitA.orient>0 and hitB.orient<0) ? 2: 3))
|
|
73
|
-
end
|
|
74
|
-
def to_s
|
|
75
|
-
coords = [@hitA.sfrom, @hitB.sfrom, @hitA.sto, @hitB.sto]
|
|
76
|
-
@name + "\t" + @sbj + "\t" + @score.to_s + "\t" +
|
|
77
|
-
@hitA.sfrom.to_s + "\t" + @hitA.sto.to_s + "\t" +
|
|
78
|
-
@hitB.sfrom.to_s + "\t" + @hitB.sto.to_s + "\t" +
|
|
79
|
-
@orient.to_s + "\t" + (coords.max-coords.min).to_s + "\n"
|
|
80
|
-
end
|
|
81
|
-
end
|
|
82
|
-
class PairedHits
|
|
83
|
-
attr_reader :name, :hitsA, :hitsB
|
|
84
|
-
@@minscore = 0
|
|
85
|
-
@@orient = 0
|
|
86
|
-
@@besthits = 0
|
|
87
|
-
def initialize(name)
|
|
88
|
-
@name = name
|
|
89
|
-
@hitsA = []
|
|
90
|
-
@hitsB = []
|
|
91
|
-
@hits = []
|
|
92
|
-
end
|
|
93
|
-
def hits
|
|
94
|
-
@hits = []
|
|
95
|
-
# Search for paired hits
|
|
96
|
-
@hitsA.each do |hitA|
|
|
97
|
-
@hitsB.each do |hitB|
|
|
98
|
-
if hitA.sbj == hitB.sbj
|
|
99
|
-
hit = DoubleHit.new(@name, hitA, hitB)
|
|
100
|
-
next if hit.score <= @@minscore # Minimum bit-score check
|
|
101
|
-
next if ((1 .. 3).include?(@@orient) and @@orient != hit.orient) # "typical" orientation check
|
|
102
|
-
next if (@@orient == 4 and not((2 .. 3).include?(hit.orient))) # "different-orientation" check
|
|
103
|
-
@hits.push(hit)
|
|
104
|
-
end
|
|
105
|
-
end
|
|
106
|
-
end
|
|
107
|
-
# Sort the hits
|
|
108
|
-
@hits.sort! {|x,y| x.score <=> y.score }
|
|
109
|
-
if @@besthits==0
|
|
110
|
-
@hits
|
|
111
|
-
else
|
|
112
|
-
@hits.take(@@besthits)
|
|
113
|
-
end
|
|
114
|
-
end
|
|
115
|
-
def hitsX(x)
|
|
116
|
-
if x == 1
|
|
117
|
-
@hitsA
|
|
118
|
-
else
|
|
119
|
-
@hitsB
|
|
120
|
-
end
|
|
121
|
-
end
|
|
122
|
-
# Class methods
|
|
123
|
-
def PairedHits.minscore=(value)
|
|
124
|
-
@@minscore = value
|
|
125
|
-
end
|
|
126
|
-
def PairedHits.orient=(value)
|
|
127
|
-
@@orient = value
|
|
128
|
-
end
|
|
129
|
-
def PairedHits.besthits=(value)
|
|
130
|
-
@@besthits = value
|
|
131
|
-
end
|
|
132
|
-
end
|
|
133
|
-
|
|
134
|
-
PairedHits.minscore = opts[:minscore]
|
|
135
|
-
PairedHits.orient = opts[:orient]
|
|
136
|
-
PairedHits.besthits = opts[:besthits]
|
|
137
|
-
|
|
138
|
-
begin
|
|
139
|
-
f = File.open(opts[:blast], "r")
|
|
140
|
-
currPair = PairedHits.new(" ")
|
|
141
|
-
while(ln = f.gets)
|
|
142
|
-
m = /^([^\s]*)(?:#{opts[:sisprefix]})([12])/.match(ln)
|
|
143
|
-
raise "Impossible to parse read name in line #{$.} using sister prefix '#{opts[:sisprefix]}':\n#{ln}" unless m
|
|
144
|
-
if m[1] != currPair.name
|
|
145
|
-
currPair.hits.each { |hit| puts hit.to_s }
|
|
146
|
-
currPair = PairedHits.new(m[1])
|
|
147
|
-
end
|
|
148
|
-
currPair.hitsX(m[2].to_i).push(SingleHit.new(ln));
|
|
149
|
-
end
|
|
150
|
-
currPair.hits.each { |hit| puts hit.to_s }
|
|
151
|
-
f.close
|
|
152
|
-
rescue => err
|
|
153
|
-
$stderr.puts "Exception: #{err}\n\n"
|
|
154
|
-
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
|
155
|
-
err
|
|
156
|
-
end
|
|
157
|
-
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env Rscript
|
|
2
|
-
|
|
3
|
-
# @author Luis M. Rodriguez-R
|
|
4
|
-
# @license Artistic-2.0
|
|
5
|
-
|
|
6
|
-
#= Load stuff
|
|
7
|
-
suppressPackageStartupMessages(library(enveomics.R))
|
|
8
|
-
args <- commandArgs(trailingOnly = FALSE)
|
|
9
|
-
enveomics_R <- file.path(dirname(
|
|
10
|
-
sub("^--file=", "", args[grep("^--file=", args)])),
|
|
11
|
-
"lib", "enveomics.R")
|
|
12
|
-
|
|
13
|
-
#= Generate interface
|
|
14
|
-
opt <- enve.cliopts(enve.recplot2,
|
|
15
|
-
file.path(enveomics_R, "man", "enve.recplot2.Rd"),
|
|
16
|
-
positional_arguments=c(1,4),
|
|
17
|
-
usage="usage: %prog [options] output.Rdata [output.pdf [width height]]",
|
|
18
|
-
mandatory=c("prefix"),
|
|
19
|
-
o_desc=list(pos.breaks="Breaks in the positions histogram.",
|
|
20
|
-
pos.breaks.tsv="File with (absolute) coordinates of breaks in the position histogram",
|
|
21
|
-
id.breaks="Breaks in the identity histogram.",
|
|
22
|
-
id.summary="Function summarizing the identity bins. By default: sum.",
|
|
23
|
-
peaks.col="Color of peaks, mandatory for peak-finding (e.g., darkred).",
|
|
24
|
-
peaks.method="Method to detect peaks; one of emauto, em, or mower."),
|
|
25
|
-
p_desc=paste("","Produce recruitment plot objects provided that",
|
|
26
|
-
"BlastTab.catsbj.pl has been previously executed.", sep="\n\t"),
|
|
27
|
-
ignore=c("plot"),
|
|
28
|
-
defaults=c(pos.breaks.tsv=NA, id.metric="identity", peaks.col=NA,
|
|
29
|
-
peaks.method="emauto"))
|
|
30
|
-
|
|
31
|
-
#= Run it!
|
|
32
|
-
if(length(opt$args)>1){
|
|
33
|
-
args = as.list(opt$args[-1])
|
|
34
|
-
for(i in 2:3) if(length(args)>=i) args[[i]] <- as.numeric(args[[i]])
|
|
35
|
-
do.call("pdf", args)
|
|
36
|
-
}else{
|
|
37
|
-
opt$options[["plot"]] <- FALSE
|
|
38
|
-
}
|
|
39
|
-
pc <- opt$options[["peaks.col"]]
|
|
40
|
-
if(!is.na(pc) && pc=="NA") opt$options[["peaks.col"]] <- NA
|
|
41
|
-
if(!is.null(opt$options[["peaks.method"]])){
|
|
42
|
-
opt$options[["peaks.opts"]] <- list(method=opt$options[["peaks.method"]])
|
|
43
|
-
opt$options[["peaks.method"]] <- NULL
|
|
44
|
-
}
|
|
45
|
-
rp <- do.call("enve.recplot2", opt$options)
|
|
46
|
-
save(rp, file=opt$args[1])
|
|
47
|
-
if(length(opt$args)>1) dev.off()
|
|
48
|
-
|
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env perl
|
|
2
|
-
#
|
|
3
|
-
# @author: Luis M Rodriguez-R <lmrodriguezr at gmail dot com>
|
|
4
|
-
# @license: artistic license 2.0
|
|
5
|
-
# @update: Mar-23-2015
|
|
6
|
-
#
|
|
7
|
-
|
|
8
|
-
use strict;
|
|
9
|
-
use warnings;
|
|
10
|
-
use List::Util qw/min max sum/;
|
|
11
|
-
|
|
12
|
-
my $fna = shift @ARGV;
|
|
13
|
-
$fna or die "
|
|
14
|
-
Usage:
|
|
15
|
-
cat blast1... | $0 genes_or_ctgs.fna > genes_or_ctgs.cov
|
|
16
|
-
|
|
17
|
-
blast1... One or more Tabular BLAST files of reads vs genes (or contigs).
|
|
18
|
-
genes_or_ctgs.fna A FastA file containing the genes or the contigs (db).
|
|
19
|
-
genes_or_ctgs.cov The output file.
|
|
20
|
-
|
|
21
|
-
Output:
|
|
22
|
-
A tab-delimited file with the following columns:
|
|
23
|
-
1. Subject ID
|
|
24
|
-
2. Average sequencing depth
|
|
25
|
-
3. Median sequencing depth
|
|
26
|
-
4. Number of mapped reads
|
|
27
|
-
5. Length of the subject sequence
|
|
28
|
-
|
|
29
|
-
";
|
|
30
|
-
|
|
31
|
-
my $size = {};
|
|
32
|
-
my $gene = {};
|
|
33
|
-
my $reads = {};
|
|
34
|
-
|
|
35
|
-
SIZE:{
|
|
36
|
-
local $/=">";
|
|
37
|
-
print STDERR "== Reading fasta\n";
|
|
38
|
-
open FNA, "<", $fna or die "Cannot read the file: $fna: $!\n";
|
|
39
|
-
my $i=0;
|
|
40
|
-
while(<FNA>){
|
|
41
|
-
chomp;
|
|
42
|
-
my @g = split /\n/, $_, 2;
|
|
43
|
-
next unless $g[1];
|
|
44
|
-
#$g[1] =~ s/[^A-Za-z]//g;
|
|
45
|
-
#$size->{$g[0]} = length $g[1];
|
|
46
|
-
$g[0] =~ s/\s.*//;
|
|
47
|
-
$size->{$g[0]} = ( $g[1] =~ tr/[A-Za-z]// );
|
|
48
|
-
print STDERR " Measuring sequence ".($i).": $g[0] \r" unless ++$i%500;
|
|
49
|
-
}
|
|
50
|
-
close FNA;
|
|
51
|
-
print STDERR " Found $i sequences".(" "x30)."\n";
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
MAP:{
|
|
55
|
-
print STDERR "== Reading mapping\n";
|
|
56
|
-
my $i=0;
|
|
57
|
-
while(<>){
|
|
58
|
-
my @ln = split /\t/;
|
|
59
|
-
$gene->{$ln[1]} ||= [];
|
|
60
|
-
for my $pos (min($ln[8], $ln[9]) .. max($ln[8], $ln[9])){ ($gene->{$ln[1]}->[$pos]||=0)++ }
|
|
61
|
-
($reads->{$ln[1]} ||= 0)++;
|
|
62
|
-
print STDERR " Saving hit ".($i).": $ln[1] \r" unless ++$i%5000;
|
|
63
|
-
}
|
|
64
|
-
print STDERR " Found $i hits".(" "x30)."\n";
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
OUT:{
|
|
68
|
-
print STDERR "== Creating output\n";
|
|
69
|
-
my $i=0;
|
|
70
|
-
for my $g (keys %$gene){
|
|
71
|
-
$gene->{$g}->[$_] ||= 0 for (0 .. $size->{$g});
|
|
72
|
-
my @sorted = sort {$a <=> $b} @{$gene->{$g}};
|
|
73
|
-
die "Cannot find gene in $fna: $g.\n" unless exists $size->{$g};
|
|
74
|
-
printf "%s\t%.6f\t%d\t%d\t%d\n", $g,
|
|
75
|
-
sum(@{$gene->{$g}})/$size->{$g},
|
|
76
|
-
$sorted[$#sorted/2],
|
|
77
|
-
$reads->{$g},
|
|
78
|
-
$size->{$g};
|
|
79
|
-
delete $gene->{$g};
|
|
80
|
-
print STDERR " Saving sequence $g:".($i)."\r" unless ++$i%500;
|
|
81
|
-
}
|
|
82
|
-
print STDERR " Saved $i sequences".(" "x30)."\n";
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
print STDERR " done.\n";
|
|
86
|
-
|
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env perl
|
|
2
|
-
#
|
|
3
|
-
# @author: Luis M Rodriguez-R <lmrodriguezr at gmail dot com>
|
|
4
|
-
# @license: artistic license 2.0
|
|
5
|
-
# @update: Mar-23-2015
|
|
6
|
-
#
|
|
7
|
-
|
|
8
|
-
use strict;
|
|
9
|
-
use warnings;
|
|
10
|
-
use List::Util qw/min max sum/;
|
|
11
|
-
|
|
12
|
-
my $fna = shift @ARGV;
|
|
13
|
-
$fna or die "
|
|
14
|
-
Description:
|
|
15
|
-
Estimates the average sequencing depth of subject sequences (genes or contigs)
|
|
16
|
-
assuming a Zero-Inflated Poisson distribution (ZIP) to correct for non-covered
|
|
17
|
-
positions. It uses the corrected method of moments estimators (CMMEs) as described
|
|
18
|
-
by Beckett et al [1]. Note that [1] has a mistake in eq. (2.4), that should be:
|
|
19
|
-
pi-hat-MM = 1 - (X-bar / lambda-hat-MM)
|
|
20
|
-
|
|
21
|
-
Also note that a more elaborated mixture distribution can arise from coverage
|
|
22
|
-
histograms (e.g., see [2] for an additional correction called 'tail distribution'
|
|
23
|
-
and mixtures involving negative binomial) so take these results cum grano salis.
|
|
24
|
-
|
|
25
|
-
Usage:
|
|
26
|
-
cat blast1... | $0 genes_or_ctgs.fna > genes_or_ctgs.cov
|
|
27
|
-
|
|
28
|
-
blast1... One or more Tabular BLAST files of reads vs genes (or contigs).
|
|
29
|
-
genes_or_ctgs.fna A FastA file containing the genes or the contigs (db).
|
|
30
|
-
genes_or_ctgs.cov The output file.
|
|
31
|
-
|
|
32
|
-
Output:
|
|
33
|
-
A tab-delimited file with the following columns (the one you want is #2):
|
|
34
|
-
1. Subject ID
|
|
35
|
-
2. Estimated average sequencing depth (CMME lambda)
|
|
36
|
-
3. Zero-inflation (CMME pi)
|
|
37
|
-
4. Observed average sequencing depth
|
|
38
|
-
5. Observed median sequencing depth
|
|
39
|
-
6. Observed median sequencing depth excluding zeroes
|
|
40
|
-
7. Number of mapped reads
|
|
41
|
-
8. Length of the subject sequence
|
|
42
|
-
|
|
43
|
-
References:
|
|
44
|
-
[1] http://anisette.ucs.louisiana.edu/Academic/Sciences/MATH/stage/stat2012.pdf
|
|
45
|
-
[2] Lindner et al, Bioinformatics, 2013.
|
|
46
|
-
|
|
47
|
-
";
|
|
48
|
-
|
|
49
|
-
my $size = {};
|
|
50
|
-
my $gene = {};
|
|
51
|
-
my $reads = {};
|
|
52
|
-
|
|
53
|
-
SIZE:{
|
|
54
|
-
local $/=">";
|
|
55
|
-
print STDERR "== Reading fasta\n";
|
|
56
|
-
open FNA, "<", $fna or die "Cannot read the file: $fna: $!\n";
|
|
57
|
-
my $i=0;
|
|
58
|
-
while(<FNA>){
|
|
59
|
-
chomp;
|
|
60
|
-
my @g = split /\n/, $_, 2;
|
|
61
|
-
next unless $g[1];
|
|
62
|
-
#$g[1] =~ s/[^A-Za-z]//g;
|
|
63
|
-
#$size->{$g[0]} = length $g[1];
|
|
64
|
-
$g[0] =~ s/\s.*//;
|
|
65
|
-
$size->{$g[0]} = ( $g[1] =~ tr/[A-Za-z]// );
|
|
66
|
-
print STDERR " Measuring sequence ".($i).": $g[0] \r" unless ++$i%500;
|
|
67
|
-
}
|
|
68
|
-
close FNA;
|
|
69
|
-
print STDERR " Found $i sequences".(" "x30)."\n";
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
MAP:{
|
|
73
|
-
print STDERR "== Reading mapping\n";
|
|
74
|
-
my $i=0;
|
|
75
|
-
while(<>){
|
|
76
|
-
my @ln = split /\t/;
|
|
77
|
-
$gene->{$ln[1]} ||= [];
|
|
78
|
-
for my $pos (min($ln[8], $ln[9]) .. max($ln[8], $ln[9])){ ($gene->{$ln[1]}->[$pos]||=0)++ }
|
|
79
|
-
($reads->{$ln[1]} ||= 0)++;
|
|
80
|
-
print STDERR " Saving hit ".($i).": $ln[1] \r" unless ++$i%5000;
|
|
81
|
-
}
|
|
82
|
-
print STDERR " Found $i hits".(" "x30)."\n";
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
OUT:{
|
|
86
|
-
print STDERR "== Creating output\n";
|
|
87
|
-
my $i=0;
|
|
88
|
-
for my $g (keys %$gene){
|
|
89
|
-
unless(exists $size->{$g}){
|
|
90
|
-
warn "Warning: Cannot find gene in $fna: $g.\n";
|
|
91
|
-
next;
|
|
92
|
-
}
|
|
93
|
-
$gene->{$g}->[$_] ||= 0 for (0 .. $size->{$g});
|
|
94
|
-
die "Hits out-of-boundaries in gene $g: $#{$gene->{$g}} != $size->{$g}.\n" if $#{$gene->{$g}} != $size->{$g};
|
|
95
|
-
my @sorted = sort {$a <=> $b} @{$gene->{$g}};
|
|
96
|
-
my @sorted_nz = grep { $_>0 } @sorted;
|
|
97
|
-
my $xbar = sum(@{$gene->{$g}})/$size->{$g};
|
|
98
|
-
my $xsqbar = sum(map { ($_ - $xbar)**2 } @{$gene->{$g}})/($size->{$g}-1);
|
|
99
|
-
my $var = $xsqbar - $xbar**2;
|
|
100
|
-
my $lambdaMM = $xbar + ($var/$xbar) - 1;
|
|
101
|
-
my $piMM = $lambdaMM==0 ? 0 : 1 - $xbar/$lambdaMM;
|
|
102
|
-
printf "%s\t%.6f\t%.6f\t%.6f\t%d\t%d\t%d\t%d\n", $g,
|
|
103
|
-
($xbar >= $var ? $xbar : $lambdaMM),
|
|
104
|
-
($xbar >= $var ? 0 : $piMM),
|
|
105
|
-
#$lambdaMM,
|
|
106
|
-
#$piMM,
|
|
107
|
-
sum(@{$gene->{$g}})/$size->{$g},
|
|
108
|
-
$sorted[$#sorted/2],
|
|
109
|
-
$sorted_nz[$#sorted_nz/2],
|
|
110
|
-
$reads->{$g},
|
|
111
|
-
$size->{$g};
|
|
112
|
-
delete $gene->{$g};
|
|
113
|
-
print STDERR " Saving sequence $g:".($i)." \r" unless ++$i%500;
|
|
114
|
-
}
|
|
115
|
-
print STDERR " Saved $i sequences".(" "x30)." \n";
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
print STDERR " done.\n";
|
|
119
|
-
|
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env perl
|
|
2
|
-
#
|
|
3
|
-
# @author: Luis M Rodriguez-R <lmrodriguezr at gmail dot com>
|
|
4
|
-
# @license: artistic license 2.0
|
|
5
|
-
# @update: Mar-23-2015
|
|
6
|
-
#
|
|
7
|
-
|
|
8
|
-
use strict;
|
|
9
|
-
use warnings;
|
|
10
|
-
use List::Util qw/min max sum/;
|
|
11
|
-
|
|
12
|
-
my $fna = shift @ARGV;
|
|
13
|
-
$fna or die "
|
|
14
|
-
Usage:
|
|
15
|
-
cat blast1... | $0 genes_or_ctgs.fna > genes_or_ctgs.cov
|
|
16
|
-
|
|
17
|
-
blast1... One or more Tabular BLAST files of reads vs genes (or contigs).
|
|
18
|
-
genes_or_ctgs.fna A FastA file containing the genes or the contigs (db).
|
|
19
|
-
genes_or_ctgs.cov The output file.
|
|
20
|
-
|
|
21
|
-
Output:
|
|
22
|
-
A tab-delimited file with the following columns:
|
|
23
|
-
1. Subject ID
|
|
24
|
-
2. Average sequencing depth
|
|
25
|
-
3. Number of mapped reads
|
|
26
|
-
4. Length of the subject sequence
|
|
27
|
-
|
|
28
|
-
Note:
|
|
29
|
-
The values reported by this script may differ from those of BlastTab.seqdepth.pl,
|
|
30
|
-
because this script uses the aligned length of the read while BlastTab.seqdepth.pl
|
|
31
|
-
uses the aligned length of the subject sequence.
|
|
32
|
-
|
|
33
|
-
";
|
|
34
|
-
|
|
35
|
-
my $size = {};
|
|
36
|
-
my $gene = {};
|
|
37
|
-
my $reads = {};
|
|
38
|
-
|
|
39
|
-
SIZE:{
|
|
40
|
-
local $/=">";
|
|
41
|
-
print STDERR "== Reading fasta\n";
|
|
42
|
-
open FNA, "<", $fna or die "Cannot read the file: $fna: $!\n";
|
|
43
|
-
my $i=0;
|
|
44
|
-
while(<FNA>){
|
|
45
|
-
chomp;
|
|
46
|
-
my @g = split /\n/, $_, 2;
|
|
47
|
-
next unless $g[1];
|
|
48
|
-
#$g[1] =~ s/[^A-Za-z]//g;
|
|
49
|
-
#$size->{$g[0]} = length $g[1];
|
|
50
|
-
$g[0] =~ s/\s.*//;
|
|
51
|
-
$size->{$g[0]} = ( $g[1] =~ tr/[A-Za-z]// );
|
|
52
|
-
print STDERR " Measuring sequence ".($i).": $g[0] \r" unless ++$i%500;
|
|
53
|
-
}
|
|
54
|
-
close FNA;
|
|
55
|
-
print STDERR " Found $i sequences".(" "x30)."\n";
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
MAP:{
|
|
59
|
-
print STDERR "== Reading mapping\n";
|
|
60
|
-
my $i=0;
|
|
61
|
-
while(<>){
|
|
62
|
-
my @ln = split /\t/;
|
|
63
|
-
$gene->{$ln[1]} ||= 0;
|
|
64
|
-
$gene->{$ln[1]} += abs($ln[6]-$ln[7])+1;
|
|
65
|
-
($reads->{$ln[1]} ||= 0)++;
|
|
66
|
-
print STDERR " Saving hit ".($i).": $ln[1] \r" unless ++$i%5000;
|
|
67
|
-
}
|
|
68
|
-
print STDERR " Found $i hits".(" "x30)."\n";
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
OUT:{
|
|
72
|
-
print STDERR "== Creating output\n";
|
|
73
|
-
my $i=0;
|
|
74
|
-
for my $g (keys %$gene){
|
|
75
|
-
die "Cannot find gene in $fna: $g.\n" unless exists $size->{$g};
|
|
76
|
-
printf "%s\t%.6f\t%d\t%d\n", $g,
|
|
77
|
-
$gene->{$g}/$size->{$g},
|
|
78
|
-
$reads->{$g},
|
|
79
|
-
$size->{$g};
|
|
80
|
-
print STDERR " Saving sequence $g:".($i)."\r" unless ++$i%500;
|
|
81
|
-
}
|
|
82
|
-
print STDERR " Saved $i sequences".(" "x30)."\n";
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
print STDERR " done.\n";
|
|
86
|
-
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env perl
|
|
2
|
-
#
|
|
3
|
-
# @author Luis M Rodriguez-R <lmrodriguezr at gmail dot com>
|
|
4
|
-
# @license artistic license 2.0
|
|
5
|
-
# @update Mar-23-2015
|
|
6
|
-
#
|
|
7
|
-
|
|
8
|
-
use strict;
|
|
9
|
-
use warnings;
|
|
10
|
-
|
|
11
|
-
my($blast, $fasta) = @ARGV;
|
|
12
|
-
($blast and $fasta) or die "
|
|
13
|
-
Description:
|
|
14
|
-
Filters a BLAST output including only the hits produced by
|
|
15
|
-
any of the given sequences as query.
|
|
16
|
-
|
|
17
|
-
Usage:
|
|
18
|
-
$0 blast.tab sample.fa > out.tab
|
|
19
|
-
|
|
20
|
-
blast.tab BLAST output to be filtered (tabular format).
|
|
21
|
-
sample.fa Sequences to use as query.
|
|
22
|
-
out.tab The filtered BLAST output (tabular format).
|
|
23
|
-
|
|
24
|
-
";
|
|
25
|
-
|
|
26
|
-
print STDERR "== Reading sequences\n";
|
|
27
|
-
my $seq = {};
|
|
28
|
-
open FASTA, "<", $fasta or die "Cannot read the file: $fasta: $!\n";
|
|
29
|
-
while(<FASTA>){
|
|
30
|
-
next unless /^>(\S+)/;
|
|
31
|
-
$seq->{$1} = 1;
|
|
32
|
-
}
|
|
33
|
-
close FASTA;
|
|
34
|
-
print STDERR " ".(scalar keys %$seq)." sequences to be used as query.\n";
|
|
35
|
-
|
|
36
|
-
print STDERR "== Reading BLAST\n";
|
|
37
|
-
my ($N,$n)=(0,0);
|
|
38
|
-
open BLAST, "<", $blast or die "Cannot read the file: $blast: $!\n";
|
|
39
|
-
while(my $ln = <BLAST>){
|
|
40
|
-
next if $ln=~/^#/;
|
|
41
|
-
$N++; my ($qry) = split /\t/, $ln;
|
|
42
|
-
next unless exists $seq->{$qry};
|
|
43
|
-
$n++; print $ln;
|
|
44
|
-
}
|
|
45
|
-
close BLAST;
|
|
46
|
-
print STDERR " Reported $n entries out of $N.\n";
|
|
47
|
-
|
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env perl
|
|
2
|
-
#
|
|
3
|
-
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
|
4
|
-
# @update Mar-23-2016
|
|
5
|
-
# @license artistic license 2.0
|
|
6
|
-
#
|
|
7
|
-
|
|
8
|
-
use warnings;
|
|
9
|
-
use strict;
|
|
10
|
-
use Getopt::Std;
|
|
11
|
-
|
|
12
|
-
sub HELP_MESSAGE {
|
|
13
|
-
die "
|
|
14
|
-
.Description
|
|
15
|
-
Sums the weights of all the queries hitting each subject. Often (but not
|
|
16
|
-
necessarily) the BLAST files contain only best matches. The weights can be
|
|
17
|
-
any number, but a common use of this Script is to add up counts (weights are
|
|
18
|
-
integers). For example, in a BLAST of predicted genes vs some annotation
|
|
19
|
-
source, the weights could be the number of reads recruited by each gene.
|
|
20
|
-
|
|
21
|
-
.Usage:
|
|
22
|
-
$0 [options] blast... > out-file
|
|
23
|
-
|
|
24
|
-
blast... * One or more BLAST files.
|
|
25
|
-
out-file A two-columns tab-delimited file containing the summed weights
|
|
26
|
-
per hit.
|
|
27
|
-
|
|
28
|
-
-w <str> Weights file: A two-columns tab-delimited file containing the
|
|
29
|
-
name (column 1) and the weight (column 2) of each query.
|
|
30
|
-
-s <float> Minimum score. By default: 0.
|
|
31
|
-
-i <float> Minimum identity (in percentage). By default: 0.
|
|
32
|
-
-m <int> Maximum number of queries. Set to 0 for all. By default: 0.
|
|
33
|
-
-n Normalize weights by the number of hits per query.
|
|
34
|
-
-z Add zero when weight is not found (by default: doesn't list
|
|
35
|
-
them).
|
|
36
|
-
-q Run quietly.
|
|
37
|
-
-h Display this message and exit.
|
|
38
|
-
|
|
39
|
-
* Mandatory
|
|
40
|
-
|
|
41
|
-
.Note:
|
|
42
|
-
The weights (-w parameter) are optional, but its use is encouraged. When
|
|
43
|
-
weights are not passed, the script simply assumes all queries to be equally
|
|
44
|
-
weighted (unity), a result that can be faster to compute with, for example:
|
|
45
|
-
cat blast | cut -f 2 | sort | uniq -c | awk '{print \$2\"\\t\"\$1}' > out
|
|
46
|
-
It is equivalent to simply count the number of times that each subject
|
|
47
|
-
occurs.
|
|
48
|
-
"
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
my %o = ();
|
|
52
|
-
getopts('w:s:i:m:znqh', \%o);
|
|
53
|
-
$o{h} and &HELP_MESSAGE;
|
|
54
|
-
$o{s}||=0;
|
|
55
|
-
$o{i}||=0;
|
|
56
|
-
$o{m}||=0;
|
|
57
|
-
|
|
58
|
-
my %count;
|
|
59
|
-
if($o{w}){
|
|
60
|
-
print STDERR "Reading counts.\n" unless $o{q};
|
|
61
|
-
open COUNT, "<", $o{w} or die "Cannot open file: $o{w}: $!\n";
|
|
62
|
-
%count = map {split /\t/} <COUNT>;
|
|
63
|
-
close COUNT;
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
print STDERR "Reading BLASTs.\n" unless $o{q};
|
|
67
|
-
my $qry = '';
|
|
68
|
-
my $hits = 0;
|
|
69
|
-
my @buf = ();
|
|
70
|
-
my $qries = 0;
|
|
71
|
-
my $noQry = 0;
|
|
72
|
-
my $ln1 = 0;
|
|
73
|
-
my %out = ();
|
|
74
|
-
BFILE: for my $blast (@ARGV){
|
|
75
|
-
print STDERR " o $blast\n" unless $o{q};
|
|
76
|
-
open BLAST, "<", $blast or die "Cannot open file: $blast: $!\n";
|
|
77
|
-
BLINE: while(<BLAST>){
|
|
78
|
-
chomp;
|
|
79
|
-
my @ln = split /\t/;
|
|
80
|
-
$ln1 ||= $#ln;
|
|
81
|
-
die "Bad line $.: $_\n" unless $#ln==$ln1;
|
|
82
|
-
next if ($o{s} and $ln[11]<$o{s}) or ($o{i} and $ln[2]<$o{i});
|
|
83
|
-
unless(exists $count{$ln[0]}){
|
|
84
|
-
$noQry++;
|
|
85
|
-
if(not $o{w}){
|
|
86
|
-
$count{$ln[0]}=1;
|
|
87
|
-
}elsif($o{z}){
|
|
88
|
-
$count{$ln[0]}=0;
|
|
89
|
-
}else{
|
|
90
|
-
next BLINE;
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
if($qry ne $ln[0]){
|
|
95
|
-
$qries++;
|
|
96
|
-
($out{$_->[0]}||=0) += ($_->[1]/($o{n}?$hits:1)) for @buf;
|
|
97
|
-
last BFILE if $o{m} and $qries >= $o{m};
|
|
98
|
-
@buf = ();
|
|
99
|
-
$qry = $ln[0];
|
|
100
|
-
$hits = 0;
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
push @buf, [$ln[1], $count{$ln[0]}];
|
|
104
|
-
$hits++;
|
|
105
|
-
}
|
|
106
|
-
($out{$_->[0]}||=0) += ($_->[1]/($o{n}?$hits:1)) for @buf;
|
|
107
|
-
close BLAST;
|
|
108
|
-
}
|
|
109
|
-
print STDERR "Warning: Couldn't find $noQry queries\n" if $noQry and $o{w};
|
|
110
|
-
|
|
111
|
-
for my $h (keys %out){
|
|
112
|
-
print "$h\t".$out{$h}."\n";
|
|
113
|
-
}
|
|
114
|
-
|