miga-base 1.2.15.1 → 1.2.15.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/download/gtdb.rb +4 -1
- data/lib/miga/cli/action/gtdb_get.rb +4 -0
- data/lib/miga/remote_dataset/download.rb +3 -2
- data/lib/miga/remote_dataset.rb +44 -8
- data/lib/miga/taxonomy.rb +6 -0
- data/lib/miga/version.rb +2 -2
- data/test/remote_dataset_test.rb +3 -1
- metadata +6 -302
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
- data/utils/FastAAI/FastAAI +0 -3659
- data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
- data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
- data/utils/FastAAI/README.md +0 -84
- data/utils/enveomics/Docs/recplot2.md +0 -244
- data/utils/enveomics/Examples/aai-matrix.bash +0 -66
- data/utils/enveomics/Examples/ani-matrix.bash +0 -66
- data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
- data/utils/enveomics/LICENSE.txt +0 -73
- data/utils/enveomics/Makefile +0 -52
- data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
- data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
- data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
- data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
- data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
- data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
- data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
- data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
- data/utils/enveomics/Manifest/Tasks/other.json +0 -906
- data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
- data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
- data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
- data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
- data/utils/enveomics/Manifest/categories.json +0 -165
- data/utils/enveomics/Manifest/examples.json +0 -162
- data/utils/enveomics/Manifest/tasks.json +0 -4
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
- data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
- data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
- data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
- data/utils/enveomics/README.md +0 -42
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
- data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
- data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
- data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
- data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
- data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
- data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
- data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
- data/utils/enveomics/Scripts/Chao1.pl +0 -97
- data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
- data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
- data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
- data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
- data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
- data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
- data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
- data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
- data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
- data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
- data/utils/enveomics/Scripts/FastA.length.pl +0 -38
- data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
- data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
- data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
- data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
- data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
- data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
- data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
- data/utils/enveomics/Scripts/FastA.split.pl +0 -55
- data/utils/enveomics/Scripts/FastA.split.rb +0 -79
- data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
- data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
- data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
- data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
- data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
- data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
- data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
- data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
- data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
- data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
- data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
- data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
- data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
- data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
- data/utils/enveomics/Scripts/SRA.download.bash +0 -55
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
- data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
- data/utils/enveomics/Scripts/Table.barplot.R +0 -31
- data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
- data/utils/enveomics/Scripts/Table.filter.pl +0 -61
- data/utils/enveomics/Scripts/Table.merge.pl +0 -77
- data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
- data/utils/enveomics/Scripts/Table.replace.rb +0 -69
- data/utils/enveomics/Scripts/Table.round.rb +0 -63
- data/utils/enveomics/Scripts/Table.split.pl +0 -57
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
- data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
- data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
- data/utils/enveomics/Scripts/aai.rb +0 -421
- data/utils/enveomics/Scripts/ani.rb +0 -362
- data/utils/enveomics/Scripts/anir.rb +0 -137
- data/utils/enveomics/Scripts/clust.rand.rb +0 -102
- data/utils/enveomics/Scripts/gi2tax.rb +0 -103
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
- data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
- data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
- data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
- data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
- data/utils/enveomics/Scripts/ogs.rb +0 -104
- data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
- data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
- data/utils/enveomics/Scripts/rbm.rb +0 -108
- data/utils/enveomics/Scripts/sam.filter.rb +0 -148
- data/utils/enveomics/Tests/Makefile +0 -10
- data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
- data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
- data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
- data/utils/enveomics/Tests/alkB.nwk +0 -1
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
- data/utils/enveomics/Tests/hiv1.faa +0 -59
- data/utils/enveomics/Tests/hiv1.fna +0 -134
- data/utils/enveomics/Tests/hiv2.faa +0 -70
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
- data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
- data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
- data/utils/enveomics/build_enveomics_r.bash +0 -45
- data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
- data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
- data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
- data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
- data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
- data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
- data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
- data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
- data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
- data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
- data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
- data/utils/enveomics/enveomics.R/R/utils.R +0 -80
- data/utils/enveomics/enveomics.R/README.md +0 -81
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
- data/utils/enveomics/globals.mk +0 -8
- data/utils/enveomics/manifest.json +0 -9
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +0 -67
- data/utils/multitrim/multitrim.py +0 -1555
- data/utils/multitrim/multitrim.yml +0 -13
@@ -1,362 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# @author Luis M. Rodriguez-R
|
4
|
-
# @license Artistic-2.0
|
5
|
-
|
6
|
-
require "optparse"
|
7
|
-
require "tmpdir"
|
8
|
-
has_rest_client = true
|
9
|
-
has_sqlite3 = true
|
10
|
-
begin
|
11
|
-
require "rubygems"
|
12
|
-
require "restclient"
|
13
|
-
rescue LoadError
|
14
|
-
has_rest_client = false
|
15
|
-
end
|
16
|
-
begin
|
17
|
-
require "sqlite3"
|
18
|
-
rescue LoadError
|
19
|
-
has_sqlite3 = false
|
20
|
-
end
|
21
|
-
|
22
|
-
o = {win:1000, step:200, id:70, len:700, correct:true, hits:50, q:false, bin:"",
|
23
|
-
program:"blast+", thr:1, dec:2, auto:false, lookupfirst:false,
|
24
|
-
dbregions:true, dbrbm: true, min_actg:0.95}
|
25
|
-
ARGV << "-h" if ARGV.size==0
|
26
|
-
OptionParser.new do |opts|
|
27
|
-
opts.banner = "
|
28
|
-
Calculates the Average Nucleotide Identity between two genomes.
|
29
|
-
|
30
|
-
Usage: #{$0} [options]"
|
31
|
-
opts.separator ""
|
32
|
-
opts.separator "Mandatory"
|
33
|
-
opts.on("-1", "--seq1 FILE",
|
34
|
-
"Path to the FastA file containing the genome 1."){ |v| o[:seq1] = v }
|
35
|
-
opts.on("-2", "--seq2 FILE",
|
36
|
-
"Path to the FastA file containing the genome 2."){ |v| o[:seq2] = v }
|
37
|
-
if has_rest_client
|
38
|
-
opts.separator " Alternatively, you can supply a NCBI-acc with the " +
|
39
|
-
"format ncbi:CP014272 instead of files."
|
40
|
-
else
|
41
|
-
opts.separator " Install rest-client to enable NCBI-acc support."
|
42
|
-
end
|
43
|
-
opts.separator ""
|
44
|
-
opts.separator "Search Options"
|
45
|
-
opts.on("-w", "--win INT",
|
46
|
-
"Window size in the ANI calculation (in bp). By default: " +
|
47
|
-
"#{o[:win].to_s}."){ |v| o[:win] = v.to_i }
|
48
|
-
opts.on("-s", "--step INT",
|
49
|
-
"Step size in the ANI calculation (in bp). By default: " +
|
50
|
-
"#{o[:step].to_s}."){ |v| o[:step] = v.to_i }
|
51
|
-
opts.on("-l", "--len INT",
|
52
|
-
"Minimum alignment length (in bp). By default: #{o[:len]}."
|
53
|
-
){ |v| o[:len] = v.to_i }
|
54
|
-
opts.on("-i", "--id NUM",
|
55
|
-
"Minimum alignment identity (in %). By default: #{o[:id]}."
|
56
|
-
){ |v| o[:id] = v.to_f }
|
57
|
-
opts.on("-n", "--hits INT",
|
58
|
-
"Minimum number of hits. By default: #{o[:hits]}."
|
59
|
-
){ |v| o[:hits] = v.to_i }
|
60
|
-
opts.on("-N", "--nocorrection",
|
61
|
-
"Report values without post-hoc correction."){ |v| o[:correct] = false }
|
62
|
-
opts.on("--min-actg FLOAT",
|
63
|
-
"Minimum fraction of ACTGN in the sequences before assuming proteins.",
|
64
|
-
"By default: #{o[:min_actg]}."
|
65
|
-
){ |v| o[:min_actg] = v.to_f }
|
66
|
-
opts.separator ""
|
67
|
-
opts.separator "Software Options"
|
68
|
-
opts.on("-b", "--bin DIR",
|
69
|
-
"Path to the directory containing the binaries of the search program."
|
70
|
-
){ |v| o[:bin] = v }
|
71
|
-
opts.on("-p", "--program STR",
|
72
|
-
"Search program to be used. One of: blast+ (default), blast, blat."
|
73
|
-
){ |v| o[:program] = v }
|
74
|
-
opts.on("-t", "--threads INT",
|
75
|
-
"Number of parallel threads to be used. By default: #{o[:thr]}."
|
76
|
-
){ |v| o[:thr] = v.to_i }
|
77
|
-
opts.separator ""
|
78
|
-
opts.separator "SQLite3 Options"
|
79
|
-
opts.on("-S", "--sqlite3 FILE",
|
80
|
-
"Path to the SQLite3 database to create (or update) with the results."
|
81
|
-
){ |v| o[:sqlite3] = v }
|
82
|
-
opts.separator " Install sqlite3 gem to enable database support." unless
|
83
|
-
has_sqlite3
|
84
|
-
opts.on("--name1 STR",
|
85
|
-
"Name of --seq1 to use in --sqlite3. By default determined by filename."
|
86
|
-
){ |v| o[:seq1name] = v }
|
87
|
-
opts.on("--name2 STR",
|
88
|
-
"Name of --seq2 to use in --sqlite3. By default determined by filename."
|
89
|
-
){ |v| o[:seq2name] = v }
|
90
|
-
opts.on("--[no-]save-regions",
|
91
|
-
"Save (or don't save) the fragments in the --sqlite3 database.",
|
92
|
-
"By default: #{o[:dbregions]}."){ |v| o[:dbregions] = !!v }
|
93
|
-
opts.on("--[no-]save-rbm",
|
94
|
-
"Save (or don't save) the reciprocal best matches in the --sqlite3 db.",
|
95
|
-
"By default: #{o[:dbrbm]}."){ |v| o[:dbrbm] = !!v }
|
96
|
-
opts.on("--lookup-first",
|
97
|
-
"Indicates if the ANI should be looked up first in the database.",
|
98
|
-
"Requires --sqlite3, --auto, --name1, and --name2.",
|
99
|
-
"Incompatible with --res, --tab, and --out."){ |v| o[:lookupfirst] = v }
|
100
|
-
opts.separator ""
|
101
|
-
opts.separator "Other Output Options"
|
102
|
-
opts.on("-d", "--dec INT",
|
103
|
-
"Decimal positions to report. By default: #{o[:dec]}"
|
104
|
-
){ |v| o[:dec] = v.to_i }
|
105
|
-
opts.on("-o", "--out FILE",
|
106
|
-
"Saves a file describing the alignments used for two-way ANI."
|
107
|
-
){ |v| o[:out] = v }
|
108
|
-
opts.on("-r", "--res FILE",
|
109
|
-
"Saves a file with the final results."){ |v| o[:res] = v }
|
110
|
-
opts.on("-T", "--tab FILE",
|
111
|
-
"Saves a file with the final two-way results in a tab-delimited form.",
|
112
|
-
"The columns are (in that order):",
|
113
|
-
"ANI, standard deviation, fragments used, fragments in the smallest genome."
|
114
|
-
){ |v| o[:tab]=v }
|
115
|
-
opts.on("-a", "--auto",
|
116
|
-
"ONLY outputs the ANI value in STDOUT (or nothing, if calculation fails)."
|
117
|
-
){ o[:auto] = true }
|
118
|
-
opts.on("-q", "--quiet", "Run quietly (no STDERR output)"){ o[:q] = true }
|
119
|
-
opts.on("-h", "--help", "Display this screen") do
|
120
|
-
puts opts
|
121
|
-
exit
|
122
|
-
end
|
123
|
-
opts.separator ""
|
124
|
-
end.parse!
|
125
|
-
abort "-1 is mandatory" if o[:seq1].nil?
|
126
|
-
abort "-2 is mandatory" if o[:seq2].nil?
|
127
|
-
abort "SQLite3 requested (-S) but sqlite3 not supported. First install gem " +
|
128
|
-
"sqlite3." unless o[:sqlite3].nil? or has_sqlite3
|
129
|
-
abort "Step size must be smaller than window size." if o[:step] > o[:win]
|
130
|
-
o[:bin] = o[:bin]+"/" if o[:bin].size > 0
|
131
|
-
if o[:lookupfirst]
|
132
|
-
abort "--lookup-first needs --sqlite3" if o[:sqlite3].nil?
|
133
|
-
abort "--lookup-first requires --auto" unless o[:auto]
|
134
|
-
abort "--lookup-first requires --name1" if o[:seq1name].nil?
|
135
|
-
abort "--lookup-first requires --name2" if o[:seq2name].nil?
|
136
|
-
abort "--lookup-first conflicts with --res" unless o[:res].nil?
|
137
|
-
abort "--lookup-first conflicts with --tab" unless o[:tab].nil?
|
138
|
-
abort "--lookup-first conflicts with --out" unless o[:out].nil?
|
139
|
-
end
|
140
|
-
|
141
|
-
# Create SQLite3 file
|
142
|
-
unless o[:sqlite3].nil?
|
143
|
-
$stderr.puts "Accessing SQLite3 file: #{o[:sqlite3]}." unless o[:q]
|
144
|
-
sqlite_db = SQLite3::Database.new o[:sqlite3]
|
145
|
-
sqlite_db.execute "create table if not exists regions( " +
|
146
|
-
"seq varchar(256), id int, source varchar(256), `start` int," +
|
147
|
-
" `end` int )"
|
148
|
-
sqlite_db.execute "create table if not exists rbm( seq1 varchar(256), " +
|
149
|
-
"seq2 varchar(256), id1 int, id2 int, id float, evalue float, " +
|
150
|
-
"bitscore float )"
|
151
|
-
sqlite_db.execute "create table if not exists ani( seq1 varchar(256), " +
|
152
|
-
"seq2 varchar(256), ani float, sd float, n int, omega int )"
|
153
|
-
end
|
154
|
-
|
155
|
-
# Look-up first
|
156
|
-
if o[:lookupfirst]
|
157
|
-
val = sqlite_db.execute "select ani from ani where seq1=? and seq2=?",
|
158
|
-
[o[:seq1name], o[:seq2name]]
|
159
|
-
val = sqlite_db.execute "select ani from ani where seq1=? and seq2=?",
|
160
|
-
[o[:seq2name], o[:seq1name]] if val.empty?
|
161
|
-
unless val.empty?
|
162
|
-
puts val.first.first
|
163
|
-
exit
|
164
|
-
end
|
165
|
-
end
|
166
|
-
|
167
|
-
Dir.mktmpdir do |dir|
|
168
|
-
$stderr.puts "Temporal directory: #{dir}." unless o[:q]
|
169
|
-
|
170
|
-
# Create databases.
|
171
|
-
$stderr.puts "Creating databases." unless o[:q]
|
172
|
-
minfrg = nil
|
173
|
-
seq_names = []
|
174
|
-
seq_len = {}
|
175
|
-
actg_cnt = {}
|
176
|
-
[:seq1, :seq2].each do |seq|
|
177
|
-
abort "GIs are no longer supported by NCBI. Please use NCBI-acc instead" if
|
178
|
-
/^gi:/.match(o[seq])
|
179
|
-
acc = /^ncbi:(\S+)/.match(o[seq])
|
180
|
-
if not acc.nil?
|
181
|
-
abort "NCBI-acc requested but rest-client not supported. First " +
|
182
|
-
"install gem rest-client." unless has_rest_client
|
183
|
-
response = RestClient.get(
|
184
|
-
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi",
|
185
|
-
{params:{db:"nuccore",rettype:"fasta",id:acc[1],idtype:"acc"}})
|
186
|
-
abort "Unable to reach NCBI EUtils, error code " +
|
187
|
-
response.code.to_s + "." unless response.code == 200
|
188
|
-
o[seq] = "#{dir}/ncbi-#{seq.to_s}.fa"
|
189
|
-
fo = File.open(o[seq], "w")
|
190
|
-
fo.puts response.to_str
|
191
|
-
fo.close
|
192
|
-
seq_names << ( o[ "#{seq}name".to_sym ].nil? ?
|
193
|
-
"ncbi:#{acc[1]}" : o[ "#{seq}name".to_sym ] )
|
194
|
-
else
|
195
|
-
seq_names << ( o[ "#{seq}name".to_sym ].nil? ?
|
196
|
-
File.basename(o[seq], ".*") : o[ "#{seq}name".to_sym ] )
|
197
|
-
end
|
198
|
-
$stderr.puts " Reading FastA file: #{o[seq]}" unless o[:q]
|
199
|
-
sqlite_db.execute("delete from regions where seq=?",
|
200
|
-
[seq_names.last]) unless o[:sqlite3].nil?
|
201
|
-
buffer = ""
|
202
|
-
frgs = 0
|
203
|
-
seq_len[seq] = 0
|
204
|
-
actg_cnt[seq] = 0
|
205
|
-
seqs = 0
|
206
|
-
disc = 0
|
207
|
-
seqn = ""
|
208
|
-
from = 1
|
209
|
-
fi = File.open(o[seq], "r")
|
210
|
-
fo = File.open("#{dir}/#{seq.to_s}.fa", "w")
|
211
|
-
fi.each_line do |ln|
|
212
|
-
if ln =~ /^>(\S+)/
|
213
|
-
seqs += 1
|
214
|
-
disc += buffer.size
|
215
|
-
buffer = ""
|
216
|
-
seqn = $1
|
217
|
-
from = 1
|
218
|
-
else
|
219
|
-
ln.gsub!(/[^A-Za-z]/, '')
|
220
|
-
seq_len[seq] += ln.length
|
221
|
-
actg_cnt[seq] += ln.gsub(/[^ACTGNactgn]/,"").length
|
222
|
-
buffer = buffer + ln
|
223
|
-
while buffer.size > o[:win]
|
224
|
-
seq_i = buffer[0, o[:win]]
|
225
|
-
if seq_i =~ /^N+$/
|
226
|
-
disc += seq_i.size
|
227
|
-
else
|
228
|
-
frgs += 1
|
229
|
-
fo.puts ">#{frgs}"
|
230
|
-
fo.puts seq_i
|
231
|
-
sqlite_db.execute("insert into regions values(?,?,?,?,?)",
|
232
|
-
[seq_names.last, frgs, seqn, from, from+o[:win]]) if
|
233
|
-
not o[:sqlite3].nil? and o[:dbregions]
|
234
|
-
end
|
235
|
-
buffer = buffer[o[:step] .. -1]
|
236
|
-
from += o[:win]
|
237
|
-
end
|
238
|
-
end
|
239
|
-
end
|
240
|
-
fi.close
|
241
|
-
fo.close
|
242
|
-
actg_frx = actg_cnt[seq].to_f/seq_len[seq].to_f
|
243
|
-
abort "Input sequences appear to be proteins " +
|
244
|
-
"(ACTGN fraction: %.2f%%)." % (actg_frx*100) if actg_frx < o[:min_actg]
|
245
|
-
$stderr.puts " Created #{frgs} fragments from #{seqs} sequences, " +
|
246
|
-
"discarded #{disc} bp." unless o[:q]
|
247
|
-
minfrg ||= frgs
|
248
|
-
minfrg = frgs if minfrg > frgs
|
249
|
-
case o[:program].downcase
|
250
|
-
when "blast"
|
251
|
-
`"#{o[:bin]}formatdb" -i "#{dir}/#{seq.to_s}.fa" -p F`
|
252
|
-
when "blast+"
|
253
|
-
`"#{o[:bin]}makeblastdb" -in "#{dir}/#{seq.to_s}.fa" -dbtype nucl`
|
254
|
-
when "blat"
|
255
|
-
# Nothing to do
|
256
|
-
else
|
257
|
-
abort "Unsupported program: #{o[:program]}."
|
258
|
-
end
|
259
|
-
end # [:seq1, :seq2].each
|
260
|
-
|
261
|
-
# Best-hits.
|
262
|
-
$stderr.puts "Running one-way comparisons." unless o[:q]
|
263
|
-
rbh = []
|
264
|
-
id2 = 0
|
265
|
-
sq2 = 0
|
266
|
-
n2 = 0
|
267
|
-
unless o[:sqlite3].nil?
|
268
|
-
sqlite_db.execute "delete from rbm where seq1=? and seq2=?", seq_names
|
269
|
-
sqlite_db.execute "delete from ani where seq1=? and seq2=?", seq_names
|
270
|
-
end
|
271
|
-
unless o[:out].nil?
|
272
|
-
fo = File.open(o[:out], "w")
|
273
|
-
fo.puts %w(identity aln.len mismatch gap.open evalue bitscore).join("\t")
|
274
|
-
end
|
275
|
-
res = File.open(o[:res], "w") unless o[:res].nil?
|
276
|
-
[1,2].each do |i|
|
277
|
-
qry_seen = []
|
278
|
-
q = "#{dir}/seq#{i}.fa"
|
279
|
-
s = "#{dir}/seq#{i==1?2:1}.fa"
|
280
|
-
case o[:program].downcase
|
281
|
-
when "blast"
|
282
|
-
`"#{o[:bin]}blastall" -p blastn -d "#{s}" -i "#{q}" \
|
283
|
-
-F F -v 1 -b 1 -a #{o[:thr]} -m 8 -o "#{dir}/#{i}.tab"`
|
284
|
-
when "blast+"
|
285
|
-
`"#{o[:bin]}blastn" -db "#{s}" -query "#{q}" \
|
286
|
-
-dust no -max_target_seqs 1 \
|
287
|
-
-num_threads #{o[:thr]} -outfmt 6 -out "#{dir}/#{i}.tab"`
|
288
|
-
when "blat"
|
289
|
-
`#{o[:bin]}blat "#{s}" "#{q}" -out=blast8 "#{dir}/#{i}.tab"`
|
290
|
-
else
|
291
|
-
abort "Unsupported program: #{o[:program]}."
|
292
|
-
end
|
293
|
-
fh = File.open("#{dir}/#{i}.tab", "r")
|
294
|
-
id = 0
|
295
|
-
sq = 0
|
296
|
-
n = 0
|
297
|
-
fh.each_line do |ln|
|
298
|
-
ln.chomp!
|
299
|
-
row = ln.split(/\t/)
|
300
|
-
if qry_seen[ row[0].to_i ].nil? and row[3].to_i >= o[:len] and
|
301
|
-
row[2].to_f >= o[:id]
|
302
|
-
qry_seen[ row[0].to_i ] = 1
|
303
|
-
identity_corr = 100 - (100-row[2].to_f)/(o[:correct] ? 0.8621 : 1.0)
|
304
|
-
id += identity_corr
|
305
|
-
sq += identity_corr ** 2
|
306
|
-
n += 1
|
307
|
-
if i==1
|
308
|
-
rbh[ row[0].to_i ] = row[1].to_i
|
309
|
-
else
|
310
|
-
if !rbh[ row[1].to_i ].nil? and rbh[ row[1].to_i ]==row[0].to_i
|
311
|
-
id2 += identity_corr
|
312
|
-
sq2 += identity_corr ** 2
|
313
|
-
n2 += 1
|
314
|
-
fo.puts [identity_corr,row[3..5],
|
315
|
-
row[10..11]].join("\t") unless o[:out].nil?
|
316
|
-
sqlite_db.execute("insert into rbm values(?,?,?,?,?,?,?)",
|
317
|
-
seq_names + [row[1], row[0], row[2], row[10], row[11]]
|
318
|
-
) if not o[:sqlite3].nil? and o[:dbrbm]
|
319
|
-
end
|
320
|
-
end
|
321
|
-
end
|
322
|
-
end
|
323
|
-
fh.close
|
324
|
-
if n < o[:hits]
|
325
|
-
puts "Insuffient hits to estimate one-way ANI: #{n}." unless o[:auto]
|
326
|
-
res.puts "Insufficient hits to estimate one-way ANI: #{n}" unless
|
327
|
-
o[:res].nil?
|
328
|
-
else
|
329
|
-
printf "! One-way ANI %d: %.#{o[:dec]}f%% (SD: %.#{o[:dec]}f%%), " +
|
330
|
-
"from %i fragments.\n", i, id/n, (sq/n - (id/n)**2)**0.5, n unless
|
331
|
-
o[:auto]
|
332
|
-
res.puts sprintf "<b>One-way ANI %d:</b> %.#{o[:dec]}f%% " +
|
333
|
-
"(SD: %.#{o[:dec]}f%%), from %i fragments.<br/>", i, id/n,
|
334
|
-
(sq/n - (id/n)**2)**0.5, n unless o[:res].nil?
|
335
|
-
end
|
336
|
-
end # [1,2].each
|
337
|
-
if n2 < o[:hits]
|
338
|
-
puts "Insufficient hits to estimate two-way ANI: #{n2}" unless o[:auto]
|
339
|
-
res.puts "Insufficient hits to estimate two-way ANI: #{n2}" unless
|
340
|
-
o[:res].nil?
|
341
|
-
else
|
342
|
-
ani = id2/n2
|
343
|
-
ani_sd = (sq2/n2 - (id2/n2)**2)**0.5
|
344
|
-
printf "! Two-way ANI : %.#{o[:dec]}f%% (SD: %.#{o[:dec]}f%%), " +
|
345
|
-
"from %i fragments.\n", ani, ani_sd, n2 unless o[:auto]
|
346
|
-
res.puts sprintf "<b>Two-way ANI:</b> %.#{o[:dec]}f%% " +
|
347
|
-
"(SD: %.#{o[:dec]}f%%), from %i fragments.<br/>",
|
348
|
-
ani, ani_sd, n2 unless o[:res].nil?
|
349
|
-
unless o[:tab].nil?
|
350
|
-
tab = File.open(o[:tab], "w")
|
351
|
-
tab.printf "%.#{o[:dec]}f\t%.#{o[:dec]}f\t%i\t%i\n",
|
352
|
-
ani, ani_sd, n2, minfrg
|
353
|
-
tab.close
|
354
|
-
end
|
355
|
-
sqlite_db.execute("insert into ani values(?,?,?,?,?,?)",
|
356
|
-
seq_names + [ani, ani_sd, n2, minfrg]) unless o[:sqlite3].nil?
|
357
|
-
puts ani if o[:auto]
|
358
|
-
end
|
359
|
-
res.close unless o[:res].nil?
|
360
|
-
fo.close unless o[:out].nil?
|
361
|
-
end
|
362
|
-
|
@@ -1,137 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# frozen_string_literal: true
|
4
|
-
|
5
|
-
$:.push File.expand_path('../lib', __FILE__)
|
6
|
-
require 'enveomics_rb/enveomics'
|
7
|
-
require 'enveomics_rb/anir'
|
8
|
-
$VERSION = 1.0
|
9
|
-
|
10
|
-
o = {
|
11
|
-
q: false, threads: 2,
|
12
|
-
r_format: :fastq, g_format: :fasta, m_format: :sam, r_type: :single,
|
13
|
-
identity: 95.0, algorithm: :auto, bimodality: 0.5, bin_size: 1.0,
|
14
|
-
coefficient: :sarle
|
15
|
-
}
|
16
|
-
|
17
|
-
OptionParser.new do |opt|
|
18
|
-
cmd = File.basename($0)
|
19
|
-
opt.banner = <<~BANNER
|
20
|
-
|
21
|
-
[Enveomics Collection: #{cmd} v#{$VERSION}]
|
22
|
-
|
23
|
-
Estimates ANIr: the Average Nucleotide Identity of reads against a genome
|
24
|
-
|
25
|
-
Usage
|
26
|
-
# [ Input/output modes ]
|
27
|
-
# Run mapping and (optionally) save it as SAM
|
28
|
-
# Requires bowtie2
|
29
|
-
#{cmd} -r reads.fastq -g genome.fasta -m out_map.sam [options]
|
30
|
-
|
31
|
-
# Read mapping from BAM file
|
32
|
-
# Requires samtools
|
33
|
-
#{cmd} -m map.bam --m-format bam [options]
|
34
|
-
|
35
|
-
# Read mapping from other formats: SAM or Tabular BLAST
|
36
|
-
#{cmd} -m map.blast --m-format tab [options]
|
37
|
-
|
38
|
-
# Read a list of identities as percentage (contig filtering off)
|
39
|
-
#{cmd} -m identities.txt --m-format list [options]
|
40
|
-
|
41
|
-
# [ Identity threshold modes ]
|
42
|
-
#{cmd} -i 95 -a fix [options] # Set fixed identity threshold
|
43
|
-
#{cmd} -a gmm [options] # Find valley by EM of GMM
|
44
|
-
#{cmd} -a auto [options] # Pick method by bimodality (default)"
|
45
|
-
|
46
|
-
BANNER
|
47
|
-
|
48
|
-
opt.separator 'Input/Output'
|
49
|
-
opt.on('-r', '--reads PATH', 'Metagenomic reads') { |v| o[:r] = v }
|
50
|
-
opt.on('-g', '--genome PATH', 'Genome assembly') { |v| o[:g] = v }
|
51
|
-
opt.on('-m', '--mapping PATH', 'Mapping file') { |v| o[:m] = v }
|
52
|
-
opt.on('-L', '--list PATH', 'Output file with identities') { |v| o[:L] = v }
|
53
|
-
opt.on('-H', '--hist PATH', 'Output file with histogram') { |v| o[:H] = v }
|
54
|
-
opt.on(
|
55
|
-
'-T', '--tab PATH', 'Output file with results in tabular format'
|
56
|
-
) { |v| o[:T] = v }
|
57
|
-
opt.separator ''
|
58
|
-
|
59
|
-
opt.separator 'Formats'
|
60
|
-
opt.on(
|
61
|
-
'--r-format STRING',
|
62
|
-
'Metagenomic reads format: fastq (default) or fasta',
|
63
|
-
'Both options support compression with .gz file extension'
|
64
|
-
) { |v| o[:r_format] = v.downcase.to_sym }
|
65
|
-
opt.on(
|
66
|
-
'--r-type STRING', 'Type of metagenomic reads:',
|
67
|
-
'~ single (default): Single reads',
|
68
|
-
'~ coupled: Coupled reads in separate files (-m must be comma-delimited)',
|
69
|
-
'~ interleaved: Coupled reads in a single interposed file'
|
70
|
-
) { |v| o[:r_type] = v.downcase.to_sym }
|
71
|
-
opt.on(
|
72
|
-
'--g-format STRING',
|
73
|
-
'Genome assembly format: fasta (default) or list',
|
74
|
-
'Both options support compression with .gz file extension',
|
75
|
-
'If passed in mapping-read mode, filters only matches to these contigs'
|
76
|
-
) { |v| o[:g_format] = v.downcase.to_sym }
|
77
|
-
opt.on(
|
78
|
-
'--m-format STRING',
|
79
|
-
'Mapping file format: sam (default), bam, tab, or list',
|
80
|
-
'sam, tab, and list options support compression with .gz file extension'
|
81
|
-
) { |v| o[:m_format] = v.downcase.to_sym }
|
82
|
-
opt.separator ''
|
83
|
-
|
84
|
-
opt.separator 'Identity threshold'
|
85
|
-
opt.on(
|
86
|
-
'-i', '--identity FLOAT', Float,
|
87
|
-
"Set a fixed threshold of percent identity (default: #{o[:identity]})"
|
88
|
-
) { |v| o[:identity] = v }
|
89
|
-
opt.on(
|
90
|
-
'-a', '--algorithm STRING',
|
91
|
-
'Set an algorithm to automatically detect identity threshold:',
|
92
|
-
'~ gmm: Valley detection by E-M of Gaussian Mixture Model',
|
93
|
-
'~ fix: Fixed threshold, see -i',
|
94
|
-
'~ auto (default): Pick gmm or fix depending on bimodality, see -b'
|
95
|
-
) { |v| o[:algorithm] = v.downcase.to_sym }
|
96
|
-
opt.on(
|
97
|
-
'-b', '--bimodality FLOAT', Float,
|
98
|
-
'Threshold of bimodality below which the algorithm is set to fix',
|
99
|
-
'The coefficient used is the de Michele & Accantino (2014) B index',
|
100
|
-
"By default: #{o[:bimodality]}"
|
101
|
-
) { |v| o[:bimodality] = v }
|
102
|
-
opt.on(
|
103
|
-
'--coefficient STRING',
|
104
|
-
'Coefficient of bimodality for -a auto:',
|
105
|
-
'~ sarle (default): Sarle\'s bimodality coefficient b',
|
106
|
-
'~ dma: de Michele and Accatino (2014 PLoS ONE) B index, use with -b 0.1'
|
107
|
-
) { |v| o[:coefficient] = v.downcase.to_sym }
|
108
|
-
opt.on(
|
109
|
-
'--bin-size FLOAT', Float,
|
110
|
-
"Width of histogram bins (in percent identity). By default: #{o[:bin_size]}"
|
111
|
-
) { |v| o[:bin_size] = v }
|
112
|
-
opt.separator ''
|
113
|
-
|
114
|
-
opt.separator 'General'
|
115
|
-
opt.on(
|
116
|
-
'-t', '--threads INT', Integer, 'Threads to use'
|
117
|
-
) { |v| o[:threads] = v }
|
118
|
-
opt.on('-l', '--log PATH', 'Log file to save output') { |v| o[:log] = v }
|
119
|
-
opt.on('-q', '--quiet', 'Run quietly') { |v| o[:q] = v }
|
120
|
-
opt.on('-h', '--help', 'Display this screen') do
|
121
|
-
puts opt
|
122
|
-
exit
|
123
|
-
end
|
124
|
-
opt.separator ''
|
125
|
-
end.parse!
|
126
|
-
|
127
|
-
anir = Enveomics::ANIr.new(o)
|
128
|
-
anir.go!
|
129
|
-
if o[:T]
|
130
|
-
File.open(o[:T], 'w') do |fh|
|
131
|
-
fh.puts "anir\tsd\treads\tid_threshold"
|
132
|
-
fh.puts [
|
133
|
-
anir.sample.mean, anir.sample.sd, anir.sample.n, anir.opts[:identity]
|
134
|
-
].join("\t")
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
@@ -1,102 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
#
|
4
|
-
# @author: Luis M. Rodriguez-R
|
5
|
-
# @license: artistic license 2.0
|
6
|
-
#
|
7
|
-
|
8
|
-
require "optparse"
|
9
|
-
|
10
|
-
o = { q:false, prec:6 }
|
11
|
-
ARGV << "-h" if ARGV.empty?
|
12
|
-
OptionParser.new do |opts|
|
13
|
-
opts.banner = "
|
14
|
-
Calculates the Rand Index and the Adjusted Rand Index between two clusterings.
|
15
|
-
|
16
|
-
The clustering format is a raw text file with one cluster per line, each
|
17
|
-
defined as comma-delimited members, and a header line (ignored). Note that this
|
18
|
-
is equivalent to the OGs format for 1 genome.
|
19
|
-
|
20
|
-
Usage: #{$0} [options]"
|
21
|
-
opts.separator ""
|
22
|
-
opts.separator "Mandatory"
|
23
|
-
opts.on("-1", "--clust1 FILE", "First input file."){ |v| o[:clust1]=v }
|
24
|
-
opts.on("-2", "--clust2 FILE", "Second input file."){ |v| o[:clust2]=v }
|
25
|
-
opts.separator ""
|
26
|
-
opts.separator "Other options"
|
27
|
-
opts.on("-p", "--prec INT",
|
28
|
-
"Precision to report. By default: #{o[:prec]}"){ |v| o[:prec]=v.to_i }
|
29
|
-
opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
|
30
|
-
opts.on("-h", "--help", "Display this screen.") do
|
31
|
-
puts opts
|
32
|
-
exit
|
33
|
-
end
|
34
|
-
opts.separator ""
|
35
|
-
end.parse!
|
36
|
-
abort "-1 is mandatory" if o[:clust1].nil?
|
37
|
-
abort "-2 is mandatory" if o[:clust2].nil?
|
38
|
-
|
39
|
-
def load_clust(file, q)
|
40
|
-
$stderr.puts "Reading clusters in '#{file}'." unless q
|
41
|
-
out = []
|
42
|
-
File.open(file, "r") do |fh|
|
43
|
-
fh.each_line do |ln|
|
44
|
-
next if $.==1
|
45
|
-
out[$.-2] = ln.chomp.split(",")
|
46
|
-
end
|
47
|
-
end
|
48
|
-
$stderr.puts " Loaded clusters: #{out.size}." unless q
|
49
|
-
out
|
50
|
-
end
|
51
|
-
|
52
|
-
def choose_2(n)
|
53
|
-
return 0 if n<2
|
54
|
-
n*(n-1)/2
|
55
|
-
end
|
56
|
-
|
57
|
-
##### MAIN:
|
58
|
-
begin
|
59
|
-
# Read the pre-computed OGs
|
60
|
-
clust1 = load_clust(o[:clust1], o[:q])
|
61
|
-
clust2 = load_clust(o[:clust2], o[:q])
|
62
|
-
|
63
|
-
# Contingency table
|
64
|
-
$stderr.puts "Estimating the contingency table." unless o[:q]
|
65
|
-
cont = []
|
66
|
-
b_sums = []
|
67
|
-
clust1.each_with_index do |x_i, i|
|
68
|
-
cont[i] = []
|
69
|
-
clust2.each_with_index do |y_j, j|
|
70
|
-
cont[i][j] = (x_i & y_j).size
|
71
|
-
b_sums[j]||= 0
|
72
|
-
b_sums[j] += cont[i][j]
|
73
|
-
end
|
74
|
-
end
|
75
|
-
a_sums = cont.map{ |i| i.inject(:+) }
|
76
|
-
|
77
|
-
# Calculate variables
|
78
|
-
# - see http://i11www.iti.kit.edu/extra/publications/ww-cco-06.pdf
|
79
|
-
$stderr.puts "Estimating indexes." unless o[:q]
|
80
|
-
n = clust1.map{ |i| i.size }.inject(:+)
|
81
|
-
pairs = choose_2(n)
|
82
|
-
n11 = clust1.each_index.map do |i|
|
83
|
-
clust2.each_index.map do |j|
|
84
|
-
choose_2(cont[i][j])
|
85
|
-
end.inject(:+)
|
86
|
-
end.inject(:+).to_f
|
87
|
-
t1 = a_sums.map{ |a_i| choose_2(a_i) }.inject(:+).to_f
|
88
|
-
t2 = b_sums.map{ |b_j| choose_2(b_j) }.inject(:+).to_f
|
89
|
-
t3 = 2*t1*t2/(n*(n-1))
|
90
|
-
n00 = pairs + n11 - t1 - t2
|
91
|
-
r_index = (n11 + n00)/pairs
|
92
|
-
r_adjusted = (n11 - t3)/((t1+t2)/2 - t3)
|
93
|
-
|
94
|
-
# Report
|
95
|
-
puts "Rand Index = %.#{o[:prec]}f" % r_index
|
96
|
-
puts "Adjusted Rand Index = %.#{o[:prec]}f" % r_adjusted
|
97
|
-
rescue => err
|
98
|
-
$stderr.puts "Exception: #{err}\n\n"
|
99
|
-
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
100
|
-
err
|
101
|
-
end
|
102
|
-
|