miga-base 0.7.26.0 → 1.0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
- data/lib/miga/cli/action/classify_wf.rb +2 -2
- data/lib/miga/cli/action/derep_wf.rb +1 -1
- data/lib/miga/cli/action/doctor.rb +57 -14
- data/lib/miga/cli/action/doctor/base.rb +47 -23
- data/lib/miga/cli/action/init.rb +11 -7
- data/lib/miga/cli/action/init/files_helper.rb +1 -0
- data/lib/miga/cli/action/ncbi_get.rb +3 -3
- data/lib/miga/cli/action/tax_dist.rb +2 -2
- data/lib/miga/cli/action/wf.rb +5 -4
- data/lib/miga/common.rb +1 -0
- data/lib/miga/daemon.rb +11 -4
- data/lib/miga/dataset/result.rb +10 -6
- data/lib/miga/json.rb +5 -4
- data/lib/miga/metadata.rb +5 -1
- data/lib/miga/parallel.rb +36 -0
- data/lib/miga/project.rb +8 -8
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -2
- data/lib/miga/sqlite.rb +10 -2
- data/lib/miga/version.rb +23 -9
- data/scripts/aai_distances.bash +16 -18
- data/scripts/ani_distances.bash +16 -17
- data/scripts/assembly.bash +31 -16
- data/scripts/haai_distances.bash +3 -27
- data/scripts/miga.bash +6 -4
- data/scripts/p.bash +1 -1
- data/scripts/read_quality.bash +9 -18
- data/scripts/trimmed_fasta.bash +14 -30
- data/scripts/trimmed_reads.bash +36 -36
- data/test/parallel_test.rb +31 -0
- data/test/project_test.rb +2 -1
- data/test/remote_dataset_test.rb +1 -1
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
- data/utils/FastAAI/FastAAI/FastAAI +1336 -0
- data/utils/FastAAI/README.md +84 -0
- data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
- data/utils/distance/commands.rb +1 -0
- data/utils/distance/database.rb +0 -1
- data/utils/distance/runner.rb +2 -4
- data/utils/enveomics/Docs/recplot2.md +244 -0
- data/utils/enveomics/Examples/aai-matrix.bash +66 -0
- data/utils/enveomics/Examples/ani-matrix.bash +66 -0
- data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
- data/utils/enveomics/LICENSE.txt +73 -0
- data/utils/enveomics/Makefile +52 -0
- data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
- data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
- data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
- data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
- data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
- data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
- data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
- data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
- data/utils/enveomics/Manifest/Tasks/other.json +906 -0
- data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +638 -0
- data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
- data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
- data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
- data/utils/enveomics/Manifest/categories.json +165 -0
- data/utils/enveomics/Manifest/examples.json +154 -0
- data/utils/enveomics/Manifest/tasks.json +4 -0
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
- data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
- data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
- data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
- data/utils/enveomics/README.md +42 -0
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
- data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
- data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
- data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
- data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
- data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
- data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
- data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
- data/utils/enveomics/Scripts/Chao1.pl +97 -0
- data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
- data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
- data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
- data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
- data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
- data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
- data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
- data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
- data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
- data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
- data/utils/enveomics/Scripts/FastA.length.pl +38 -0
- data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
- data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
- data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
- data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
- data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
- data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
- data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
- data/utils/enveomics/Scripts/FastA.split.pl +55 -0
- data/utils/enveomics/Scripts/FastA.split.rb +79 -0
- data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
- data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
- data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
- data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
- data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
- data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
- data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
- data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
- data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
- data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
- data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
- data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
- data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
- data/utils/enveomics/Scripts/SRA.download.bash +55 -0
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
- data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
- data/utils/enveomics/Scripts/Table.barplot.R +31 -0
- data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
- data/utils/enveomics/Scripts/Table.filter.pl +61 -0
- data/utils/enveomics/Scripts/Table.merge.pl +77 -0
- data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
- data/utils/enveomics/Scripts/Table.replace.rb +69 -0
- data/utils/enveomics/Scripts/Table.round.rb +63 -0
- data/utils/enveomics/Scripts/Table.split.pl +57 -0
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
- data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
- data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
- data/utils/enveomics/Scripts/aai.rb +419 -0
- data/utils/enveomics/Scripts/ani.rb +362 -0
- data/utils/enveomics/Scripts/anir.rb +137 -0
- data/utils/enveomics/Scripts/clust.rand.rb +102 -0
- data/utils/enveomics/Scripts/gi2tax.rb +103 -0
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
- data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
- data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
- data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
- data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
- data/utils/enveomics/Scripts/ogs.rb +104 -0
- data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
- data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
- data/utils/enveomics/Scripts/rbm.rb +100 -0
- data/utils/enveomics/Scripts/sam.filter.rb +148 -0
- data/utils/enveomics/Tests/Makefile +10 -0
- data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
- data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
- data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
- data/utils/enveomics/Tests/alkB.nwk +1 -0
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
- data/utils/enveomics/Tests/hiv1.faa +59 -0
- data/utils/enveomics/Tests/hiv1.fna +134 -0
- data/utils/enveomics/Tests/hiv2.faa +70 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
- data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
- data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
- data/utils/enveomics/build_enveomics_r.bash +45 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
- data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
- data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
- data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
- data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
- data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
- data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
- data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
- data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
- data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
- data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
- data/utils/enveomics/enveomics.R/R/utils.R +80 -0
- data/utils/enveomics/enveomics.R/README.md +81 -0
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
- data/utils/enveomics/globals.mk +8 -0
- data/utils/enveomics/manifest.json +9 -0
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +67 -0
- data/utils/multitrim/multitrim.py +1555 -0
- data/utils/multitrim/multitrim.yml +13 -0
- data/utils/requirements.txt +4 -3
- metadata +304 -3
@@ -0,0 +1,90 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
#
|
3
|
+
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
4
|
+
# @license artistic 2.0
|
5
|
+
# @update Oct-13-2015
|
6
|
+
#
|
7
|
+
|
8
|
+
use warnings;
|
9
|
+
use strict;
|
10
|
+
|
11
|
+
my($blast, $nodes, $names, $rank, $bh) = @ARGV;
|
12
|
+
($blast and $nodes and $names) or die <<HELP
|
13
|
+
|
14
|
+
Takes a BLAST with NCBI Taxonomy IDs as subjects and replaces them by names at a
|
15
|
+
given taxonomic rank.
|
16
|
+
|
17
|
+
Usage:
|
18
|
+
$0 tax_blast.txt nodes.dmp names.dmp[ rank[ best-hit]] > taxrank_blast.txt
|
19
|
+
|
20
|
+
tax_blast.txt BLAST output, where subject IDs are NCBI Taxonomy IDs.
|
21
|
+
nodes.dmp Nodes file from NCBI Taxonomy*.
|
22
|
+
names.dmp Names file from NCBI Taxonomy*.
|
23
|
+
rank The rank to be reported. All the reported nodes will
|
24
|
+
have the same rank. By default, genus. To see
|
25
|
+
supported values, run:
|
26
|
+
cut -f 5 nodes.dmp | sort -u
|
27
|
+
best-hit A word (yes or no) telling the program whether or not it
|
28
|
+
should take into account the best hit per query only.
|
29
|
+
By default: yes.
|
30
|
+
taxrank_list.txt BLAST-like output, where subject IDs are Taxonomy names.
|
31
|
+
|
32
|
+
* Download from ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz
|
33
|
+
|
34
|
+
HELP
|
35
|
+
;
|
36
|
+
$rank ||= "genus";
|
37
|
+
$bh ||= "yes";
|
38
|
+
|
39
|
+
# %nodes structure:
|
40
|
+
# taxid => [parent's taxid, rank, nil, name, name type]
|
41
|
+
|
42
|
+
print STDERR "Reading $nodes.\n";
|
43
|
+
open NODES, "<", $nodes or die "Cannot read file: $nodes: $!\n";
|
44
|
+
my %nodes = map { my @a=split /\t\|\t/; ($a[0] => [$a[1], $a[2]]) } <NODES>;
|
45
|
+
close NODES;
|
46
|
+
|
47
|
+
print STDERR "Reading $names.\n";
|
48
|
+
open NAMES, "<", $names or die "Cannot read file: $names: $!\n";
|
49
|
+
while(<NAMES>){
|
50
|
+
my @a=split /\t\|\t/;
|
51
|
+
next if exists $nodes{$a[0]}->[3] and
|
52
|
+
$nodes{$a[0]}->[4] eq "scientific name";
|
53
|
+
next if exists $nodes{$a[0]}->[3] and
|
54
|
+
$a[3] ne "scientific name";
|
55
|
+
$nodes{$a[0]}->[3] = $a[1];
|
56
|
+
$nodes{$a[0]}->[4] = $a[3];
|
57
|
+
}
|
58
|
+
close NAMES;
|
59
|
+
|
60
|
+
my $i = 0;
|
61
|
+
my $nomap = 0;
|
62
|
+
my $qry = "";
|
63
|
+
open BLAST, "<", $blast or die "Cannot read file: $blast: $!\n";
|
64
|
+
HIT:while(<BLAST>){
|
65
|
+
if(/^#/){
|
66
|
+
print $_;
|
67
|
+
next;
|
68
|
+
}
|
69
|
+
chomp;
|
70
|
+
my @row = split /\t/;
|
71
|
+
next if $bh eq "yes" and $row[0] eq $qry;
|
72
|
+
$i++;
|
73
|
+
print STDERR " Mapping hit $i\r" unless $i%10;
|
74
|
+
exists $nodes{$row[1]} or die "Cannot find Taxonomy node: $row[1].\n";
|
75
|
+
my $n = $nodes{$row[1]};
|
76
|
+
while($n->[1] ne $rank){
|
77
|
+
if($n->[0] eq $nodes{$n->[0]}->[0]){
|
78
|
+
$nomap++;
|
79
|
+
next HIT;
|
80
|
+
}
|
81
|
+
$n = $nodes{$n->[0]};
|
82
|
+
}
|
83
|
+
$row[1] = $n->[3];
|
84
|
+
print "".join("\t", @row)."\n";
|
85
|
+
}
|
86
|
+
close BLAST;
|
87
|
+
print STDERR " Mapped $i hits\n";
|
88
|
+
print STDERR
|
89
|
+
" WARNING: $nomap hits above rank or in a lineage without rank.\n" if $nomap;
|
90
|
+
|
@@ -0,0 +1,101 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
5
|
+
# @update: Dec-11-2015
|
6
|
+
# @license: artistic license 2.0
|
7
|
+
#
|
8
|
+
|
9
|
+
require "optparse"
|
10
|
+
|
11
|
+
$opts = {n:5, sortby:"bitscore", q:false}
|
12
|
+
$cols = {"bitscore"=>11, "evalue"=>10, "identity"=>2, "length"=>3}
|
13
|
+
ARGV << "-h" if ARGV.size==0
|
14
|
+
OptionParser.new do |opts|
|
15
|
+
opts.banner = "Reports the top-N best hits of a BLAST, pre-sorted by query."
|
16
|
+
opts.separator ""
|
17
|
+
opts.separator "Mandatory"
|
18
|
+
opts.on("-i", "--blast FILE",
|
19
|
+
"Path to the BLAST file."){ |v| $opts[:blast]=v }
|
20
|
+
opts.separator ""
|
21
|
+
opts.separator "Optional"
|
22
|
+
opts.on("-n", "--top INTEGER",
|
23
|
+
"Maximum number of hits to report for each query.",
|
24
|
+
"By default: #{$opts[:n]}"){ |v| $opts[:n]=v.to_i }
|
25
|
+
opts.on("-s", "--sort-by STRING",
|
26
|
+
"Parameter used to detect the 'best' hits.",
|
27
|
+
"Any of: bitscore (default), evalue, identity, length."
|
28
|
+
){ |v| $opts[:sortby]=v }
|
29
|
+
opts.on("-q", "--quiet", "Run quietly."){ $opts[:q]=true }
|
30
|
+
opts.on("-h", "--help", "Display this screen") do
|
31
|
+
puts opts
|
32
|
+
exit
|
33
|
+
end
|
34
|
+
opts.separator ""
|
35
|
+
end.parse!
|
36
|
+
|
37
|
+
abort "-i/--blast is mandatory." if $opts[:blast].nil?
|
38
|
+
abort "Unrecognized value for -s/--sortby: #{$opts[:sortby]}." if
|
39
|
+
$cols[ $opts[:sortby] ].nil?
|
40
|
+
|
41
|
+
class Hit
|
42
|
+
attr_reader :blast_line
|
43
|
+
def initialize(blast_line)
|
44
|
+
@blast_line = blast_line.chomp.split(/\t/)
|
45
|
+
end
|
46
|
+
def col(i)
|
47
|
+
@blast_line[i]
|
48
|
+
end
|
49
|
+
def <=>(other)
|
50
|
+
ans = self.col( $cols[ $opts[:sortby] ] ).to_f <=> other.col( $cols[ $opts[:sortby] ] ).to_f
|
51
|
+
ans = ans * -1 unless $opts[:sortby] == "evalue"
|
52
|
+
return ans
|
53
|
+
end
|
54
|
+
def to_s
|
55
|
+
@blast_line.join("\t")
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
class HitSet
|
60
|
+
attr_reader :query, :hits
|
61
|
+
def initialize
|
62
|
+
@hits = []
|
63
|
+
@query = nil
|
64
|
+
end
|
65
|
+
def <<(hit)
|
66
|
+
@query = hit.col(0) if @query.nil?
|
67
|
+
raise "Inconsistent query, expecting #{self.query}" unless
|
68
|
+
self.query == hit.col(0)
|
69
|
+
@hits << hit
|
70
|
+
end
|
71
|
+
def empty?
|
72
|
+
self.hits.length == 0
|
73
|
+
end
|
74
|
+
def filter!
|
75
|
+
@hits.sort!
|
76
|
+
@hits.slice!($opts[:n], @hits.length)
|
77
|
+
end
|
78
|
+
def to_s
|
79
|
+
@hits.join("\n")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
$stderr.puts "Parsing BLAST." unless $opts[:q]
|
84
|
+
fh = File.open $opts[:blast], "r"
|
85
|
+
hs = HitSet.new
|
86
|
+
while ln=fh.gets
|
87
|
+
hit = Hit.new( ln )
|
88
|
+
if hs.query != hit.col(0)
|
89
|
+
hs.filter!
|
90
|
+
puts hs unless hs.empty?
|
91
|
+
hs = HitSet.new
|
92
|
+
$stderr.print "Parsing line #{$.}... \r" unless $opts[:q]
|
93
|
+
end
|
94
|
+
hs << hit
|
95
|
+
end
|
96
|
+
$stderr.print "Parsed #{$.} lines. \n" unless $opts[:q]
|
97
|
+
fh.close
|
98
|
+
|
99
|
+
hs.filter!
|
100
|
+
puts hs unless hs.empty?
|
101
|
+
|
@@ -0,0 +1,97 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
#
|
3
|
+
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
4
|
+
# @license artistic license 2.0
|
5
|
+
# @update Dec-29-2015
|
6
|
+
#
|
7
|
+
|
8
|
+
use warnings;
|
9
|
+
use strict;
|
10
|
+
use Getopt::Std;
|
11
|
+
use List::Util qw/min max/;
|
12
|
+
|
13
|
+
sub VERSION_MESSAGE(){print "Home-made Chao1 (enveomics)\n"}
|
14
|
+
sub HELP_MESSAGE(){die "
|
15
|
+
Description:
|
16
|
+
Takes a table of OTU abundance in one or more samples and calculates the
|
17
|
+
chao1 index (with 95% Confidence Interval) for each sample. To use it with
|
18
|
+
Qiime OTU Tables, run it as:
|
19
|
+
$0 -i OTU_Table.txt -c 1 -h
|
20
|
+
|
21
|
+
Usage:
|
22
|
+
$0 [opts]
|
23
|
+
|
24
|
+
-i <str> * Input table (columns:samples, rows:OTUs).
|
25
|
+
-r <int> Number of rows to ignore. By default: 0.
|
26
|
+
-c <int> Number of columns to ignore. By default: 0.
|
27
|
+
-C <int> Number of columns to ignore at the end. By default: 0.
|
28
|
+
-d <str> Delimiter. Supported escaped characters are: \"\\t\"
|
29
|
+
(tabulation), and \"\\0\" (null bit). By default: \"\\t\".
|
30
|
+
-h If set, the first row is assumed to have the names of the
|
31
|
+
samples.
|
32
|
+
--help This help message.
|
33
|
+
|
34
|
+
* Mandatory.
|
35
|
+
|
36
|
+
To improve:
|
37
|
+
o Account for n1==0 and n2==0 cases. See http://www.mothur.org/wiki/Chao.
|
38
|
+
|
39
|
+
"}
|
40
|
+
|
41
|
+
my %o;
|
42
|
+
getopts('i:c:C:r:d:h', \%o);
|
43
|
+
|
44
|
+
&HELP_MESSAGE() unless $o{i};
|
45
|
+
$o{c} ||= 0;
|
46
|
+
$o{C} ||= 0;
|
47
|
+
$o{r} ||= 0;
|
48
|
+
$o{d} ||= "\\t";
|
49
|
+
|
50
|
+
$o{d}="\t" if $o{d} eq "\\t";
|
51
|
+
$o{d}="\0" if $o{d} eq "\\0";
|
52
|
+
|
53
|
+
my @names = ();
|
54
|
+
my @values = ();
|
55
|
+
open TABLE, "<", $o{i} or die "Cannot open file: ".$o{i}.": $!\n";
|
56
|
+
<TABLE> for (1 .. $o{r});
|
57
|
+
if($o{h}){
|
58
|
+
my $h = <TABLE>;
|
59
|
+
$h or die "Empty table!\n";
|
60
|
+
chomp $h;
|
61
|
+
@names = split $o{d}, $h;
|
62
|
+
shift @names for (1 .. $o{c});
|
63
|
+
pop @names for (1 .. $o{C});
|
64
|
+
}
|
65
|
+
while(<TABLE>){
|
66
|
+
chomp;
|
67
|
+
my @ln = split $o{d};
|
68
|
+
shift @ln for (1 .. $o{c});
|
69
|
+
pop @ln for (1 .. $o{C});
|
70
|
+
push @{$values[$_] ||= []}, $ln[$_] for (0 .. $#ln);
|
71
|
+
}
|
72
|
+
close TABLE;
|
73
|
+
|
74
|
+
print "Sample\tObs\tChao1\tChao1_LB\tChao1_UL\n";
|
75
|
+
for my $i (0 .. $#values){
|
76
|
+
print "".(exists $names[$i] ? $names[$i] : $i).$o{d};
|
77
|
+
my $n1=0;
|
78
|
+
my $n2=0;
|
79
|
+
my $ob=0;
|
80
|
+
for my $v (@{$values[$i]}){
|
81
|
+
$n1++ if $v==1;
|
82
|
+
$n2++ if $v==2;
|
83
|
+
$ob++ if $v>=1;
|
84
|
+
}
|
85
|
+
if($ob and $n1 and $n2){
|
86
|
+
my $m = $n1/$n2;
|
87
|
+
my $ch = $ob + (($n1**2)/(2*$n2));
|
88
|
+
my $var = ($n2*((($m**4)/4) + ($m**3) + (($m**2)/2)));
|
89
|
+
my $c = exp(1.96*sqrt(log(1+ $var/(($ch-$ob)**2))));
|
90
|
+
my $lc = max($ob + ($ch-$ob)/$c, $ob);
|
91
|
+
my $uc = $ob + $c*($ch-$ob);
|
92
|
+
print "".join($o{d}, $ob, $ch, $lc, $uc)."\n"
|
93
|
+
}else{
|
94
|
+
print "".join($o{d}, $ob, $ob, 0, 0)."\n"
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
@@ -0,0 +1,234 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# @author Luis M. Rodriguez-R
|
5
|
+
# @update Jan-13-2016
|
6
|
+
# @license artistic license 2.0
|
7
|
+
#
|
8
|
+
|
9
|
+
require "optparse"
|
10
|
+
|
11
|
+
o = {:q=>false}
|
12
|
+
ARGV << "-h" if ARGV.size==0
|
13
|
+
OptionParser.new do |opts|
|
14
|
+
opts.banner = "
|
15
|
+
Uses a dichotomous key to classify objects parsing a character table.
|
16
|
+
|
17
|
+
Usage: #{$0} [options]"
|
18
|
+
opts.separator ""
|
19
|
+
opts.separator "Input Options (mandatory)"
|
20
|
+
opts.on("-t", "--table FILE",
|
21
|
+
"Input table containing the states (columns) per object (row).",
|
22
|
+
"It must be tab-delimited and with row and column names.",
|
23
|
+
"See Tests/anthrax-cansnp-data.tsv for an example."){ |v| o[:table]=v }
|
24
|
+
opts.on("-k", "--key FILE",
|
25
|
+
"Input table containing the dychotomous key in linked style, defined in",
|
26
|
+
"four columns (can contain #-lead comment lines):",
|
27
|
+
" 1. ID of the step, typically a sequential integer.",
|
28
|
+
" 2. Name of character to evaluate. Must coincide with the -t headers.",
|
29
|
+
" 3. First character decision (see below).",
|
30
|
+
" 4. Second character decision (see below).",
|
31
|
+
"A character decision must be formated as: state (must coincide with the",
|
32
|
+
"values in -t), colon (:), step to follow. If the state is * (star) any",
|
33
|
+
"state triggers the decision (this should be the norm in column 4). The",
|
34
|
+
"step to follow should be a step ID in square brackets, or the name of",
|
35
|
+
"the classification. See Tests/anthrax-cansnp-key.tsv for an example."
|
36
|
+
){ |v| o[:key]=v }
|
37
|
+
opts.separator ""
|
38
|
+
opts.separator "Output Options"
|
39
|
+
opts.on("-c", "--classification FILE",
|
40
|
+
"Two-column table with the classification of the input objects."
|
41
|
+
){ |v| o[:class]=v }
|
42
|
+
opts.on("-n", "--newick FILE",
|
43
|
+
"Tree containing all the classified objects. This only makes sense for",
|
44
|
+
"synoptic keys."){ |v| o[:nwk]=v }
|
45
|
+
opts.separator ""
|
46
|
+
opts.separator "Additional Options"
|
47
|
+
opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
|
48
|
+
opts.on("-h", "--help", "Display this screen.") do
|
49
|
+
puts opts
|
50
|
+
exit
|
51
|
+
end
|
52
|
+
opts.separator ""
|
53
|
+
end.parse!
|
54
|
+
abort "-t is mandatory" if o[:table].nil?
|
55
|
+
abort "-k is mandatory" if o[:key].nil?
|
56
|
+
|
57
|
+
##### Extensions:
|
58
|
+
class String
|
59
|
+
def nwk_sanitize() self.gsub(/[\(\):;,"'\s]/,'_') ; end
|
60
|
+
end
|
61
|
+
|
62
|
+
##### Classes:
|
63
|
+
module Dychotomous
|
64
|
+
class Decision
|
65
|
+
attr_reader :state, :terminal, :conclusion
|
66
|
+
def initialize(string)
|
67
|
+
r = string.split /:/
|
68
|
+
@state = r[0]
|
69
|
+
@terminal = !(r[1] =~ /^\[(.*)\]$/)
|
70
|
+
@conclusion = @terminal ? r[1] : $1
|
71
|
+
end
|
72
|
+
def ==(state)
|
73
|
+
return true if self.state == '*'
|
74
|
+
self.state == state.to_s
|
75
|
+
end
|
76
|
+
end
|
77
|
+
class Character
|
78
|
+
attr_reader :name, :a, :b
|
79
|
+
def initialize(name, a, b)
|
80
|
+
@name = name
|
81
|
+
@a = a
|
82
|
+
@b = b
|
83
|
+
end
|
84
|
+
def eval(object)
|
85
|
+
state = object.state(self.name)
|
86
|
+
return self.a if self.a == state
|
87
|
+
return self.b if self.b == state
|
88
|
+
raise "Impossible to make a decision for #{object.name} based on " +
|
89
|
+
"character #{self.name}. Offending state: #{state.to_s}."
|
90
|
+
end
|
91
|
+
end
|
92
|
+
class Key
|
93
|
+
attr_reader :first
|
94
|
+
def initialize(file)
|
95
|
+
@characters = {}
|
96
|
+
fh = File.open(file, 'r')
|
97
|
+
while ln = fh.gets
|
98
|
+
next if ln=~/^#/ or ln=~/^\s*$/
|
99
|
+
r = ln.chomp.split /\t/
|
100
|
+
@characters[ r[0] ] = Character.new(r[1],
|
101
|
+
Decision.new(r[2]), Decision.new(r[3]))
|
102
|
+
@first = @characters[ r[0] ] if @first.nil?
|
103
|
+
end
|
104
|
+
fh.close
|
105
|
+
end
|
106
|
+
def [](name)
|
107
|
+
raise "Cannot find character #{name} in key." if @characters[name].nil?
|
108
|
+
@characters[name]
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
module CharData
|
113
|
+
class Object
|
114
|
+
attr_reader :name, :states
|
115
|
+
def initialize(name)
|
116
|
+
@name = name
|
117
|
+
@states = {}
|
118
|
+
end
|
119
|
+
def <<(state) @states[state.character] = state ; end
|
120
|
+
def state(name) @states[name] ; end
|
121
|
+
end
|
122
|
+
class State
|
123
|
+
attr_reader :character, :state
|
124
|
+
def initialize(character, state)
|
125
|
+
@character = character
|
126
|
+
@state = state
|
127
|
+
end
|
128
|
+
def to_s() self.state.to_s ; end
|
129
|
+
end
|
130
|
+
class Table
|
131
|
+
attr_reader :objects
|
132
|
+
def initialize(file)
|
133
|
+
@objects = []
|
134
|
+
fh = File.open(file, 'r')
|
135
|
+
header = fh.gets.chomp.split(/\t/)
|
136
|
+
while ln = fh.gets
|
137
|
+
next if ln=~/^#/ or ln=~/^\s*$/
|
138
|
+
r = ln.chomp.split /\t/
|
139
|
+
o = Object.new(r[0])
|
140
|
+
(1 .. r.size).each{ |i| o << State.new(header[i], r[i]) }
|
141
|
+
self << o
|
142
|
+
end
|
143
|
+
fh.close
|
144
|
+
end
|
145
|
+
def <<(object) @objects << object ; end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
module ClassData
|
149
|
+
class Classification
|
150
|
+
attr_reader :key, :object, :result
|
151
|
+
def initialize key, object
|
152
|
+
@key = key
|
153
|
+
@object = object
|
154
|
+
self.classify!
|
155
|
+
end
|
156
|
+
def classify!
|
157
|
+
@result = self.key.first.eval(object)
|
158
|
+
while ! self.result.terminal
|
159
|
+
@result = self.key[ self.result.conclusion ].eval(object)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
class Collection
|
164
|
+
attr_reader :key, :table, :classifications
|
165
|
+
def initialize(key, table)
|
166
|
+
@key = key
|
167
|
+
@table = table
|
168
|
+
@classifications = []
|
169
|
+
self.classify!
|
170
|
+
end
|
171
|
+
def classify!
|
172
|
+
table.objects.each do |object|
|
173
|
+
@classifications << Classification.new(key, object)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
def classified_as(conclusion)
|
177
|
+
self.classifications.select{ |c|
|
178
|
+
c.result.conclusion==conclusion }.map{ |c| c.object }
|
179
|
+
end
|
180
|
+
def to_nwk
|
181
|
+
self.to_nwk_node(self.key.first) + ";"
|
182
|
+
end
|
183
|
+
def to_nwk_node(node)
|
184
|
+
if node.is_a? Dychotomous::Character
|
185
|
+
a = self.to_nwk_node(node.a)
|
186
|
+
b = self.to_nwk_node(node.b)
|
187
|
+
return "" if (a + b)==""
|
188
|
+
return "(" + a + b + ")#{node.name.nwk_sanitize}" if a=="" or b==""
|
189
|
+
return "(" + self.to_nwk_node(node.a) + "," +
|
190
|
+
self.to_nwk_node(node.b) + ")#{node.name.nwk_sanitize}"
|
191
|
+
end
|
192
|
+
if node.is_a? Dychotomous::Decision and node.terminal
|
193
|
+
objects = self.classified_as(node.conclusion)
|
194
|
+
return '' if objects.empty?
|
195
|
+
return objects[0].name.nwk_sanitize if objects.size==1
|
196
|
+
return "(" + objects.map{|o| o.name.nwk_sanitize}.join(",") + ")"
|
197
|
+
end
|
198
|
+
return self.to_nwk_node( self.key[node.conclusion] ) if
|
199
|
+
node.is_a? Dychotomous::Decision
|
200
|
+
raise "Unsupported class: to_nwk_node: #{node}."
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
##### MAIN:
|
206
|
+
begin
|
207
|
+
$stderr.puts "Reading dychotomous key." unless o[:q]
|
208
|
+
key = Dychotomous::Key.new(o[:key])
|
209
|
+
$stderr.puts "Reading character table." unless o[:q]
|
210
|
+
table = CharData::Table.new(o[:table])
|
211
|
+
$stderr.puts "Classifying objects." unless o[:q]
|
212
|
+
classif = ClassData::Collection.new(key, table)
|
213
|
+
|
214
|
+
unless o[:class].nil?
|
215
|
+
$stderr.puts "Generating classification table." unless o[:q]
|
216
|
+
fh = File.open(o[:class], 'w')
|
217
|
+
classif.classifications.each do |c|
|
218
|
+
fh.puts c.object.name + "\t" + c.result.conclusion
|
219
|
+
end
|
220
|
+
fh.close
|
221
|
+
end
|
222
|
+
|
223
|
+
unless o[:nwk].nil?
|
224
|
+
$stderr.puts "Generating classification tree." unless o[:q]
|
225
|
+
fh = File.open(o[:nwk], 'w')
|
226
|
+
fh.puts classif.to_nwk
|
227
|
+
fh.close
|
228
|
+
end
|
229
|
+
|
230
|
+
rescue => err
|
231
|
+
$stderr.puts "Exception: #{err}\n\n"
|
232
|
+
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
233
|
+
err
|
234
|
+
end
|