miga-base 0.7.26.0 → 0.7.26.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/version.rb +1 -1
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
- data/utils/FastAAI/FastAAI/FastAAI +1336 -0
- data/utils/FastAAI/README.md +84 -0
- data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
- data/utils/enveomics/Docs/recplot2.md +244 -0
- data/utils/enveomics/Examples/aai-matrix.bash +66 -0
- data/utils/enveomics/Examples/ani-matrix.bash +66 -0
- data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
- data/utils/enveomics/LICENSE.txt +73 -0
- data/utils/enveomics/Makefile +52 -0
- data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
- data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
- data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
- data/utils/enveomics/Manifest/Tasks/fasta.json +766 -0
- data/utils/enveomics/Manifest/Tasks/fastq.json +243 -0
- data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
- data/utils/enveomics/Manifest/Tasks/mapping.json +67 -0
- data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
- data/utils/enveomics/Manifest/Tasks/other.json +829 -0
- data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +501 -0
- data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
- data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
- data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
- data/utils/enveomics/Manifest/categories.json +156 -0
- data/utils/enveomics/Manifest/examples.json +154 -0
- data/utils/enveomics/Manifest/tasks.json +4 -0
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
- data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
- data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
- data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
- data/utils/enveomics/README.md +42 -0
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
- data/utils/enveomics/Scripts/Aln.cat.rb +163 -0
- data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
- data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
- data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
- data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
- data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
- data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
- data/utils/enveomics/Scripts/Chao1.pl +97 -0
- data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
- data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
- data/utils/enveomics/Scripts/FastA.N50.pl +56 -0
- data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
- data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
- data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
- data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
- data/utils/enveomics/Scripts/FastA.fragment.rb +92 -0
- data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
- data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
- data/utils/enveomics/Scripts/FastA.length.pl +38 -0
- data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
- data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
- data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
- data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
- data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
- data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
- data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
- data/utils/enveomics/Scripts/FastA.split.pl +55 -0
- data/utils/enveomics/Scripts/FastA.split.rb +79 -0
- data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
- data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
- data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
- data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
- data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +63 -0
- data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
- data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
- data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
- data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
- data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
- data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
- data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
- data/utils/enveomics/Scripts/SRA.download.bash +57 -0
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
- data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
- data/utils/enveomics/Scripts/Table.barplot.R +31 -0
- data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
- data/utils/enveomics/Scripts/Table.filter.pl +61 -0
- data/utils/enveomics/Scripts/Table.merge.pl +77 -0
- data/utils/enveomics/Scripts/Table.replace.rb +69 -0
- data/utils/enveomics/Scripts/Table.round.rb +63 -0
- data/utils/enveomics/Scripts/Table.split.pl +57 -0
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
- data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
- data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
- data/utils/enveomics/Scripts/aai.rb +418 -0
- data/utils/enveomics/Scripts/ani.rb +362 -0
- data/utils/enveomics/Scripts/clust.rand.rb +102 -0
- data/utils/enveomics/Scripts/gi2tax.rb +103 -0
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
- data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
- data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
- data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
- data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
- data/utils/enveomics/Scripts/ogs.rb +104 -0
- data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
- data/utils/enveomics/Scripts/rbm.rb +146 -0
- data/utils/enveomics/Tests/Makefile +10 -0
- data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
- data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
- data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
- data/utils/enveomics/Tests/alkB.nwk +1 -0
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
- data/utils/enveomics/Tests/hiv1.faa +59 -0
- data/utils/enveomics/Tests/hiv1.fna +134 -0
- data/utils/enveomics/Tests/hiv2.faa +70 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
- data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
- data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
- data/utils/enveomics/build_enveomics_r.bash +45 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
- data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
- data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
- data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
- data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
- data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
- data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
- data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
- data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
- data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
- data/utils/enveomics/enveomics.R/R/utils.R +50 -0
- data/utils/enveomics/enveomics.R/README.md +80 -0
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -0
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -0
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -0
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -0
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +37 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
- data/utils/enveomics/globals.mk +8 -0
- data/utils/enveomics/manifest.json +9 -0
- metadata +277 -4
@@ -0,0 +1,31 @@
|
|
1
|
+
#!/usr/bin/env Rscript
|
2
|
+
#
|
3
|
+
# @author Luis M. Rodriguez-R
|
4
|
+
# @update Dec-29-2015
|
5
|
+
# @license artistic license 2.0
|
6
|
+
#
|
7
|
+
|
8
|
+
#= Load stuff
|
9
|
+
args <- commandArgs(trailingOnly = F)
|
10
|
+
enveomics_R <- file.path(dirname(
|
11
|
+
sub("^--file=", "", args[grep("^--file=", args)])),
|
12
|
+
"lib", "enveomics.R")
|
13
|
+
source(file.path(enveomics_R, "R", "cliopts.R"))
|
14
|
+
source(file.path(enveomics_R, "R", "utils.R"))
|
15
|
+
source(file.path(enveomics_R, "R", "barplot.R"))
|
16
|
+
|
17
|
+
#= Generate interface
|
18
|
+
opt <- enve.cliopts(enve.barplot,
|
19
|
+
file.path(enveomics_R, "man", "enve.barplot.Rd"),
|
20
|
+
positional_arguments=c(1,3),
|
21
|
+
usage="usage: %prog [options] output.pdf [width height]",
|
22
|
+
mandatory=c("x"), vectorize=c("sizes","order","col"),
|
23
|
+
number=c("sizes","order"),
|
24
|
+
o_desc=list(x="A tab-delimited file containing header (first row) and row names (first column)."))
|
25
|
+
|
26
|
+
#= Run it!
|
27
|
+
args = as.list(opt$args)
|
28
|
+
for(i in 2:3) if(length(args)>=i) args[[i]] <- as.numeric(args[[i]])
|
29
|
+
do.call("pdf", args)
|
30
|
+
do.call("enve.barplot", opt$options)
|
31
|
+
dev.off()
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/usr/bin/env Rscript
|
2
|
+
#
|
3
|
+
# @author Luis M. Rodriguez-R
|
4
|
+
# @update Jan-04-2016
|
5
|
+
# @license artistic license 2.0
|
6
|
+
#
|
7
|
+
|
8
|
+
#= Load stuff
|
9
|
+
args <- commandArgs(trailingOnly = F)
|
10
|
+
enveomics_R <- file.path(dirname(
|
11
|
+
sub("^--file=", "", args[grep("^--file=", args)])),
|
12
|
+
"lib", "enveomics.R")
|
13
|
+
source(file.path(enveomics_R, "R", "cliopts.R"))
|
14
|
+
source(file.path(enveomics_R, "R", "df2dist.R"))
|
15
|
+
|
16
|
+
#= Generate interface
|
17
|
+
opt <- enve.cliopts(enve.df2dist,
|
18
|
+
file.path(enveomics_R, "man", "enve.df2dist.Rd"),
|
19
|
+
positional_arguments=1,
|
20
|
+
usage="usage: %prog [options] output.mat",
|
21
|
+
mandatory=c("x"),
|
22
|
+
number=c("default.d", "max.sim"),
|
23
|
+
o_desc=list(x="A tab-delimited table with the distances."),
|
24
|
+
p_desc="Transform a tab-delimited list of distances into a squared matrix.")
|
25
|
+
|
26
|
+
#= Run it!
|
27
|
+
opt$options[['x']] <- read.table(opt$options[['x']],
|
28
|
+
header=TRUE, sep="\t", as.is=TRUE)
|
29
|
+
dist <- do.call("enve.df2dist", opt$options)
|
30
|
+
write.table(as.matrix(dist), opt$args[1], quote=FALSE, sep="\t", col.names=NA)
|
@@ -0,0 +1,61 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
#
|
3
|
+
# @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
4
|
+
# @update: Mar-23-2015
|
5
|
+
# @license: artistic license 2.0
|
6
|
+
#
|
7
|
+
|
8
|
+
use warnings;
|
9
|
+
use strict;
|
10
|
+
use Getopt::Std;
|
11
|
+
|
12
|
+
my %o;
|
13
|
+
getopts('k:s:ihn', \%o);
|
14
|
+
my($list, $table) = @ARGV;
|
15
|
+
|
16
|
+
($list and $table) or die "
|
17
|
+
.Description:
|
18
|
+
Extracts (and re-orders) a subset of rows from a raw table.
|
19
|
+
|
20
|
+
.Usage: $0 [options] list.txt table.txt > subset.txt
|
21
|
+
|
22
|
+
Options:
|
23
|
+
-k <int> Column of the table to use as key to filter. By default, 1.
|
24
|
+
-s <str> String to use as separation between rows. By default, tabulation.
|
25
|
+
-i If set, reports the inverse of the list (i.e., reports only rows
|
26
|
+
absent in the list). Implies -n.
|
27
|
+
-h Keep first row of the table (header) untouched.
|
28
|
+
-n No re-order. The output has the same order of the table. By
|
29
|
+
default, it prints in the order of the list.
|
30
|
+
|
31
|
+
list.txt List of IDs to extract.
|
32
|
+
table.txt Table file containing the superset.
|
33
|
+
subset.txt Table file to be created.
|
34
|
+
|
35
|
+
";
|
36
|
+
|
37
|
+
$o{k} ||= 1;
|
38
|
+
$o{s} ||= "\t";
|
39
|
+
$o{n}=1 if $o{i};
|
40
|
+
my $HEADER = "";
|
41
|
+
|
42
|
+
my $tbl2 = $o{n} ? $list : $table;
|
43
|
+
open TBL, "<", $tbl2 or die "Cannot read file: $tbl2: $!\n";
|
44
|
+
$HEADER = <TBL> if $o{h} and not $o{n};
|
45
|
+
my %tbl2 = map { my $l=$_; chomp $l; my @r=split $o{s}, $l; $r[ $o{n} ? 0 : $o{k}-1] => $l } <TBL>;
|
46
|
+
close TBL;
|
47
|
+
|
48
|
+
my $tbl1 = $o{n} ? $table : $list;
|
49
|
+
open TBL, "<", $tbl1 or die "Cannot read file: $tbl1: $!\n";
|
50
|
+
$HEADER = <TBL> if $o{h} and $o{n};
|
51
|
+
print $HEADER;
|
52
|
+
while(my $ln = <TBL>){
|
53
|
+
chomp $ln;
|
54
|
+
next unless $ln;
|
55
|
+
my @ln = split $o{s}, $ln;
|
56
|
+
my $good = exists $tbl2{ $ln[$o{n} ? $o{k}-1 : 0] };
|
57
|
+
$good = not $good if $o{i};
|
58
|
+
print "".($o{n} ? $ln : $tbl2{$ln[0]})."\n" if $good;
|
59
|
+
}
|
60
|
+
close TBL;
|
61
|
+
|
@@ -0,0 +1,77 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
#
|
3
|
+
# @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
4
|
+
# @update: Sep-20-2015
|
5
|
+
# @license: artistic license 2.0
|
6
|
+
#
|
7
|
+
|
8
|
+
use warnings;
|
9
|
+
use strict;
|
10
|
+
use Getopt::Std;
|
11
|
+
|
12
|
+
my %o;
|
13
|
+
getopts('si:o:ne:h:H:r:', \%o);
|
14
|
+
my @files = @ARGV;
|
15
|
+
|
16
|
+
$#files>0 or die "
|
17
|
+
.Description:
|
18
|
+
Merges multiple (two-column) lists into one table.
|
19
|
+
|
20
|
+
.Usage:
|
21
|
+
$0 [options] files... > output.txt
|
22
|
+
|
23
|
+
Options:
|
24
|
+
-s Values are read as Strings. By default, values are read as numbers.
|
25
|
+
-i <str> Input field-delimiter. By default: tabulation (\"\\t\").
|
26
|
+
-o <str> Output field-delimiter. By default: tabulation (\"\\t\").
|
27
|
+
-n No-header. By default, the header is determined by the file names.
|
28
|
+
-e <str> Default string when no value is found. By default, the \"empty\" value
|
29
|
+
is 0 if values are numeric (i.e., unless -s is set) or an empty string
|
30
|
+
otherwise.
|
31
|
+
-h <str> Header of the first column, containing the IDs. By default: \"Tag\".
|
32
|
+
-H <str> Format of filenames capturing the column header in the first capturing
|
33
|
+
parenthesis. Non-capturing paretheses can be defined as (?:...). By
|
34
|
+
default: \"(?:.*/)?([^\\.]+)\", which captures the part of the basename
|
35
|
+
of the file before the first dot (if any).
|
36
|
+
-r <int> Number of leading rows to ignore in the input files. Zero by default.
|
37
|
+
|
38
|
+
";
|
39
|
+
$o{i} ||= "\t";
|
40
|
+
$o{o} ||= "\t";
|
41
|
+
$o{e} ||= ($o{s} ? "" : 0);
|
42
|
+
$o{h} ||= "Tag";
|
43
|
+
$o{H} ||= "(?:.*/)?([^\\.]+)";
|
44
|
+
$o{r} += 0;
|
45
|
+
|
46
|
+
my $notes = {};
|
47
|
+
|
48
|
+
print $o{h} unless $o{n};
|
49
|
+
my $i = 0;
|
50
|
+
for my $file (@files){
|
51
|
+
unless($o{n}){
|
52
|
+
$file =~ m/$o{H}/ or die "Filename '$file' doesn't match format '$o{H}'.";
|
53
|
+
my $tag=$1;
|
54
|
+
print $o{o}.$tag;
|
55
|
+
}
|
56
|
+
open IN, "<", $file or die "Cannot read file: $file: $!\n";
|
57
|
+
while(<IN>){
|
58
|
+
next if $. <= $o{r};
|
59
|
+
chomp;
|
60
|
+
my @l = split $o{i};
|
61
|
+
$l[1]+=0 unless $o{s};
|
62
|
+
$notes->{$l[0]} ||= [];
|
63
|
+
$notes->{$l[0]}->[$i] = $l[1];
|
64
|
+
}
|
65
|
+
close IN;
|
66
|
+
$i++;
|
67
|
+
}
|
68
|
+
print "\n" unless $o{n};
|
69
|
+
|
70
|
+
for my $id (keys %$notes){
|
71
|
+
print $id;
|
72
|
+
for my $i (0 .. $#files){
|
73
|
+
print $o{o}.(( defined $notes->{$id}->[$i] ? $notes->{$id}->[$i] : $o{e} ));
|
74
|
+
}
|
75
|
+
print "\n";
|
76
|
+
}
|
77
|
+
|
@@ -0,0 +1,69 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
5
|
+
# @update Feb 01 2016
|
6
|
+
# @license artistic license 2.0
|
7
|
+
#
|
8
|
+
|
9
|
+
require "optparse"
|
10
|
+
|
11
|
+
o = {delimiter: "\t", key: 1, default: ""}
|
12
|
+
ARGV << "-h" if ARGV.size==0
|
13
|
+
OptionParser.new do |opts|
|
14
|
+
opts.banner = "\nReplaces a field in a table using a mapping file."
|
15
|
+
opts.on("-m", "--map FILE",
|
16
|
+
"Mapping file with two columns (key and replacement)."){ |v| o[:map] = v }
|
17
|
+
opts.on("-i", "--in FILE", "Input table."){ |v| o[:in] = v }
|
18
|
+
opts.on("-o", "--out FILE", "Output table."){ |v| o[:out] = v }
|
19
|
+
opts.on("-k", "--key INT",
|
20
|
+
"Column to replace in --in. By deafult: 1."){ |v| o[:key] = v.to_i }
|
21
|
+
opts.on("-u", "--unknown STR",
|
22
|
+
"String to use whenever the key is not found in --map."
|
23
|
+
){ |v| o[:default] = v }
|
24
|
+
opts.on("-d", "--delimiter STR",
|
25
|
+
"String delimiting columns. By default, tabulation."
|
26
|
+
){ |v| o[:delimiter] = v }
|
27
|
+
opts.on("-h", "--help", "Display this screen") do
|
28
|
+
puts opts
|
29
|
+
exit
|
30
|
+
end
|
31
|
+
opts.separator ""
|
32
|
+
end.parse!
|
33
|
+
abort "-m is mandatory" if o[:map].nil?
|
34
|
+
abort "-i is mandatory" if o[:in].nil?
|
35
|
+
abort "-o is mandatory" if o[:out].nil?
|
36
|
+
|
37
|
+
class String
|
38
|
+
def is_number?
|
39
|
+
true if Float(self) rescue false
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
begin
|
44
|
+
# Read mapping file
|
45
|
+
ifh = File.open(o[:map], "r")
|
46
|
+
map = {}
|
47
|
+
while(ln = ifh.gets)
|
48
|
+
row = ln.chomp.split(o[:delimiter])
|
49
|
+
map[ row[0] ] = row[1]
|
50
|
+
end
|
51
|
+
ifh.close
|
52
|
+
# Process table
|
53
|
+
ifh = File.open(o[:in], "r")
|
54
|
+
ofh = File.open(o[:out], "w")
|
55
|
+
while(ln = ifh.gets)
|
56
|
+
row = ln.chomp.split(o[:delimiter])
|
57
|
+
k = row[ o[:key]-1 ]
|
58
|
+
v = map[ k ]
|
59
|
+
v = o[:default] if v.nil?
|
60
|
+
row[ o[:key]-1 ] = v
|
61
|
+
ofh.puts(row.join(o[:delimiter]))
|
62
|
+
end
|
63
|
+
ifh.close
|
64
|
+
ofh.close
|
65
|
+
rescue => err
|
66
|
+
$stderr.puts "Exception: #{err}\n\n"
|
67
|
+
err.backtrace.each { |l| $stderr.puts " - " + l + "\n" }
|
68
|
+
err
|
69
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
5
|
+
# @update: Feb 04 2015
|
6
|
+
# @license: artistic license 2.0
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'optparse'
|
10
|
+
|
11
|
+
o = {:ndigits=>0, :action=>:round, :delimiter=>"\t"}
|
12
|
+
ARGV << '-h' if ARGV.size==0
|
13
|
+
OptionParser.new do |opts|
|
14
|
+
opts.banner = "\nRounds numbers in a table."
|
15
|
+
opts.on("-i", "--in FILE", "Input table."){ |v| o[:in] = v}
|
16
|
+
opts.on("-o", "--out FILE", "Output table."){ |v| o[:out] = v }
|
17
|
+
opts.on("-n", "--ndigits INT", "Number of decimal digits. By default: #{o[:ndigits]}"){ |v| o[:ndigits] = v.to_i }
|
18
|
+
opts.on("-f", "--floor", "Floors the values instead of rounding them. Ignores -n."){ o[:action] = :floor }
|
19
|
+
opts.on("-c", "--ceil", "Ceils the values instead of rounding them. Ignores -n."){ o[:action] = :ceil }
|
20
|
+
opts.on("-d", "--delimiter STR", "String delimiting columns. By default, tabulation."){ |v| o[:delimiter] = v }
|
21
|
+
opts.on("-h", "--help", "Display this screen") do
|
22
|
+
puts opts
|
23
|
+
exit
|
24
|
+
end
|
25
|
+
opts.separator ""
|
26
|
+
end.parse!
|
27
|
+
abort "-i is mandatory" if o[:in].nil?
|
28
|
+
abort "-o is mandatory" if o[:out].nil?
|
29
|
+
|
30
|
+
class String
|
31
|
+
def is_number?
|
32
|
+
true if Float(self) rescue false
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
begin
|
37
|
+
ifh = File.open(o[:in], "r")
|
38
|
+
ofh = File.open(o[:out], "w")
|
39
|
+
while(ln = ifh.gets)
|
40
|
+
ln.chomp!
|
41
|
+
row = []
|
42
|
+
ln.split(o[:delimiter]).each do |value|
|
43
|
+
if value.is_number?
|
44
|
+
case o[:action]
|
45
|
+
when :round
|
46
|
+
value = value.to_f.round(o[:ndigits])
|
47
|
+
when :floor
|
48
|
+
value = value.to_f.floor
|
49
|
+
when :ceil
|
50
|
+
value = value.to_f.ceil
|
51
|
+
end
|
52
|
+
end
|
53
|
+
row.push value.to_s
|
54
|
+
end
|
55
|
+
ofh.puts(row.join(o[:delimiter]))
|
56
|
+
end
|
57
|
+
ifh.close
|
58
|
+
ofh.close
|
59
|
+
rescue => err
|
60
|
+
$stderr.puts "Exception: #{err}\n\n"
|
61
|
+
err.backtrace.each { |l| $stderr.puts " - " + l + "\n" }
|
62
|
+
err
|
63
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
#
|
3
|
+
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
4
|
+
# @update Feb-01-2016
|
5
|
+
# @license artistic license 2.0
|
6
|
+
#
|
7
|
+
|
8
|
+
use warnings;
|
9
|
+
use strict;
|
10
|
+
use Getopt::Std;
|
11
|
+
use Symbol;
|
12
|
+
|
13
|
+
my %o;
|
14
|
+
getopts('i:o:d:e:h', \%o);
|
15
|
+
my $file = shift @ARGV;
|
16
|
+
|
17
|
+
($file and not $o{h}) or die "
|
18
|
+
.Description:
|
19
|
+
Split a file with multiple columns into multiple two-columns lists.
|
20
|
+
|
21
|
+
.Usage:
|
22
|
+
$0 [options] file
|
23
|
+
|
24
|
+
Options:
|
25
|
+
-i <str> Input field-delimiter. By default: tabulation (\"\\t\").
|
26
|
+
-o <str> Prefix of the output files. By default: no prefix (\"\").
|
27
|
+
-d <str> Output directory. By default: current directory (\"\").
|
28
|
+
|
29
|
+
";
|
30
|
+
$o{i} ||= "\t";
|
31
|
+
$o{o} ||= "";
|
32
|
+
$o{o} = $o{d}."/".$o{o} if $o{d};
|
33
|
+
|
34
|
+
my $open=0;
|
35
|
+
my @fhs=();
|
36
|
+
open IN, "<", $file or die "Cannot read file: $file: $!\n";
|
37
|
+
while(<IN>){
|
38
|
+
chomp;
|
39
|
+
my @row = split $o{i};
|
40
|
+
my $h = shift @row;
|
41
|
+
if($open){
|
42
|
+
for my $i (0 .. $#row){
|
43
|
+
print { qualify_to_ref $fhs[$i] } $h.$o{i}.$row[$i]."\n" if $row[$i];
|
44
|
+
}
|
45
|
+
}else{
|
46
|
+
$open++;
|
47
|
+
for my $l (@row){
|
48
|
+
$l =~ s/[\.\/:]/_/g;
|
49
|
+
my $gs = gensym;
|
50
|
+
open($gs, '>', $o{o}.$l.".txt") or die "Cannot create file: $o{o}$l.txt: $!\n";
|
51
|
+
push @fhs, $gs;
|
52
|
+
}
|
53
|
+
}
|
54
|
+
}
|
55
|
+
close IN;
|
56
|
+
close $_ for @fhs;
|
57
|
+
|
@@ -0,0 +1,227 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
5
|
+
# @update: Feb-06-2015
|
6
|
+
# @license artistic license 2.0
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'optparse'
|
10
|
+
|
11
|
+
$opts = {:warns=>false}
|
12
|
+
ARGV << '-h' if ARGV.size==0
|
13
|
+
OptionParser.new do |opt|
|
14
|
+
opt.separator "Re-formats Silva taxonomy into NCBI-like taxonomy dump files."
|
15
|
+
opt.separator ""
|
16
|
+
opt.separator "Mandatory arguments"
|
17
|
+
opt.on("-k", "--silvaranks FILE", "Input Silva ranks file (e.g., tax_ranks_ssu_115.txt)."){ |v| $opts[:silvaranks]=v }
|
18
|
+
opt.on("-f", "--silvaref FILE", "Input Silva ref alignment file (e.g., SSURef_NR99_115_tax_silva_full_align_trunc.fasta)."){ |v| $opts[:silvaref]=v }
|
19
|
+
opt.separator ""
|
20
|
+
opt.separator "Additional options"
|
21
|
+
opt.on("-p", "--patch FILE", "If passed, it replaces the paths specified in the patch."){ |v| $opts[:patch]=v }
|
22
|
+
opt.on("-s", "--seqinfo FILE", "If passed, it creates a CSV seq-info file compatible with taxtastic."){ |v| $opts[:seqinfo]=v }
|
23
|
+
opt.on("-t", "--taxfile FILE", "If passed, it creates a simple TSV taxonomy file."){ |v| $opts[:taxfile]=v }
|
24
|
+
opt.on("-n", "--ncbi FILE", "If passed, output folder for the NCBI dump files (e.g., taxdmp)."){ |v| $opts[:ncbi]=v }
|
25
|
+
opt.on("-w", "--warns", "Verbously display warnings."){ $opts[:warns]=true }
|
26
|
+
opt.on("-h", "--help","Display this screen") do
|
27
|
+
puts opt
|
28
|
+
exit
|
29
|
+
end
|
30
|
+
opt.separator ""
|
31
|
+
end.parse!
|
32
|
+
abort "-k/--silvaranks is mandatory." if $opts[:silvaranks].nil?
|
33
|
+
abort "-k/--silvaranks must exist." unless File.exists? $opts[:silvaranks]
|
34
|
+
abort "-f/--silvaref is mandatory." if $opts[:silvaref].nil?
|
35
|
+
abort "-f/--silvaref must exist." unless File.exists? $opts[:silvaref]
|
36
|
+
|
37
|
+
class Node
|
38
|
+
attr_accessor :id, :tax, :leaf, :name_type
|
39
|
+
attr_reader :name, :rank, :parent, :children
|
40
|
+
def initialize(name, rank=nil)
|
41
|
+
@name = name
|
42
|
+
@rank = rank.nil? ? "no rank" : rank
|
43
|
+
@children = []
|
44
|
+
@leaf = false
|
45
|
+
@name_type = "scientific name";
|
46
|
+
end
|
47
|
+
def parent=(node)
|
48
|
+
@parent=node
|
49
|
+
node.add_child(self)
|
50
|
+
end
|
51
|
+
def add_child(node)
|
52
|
+
@children << node
|
53
|
+
end
|
54
|
+
def ncbirank
|
55
|
+
ncbirank =
|
56
|
+
self.rank == "superkingdom" ? "no rank" :
|
57
|
+
self.rank == "domain" ? "superkingdom" :
|
58
|
+
self.rank == "major_clade" ? "no rank" : self.rank
|
59
|
+
return ncbirank
|
60
|
+
end
|
61
|
+
def path
|
62
|
+
if self.parent.nil?
|
63
|
+
self.name
|
64
|
+
else
|
65
|
+
"#{self.parent.path};#{self.name}"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
def each_desc internals, leaves, &blk
|
69
|
+
blk[self] if (leaves and self.leaf) or (internals and not self.leaf)
|
70
|
+
self.children.each {|child| child.each_desc internals, leaves, &blk}
|
71
|
+
end
|
72
|
+
def to_s
|
73
|
+
"#{self.name} (#{self.rank})"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
class Taxonomy
|
78
|
+
attr_reader :root, :next_id
|
79
|
+
def initialize
|
80
|
+
@root = Node.new('root')
|
81
|
+
@root.id = 1
|
82
|
+
@next_id = 2
|
83
|
+
end
|
84
|
+
def register(node)
|
85
|
+
node.id = self.next_id
|
86
|
+
node.parent = self.root if node.parent.nil?
|
87
|
+
@next_id += 1
|
88
|
+
end
|
89
|
+
def node(path)
|
90
|
+
node = self.root
|
91
|
+
path.each do |level|
|
92
|
+
node.children.each do |child|
|
93
|
+
if child.name == level
|
94
|
+
node = child
|
95
|
+
break
|
96
|
+
end
|
97
|
+
end
|
98
|
+
unless node.name == level
|
99
|
+
$stderr.puts "Warning: Impossible to find #{level} at #{node.to_s}, making it up." if $opts[:warns]
|
100
|
+
child = Node.new(level)
|
101
|
+
child.parent = node
|
102
|
+
self.register(child)
|
103
|
+
node = child
|
104
|
+
end
|
105
|
+
end
|
106
|
+
node
|
107
|
+
end
|
108
|
+
def each_node &blk
|
109
|
+
self.root.each_desc true, true, &blk
|
110
|
+
end
|
111
|
+
def each_leaf &blk
|
112
|
+
self.root.each_desc false, true, &blk
|
113
|
+
end
|
114
|
+
def each_internal &blk
|
115
|
+
self.root.each_desc true, false, &blk
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
begin
|
120
|
+
taxo = Taxonomy.new()
|
121
|
+
|
122
|
+
## Read patch
|
123
|
+
patch = {}
|
124
|
+
unless $opts[:patch].nil?
|
125
|
+
$stderr.puts "Reading patch: #{$opts[:patch]}"
|
126
|
+
f = File.open($opts[:patch], "r")
|
127
|
+
while(ln = f.gets)
|
128
|
+
m = ln.chomp.split(/\t/)
|
129
|
+
patch[ m[0] ] = m[1]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
## Read the Silva ranks
|
134
|
+
$stderr.puts "Reading Silva ranks: #{$opts[:silvaranks]}"
|
135
|
+
f = File.open($opts[:silvaranks], "r")
|
136
|
+
f.gets # header
|
137
|
+
while(ln = f.gets)
|
138
|
+
m = ln.chomp.split(/\t/)
|
139
|
+
m[0] = patch[ m[0] ] unless patch[ m[0] ].nil?
|
140
|
+
p = m[0].split(/;/)
|
141
|
+
raise "Inconsistent path and node name at line #{$.}: #{ln}." unless m[1] == p.pop
|
142
|
+
if m[3] != "w"
|
143
|
+
node = Node.new(m[1], m[2])
|
144
|
+
node.name_type = "common name" if m[3] == "a"
|
145
|
+
node.parent = taxo.node(p)
|
146
|
+
taxo.register(node)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
f.close
|
150
|
+
|
151
|
+
$stderr.puts " Top taxa:"
|
152
|
+
taxo.root.children.each do |top|
|
153
|
+
$stderr.puts " o #{top.to_s} has #{top.children.length} children."
|
154
|
+
end
|
155
|
+
|
156
|
+
## Read the Silva ref alignment
|
157
|
+
$stderr.puts "Reading Silva ref alignment: #{$opts[:silvaref]}"
|
158
|
+
i = 0
|
159
|
+
f = File.open($opts[:silvaref], "r")
|
160
|
+
while(ln = f.gets)
|
161
|
+
m = />([^\s]+)\s(.*)/.match(ln)
|
162
|
+
next unless m
|
163
|
+
# Patch
|
164
|
+
pm = /(.+);([^;]+)/.match(m[2])
|
165
|
+
path = "#{patch[ pm[1] ].nil? ? pm[1] : patch[ pm[1] ]};#{pm[2]}".split(/;/)
|
166
|
+
# Register
|
167
|
+
node = taxo.node(path)
|
168
|
+
taxo.register(node)
|
169
|
+
refseq = Node.new(m[1], 'refseq')
|
170
|
+
refseq.parent = node
|
171
|
+
refseq.leaf = true
|
172
|
+
taxo.register(refseq)
|
173
|
+
i += 1
|
174
|
+
end
|
175
|
+
f.close
|
176
|
+
$stderr.puts " Saved #{i} leaves."
|
177
|
+
|
178
|
+
### NCBI
|
179
|
+
unless $opts[:ncbi].nil?
|
180
|
+
## Create taxonomy .dmp files
|
181
|
+
$stderr.puts "Creating NCBI-like files: #{$opts[:ncbi]}"
|
182
|
+
Dir.mkdir($opts[:ncbi]) unless Dir.exists?($opts[:ncbi]);
|
183
|
+
# merged.dmp
|
184
|
+
$stderr.puts " o Creating merged.dmp"
|
185
|
+
File.open(File.join($opts[:ncbi], 'merged.dmp'), 'w'){}
|
186
|
+
# names.dmp
|
187
|
+
$stderr.puts " o Creating names.dmp"
|
188
|
+
f = File.open(File.join($opts[:ncbi], 'names.dmp'), 'w')
|
189
|
+
taxo.each_internal do |n|
|
190
|
+
f.puts [n.id, n.name, "", n.name_type].join("\t|\t")+"\t|"
|
191
|
+
end
|
192
|
+
f.close
|
193
|
+
# nodes.dmp
|
194
|
+
$stderr.puts " o Creating nodes.dmp"
|
195
|
+
f = File.open(File.join($opts[:ncbi], 'nodes.dmp'), 'w')
|
196
|
+
taxo.each_internal do |n|
|
197
|
+
f.puts ([n.id, n.parent.nil? ? n.id : n.parent.id, n.ncbirank, ""] << Array.new(8,0) << "").join("\t|\t")+"\t|"
|
198
|
+
end
|
199
|
+
f.close
|
200
|
+
end
|
201
|
+
|
202
|
+
## Taxtastic
|
203
|
+
unless $opts[:seqinfo].nil?
|
204
|
+
$stderr.puts "Creating seq-info file: #{$opts[:seqinfo]}"
|
205
|
+
f = File.open($opts[:seqinfo], 'w')
|
206
|
+
f.puts "\"seqname\",\"tax_id\",\"group_name\""
|
207
|
+
taxo.each_leaf { |n| f.puts "\"#{n.name}\",\"#{n.parent.id}\",\"#{n.parent.name}\"" }
|
208
|
+
f.close
|
209
|
+
end
|
210
|
+
|
211
|
+
## Misc
|
212
|
+
unless $opts[:taxfile].nil?
|
213
|
+
$stderr.puts "Creating taxonomy file: #{$opts[:taxfile]}"
|
214
|
+
f = File.open($opts[:taxfile], 'w')
|
215
|
+
f.puts "tax_id\tparent_id\trank\ttax_name"
|
216
|
+
taxo.each_internal do |n|
|
217
|
+
f.puts [n.id, n.parent.nil? ? n.id : n.parent.id, n.rank, n.name].join("\t")
|
218
|
+
end
|
219
|
+
f.close
|
220
|
+
end
|
221
|
+
rescue => err
|
222
|
+
$stderr.puts "Exception: #{err}\n\n"
|
223
|
+
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
224
|
+
err
|
225
|
+
end
|
226
|
+
|
227
|
+
|