RubyGems - miga-base - Versions diffs - 1.2.17.0 → 1.2.17.1 - Mend

miga-base 1.2.17.0 → 1.2.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (299) hide show

checksums.yaml +4 -4
data/lib/miga/version.rb +1 -1
data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
data/utils/FastAAI/FastAAI +3659 -0
data/utils/FastAAI/FastAAI-legacy/FastAAI +1336 -0
data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +1296 -0
data/utils/FastAAI/README.md +84 -0
data/utils/enveomics/Docs/recplot2.md +244 -0
data/utils/enveomics/Examples/aai-matrix.bash +66 -0
data/utils/enveomics/Examples/ani-matrix.bash +66 -0
data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
data/utils/enveomics/LICENSE.txt +73 -0
data/utils/enveomics/Makefile +52 -0
data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
data/utils/enveomics/Manifest/Tasks/blasttab.json +790 -0
data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
data/utils/enveomics/Manifest/Tasks/other.json +906 -0
data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
data/utils/enveomics/Manifest/Tasks/sequence-identity.json +650 -0
data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
data/utils/enveomics/Manifest/categories.json +165 -0
data/utils/enveomics/Manifest/examples.json +162 -0
data/utils/enveomics/Manifest/tasks.json +4 -0
data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
data/utils/enveomics/README.md +42 -0
data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +123 -0
data/utils/enveomics/Scripts/Chao1.pl +97 -0
data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
data/utils/enveomics/Scripts/FastA.length.pl +38 -0
data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
data/utils/enveomics/Scripts/FastA.split.pl +55 -0
data/utils/enveomics/Scripts/FastA.split.rb +79 -0
data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
data/utils/enveomics/Scripts/SRA.download.bash +55 -0
data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
data/utils/enveomics/Scripts/Table.barplot.R +31 -0
data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
data/utils/enveomics/Scripts/Table.filter.pl +61 -0
data/utils/enveomics/Scripts/Table.merge.pl +77 -0
data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
data/utils/enveomics/Scripts/Table.replace.rb +69 -0
data/utils/enveomics/Scripts/Table.round.rb +63 -0
data/utils/enveomics/Scripts/Table.split.pl +57 -0
data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
data/utils/enveomics/Scripts/aai.rb +421 -0
data/utils/enveomics/Scripts/ani.rb +362 -0
data/utils/enveomics/Scripts/anir.rb +137 -0
data/utils/enveomics/Scripts/clust.rand.rb +102 -0
data/utils/enveomics/Scripts/gi2tax.rb +103 -0
data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +88 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +74 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
data/utils/enveomics/Scripts/ogs.rb +104 -0
data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
data/utils/enveomics/Scripts/rbm.rb +108 -0
data/utils/enveomics/Scripts/sam.filter.rb +148 -0
data/utils/enveomics/Tests/Makefile +10 -0
data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
data/utils/enveomics/Tests/alkB.nwk +1 -0
data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
data/utils/enveomics/Tests/hiv1.faa +59 -0
data/utils/enveomics/Tests/hiv1.fna +134 -0
data/utils/enveomics/Tests/hiv2.faa +70 -0
data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
data/utils/enveomics/build_enveomics_r.bash +45 -0
data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
data/utils/enveomics/enveomics.R/R/utils.R +80 -0
data/utils/enveomics/enveomics.R/README.md +81 -0
data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
data/utils/enveomics/globals.mk +8 -0
data/utils/enveomics/manifest.json +9 -0
data/utils/multitrim/Multitrim How-To.pdf +0 -0
data/utils/multitrim/README.md +67 -0
data/utils/multitrim/multitrim.py +1555 -0
data/utils/multitrim/multitrim.yml +13 -0
metadata +301 -5

data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl ADDED Viewed

@@ -0,0 +1,228 @@
+#!/usr/bin/env perl
+# @author Luis M Rodriguez-R <lmrodriguezr at gmail dot com>
+# @update Mar-23-2015
+# @license Artistic License 2.0
+use warnings;
+use strict;
+use List::Util qw/sum max/;
+use Getopt::Std;
+use Math::Round qw/round/;
+our $VERSION = 1.1;
+warn <<WARN
+┌──[ IMPORTANT ]─────────────────────────────────────────────────┐
+│ This script has been deprecated in favor of JPlace.to_iToL.rb. │
+│ Please use the new version, together with the RAxML EPA's file │
+│ RAxML_portableTree.*.jplace instead.                           │
+└────────────────────────────────────────────────────────────────┘
+WARN
+;
+sub HELP_MESSAGE {
+die "
+Description:
+   Reformats the node names (labels) of a RAxML_originalLabelledTree.<NAME> file
+   (produced by RAxML's EPA, -f v), so it can be opened in most tree viewers (like
+   iToL and FigTree).  Also, it creates iToL-compatible files to draw pie-charts
+   (based on the classification of short reads) in the nodes of the reference tree.
+Usage:
+   $0 -n <NAME> [other options...]
+   -n <str> *	Name of the run used in RAxML.
+   -t <str>	Use this file as original labelled tree, instead of generating one
+   		based on the job name.  By default, RAxML_originalLabelledTree.<NAME>
+		in the -d directory. See [NOTE1].
+   -d <str>	Directory containing RAxML files.  By default: current directory.
+   -o <str>	Output tree.  By default, it takes the path to the input tree and
+   		appends .nwk to it.
+   -l <str>	File containing a list of internal nodes.  The nodes in the list
+   		will be renamed, and the reads of all children nodes will be
+		transferred to it.  This can be useful if you want to display
+		these nodes collapsed.  The format of the file is raw text, with
+		two columns separated by tabs or spaces, where the first column is
+		the original name of the internal node (without the brackets) and
+		the second is the name to be used.  See [NOTE2].
+   -a		Append original label to the renamed nodes (only if -l is passed).
+   -s <str>	The names of the reads will be assumed to contain the sample name,
+   		separated by this string.  For example, if the value is '_', and
+		a read has the name 'hco_ABCDEF/1#ACTG', it will be assumed to be
+		a read from the sample 'hco'.  If not provided, all the reads are
+		assumed to come from the same sample (called 'unknown').
+   -m <str>	Comma-delimited list of samples.  If not provided, all found samples
+   		will be used (unsorted).
+   -c <str>	Comma-delimited list of colors (in RGB hexadecimal) to represent
+   		the different samples.  If not provided (or if insufficient values
+		are provided) random colors are generated.
+   -N <str>	Comma-delimited list of normalizing factors per dataset.  Typically,
+   		the size of the datasets divided by a fixed value (e.g. size x 1,000,
+		to express sizes as reads per thousand).
+   -T		Use the total number of assigned reads per sample (times a constant)
+   		as the normalizing factor. The constant used corresponds to the 100
+		times the size of the largest factor. If passed, -N is ignored.
+   -q		Run quietly.
+   -h/--help	Displays this message and exits.
+   * Mandatory
+   [NOTE1] The tree provided by -t MUST be based on a tree produced by this script
+   without the -l option.
+   [NOTE2] The tree produced by RAxML-EPA is usually not correctly rooted, which
+   makes the -l option useless.  However, you can manually root the tree and provide
+   the rooted tree in Newick format using the -t option.  If you do this, make
+   sure the program doesn't change/delete the names of the internal nodes.  I know
+   that iToL can do it correctly (if you export preserving the original IDs), while
+   FigTree deletes the labels.  I didn't try any other tool.
+";
+}
+my %o;
+getopts('n:t:d:o:l:s:m:c:N:Tqh', \%o);
+$o{d} ||= '.';
+$o{n} or &HELP_MESSAGE;
+$o{h} and &HELP_MESSAGE;
+$o{c} = [split /,/, (defined $o{c}?$o{c}:"")];
+$o{N} = [split /,/, (defined $o{N}?$o{N}:"")];
+# Set files
+my $inTree   = ($o{t} || $o{d}."/RAxML_originalLabelledTree.".$o{n});
+my $outTree  = ($o{o} || $inTree.".nwk");
+my $inClass  = $o{d}."/RAxML_classification.".$o{n};
+my $outClass = $inClass.".iToL";
+my $outColl  = $outTree.".collapse.iToL";
+# Relocate tree node names
+print STDERR "o Reformatting tree.\n" unless $o{q};
+open INTREE, "<", $inTree or die "Cannot read file: $inTree: $!\n";
+my $tree = <INTREE>;
+$tree =~ s/:([\d\.]+)(\[.+?\])/$2:$1/g unless $o{t};
+close INTREE;
+# Read leaf nodes
+print STDERR "o Reading nodes.\n" unless $o{q};
+my %tags    = ();
+my $t = $tree;
+while($t =~ m/([A-Za-z0-9_\|\.-]+\[([A-Za-z0-9_\|\.-]+)\])/){
+   my $n = $1;
+   my $ta = $2;
+   $tags{$ta} = $n;
+   $t = substr $t, (length($n) + index $t, $n);
+}
+# Label/collapse internal nodes
+if($o{l}){
+   print STDERR "o Labeling/collapsing internal nodes.\n";
+   open LIST, "<", $o{l} or die "Cannot read file: $o{l}: $!\n";
+   open COLL, ">", $outColl or die "Cannot create file: $outColl: $!\n";
+   while(<LIST>){
+      chomp;
+      next if /^#/ or /^\s*$/;
+      # Label internal node
+      my @l = split /\s+/;
+      $l[0] =~ m/^\[(.+)\]$/ or die "Unable to parse internal node name: $l[0].\n";
+      my $ori = $1;
+      my $new = $l[1];
+      if(exists $tags{$ori}){
+	 warn "Warning: Trying to label/collapse $ori as $new, already defined as $tags{$ori}.\n";
+	 next;
+      }
+      $new =~ s/[^A-Za-z0-9_\|\.\-]/_/g;
+      $new.= "[$ori]" if $o{a};
+      $tags{$ori} = $new;
+      $tree =~ s/\[$ori\]/$new/;
+      # Isolate node
+      $t = substr $tree, 0, index($tree, $new);
+      my $i=length($t)-2;
+      for(my $c=1 ; $i and $c; $i--){
+         my $char = substr $t, $i, 1;
+	 $c++ if $char eq ')';
+	 $c-- if $char eq '(';
+      }
+      $t = substr $t, $i;
+      # Get children
+      $t =~ s/:[\d\.]+|[\(\)]/,/g;
+      $t =~ s/,+/,/g;
+      my $chn=0;
+      for my $child (split /,/, $t){
+         next unless $child;
+	 $child =~ s/.*\[(.+?)\]/$1/;
+	 $tags{$child} = $new;
+	 $chn++;
+      }
+      print STDERR "  Collapsing $new: $chn children.\n" unless $o{q};
+      print COLL "$new\n";
+   }
+   close LIST;
+   close COLL;
+}
+# Save tree
+open OUTTREE, ">", $outTree or die "Cannot create file: $outTree: $!\n";
+print OUTTREE $tree;
+close OUTTREE;
+# Count reads
+my %samples = ();
+my %nodes   = ();
+print STDERR "o Counting reads.\n";
+my $s = defined $o{s} ? $o{s} : "";
+open INCLASS, "<", $inClass or die "Cannot read file: $inClass: $!\n";
+while(<INCLASS>){
+   my @ln = split /\s+/;
+   $ln[0] =~ s/$s.+$//; # Sample name
+   ($samples{$ln[0]} ||= 0)++;
+   $tags{$ln[1]} ||= "[".$ln[1]."]"; # Node name
+   (($nodes{$tags{$ln[1]}} ||= {})->{$ln[0]} ||= 0)++;
+}
+close INCLASS;
+my $labs = 'LABELS';
+my $cols = 'COLORS';
+my @samples = $o{m} ? (split /,/, $o{m}) : (keys %samples);
+my @normfac = ();
+for my $sample (@samples){
+   my $col = shift @{$o{c}};
+   unless(defined $col and length($col)==6){
+      $col = '';
+      for (1 .. 3){
+	 my $v = int rand 16;
+	 $v = chr $v+55 if $v>9;
+	 $col.="$v$v";
+      }
+   }
+   my $nf = shift @{$o{N}};
+   $nf = 1 unless defined $nf and $nf>0;
+   $labs.= ','.($sample || 'unknown');
+   $cols.= ',#'.$col;
+   push @normfac, $nf+0;
+}
+open OUTCLASS, ">", $outClass or die "Cannot create file: $outClass: $!\n";
+print OUTCLASS "$labs\n$cols\n";
+my $tiny=0;
+for my $node (keys %nodes){
+   my $i=0;
+   for my $s (@samples){
+      $nodes{$node}->{$s} = ($nodes{$node}->{$s} || 0)/($o{T} ? ($samples{$s}||1)/(max(values %samples)*100) : ($normfac[$i++]||1));
+   }
+   my $r = round(sum(values %{$nodes{$node}}));
+   print OUTCLASS "$node,R$r";
+   for my $sample (@samples){
+      print OUTCLASS ",".round($nodes{$node}->{$sample} || 0);
+   }
+   print OUTCLASS "\n";
+   $tiny++ unless $r;
+}
+close OUTCLASS;
+unless($o{q}) {
+   print "Total counts per dataset:\n";
+   print "  $_\t".($samples{$_}||0)."\n" for @samples;
+}
+warn "$tiny node assignments are too small to represent. Decrease the values of -N or use an alternative like -T." if $tiny;

data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R ADDED Viewed

@@ -0,0 +1,32 @@
+#!/usr/bin/env Rscript
+#
+# @author  Luis M. Rodriguez-R
+# @update  Jan-04-2016
+# @license artistic license 2.0
+#
+#= Load stuff
+args <- commandArgs(trailingOnly = F)
+enveomics_R <- file.path(dirname(
+   sub("^--file=", "", args[grep("^--file=", args)])),
+   "lib", "enveomics.R")
+library(methods)
+source(file.path(enveomics_R, "R", "cliopts.R"))
+source(file.path(enveomics_R, "R", "recplot2.R"))
+#= Generate interface
+opt <- enve.cliopts(enve.recplot2.compareIdentities,
+   file.path(enveomics_R, "man", "enve.recplot2.compareIdentities.Rd"),
+   positional_arguments=2,
+   usage="usage: %prog [options] recplot-A.Rdata recplot-B.Rdata",
+   number=c("pseudocounts", "max.deviation"), ignore=c("x", "y"),
+   p_desc="Calculates the difference between identity distributions of two recruitment plots.")
+#= Run it!
+load(opt$args[1])
+opt$options[['x']] <- rp
+load(opt$args[2])
+opt$options[['y']] <- rp
+dist <- do.call("enve.recplot2.compareIdentities", opt$options)
+cat(dist, '\n')

data/utils/enveomics/Scripts/RefSeq.download.bash ADDED Viewed

@@ -0,0 +1,48 @@
+#!/bin/bash
+#
+# @author  Luis M. Rodriguez-R
+# @update  Oct-20-2015
+# @license artistic license 2.0
+#
+FTP="ftp://ftp.ncbi.nlm.nih.gov/genomes/refseq/bacteria"
+ORG=$1
+EXT=${2:-.*.gz}
+STT=${3:-Any}
+DIR=${4:-$ORG}
+if [[ "$ORG" == "" ]] ; then
+echo "
+Downloads a collection of sequences and/or annotations from NCBI's RefSeq.
+Usage:
+$0 <organism> [<extension>[ <level>[ <dir>]]]
+<organism>	The organism to download (e.g., Streptococcus_pneumoniae).
+<extension>	Extension to download.  Common extensions include '.fna.gz'
+		(genomic sequences), '.faa.gz' (protein sequences), and
+		'.gff.gz' (annotations).  By default: '.*.gz' (all data).
+<level>		Use only genomes with this assembly level. Common levels are
+		'Complete Genome' and 'Contig'.  By default, any assembly
+		level is allowed ('Any').
+<dir>		Directory where the files are to be downladed. By default,
+		same as <organism>.
+" >&2
+exit
+fi
+[[ -d "$DIR" ]] || mkdir "$DIR"
+curl -s "$FTP/$ORG/assembly_summary.txt" -o "$DIR/assembly_summary.txt"
+for path in $(cat "$DIR/assembly_summary.txt" \
+      | awk -F"\t" "\$12==\"$STT\" || \"$STT\"==\"Any\" {print \$20}" ) ; do
+   dir="$DIR/$(basename "$path")"
+   [[ -d "$dir" ]] || mkdir "$dir"
+   for file in $(curl -s "$path/" | awk '{print $9}') ; do
+      if [[ "$file" == *$EXT ]] ; then
+	 curl -s "$path/$file" -o "$dir/$file"
+      fi
+   done
+done

data/utils/enveomics/Scripts/SRA.download.bash ADDED Viewed

@@ -0,0 +1,55 @@
+#!/bin/bash
+DATA_LINK="https://www.ebi.ac.uk/ena/portal/api/filereport"
+DATA_OPS="result=read_run&fields=run_accession,fastq_ftp,fastq_md5"
+SRX=$1
+DIR=${2:-$SRX}
+VERSION=1.0
+if [[ "$SRX" == "" ]] ; then
+echo "
+[Enveomics Collection: $(basename "$0" .bash) $VERSION]
+Downloads the set of runs from a project, sample, or experiment in SRA.
+Usage:
+$(basename "$0") <SRA-ID>[ <dir>]
+<SRA-ID>	ID of the SRA Project, Sample, or Experiment.
+<dir>		Directory where the files are to be downladed. By default,
+		same as <SRA-ID>.
+" >&2
+exit
+fi
+[[ -d "$DIR" ]] || mkdir "$DIR"
+function md5value {
+  local file=$1
+  o=$(md5 "$file" | perl -pe 's/.* //')
+  [[ -n $o ]] || o=$(md5sum-lite "$file" | awk '{print $1}')
+  [[ -n $o ]] || o=$(md5sum "$file" | awk '{print $1}')
+  echo "$o"
+}
+curl -Ls "$DATA_LINK?$DATA_OPS&accession=$SRX" -o "$DIR/srr_list.txt"
+tail -n +2 "$DIR/srr_list.txt" | while read ln ; do
+  srr=$(echo "$ln"|cut -f 1)
+  ftp=$(echo "$ln"|cut -f 2)
+  md5=$(echo "$ln"|cut -f 3)
+  dir="$DIR/$srr"
+  [[ -d "$dir" ]] || mkdir "$dir"
+  echo "o $srr" >&2
+  for uri in $(echo "$ftp" | tr ";" " ") ; do
+    file="$dir/$(basename $uri)"
+    curl "$uri" -o "$file"
+    md5obs=$(md5value "$file" 2> /dev/null)
+    if [[ "$md5" == "$md5obs"* ]] ; then
+      md5=$(echo "$md5" | perl -pe 's/^[^;]+;//')
+    else
+      echo "Corrupt file: $file" >&2
+      echo "  MD5 mismatch: $md5obs not in $md5" >&2
+      exit 1;
+    fi
+  done
+done

data/utils/enveomics/Scripts/TRIBS.plot-test.R ADDED Viewed

@@ -0,0 +1,36 @@
+#!/usr/bin/env Rscript
+#
+# @author  Luis M. Rodriguez-R
+# @update  Jan-05-2016
+# @license artistic license 2.0
+#
+#= Load stuff
+suppressPackageStartupMessages(library(enveomics.R))
+args <- commandArgs(trailingOnly = F)
+enveomics_R <- file.path(dirname(
+   sub("^--file=", "", args[grep("^--file=", args)])),
+   "lib", "enveomics.R")
+#= Generate interface
+opt <- enve.cliopts(plot.enve.TRIBStest,
+   file.path(enveomics_R, "man", "plot.enve.TRIBStest.Rd"),
+   positional_arguments=c(1,3),
+   usage="usage: %prog [options] output.pdf [width height]",
+   mandatory=c("x"),
+   vectorize=c("xlim","ylim"),
+   number=c("xlim","ylim"),
+   defaults=c(type="overlap", xlim=NA, ylim=NA))
+#= Run it!
+a <- new.env()
+load(opt$options[['x']], a)
+opt$options[['x']] <- get(ls(envir=a),envir=a)
+summary(opt$options[['x']])
+if(is.na(opt$options[['xlim']][1])) opt$options[['xlim']] <- NULL
+if(is.na(opt$options[['ylim']][1])) opt$options[['ylim']] <- NULL
+args = as.list(opt$args)
+for(i in 2:3) if(length(args)>=i) args[[i]] <- as.numeric(args[[i]])
+do.call("pdf", args)
+do.call("plot.enve.TRIBStest", opt$options)
+dev.off()

data/utils/enveomics/Scripts/TRIBS.test.R ADDED Viewed

@@ -0,0 +1,39 @@
+#!/usr/bin/env Rscript
+#
+# @author  Luis M. Rodriguez-R
+# @update  Jan-05-2016
+# @license artistic license 2.0
+#
+#= Load stuff
+suppressPackageStartupMessages(library(enveomics.R))
+args <- commandArgs(trailingOnly = F)
+enveomics_R <- file.path(dirname(
+   sub("^--file=", "", args[grep("^--file=", args)])),
+   "lib", "enveomics.R")
+#= Generate interface
+opt <- suppressWarnings(enve.cliopts(enve.tribs,
+   file.path(enveomics_R, "man", "enve.tribs.Rd"),
+   positional_arguments=c(0,2),
+   usage="usage: %prog [options] [output.Rdata [bins=50]]",
+   mandatory=c("dist", "selection"),
+   defaults=c(dimensions=0, selection=NULL),
+   ignore=c("metaMDS.opts","points","pre.tribs","subsamples"),
+   o_desc=list(dist="A tab-delimited matrix of distances.",
+      selection="A list of names with the selection to evaluate."),
+   p_desc=paste("",
+      "Estimates the empirical difference between all the distances",
+      "in a set of objects and a subset, together with its statistical",
+      "significance.",sep="\n\t")))
+#= Run it!
+opt$options[['dist']] <- as.dist(read.table(opt$options[['dist']],
+   header=TRUE, sep="\t", row.names=1))
+opt$options[['selection']] <- read.table(opt$options[['selection']],
+   header=FALSE, sep="\t", as.is=TRUE)[,1]
+if(opt$options[['dimensions']]==0) opt$options[['dimensions']] <- NULL
+if(length(opt$args)>1) opt$options[['bins']] <- as.numeric(opt$args[2])
+t <- do.call("enve.tribs.test", opt$options)
+summary(t)
+if(length(opt$args)>0) save(t, file=opt$args[1])

data/utils/enveomics/Scripts/Table.barplot.R ADDED Viewed

@@ -0,0 +1,31 @@
+#!/usr/bin/env Rscript
+#
+# @author  Luis M. Rodriguez-R
+# @update  Dec-29-2015
+# @license artistic license 2.0
+#
+#= Load stuff
+args <- commandArgs(trailingOnly = F)
+enveomics_R <- file.path(dirname(
+   sub("^--file=", "", args[grep("^--file=", args)])),
+   "lib", "enveomics.R")
+source(file.path(enveomics_R, "R", "cliopts.R"))
+source(file.path(enveomics_R, "R", "utils.R"))
+source(file.path(enveomics_R, "R", "barplot.R"))
+#= Generate interface
+opt <- enve.cliopts(enve.barplot,
+   file.path(enveomics_R, "man", "enve.barplot.Rd"),
+   positional_arguments=c(1,3),
+   usage="usage: %prog [options] output.pdf [width height]",
+   mandatory=c("x"), vectorize=c("sizes","order","col"),
+   number=c("sizes","order"),
+   o_desc=list(x="A tab-delimited file containing header (first row) and row names (first column)."))
+#= Run it!
+args = as.list(opt$args)
+for(i in 2:3) if(length(args)>=i) args[[i]] <- as.numeric(args[[i]])
+do.call("pdf", args)
+do.call("enve.barplot", opt$options)
+dev.off()

data/utils/enveomics/Scripts/Table.df2dist.R ADDED Viewed

@@ -0,0 +1,30 @@
+#!/usr/bin/env Rscript
+#
+# @author  Luis M. Rodriguez-R
+# @update  Jan-04-2016
+# @license artistic license 2.0
+#
+#= Load stuff
+args <- commandArgs(trailingOnly = F)
+enveomics_R <- file.path(dirname(
+   sub("^--file=", "", args[grep("^--file=", args)])),
+   "lib", "enveomics.R")
+source(file.path(enveomics_R, "R", "cliopts.R"))
+source(file.path(enveomics_R, "R", "df2dist.R"))
+#= Generate interface
+opt <- enve.cliopts(enve.df2dist,
+   file.path(enveomics_R, "man", "enve.df2dist.Rd"),
+   positional_arguments=1,
+   usage="usage: %prog [options] output.mat",
+   mandatory=c("x"),
+   number=c("default.d", "max.sim"),
+   o_desc=list(x="A tab-delimited table with the distances."),
+   p_desc="Transform a tab-delimited list of distances into a squared matrix.")
+#= Run it!
+opt$options[['x']] <- read.table(opt$options[['x']],
+   header=TRUE, sep="\t", as.is=TRUE)
+dist <- do.call("enve.df2dist", opt$options)
+write.table(as.matrix(dist), opt$args[1], quote=FALSE, sep="\t", col.names=NA)

data/utils/enveomics/Scripts/Table.filter.pl ADDED Viewed

@@ -0,0 +1,61 @@
+#!/usr/bin/env perl
+#
+# @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
+# @update: Mar-23-2015
+# @license: artistic license 2.0
+#
+use warnings;
+use strict;
+use Getopt::Std;
+my %o;
+getopts('k:s:ihn', \%o);
+my($list, $table) = @ARGV;
+($list and $table) or die "
+.Description:
+   Extracts (and re-orders) a subset of rows from a raw table.
+.Usage: $0 [options] list.txt table.txt > subset.txt
+   Options:
+      -k <int>	Column of the table to use as key to filter.  By default, 1.
+      -s <str>	String to use as separation between rows.  By default, tabulation.
+      -i	If set, reports the inverse of the list (i.e., reports only rows
+      		absent in the list).  Implies -n.
+      -h	Keep first row of the table (header) untouched.
+      -n	No re-order.  The output has the same order of the table.  By
+      		default, it prints in the order of the list.
+   list.txt	List of IDs to extract.
+   table.txt	Table file containing the superset.
+   subset.txt	Table file to be created.
+";
+$o{k} ||= 1;
+$o{s} ||= "\t";
+$o{n}=1 if $o{i};
+my $HEADER = "";
+my $tbl2 = $o{n} ? $list : $table;
+open TBL, "<", $tbl2 or die "Cannot read file: $tbl2: $!\n";
+$HEADER = <TBL> if $o{h} and not $o{n};
+my %tbl2 = map { my $l=$_; chomp $l; my @r=split $o{s}, $l; $r[ $o{n} ? 0 : $o{k}-1] => $l } <TBL>;
+close TBL;
+my $tbl1 = $o{n} ? $table : $list;
+open TBL, "<", $tbl1 or die "Cannot read file: $tbl1: $!\n";
+$HEADER = <TBL> if $o{h} and $o{n};
+print $HEADER;
+while(my $ln = <TBL>){
+   chomp $ln;
+   next unless $ln;
+   my @ln = split $o{s}, $ln;
+   my $good = exists $tbl2{ $ln[$o{n} ? $o{k}-1 : 0] };
+   $good = not $good if $o{i};
+   print "".($o{n} ? $ln : $tbl2{$ln[0]})."\n" if $good;
+}
+close TBL;

data/utils/enveomics/Scripts/Table.merge.pl ADDED Viewed

@@ -0,0 +1,77 @@
+#!/usr/bin/env perl
+#
+# @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
+# @update: Sep-20-2015
+# @license: artistic license 2.0
+#
+use warnings;
+use strict;
+use Getopt::Std;
+my %o;
+getopts('si:o:ne:h:H:r:', \%o);
+my @files = @ARGV;
+$#files>0 or die "
+.Description:
+   Merges multiple (two-column) lists into one table.
+.Usage:
+   $0 [options] files... > output.txt
+   Options:
+      -s	Values are read as Strings.  By default, values are read as numbers.
+      -i <str>	Input field-delimiter.  By default: tabulation (\"\\t\").
+      -o <str>	Output field-delimiter.  By default: tabulation (\"\\t\").
+      -n	No-header.  By default, the header is determined by the file names.
+      -e <str>	Default string when no value is found.  By default, the \"empty\" value
+      		is 0 if values are numeric (i.e., unless -s is set) or an empty string
+		otherwise.
+      -h <str>	Header of the first column, containing the IDs.  By default: \"Tag\".
+      -H <str>	Format of filenames capturing the column header in the first capturing
+		parenthesis.  Non-capturing paretheses can be defined as (?:...).  By
+		default: \"(?:.*/)?([^\\.]+)\", which captures the part of the basename
+		of the file before the first dot (if any).
+      -r <int>	Number of leading rows to ignore in the input files. Zero by default.
+";
+$o{i} ||= "\t";
+$o{o} ||= "\t";
+$o{e} ||= ($o{s} ? "" : 0);
+$o{h} ||= "Tag";
+$o{H} ||= "(?:.*/)?([^\\.]+)";
+$o{r} += 0;
+my $notes = {};
+print $o{h} unless $o{n};
+my $i = 0;
+for my $file (@files){
+   unless($o{n}){
+      $file =~ m/$o{H}/ or die "Filename '$file' doesn't match format '$o{H}'.";
+      my $tag=$1;
+      print $o{o}.$tag;
+   }
+   open IN, "<", $file or die "Cannot read file: $file: $!\n";
+   while(<IN>){
+      next if $. <= $o{r};
+      chomp;
+      my @l = split $o{i};
+      $l[1]+=0 unless $o{s};
+      $notes->{$l[0]} ||= [];
+      $notes->{$l[0]}->[$i] = $l[1];
+   }
+   close IN;
+   $i++;
+}
+print "\n" unless $o{n};
+for my $id (keys %$notes){
+   print $id;
+   for my $i (0 .. $#files){
+      print $o{o}.(( defined $notes->{$id}->[$i] ? $notes->{$id}->[$i] : $o{e} ));
+   }
+   print "\n";
+}