RubyGems - miga-base - Versions diffs - 0.7.26.0 → 0.7.26.1 - Mend

miga-base 0.7.26.0 → 0.7.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (276) hide show

checksums.yaml +4 -4
data/lib/miga/version.rb +1 -1
data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
data/utils/FastAAI/FastAAI/FastAAI +1336 -0
data/utils/FastAAI/README.md +84 -0
data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
data/utils/enveomics/Docs/recplot2.md +244 -0
data/utils/enveomics/Examples/aai-matrix.bash +66 -0
data/utils/enveomics/Examples/ani-matrix.bash +66 -0
data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
data/utils/enveomics/LICENSE.txt +73 -0
data/utils/enveomics/Makefile +52 -0
data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
data/utils/enveomics/Manifest/Tasks/fasta.json +766 -0
data/utils/enveomics/Manifest/Tasks/fastq.json +243 -0
data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
data/utils/enveomics/Manifest/Tasks/mapping.json +67 -0
data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
data/utils/enveomics/Manifest/Tasks/other.json +829 -0
data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
data/utils/enveomics/Manifest/Tasks/sequence-identity.json +501 -0
data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
data/utils/enveomics/Manifest/categories.json +156 -0
data/utils/enveomics/Manifest/examples.json +154 -0
data/utils/enveomics/Manifest/tasks.json +4 -0
data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
data/utils/enveomics/README.md +42 -0
data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
data/utils/enveomics/Scripts/Aln.cat.rb +163 -0
data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
data/utils/enveomics/Scripts/Chao1.pl +97 -0
data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
data/utils/enveomics/Scripts/FastA.N50.pl +56 -0
data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
data/utils/enveomics/Scripts/FastA.fragment.rb +92 -0
data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
data/utils/enveomics/Scripts/FastA.length.pl +38 -0
data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
data/utils/enveomics/Scripts/FastA.split.pl +55 -0
data/utils/enveomics/Scripts/FastA.split.rb +79 -0
data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
data/utils/enveomics/Scripts/FastQ.tag.rb +63 -0
data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
data/utils/enveomics/Scripts/SRA.download.bash +57 -0
data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
data/utils/enveomics/Scripts/Table.barplot.R +31 -0
data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
data/utils/enveomics/Scripts/Table.filter.pl +61 -0
data/utils/enveomics/Scripts/Table.merge.pl +77 -0
data/utils/enveomics/Scripts/Table.replace.rb +69 -0
data/utils/enveomics/Scripts/Table.round.rb +63 -0
data/utils/enveomics/Scripts/Table.split.pl +57 -0
data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
data/utils/enveomics/Scripts/aai.rb +418 -0
data/utils/enveomics/Scripts/ani.rb +362 -0
data/utils/enveomics/Scripts/clust.rand.rb +102 -0
data/utils/enveomics/Scripts/gi2tax.rb +103 -0
data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +30 -0
data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
data/utils/enveomics/Scripts/ogs.rb +104 -0
data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
data/utils/enveomics/Scripts/rbm.rb +146 -0
data/utils/enveomics/Tests/Makefile +10 -0
data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
data/utils/enveomics/Tests/alkB.nwk +1 -0
data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
data/utils/enveomics/Tests/hiv1.faa +59 -0
data/utils/enveomics/Tests/hiv1.fna +134 -0
data/utils/enveomics/Tests/hiv2.faa +70 -0
data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
data/utils/enveomics/build_enveomics_r.bash +45 -0
data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
data/utils/enveomics/enveomics.R/R/utils.R +50 -0
data/utils/enveomics/enveomics.R/README.md +80 -0
data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -0
data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -0
data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -0
data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -0
data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -0
data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -0
data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -0
data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -0
data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +37 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -0
data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -0
data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -0
data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -0
data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -0
data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -0
data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
data/utils/enveomics/globals.mk +8 -0
data/utils/enveomics/manifest.json +9 -0
metadata +277 -4

data/utils/enveomics/Manifest/Tasks/fastq.json ADDED Viewed

@@ -0,0 +1,243 @@
+{
+  "tasks": [
+    {
+      "task": "FastQ.filter.pl",
+      "description": "Extracts a subset of sequences from a FastQ file.",
+      "see_also": ["FastA.filter.pl"],
+      "help_arg": "-h",
+      "options": [
+        {
+          "name": "Reverse list",
+          "opt": "-r",
+          "description": "Extracts sequences NOT present in the list."
+        },
+        {
+          "name": "Quiet",
+          "opt": "-q",
+          "description": "Runs quietly."
+        },
+        {
+          "name": "List",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "List of sequences to extract."
+        },
+        {
+          "name": "Seqs.fq",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "FastQ file containing the superset of sequences."
+        },
+        ">",
+        {
+          "name": "Subset.fq",
+          "arg": "out_file",
+          "mandatory": true,
+          "description": "FastQ file to be created."
+        }
+      ]
+    },
+    {
+      "task": "FastQ.interpose.pl",
+      "description": ["Interposes sequences in FastQ format from two files",
+        "into one output file. If more than two files are provided, the script",
+        "will interpose all the input files."],
+      "warn": ["Note that this script will check for the consistency of the",
+        "names (assuming a pair of related reads contains the same name",
+        "varying only in a trailing slash (/) followed by a digit. If you want",
+        "to turn this feature off just set the checking period to zero. If you",
+        "want to decrease the sampling period (to speed the script up) or",
+        "increase it (to make it more sensitive to errors) just change the",
+        "checking period accordingly."],
+      "see_also": ["FastQ.split.pl","FastA.interpose.pl"],
+      "help_arg": "",
+      "options": [
+        {
+          "name": "Checking period",
+          "opt": "-T",
+          "arg": "integer",
+          "default": 1000,
+          "description": "Sampling period for names evaluation."
+        },
+        {
+          "arg": "out_file",
+          "mandatory": true,
+          "description": "Output FastQ file."
+        },
+        {
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "First input FastQ file."
+        },
+        {
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Second input FastQ file."
+        },
+        {
+          "arg": "in_file",
+          "multiple_sep": " ",
+          "description": "Any additional input FastQ files."
+        }
+      ]
+    },
+    {
+      "task": "FastQ.offset.pl",
+      "description": ["There are several FastQ formats. This script takes a",
+        "FastQ in any of them, identifies the type of FastQ (this is, the",
+        "offset), and generates a FastQ with the given offset."],
+      "warn": ["Note that Solexa+64 FastQ can cause problematic values when",
+        "using the offset 33, since there is no equivalent in Phred+33 for",
+        "negative values (the range of Solexa+64 is -5 to 40)."],
+      "help_arg": "",
+      "options": [
+        {
+          "arg": "in_file",
+          "mandatory": true,
+          "description": ["Input file in FastQ format (range is automatically",
+            "detected)."]
+        },
+        {
+          "name": "Offset",
+          "arg": "integer",
+          "default": 33,
+          "mandatory": true,
+          "description": ["Offset to use for the output. Use 0 (zero) to",
+            "detect the input format and exit."]
+        },
+        {
+          "opt": "force",
+          "description": ["If set, turns errors into warnings and continues.",
+            "Out-of-range values are set to the closest range limit."]
+        },
+        ">",
+        {
+          "arg": "out_file",
+          "mandatory": true,
+          "description": ["Output file in FastQ format with the specified",
+            "offset."]
+        }
+      ]
+    },
+    {
+      "task": "FastQ.split.pl",
+      "description": ["Splits a FastQ file into several FastQ files. This",
+        "script can be used to separate interposed sister reads using any even",
+        "number of output files."],
+      "help_arg": "",
+      "see_also": ["FastQ.interpose.pl","FastA.split.pl"],
+      "options": [
+        {
+          "name": "in_file.fq",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Input file in FastQ format."
+        },
+        {
+          "name": "out_base",
+          "arg": "out_file",
+          "mandatory": true,
+          "description": ["Prefix for the name of the output files. It will be",
+            "appended with .<i>.fastq, where <i> is a consecutive number",
+            "starting in 1."]
+        },
+        {
+          "name": "no_files",
+          "arg": "integer",
+          "default": 2,
+          "description": "Number of files to generate."
+        }
+      ]
+    },
+    {
+      "task": "FastQ.tag.rb",
+      "description": "Generates easy-to-parse tagged reads from FastQ files.",
+      "see_also": ["FastA.tag.rb"],
+      "help_arg": "--help",
+      "options": [
+        {
+          "name": "Input file",
+          "opt": "--in",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "FastQ file containing the sequences."
+        },
+        {
+          "name": "Output file",
+          "opt": "--out",
+          "arg": "out_file",
+          "mandatory": true,
+          "description": "FastQ to create."
+        },
+        {
+          "opt": "--prefix",
+          "arg": "string",
+          "description": "Prefix to use in all IDs."
+        },
+        {
+          "opt": "--suffix",
+          "arg": "string",
+          "description": "Suffix to use in all IDs."
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
+      ]
+    },
+    {
+      "task": "FastQ.toFastA.awk",
+      "description": "Translates FastQ files into FastA.",
+      "help_arg": "'' --help",
+      "options": [
+        "<",
+        {
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Input FastQ file."
+        },
+        ">",
+        {
+          "arg": "out_file",
+          "mandatory": true,
+          "description": "Output FastA file."
+        }
+      ]
+    },
+    {
+      "task": "FastQ.test-error.rb",
+      "description": ["Compares the estimated error of sequencing reads",
+        "(Q-score) with observed mismatches (identity against a know",
+        "reference sequence)."],
+      "help_arg": "--help",
+      "options": [
+        {
+          "name": "FastQ",
+          "opt": "--fastq",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "FastQ file containing the sequences."
+        },
+        {
+          "name": "Tabular BLAST",
+          "opt": "--blast",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": ["Tabular BLAST file mapping reads to reference",
+            "sequences."]
+        },
+        {
+          "name": "Output",
+          "opt": "--out",
+          "arg": "out_file",
+          "mandatory": true,
+          "description": "Output tab-delimited file to create."
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
+      ]
+    }
+  ]
+}

data/utils/enveomics/Manifest/Tasks/graphics.json ADDED Viewed

@@ -0,0 +1,126 @@
+{
+  "tasks": [
+    {
+      "task": "Table.barplot.R",
+      "description": "Creates nice barplots from tab-delimited tables.",
+      "requires": [ { "r_package": "optparse" } ],
+      "help_arg": "--help",
+      "options": [
+        {
+          "name": "Input file",
+          "opt": "--x",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": ["A tab-delimited file containing header (first row)",
+            "and row names (first column)."]
+        },
+        {
+          "opt": "--sizes",
+          "arg": "string",
+          "description": ["A numeric vector containing the real size of the",
+            "samples (columns) in the same order of the input table. If set,",
+            "the values are assumed to be 100%, otherwise the sum of the",
+            "columns is used. Separate values by commas."]
+        },
+        {
+          "opt": "--top",
+          "arg": "integer",
+          "default": 25,
+          "description": ["Maximum number of categories to display. Any",
+            "additional categories will be listed as 'Others'."]
+        },
+        {
+          "opt": "--colors-per-group",
+          "arg": "integer",
+          "default": 9,
+          "description": ["Number of categories in the first two saturation",
+            "groups of colors. The third group contains the remaining",
+            "categories if needed."]
+        },
+        {
+          "opt": "--bars-width",
+          "arg": "integer",
+          "default": 4,
+          "description": "Width of the barplot with respect to the legend."
+        },
+        {
+          "opt": "--legend-ncol",
+          "arg": "integer",
+          "default": 1,
+          "description": "Number of columns in the legend."
+        },
+        {
+          "opt": "--other-col",
+          "arg": "string",
+          "default": "#000000",
+          "description": "Color of the 'Others' category."
+        },
+        {
+          "opt": "--add-trend",
+          "description": ["Controls if semi-transparent areas are to be",
+            "plotted between the bars to connect the regions (trend regions)."]
+        },
+        {
+          "opt": "--organic-trend",
+          "description": ["Controls if the trend regions are to be smoothed",
+            "(curves). By default, trend regions have straight edges. If TRUE,",
+            "forces add.trend=TRUE."]
+        },
+        {
+          "opt": "--sort-by",
+          "arg": "string",
+          "default": "median",
+          "description": ["Any function that takes a numeric vector and",
+            "returns a numeric scalar. This function is applied to each row,",
+            "and the resulting values are used to sort the rows",
+            "(decreasingly). Good options include: sd, min, max, mean, median."]
+        },
+        {
+          "opt": "--min-report",
+          "arg": "integer",
+          "default": 101,
+          "description": ["Minimum percentage to report the value in the plot.",
+            "Any value above 100 indicates that no values are to be reported."]
+        },
+        {
+          "opt": "--order",
+          "arg": "string",
+          "description": ["Controls how the rows should be ordered. If empty",
+            "(default), sort.by is applied per row and the results are sorted",
+            "decreasingly. If NA, no sorting is performed, i.e., the original",
+            "order is respected. If a vector is provided, it is assumed to be",
+            "the custom order to be used (either by numeric index or by row",
+            "names). Separate values by commas."]
+        },
+        {
+          "opt": "--col",
+          "arg": "string",
+          "description": ["Colors to use. If provided, overrides the variables",
+            "`top` and `colors.per.group`, but `other.col` is still used if",
+            "the vector is insufficient for all the rows. Separate values by",
+            "commas. An additional palette is available when using value",
+            "'coto' (palette contributed by Luis (Coto) Orellana)."]
+        },
+        {
+          "arg": "out_file",
+          "mandatory": true,
+          "description": "Output file in PDF format."
+        },
+        {
+          "name": "width",
+          "arg": "float",
+          "mandatory": true,
+          "default": 7,
+          "description": "Width of the plot (in inches)."
+        },
+        {
+          "name": "height",
+          "arg": "float",
+          "mandatory": true,
+          "default": 7,
+          "description": "Height of the plot (in inches)."
+        }
+      ]
+    }
+  ]
+}

data/utils/enveomics/Manifest/Tasks/mapping.json ADDED Viewed

@@ -0,0 +1,67 @@
+{
+  "tasks": [
+    {
+      "task": "BedGraph.tad.rb",
+      "description": ["Estimates the truncated average sequencing depth (TAD)",
+        "from a BedGraph file."],
+      "warn": ["This script doesn't consider zero-coverage positions if",
+        "missing from the file. If you produce your BedGraph file with",
+        "bedtools genomecov and want to consider zero-coverage position, be",
+        "sure to use -bga (not -bg)."],
+      "see_also": ["BedGraph.window.rb",
+        "BlastTab.seqdepth.pl", "BlastTab.seqdepth_ZIP.pl"],
+      "help_arg": "--help",
+      "options": [
+        {
+          "opt": "--input",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Input BedGraph file."
+        },
+        {
+          "opt": "--range",
+          "arg": "float",
+          "default": 0.5,
+          "description": ["Central range to consider, between 0 and 1. By",
+            "default: inter-quartile range (0.5)."]
+        },
+        {
+          "opt": "--per-seq",
+          "description": ["Calculate averages per reference sequence, not",
+            "total. Assumes a sorted BedGraph file."]
+        },
+        {
+          "opt": "--length",
+          "description": "Add sequence length to the output."
+        }
+      ]
+    },
+    {
+      "task": "BedGraph.window.rb",
+      "description": ["Estimates the sequencing depth per windows from a",
+        "BedGraph file."],
+      "warn": ["This script doesn't consider zero-coverage positions if",
+        "missing from the file. If you produce your BedGraph file with",
+        "bedtools genomecov and want to consider zero-coverage position, be",
+        "sure to use -bga (not -bg)."],
+      "see_also": ["BedGraph.tad.rb",
+        "BlastTab.seqdepth.pl", "BlastTab.seqdepth_ZIP.pl"],
+      "help_arg": "--help",
+      "options": [
+        {
+          "opt": "--input",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Input BedGraph file."
+        },
+        {
+          "name": "Window size",
+          "opt": "--win",
+          "arg": "float",
+          "default": 1000,
+          "description": "Window size, in base pairs."
+        }
+      ]
+    }
+  ]
+}

data/utils/enveomics/Manifest/Tasks/ogs.json ADDED Viewed

@@ -0,0 +1,382 @@
+{
+  "tasks": [
+    {
+      "task": "ogs.annotate.rb",
+      "description": ["Annotates Orthology Groups (OGs) using one or more",
+        "reference genomes."],
+      "see_also": ["ogs.mcl.rb"],
+      "help_arg": "--help",
+      "options": [
+        {
+          "name": "Input file",
+          "opt": "--in",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": ["Input file containing the OGs (as generated by",
+            "ogs.mcl.rb)."]
+        },
+        {
+          "name": "Output file",
+          "opt": "--out",
+          "arg": "out_file",
+          "mandatory": true,
+          "description": "Output file containing the annotated OGs."
+        },
+        {
+          "name": "Annotations",
+          "opt": "-a",
+          "arg": "in_file",
+          "mandatory": true,
+          "multiple_sep": ",",
+          "description": ["Input file(s) containing the annotations. One or",
+            "more tab-delimited files with the gene names in the first column",
+            "and the annotation in the second."]
+        },
+        {
+          "opt": "--format",
+          "arg": "string",
+          "default": "(\\S+)\\.txt",
+          "description": ["Format of the filenames for the annotation files,",
+            "using regex syntax."]
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
+      ]
+    },
+    {
+      "task": "ogs.core-pan.rb",
+      "description": ["Subsamples the genomes in a set of Orthology Groups",
+        "(OGs) and estimates the trend of core genome and pangenome sizes."],
+      "help_arg": "--help",
+      "requires": [
+        {
+          "ruby_gem": "json"
+        }
+      ],
+      "see_also": ["ogs.mcl.rb"],
+      "options": [
+        {
+          "opt": "--ogs",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Input file containing the precomputed OGs."
+        },
+        {
+          "opt": "--summary",
+          "arg": "out_file",
+          "description": ["Output file in tabular format with summary",
+            "statistics."]
+        },
+        {
+          "opt": "--tab",
+          "arg": "out_file",
+          "description": "Output file in tabular format."
+        },
+        {
+          "opt": "--json",
+          "arg": "out_file",
+          "description": "Output file in JSON format."
+        },
+        {
+          "opt": "--replicates",
+          "arg": "integer",
+          "description": "Number of replicates to estimate.",
+          "default": 100
+        },
+        {
+          "opt": "--threads",
+          "arg": "integer",
+          "description": "Children threads to spawn."
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
+      ]
+    },
+    {
+      "task": "ogs.extract.rb",
+      "description": ["Extracts sequences of Orthology Groups (OGs) from",
+        "genomes (proteomes)."],
+      "help_arg": "--help",
+      "see_also": ["ogs.mcl.rb"],
+      "options": [
+        {
+          "name": "Input file",
+          "opt": "--in",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": ["Input file containing the OGs (as generated by",
+            "ogs.mcl.rb)."]
+        },
+        {
+          "name": "Output file",
+          "opt": "--out",
+          "arg": "out_file",
+          "mandatory": true,
+          "description": "Output directory where to place extracted sequences."
+        },
+        {
+          "name": "Sequences",
+          "opt": "--seqs",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": ["Path to the proteomes in FastA format, using '%s'",
+            "to denote the genome. For example: /path/to/seqs/%s.faa."]
+        },
+        {
+          "opt": "--core",
+          "arg": "float",
+          "description": ["Use only OGs present in at least this fraction of",
+            "the genomes. To use only the strict core genome*, use --core 1."],
+          "note": ["* To use only the unus genome (OGs with exactly one gene",
+            "per genome), use: --core 1 --duplicates 1."]
+        },
+        {
+          "opt": "--duplicates",
+          "arg": "integer",
+          "description": ["Use only OGs with less than this number of",
+            "in-paralogs in a genome. To use only genes without in-paralogs*,",
+            "use --duplicates 1."],
+          "note": ["* To use only the unus genome (OGs with exactly one gene",
+            "per genome), use: --core 1 --duplicates 1."]
+        },
+        {
+          "opt": "--per-genome",
+          "description": ["If set, the output is generated per genome. By",
+            "default, the output is per OG."]
+        },
+        {
+          "opt": "--prefix",
+          "description": ["If set, each sequence is prefixed with the genome",
+            "name (or OG number, if --per-genome) and a dash."]
+        },
+        {
+          "opt": "--rand",
+          "description": ["Get only one gene per genome per OG (random)",
+            "regardless of in-paralogs. By default all genes are extracted."]
+        },
+        {
+          "opt": "--first",
+          "description": ["Get only one gene per genome per OG (first)",
+            "regardless of in-paralogs. By default all genes are extracted.",
+            "Takes precedence over --rand."]
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
+      ]
+    },
+    {
+      "task": "ogs.mcl.rb",
+      "description": ["Identifies Orthology Groups (OGs) in Reciprocal Best",
+        "Matches (RBM) between all pairs in a collection of genomes, using the",
+        "Markov Cluster Algorithm."],
+      "see_also": ["ogs.annotate.rb", "ogs.core-pan.rb", "ogs.extract.rb",
+        "ogs.stats.rb"],
+      "cite": [["Enright et al, 2002, NAR",
+        "http://dx.doi.org/10.1093/nar/30.7.1575"]],
+      "help_arg": "--help",
+      "options": [
+        {
+          "opt": "--out",
+          "arg": "out_file",
+          "mandatory": true,
+          "description": "Output file containing the detected OGs."
+        },
+        {
+          "opt": "--dir",
+          "arg": "in_dir",
+          "description": "Directory containing the RBM files.",
+          "note": "Mandatory, unless --abc is set to a non-empty file."
+        },
+        {
+          "opt": "--format",
+          "arg": "string",
+          "description": ["Format of the filenames for the RBM files (within",
+            "--dir), using regex syntax."],
+          "default": "(\\S+)-(\\S+)\\.rbm"
+        },
+        {
+          "opt": "--inflation",
+          "arg": "float",
+          "description": "Inflation parameter for MCL clustering.",
+          "default": 1.5
+        },
+        {
+          "opt": "--blind",
+          "description": ["If set, computes clusters without taking bitscore",
+            "into account."]
+        },
+        {
+          "opt": "--evalue",
+          "description": ["If set, uses the e-value to weight edges, instead",
+            "of the default Bit-Score."]
+        },
+        {
+          "opt": "--identity",
+          "description": ["If set, uses the identity to weight edges, instead",
+            "of the default Bit-Score."]
+        },
+        {
+          "opt": "--best-match",
+          "description": ["If set, it assumes best-matches instead reciprocal",
+            "best matches."]
+        },
+        {
+          "opt": "--mcl-bin",
+          "arg": "in_dir",
+          "description": ["Path to the directory containing the mcl binaries.",
+            "By default, assumed to be in the PATH."]
+        },
+        {
+          "name": "abc",
+          "arg": "out_file",
+          "opt": "--abc",
+          "description": "Use this abc file instead of a temporal file."
+        },
+        {
+          "opt": "--threads",
+          "arg": "integer",
+          "default": 2,
+          "description": "Number of threads to use."
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
+      ]
+    },
+    {
+      "task": "ogs.rb",
+      "description": ["Identifies Orthology Groups (OGs) in Reciprocal Best",
+        "Matches (RBM) between all pairs in a collection of genomes."],
+      "warn": ["This script suffers from chaining effect and is very",
+        "sensitive to spurious connections, because it applies a greedy",
+        "clustering algorithm. For most practical purposes, the use of this",
+        "script is discouraged and `ogs.mcl.rb` should be preferred."],
+      "help_arg": "--help",
+      "see_also": ["ogs.mcl.rb"],
+      "options": [
+        {
+          "opt": "--out",
+          "mandatory": true,
+          "arg": "out_file",
+          "description": "Output file containing the detected OGs."
+        },
+        {
+          "opt": "--dir",
+          "arg": "in_dir",
+          "description": "Directory containing the RBM files.",
+          "note": "Required unless --pre-ogs is passed."
+        },
+        {
+          "opt": "--pre-ogs",
+          "arg": "in_file",
+          "multiple_sep": ",",
+          "description": "Pre-computed OGs file(s), separated by commas."
+        },
+        {
+          "opt": "--unchecked",
+          "description": "Do not check internal redundancy in OGs."
+        },
+        {
+          "opt": "--format",
+          "arg": "string",
+          "default": "(\\S+)-(\\S+)\\.rbm",
+          "description": ["Format of the filenames for the RBM files (within",
+            "-d), using regex syntax."]
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
+      ]
+    },
+    {
+      "task": "ogs.stats.rb",
+      "description": ["Estimates some descriptive statistics on a set of",
+        "Orthology Groups (OGs)."],
+      "see_also": ["ogs.mcl.rb"],
+      "help_arg": "--help",
+      "requires": [ { "ruby_gem": "json" } ],
+      "options": [
+        {
+          "opt": "--ogs",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Input file containing the precomputed OGs."
+        },
+        {
+          "opt": "--json",
+          "arg": "out_file",
+          "description": "Output file in JSON format."
+        },
+        {
+          "opt": "--tab",
+          "arg": "out_file",
+          "description": "Output file in tabular format."
+        },
+        {
+          "opt": "--transposed-tab",
+          "arg": "out_file",
+          "description": "Output file in transposed tabular format."
+        },
+        {
+          "opt": "--auto",
+          "description": "Run completely quiertly (no STDERR or STDOUT)."
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
+      ]
+    },
+    {
+      "task": "clust.rand.rb",
+      "description": ["Calculates the Rand Index and the Adjusted Rand Index",
+        "between two clusterings. The clustering format is a raw text file",
+        "with one cluster per line, each defined as comma-delimited members,",
+        "and a header line (ignored). Note that this is equivalent to the OGs",
+        "format for 1 genome."],
+      "see_also": ["ogs.mcl.rb"],
+      "help_arg": "--help",
+      "cite": [
+        ["Rand, 1971, J Am Stat Assoc",
+          "https://doi.org/10.2307%2F2284239"],
+        ["Hubert & Arabie, 1985, J Classif",
+          "https://doi.org/10.1007%2FBF01908075"]
+      ],
+      "options": [
+        {
+          "name": "Input file 1",
+          "opt": "--clust1",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "First input file."
+        },
+        {
+          "name": "Input file 2",
+          "opt": "--clust2",
+          "arg": "in_file",
+          "mandatory": true,
+          "description": "Second input file."
+        },
+        {
+          "name": "Precision",
+          "opt": "--prec",
+          "arg": "integer",
+          "description": "Precision to report.",
+          "default": 6
+        },
+        {
+          "opt": "--quiet",
+          "description": "Run quietly (no STDERR output)."
+        }
+      ]
+    }
+  ]
+}