miga-base 0.3.0.0 → 0.3.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (260) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +21 -4
  3. data/actions/init.rb +258 -0
  4. data/actions/run_local.rb +1 -2
  5. data/actions/test_taxonomy.rb +4 -1
  6. data/bin/miga +8 -1
  7. data/lib/miga/dataset.rb +4 -4
  8. data/lib/miga/dataset_result.rb +7 -4
  9. data/lib/miga/version.rb +2 -2
  10. data/scripts/_distances_noref_nomulti.bash +3 -1
  11. data/scripts/clade_finding.bash +1 -1
  12. data/scripts/init.bash +1 -1
  13. data/scripts/miga.bash +1 -1
  14. data/scripts/mytaxa.bash +78 -72
  15. data/scripts/mytaxa_scan.bash +67 -62
  16. data/scripts/ogs.bash +1 -1
  17. data/scripts/trimmed_fasta.bash +4 -3
  18. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  19. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  20. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  21. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  22. data/utils/enveomics/LICENSE.txt +73 -0
  23. data/utils/enveomics/Makefile +52 -0
  24. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  25. data/utils/enveomics/Manifest/Tasks/blasttab.json +703 -0
  26. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  27. data/utils/enveomics/Manifest/Tasks/fasta.json +571 -0
  28. data/utils/enveomics/Manifest/Tasks/fastq.json +208 -0
  29. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  30. data/utils/enveomics/Manifest/Tasks/ogs.json +339 -0
  31. data/utils/enveomics/Manifest/Tasks/other.json +746 -0
  32. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  33. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +454 -0
  34. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  35. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  36. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  37. data/utils/enveomics/Manifest/categories.json +132 -0
  38. data/utils/enveomics/Manifest/examples.json +154 -0
  39. data/utils/enveomics/Manifest/tasks.json +4 -0
  40. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  41. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +56 -0
  42. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +60 -0
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +38 -0
  44. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  45. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  46. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  49. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  50. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  51. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  52. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  53. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  54. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  55. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  56. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  57. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  58. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  59. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  60. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +55 -0
  61. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  62. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  63. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  64. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  68. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  69. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  70. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  71. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  72. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  73. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  74. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  75. data/utils/enveomics/README.md +40 -0
  76. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  77. data/utils/enveomics/Scripts/Aln.cat.rb +162 -0
  78. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  79. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  80. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  81. data/utils/enveomics/Scripts/BlastTab.addlen.rb +61 -0
  82. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  83. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  84. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +106 -0
  85. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  86. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  87. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  88. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  89. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  90. data/utils/enveomics/Scripts/BlastTab.recplot2.R +40 -0
  91. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  92. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  93. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  94. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  95. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  96. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  97. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  98. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  99. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  100. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  101. data/utils/enveomics/Scripts/FastA.N50.pl +56 -0
  102. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  103. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  104. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  105. data/utils/enveomics/Scripts/FastA.fragment.rb +92 -0
  106. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  107. data/utils/enveomics/Scripts/FastA.interpose.pl +87 -0
  108. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  109. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  110. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  111. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  112. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  113. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  114. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  115. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  116. data/utils/enveomics/Scripts/FastA.tag.rb +64 -0
  117. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  118. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  119. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  120. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  121. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  122. data/utils/enveomics/Scripts/FastQ.tag.rb +63 -0
  123. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  124. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  125. data/utils/enveomics/Scripts/HMM.essential.rb +254 -0
  126. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  127. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  128. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +306 -0
  129. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  130. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  131. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  132. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  133. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  134. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  135. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  136. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  137. data/utils/enveomics/Scripts/SRA.download.bash +50 -0
  138. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  139. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  140. data/utils/enveomics/Scripts/Table.barplot.R +30 -0
  141. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  142. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  143. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  144. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  145. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  146. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  147. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  148. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  149. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  150. data/utils/enveomics/Scripts/aai.rb +373 -0
  151. data/utils/enveomics/Scripts/ani.rb +362 -0
  152. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  153. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  154. data/utils/enveomics/Scripts/lib/data/essential.hmm.gz +0 -0
  155. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +26 -0
  156. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  157. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  158. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  159. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  160. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +30 -0
  161. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  162. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  163. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  164. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  165. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  166. data/utils/enveomics/Scripts/ogs.rb +104 -0
  167. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  168. data/utils/enveomics/Scripts/rbm.rb +137 -0
  169. data/utils/enveomics/Tests/Makefile +10 -0
  170. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  171. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  172. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  173. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  174. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  175. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  176. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  177. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  178. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  179. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  180. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  181. data/utils/enveomics/Tests/alkB.nwk +1 -0
  182. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  183. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  184. data/utils/enveomics/Tests/hiv1.faa +59 -0
  185. data/utils/enveomics/Tests/hiv1.fna +134 -0
  186. data/utils/enveomics/Tests/hiv2.faa +70 -0
  187. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  188. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  189. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  190. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  191. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  192. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  193. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  194. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  195. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  196. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  197. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  198. data/utils/enveomics/build_enveomics_r.bash +44 -0
  199. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  200. data/utils/enveomics/enveomics.R/NAMESPACE +35 -0
  201. data/utils/enveomics/enveomics.R/R/autoprune.R +121 -0
  202. data/utils/enveomics/enveomics.R/R/barplot.R +165 -0
  203. data/utils/enveomics/enveomics.R/R/cliopts.R +119 -0
  204. data/utils/enveomics/enveomics.R/R/df2dist.R +117 -0
  205. data/utils/enveomics/enveomics.R/R/growthcurve.R +263 -0
  206. data/utils/enveomics/enveomics.R/R/recplot.R +320 -0
  207. data/utils/enveomics/enveomics.R/R/recplot2.R +745 -0
  208. data/utils/enveomics/enveomics.R/R/tribs.R +423 -0
  209. data/utils/enveomics/enveomics.R/R/utils.R +16 -0
  210. data/utils/enveomics/enveomics.R/README.md +52 -0
  211. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  212. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  213. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +30 -0
  214. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +43 -0
  215. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +19 -0
  216. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +37 -0
  217. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +24 -0
  218. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +24 -0
  219. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +33 -0
  220. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +64 -0
  221. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +37 -0
  222. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +19 -0
  223. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +18 -0
  224. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +26 -0
  225. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +25 -0
  226. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +26 -0
  227. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +49 -0
  228. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +28 -0
  229. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +97 -0
  230. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +40 -0
  231. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +40 -0
  232. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +24 -0
  233. data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeak.Rd +40 -0
  234. data/utils/enveomics/enveomics.R/man/enve.recplot2.__findPeaks.Rd +18 -0
  235. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +22 -0
  236. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +20 -0
  237. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +18 -0
  238. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  239. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +27 -0
  240. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +53 -0
  241. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -0
  242. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +44 -0
  243. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +21 -0
  244. data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +15 -0
  245. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  246. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  247. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +43 -0
  248. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +29 -0
  249. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +30 -0
  250. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +71 -0
  251. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +18 -0
  252. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +18 -0
  253. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +18 -0
  254. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +27 -0
  255. data/utils/enveomics/globals.mk +8 -0
  256. data/utils/enveomics/manifest.json +9 -0
  257. data/utils/index_metadata.rb +0 -0
  258. data/utils/plot-taxdist.R +0 -0
  259. data/utils/requirements.txt +19 -19
  260. metadata +242 -2
@@ -0,0 +1,746 @@
1
+ {
2
+ "tasks": [
3
+ {
4
+ "task": "Aln.cat.rb",
5
+ "description": ["Concatenates several multiple alignments in FastA",
6
+ "format into a single multiple alignment. The IDs of the sequences",
7
+ "(or the ID prefixes, if using --ignore-after) must coincide across",
8
+ "files."],
9
+ "help_arg": "--help",
10
+ "options": [
11
+ {
12
+ "opt": "--coords",
13
+ "arg": "out_file",
14
+ "description": "Output file of coordinates in RAxML-compliant format."
15
+ },
16
+ {
17
+ "opt": "--ignore-after",
18
+ "arg": "string",
19
+ "description": ["Remove everything in the IDs after the specified",
20
+ "string."]
21
+ },
22
+ {
23
+ "opt": "--remove-invariable",
24
+ "description": "Remove invariable sites.",
25
+ "note": ["Invariable sites are defined as columns with only one",
26
+ "state and undefined characters. Additional ambiguous characters",
27
+ "may exist and should be declared using --undefined."]
28
+ },
29
+ {
30
+ "opt": "--missing-char",
31
+ "description": "Character denoting missing data.",
32
+ "arg": "character",
33
+ "default": "-"
34
+ },
35
+ {
36
+ "opt": "--model",
37
+ "description": "Name of the model to use if --coords is used.",
38
+ "note": "See RAxML's documentation for additional information.",
39
+ "arg": "string",
40
+ "default": "AUTO"
41
+ },
42
+ {
43
+ "opt": "--undefined",
44
+ "arg": "string",
45
+ "description": ["All characters to be regarded as 'undefined'. It",
46
+ "should include all ambiguous and missing data chars. Ignored",
47
+ "unless --remove-invariable."],
48
+ "default": "-.Xx?"
49
+ },
50
+ {
51
+ "opt": "--quiet",
52
+ "description": "Run quietly (no STDERR output)."
53
+ },
54
+ {
55
+ "name": "Input alignments",
56
+ "arg": "in_file",
57
+ "multiple_sep": " ",
58
+ "mandatory": true,
59
+ "description": "Input alignments in FastA format."
60
+ },
61
+ ">",
62
+ {
63
+ "name": "Output alignment",
64
+ "arg": "out_file",
65
+ "mandatory": true,
66
+ "description": "Output concatenated alignment."
67
+ }
68
+ ]
69
+ },
70
+ {
71
+ "task": "Aln.convert.pl",
72
+ "description": "Translates between different alignment formats.",
73
+ "help_arg": "",
74
+ "requires": [
75
+ {
76
+ "perl_lib": "Bio::AlignIO"
77
+ }
78
+ ],
79
+ "cite": [["Stajich et al, 2002, GRes",
80
+ "http://dx.doi.org/10.1101/gr.361602"]],
81
+ "options": [
82
+ {
83
+ "name": "In-format",
84
+ "description": "Input file's format.",
85
+ "mandatory": true,
86
+ "arg": "select",
87
+ "values": ["bl2seq", "clustalw", "emboss", "fasta", "maf", "mase",
88
+ "mega", "meme", "metafasta", "msf", "nexus", "pfam", "phylip", "po",
89
+ "prodom", "psi", "selex", "stockholm", "XMFA", "arp"]
90
+ },
91
+ {
92
+ "name": "Out-format",
93
+ "description": "Output file's format.",
94
+ "mandatory": true,
95
+ "arg": "select",
96
+ "values": ["bl2seq", "clustalw", "emboss", "fasta", "maf", "mase",
97
+ "mega", "meme", "metafasta", "msf", "nexus", "pfam", "phylip", "po",
98
+ "prodom", "psi", "selex", "stockholm", "XMFA", "arp"]
99
+ },
100
+ "<",
101
+ {
102
+ "arg": "in_file",
103
+ "mandatory": true
104
+ },
105
+ ">",
106
+ {
107
+ "arg": "out_file",
108
+ "mandatory": true
109
+ }
110
+ ]
111
+ },
112
+ {
113
+ "task": "AlphaDiversity.pl",
114
+ "description": ["Takes a table of OTU abundance in one or more samples",
115
+ "and calculates the Rao (Q_alpha), Rao-Jost (Q_alpha_eqv), Shannon",
116
+ "(Hprime), and inverse Simpson (1_lambda) indices of alpha diversity",
117
+ "for each sample."],
118
+ "help_arg": "--help",
119
+ "see_also": ["Chao1.pl"],
120
+ "options": [
121
+ {
122
+ "name": "Input table",
123
+ "opt": "-i",
124
+ "arg": "in_file",
125
+ "description": ["Columns: samples, rows: OTUs, first column: OTU",
126
+ "names)."],
127
+ "mandatory": true
128
+ },
129
+ {
130
+ "name": "Discard rows",
131
+ "opt": "-r",
132
+ "arg": "integer",
133
+ "description": "Number of rows to ignore.",
134
+ "default": 0
135
+ },
136
+ {
137
+ "name": "Discard left columns",
138
+ "opt": "-c",
139
+ "arg": "integer",
140
+ "description": ["Number of columns to ignore after the first column",
141
+ "(i.e., between the first column, containing the name of the",
142
+ "categories, and the first column of abundance values)."],
143
+ "default": 0
144
+ },
145
+ {
146
+ "name": "Discard right columns",
147
+ "opt": "-C",
148
+ "arg": "integer",
149
+ "description": "Number of columns to ignore at the end of each line.",
150
+ "default": 0
151
+ },
152
+ {
153
+ "name": "Delimiter",
154
+ "opt": "-d",
155
+ "arg": "string",
156
+ "description": "Character (or string) separating columns.",
157
+ "note": ["Supported escaped characters are: '\\t' (tabulation), and",
158
+ "'\\0' (null bit)."],
159
+ "default": "\\t"
160
+ },
161
+ {
162
+ "name": "Headers",
163
+ "opt": "-h",
164
+ "description": ["The first row is assumed to have the names of the",
165
+ "samples."]
166
+ },
167
+ {
168
+ "name": "Distances file",
169
+ "opt": "-D",
170
+ "arg": "in_file",
171
+ "description": ["A squared matrix (or bottom-left half matrix) with",
172
+ "the distances between categories (OTUs or functions). The first",
173
+ "column must contain the names of the categories, and it shouldn't",
174
+ "have headers. If not set, all distances are assumed to be one."],
175
+ "note": "Only used for Rao."
176
+ },
177
+ {
178
+ "name": "Skip Rao",
179
+ "opt": "-R",
180
+ "description": "Do not calculate Rao indices.",
181
+ "note": ["This significantly decreases the total running time. Note",
182
+ "that Rao indices are highly susceptible to precision errors, and",
183
+ "shouldn't be trusted for very big numbers."]
184
+ },
185
+ {
186
+ "name": "q for qD",
187
+ "opt": "-q",
188
+ "arg": "integer",
189
+ "description": "Estimate the qD index (true diversity order q).",
190
+ "default": 0
191
+ },
192
+ ">",
193
+ {
194
+ "arg": "out_file",
195
+ "mandatory": true
196
+ }
197
+ ]
198
+ },
199
+ {
200
+ "task": "Chao1.pl",
201
+ "description": ["Takes a table of OTU abundance in one or more samples",
202
+ "and calculates the chao1 index (with 95% Confidence Interval) for",
203
+ "each sample. To use it with Qiime OTU Tables, run it ignoring 1",
204
+ "left column and with header."],
205
+ "see_also": ["AlphaDiversity.pl"],
206
+ "help_arg": "--help",
207
+ "options": [
208
+ {
209
+ "name": "Input file",
210
+ "opt": "-i",
211
+ "arg": "in_file",
212
+ "mandatory": true,
213
+ "description": "Input table (columns:samples, rows:OTUs)."
214
+ },
215
+ {
216
+ "name": "Rows",
217
+ "opt": "-r",
218
+ "arg": "integer",
219
+ "default": 0,
220
+ "description": "Number of rows to ignore."
221
+ },
222
+ {
223
+ "name": "Left columns to ignore",
224
+ "opt": "-c",
225
+ "arg": "integer",
226
+ "default": 0,
227
+ "description": "Number of columns to ignore."
228
+ },
229
+ {
230
+ "name": "Right columns to ignore",
231
+ "opt": "-C",
232
+ "arg": "integer",
233
+ "default": 0,
234
+ "description": "Number of columns to ignore at the end."
235
+ },
236
+ {
237
+ "name": "Delimiter",
238
+ "opt": "-d",
239
+ "arg": "string",
240
+ "default": "\\t",
241
+ "description": "Delimiter between columns.",
242
+ "note": ["Supported escaped characters are: \\t (tabulation),",
243
+ "and \\0 (null bit)."]
244
+ },
245
+ {
246
+ "name": "Header",
247
+ "opt": "-h",
248
+ "description": ["If set, the first row is assumed to have the names",
249
+ "of the samples."]
250
+ },
251
+ ">",
252
+ {
253
+ "arg": "out_file",
254
+ "mandatory": true,
255
+ "description": ["Chao1 index and 95% CI bounds for each sample, in",
256
+ "tab-delimited format."]
257
+ }
258
+ ]
259
+ },
260
+ {
261
+ "task": "CharTable.classify.rb",
262
+ "description": ["Uses a dichotomous key to classify objects parsing a",
263
+ "character table."],
264
+ "help_arg": "--help",
265
+ "options": [
266
+ {
267
+ "opt": "--table",
268
+ "arg": "in_file",
269
+ "mandatory": true,
270
+ "description": ["Input table containing the states (columns) per",
271
+ "object (row). It must be tab-delimited and with row and column",
272
+ "names."]
273
+ },
274
+ {
275
+ "opt": "--key",
276
+ "arg": "in_file",
277
+ "mandatory": true,
278
+ "description": ["Input table containing the dychotomous key in",
279
+ "linked style, defined in four columns (can contain #-lead comment",
280
+ "lines): (1) ID of the step, typically a sequential integer. (2)",
281
+ "Name of character to evaluate. Must coincide with the -t headers.",
282
+ "(3) First character decision (see below). (4) Second character",
283
+ "decision (see below).\nA character decision must be formated as:",
284
+ "state (must coincide with the values in -t), colon (:), step to",
285
+ "follow. If the state is * (star) any state triggers the decision",
286
+ "(this should be the norm in column 4). The step to follow should",
287
+ "be a step ID in square brackets, or the name of the",
288
+ "classification."]
289
+ },
290
+ {
291
+ "opt": "--classification",
292
+ "arg": "out_file",
293
+ "description": ["Two-column table with the classification of the",
294
+ "input objects."]
295
+ },
296
+ {
297
+ "opt": "--newick",
298
+ "arg": "out_file",
299
+ "description": ["Tree containing all the classified objects. This",
300
+ "only makes sense for synoptic keys."]
301
+ },
302
+ {
303
+ "opt": "--quiet",
304
+ "description": "Run quietly (no STDERR output)."
305
+ }
306
+ ]
307
+ },
308
+ {
309
+ "task": "GenBank.add_fields.rb",
310
+ "description": "Adds annotations to GenBank files.",
311
+ "help_arg": "--help",
312
+ "options": [
313
+ {
314
+ "opt": "--genbank",
315
+ "arg": "in_file",
316
+ "mandatory": true,
317
+ "description": "Input GenBank file."
318
+ },
319
+ {
320
+ "opt": "--table",
321
+ "arg": "in_file",
322
+ "mandatory": true,
323
+ "description": ["Input file containing the annotations. It must be a",
324
+ "tab-delimited raw table including a header row with the names of",
325
+ "the fields."]
326
+ },
327
+ {
328
+ "opt": "--out",
329
+ "arg": "out_file",
330
+ "mandatory": true,
331
+ "description": "Output file containing the annotated GenBank."
332
+ },
333
+ {
334
+ "opt": "--key",
335
+ "arg": "integer",
336
+ "default": 1,
337
+ "description": "Key of the column to use as identifier."
338
+ },
339
+ {
340
+ "opt": "--split",
341
+ "arg": "string",
342
+ "default": "#",
343
+ "description": ["String that separates multiple entries in the",
344
+ "annotation features."]
345
+ },
346
+ {
347
+ "opt": "--quiet",
348
+ "description": "Run quietly (no STDERR output)."
349
+ }
350
+ ]
351
+ },
352
+ {
353
+ "task": "HMM.essential.rb",
354
+ "description": ["Finds and extracts a collection of essential proteins",
355
+ "suitable for genome completeness evaluation and phylogenetic",
356
+ "analyses in Archaea and Bacteria."],
357
+ "warn": ["Most complete bacterial genomes contain only 106/111 genes in",
358
+ "this collection, therefore producing a completeness of 95.5%, and",
359
+ "most archaeal genomes only contain 26/111 genes, producing a",
360
+ "completeness of 23.4%. Use the options --bacteria and/or --archaea to",
361
+ "ignore models often missing in one or both domains. Note that even",
362
+ "with these options, some complete archaeal genomes result in very low",
363
+ "values of completeness (e.g., Nanoarchaeum equitans returns 88.5%)."],
364
+ "help_arg": "--help",
365
+ "requires": [
366
+ {
367
+ "description": "HMMer 3.0+",
368
+ "test": "hmmsearch -h",
369
+ "source_url": "http://hmmer.janelia.org/software"
370
+ }
371
+ ],
372
+ "cite": [["Eddy, 2011, PLoS CB",
373
+ "http://dx.doi.org/10.1371/journal.pcbi.1002195"]],
374
+ "options": [
375
+ {
376
+ "name": "Input file",
377
+ "opt": "--in",
378
+ "arg": "in_file",
379
+ "mandatory": true,
380
+ "description": "FastA file containing all the proteins in the genome."
381
+ },
382
+ {
383
+ "name": "Output file",
384
+ "opt": "--out",
385
+ "arg": "out_file",
386
+ "description": ["FastA file with the translated essential genes. By",
387
+ "default the file is not produced."]
388
+ },
389
+ {
390
+ "opt": "--per-model",
391
+ "arg": "out_file",
392
+ "description": ["Prefix of translated genes in independent files",
393
+ "with the name of the model appended. By default files are not",
394
+ "produced."]
395
+ },
396
+ {
397
+ "opt": "--report",
398
+ "arg": "out_file",
399
+ "description": ["Path to the report file. By default, the report is",
400
+ "sent to the STDOUT."]
401
+ },
402
+ {
403
+ "opt": "--bacteria",
404
+ "description": "If set, ignores models typically missing in Bacteria."
405
+ },
406
+ {
407
+ "opt": "--archaea",
408
+ "description": "If set, ignores models typically missing in Archaea."
409
+ },
410
+ {
411
+ "opt": "--genome-eq",
412
+ "description": ["If set, ignores models not suitable for",
413
+ "genome-equivalents estimations. See Rodriguez-R et al, 2015, ISME",
414
+ "J 9(9):1928-1940."],
415
+ "source_url": "http://www.nature.com/ismej/journal/vaop/ncurrent/full/ismej20155a.html"
416
+ },
417
+ {
418
+ "opt": "--rename",
419
+ "arg": "string",
420
+ "description": ["If set, renames the sequences with the string",
421
+ "provided and appends it with pipe (|) and the gene name (except",
422
+ "in --per-model files)."]
423
+ },
424
+ {
425
+ "opt": "--no-stats",
426
+ "description": ["If set, no statistics are reported on genome",
427
+ "evaluation."]
428
+ },
429
+ {
430
+ "opt": "--no-genes",
431
+ "description": ["If set, statistics won't include the lists of",
432
+ "missing/multi-copy genes."]
433
+ },
434
+ {
435
+ "opt": "--metagenome",
436
+ "description": ["If set, it allows for multiple copies of each gene",
437
+ "and turns on metagenomic report mode."]
438
+ },
439
+ {
440
+ "opt": "--list-models",
441
+ "description": ["If set, it only lists the models and exits.",
442
+ "Compatible with 'Archaea', 'Bacteria', 'Genome eq', and 'Quiet';",
443
+ "ignores all other parameters."]
444
+ },
445
+ {
446
+ "opt": "--bin",
447
+ "arg": "in_dir",
448
+ "description": "Directory containing the binaries of HMMer 3.0+."
449
+ },
450
+ {
451
+ "opt": "--model-file",
452
+ "arg": "in_file",
453
+ "description": "External file containing models to search."
454
+ },
455
+ {
456
+ "opt": "--threads",
457
+ "arg": "integer",
458
+ "default": 2,
459
+ "description": "Number of parallel threads to be used."
460
+ },
461
+ {
462
+ "opt": "--quiet",
463
+ "description": "Run quietly (no STDERR output)."
464
+ }
465
+ ]
466
+ },
467
+ {
468
+ "task": "HMMsearch.extractIds.rb",
469
+ "description": ["Extracts the sequence IDs and query model form a",
470
+ "(multiple) HMMsearch report (for HMMer 3.0)."],
471
+ "help_arg": "--help",
472
+ "options": [
473
+ {
474
+ "opt": "--all-evalue",
475
+ "arg": "float",
476
+ "description": "Maximum e-value of sequence to report result."
477
+ },
478
+ {
479
+ "opt": "--all-score",
480
+ "arg": "float",
481
+ "description": "Minimum score of sequence to report result."
482
+ },
483
+ {
484
+ "opt": "--best-evalue",
485
+ "arg": "float",
486
+ "description": "Maximum e-value of best domain to report result."
487
+ },
488
+ {
489
+ "opt": "--best-score",
490
+ "arg": "float",
491
+ "description": "Minimum score of best domain to report result."
492
+ },
493
+ {
494
+ "opt": "--no-model",
495
+ "description": "Do not include the query model in the output list."
496
+ },
497
+ {
498
+ "opt": "--quiet",
499
+ "description": "Run quietly."
500
+ },
501
+ "<",
502
+ {
503
+ "arg": "in_file",
504
+ "mandatory": true,
505
+ "description": "Input HMMsearch text file."
506
+ },
507
+ ">",
508
+ {
509
+ "arg": "out_file",
510
+ "mandatory": true,
511
+ "description": "Output list of IDs."
512
+ }
513
+ ]
514
+ },
515
+ {
516
+ "task": "JPlace.distances.rb",
517
+ "description": ["Extracts the distance (estimated branch length) of each",
518
+ "placed read to a given node in a JPlace file."],
519
+ "requires": [ { "ruby_gem": "json" } ],
520
+ "help_arg": "--help",
521
+ "options": [
522
+ {
523
+ "name": "Input file",
524
+ "opt": "--in",
525
+ "arg": "in_file",
526
+ "mandatory": true,
527
+ "description": ".jplace input file containing the read placement."
528
+ },
529
+ {
530
+ "name": "Node",
531
+ "opt": "--node",
532
+ "arg": "string",
533
+ "mandatory": true,
534
+ "description": ["Index (number in curly brackets) of the node to",
535
+ "which distances should be measured."]
536
+ },
537
+ {
538
+ "name": "Output file",
539
+ "opt": "--out",
540
+ "arg": "out_file",
541
+ "mandatory": true,
542
+ "description": "Output file in tabular format."
543
+ },
544
+ {
545
+ "opt": "--in-node",
546
+ "arg": "string",
547
+ "description": ["Report only reads placed at this node or it's",
548
+ "children."]
549
+ },
550
+ {
551
+ "opt": "--quiet",
552
+ "description": "Run quietly (no STDERR output)."
553
+ }
554
+ ]
555
+ },
556
+ {
557
+ "task": "JPlace.to_iToL.rb",
558
+ "description": ["Generates iToL-compatible files from a .jplace file",
559
+ "(produced by RAxML's EPA or pplacer), that can be used to draw",
560
+ "pie-charts in the nodes of the reference tree."],
561
+ "requires": [ { "ruby_gem": "json" } ],
562
+ "help_arg": "--help",
563
+ "options": [
564
+ {
565
+ "name": "Input file",
566
+ "opt": "--in",
567
+ "arg": "in_file",
568
+ "mandatory": true,
569
+ "description": ".jplace input file containing the read placement."
570
+ },
571
+ {
572
+ "name": "Out base",
573
+ "opt": "--out",
574
+ "arg": "out_file",
575
+ "mandatory": true,
576
+ "description": "Base of the output files."
577
+ },
578
+ {
579
+ "opt": "--unique",
580
+ "arg": "string",
581
+ "description": ["Name of the dataset (if only one is used).",
582
+ "Conflicts with 'Regex' and 'Separator'."]
583
+ },
584
+ {
585
+ "opt": "--regex",
586
+ "arg": "string",
587
+ "description": ["Regular expression capturing the sample ID (named",
588
+ "dataset) in read names. By default: '^(?<dataset>.+?):.*'.",
589
+ "Conflicts with 'Separator'."]
590
+ },
591
+ {
592
+ "opt": "--separator",
593
+ "arg": "string",
594
+ "description": ["String separating the dataset name and the rest of",
595
+ "the read name. It assumes that the read name starts by the",
596
+ "dataset name. Conflicts with 'Regex'."]
597
+ },
598
+ {
599
+ "opt": "--metadata",
600
+ "arg": "in_file",
601
+ "description": ["Datasets metadata in tab-delimited format with a",
602
+ "header row. Valid headers: name (required), color (in Hex),",
603
+ "size (# reads), norm (any float)."]
604
+ },
605
+ {
606
+ "opt": "--norm",
607
+ "arg": "string",
608
+ "description": ["Normalization strategy. Must be one of:\n",
609
+ "none: Direct read counts are reported without normalization.\n",
610
+ "count (default): The counts are normalized (divided) by the total",
611
+ "counts per dataset.\n",
612
+ "size: The counts are normalized (divided) by the size column in",
613
+ "metadata (must be integer).\n",
614
+ "norm: The counts are normalized (divided) by the norm column in",
615
+ "metadata (can be any float)."]
616
+ },
617
+ {
618
+ "opt": "--old-itol",
619
+ "description": ["Generate output file using the old iToL format",
620
+ " (pre v3.0)."]
621
+ },
622
+ {
623
+ "opt": "--collapse",
624
+ "arg": "in_file",
625
+ "description": "Internal nodes to collapse (requires rootted tree)."
626
+ },
627
+ {
628
+ "opt": "--area",
629
+ "description": ["If set, the area of the pies is proportional to the",
630
+ "placements. Otherwise, the radius is."]
631
+ },
632
+ {
633
+ "opt": "--quiet",
634
+ "description": "Run quietly (no STDERR output)."
635
+ }
636
+ ]
637
+ },
638
+ {
639
+ "task": "MyTaxa.fragsByTax.pl",
640
+ "description": "Identifies fragments annotated as a taxon in MyTaxa.",
641
+ "help_arg": "",
642
+ "options": [
643
+ {
644
+ "name": "MyTaxa output",
645
+ "arg": "in_file",
646
+ "mandatory": true,
647
+ "description": "Classification file generated by MyTaxa."
648
+ },
649
+ {
650
+ "name": "Taxon",
651
+ "arg": "string",
652
+ "mandatory": true,
653
+ "description": "Taxon to look for."
654
+ },
655
+ {
656
+ "name": "Rank",
657
+ "arg": "string",
658
+ "description": "Rank of taxon (optional). By default: any rank."
659
+ },
660
+ ">",
661
+ {
662
+ "arg": "out_file",
663
+ "mandatory": true,
664
+ "description": "List of fragment IDs."
665
+ }
666
+ ]
667
+ },
668
+ {
669
+ "task": "MyTaxa.seq-taxrank.rb",
670
+ "description": ["Generates a simple tabular file with the classification",
671
+ "of each sequence at a given taxonomic rank from a MyTaxa output."],
672
+ "help_arg": "--help",
673
+ "options": [
674
+ {
675
+ "name": "MyTaxa",
676
+ "opt": "--mytaxa",
677
+ "arg": "in_file",
678
+ "mandatory": true,
679
+ "description": "Input MyTaxa file."
680
+ },
681
+ {
682
+ "opt": "--rank",
683
+ "arg": "string",
684
+ "default": "genus",
685
+ "description": "Taxonomic rank."
686
+ },
687
+ {
688
+ "opt": "--quiet",
689
+ "description": "Run quietly."
690
+ }
691
+ ]
692
+ },
693
+ {
694
+ "task": "Taxonomy.silva2ncbi.rb",
695
+ "description": ["Re-formats Silva taxonomy into NCBI-like taxonomy dump",
696
+ "files."],
697
+ "help_arg": "--help",
698
+ "options": [
699
+ {
700
+ "opt": "--silvaranks",
701
+ "arg": "in_file",
702
+ "mandatory": true,
703
+ "description": ["Input Silva ranks file (e.g.,",
704
+ "tax_ranks_ssu_115.txt)."],
705
+ "source_url": "https://www.arb-silva.de/no_cache/download/archive/release_115/Exports/"
706
+ },
707
+ {
708
+ "opt": "--silvaref",
709
+ "arg": "in_file",
710
+ "mandatory": true,
711
+ "description": ["Input Silva ref alignment file (e.g.,",
712
+ "SSURef_NR99_115_tax_silva_full_align_trunc.fasta)."],
713
+ "source_url": "https://www.arb-silva.de/no_cache/download/archive/release_115/Exports/"
714
+ },
715
+ {
716
+ "opt": "--patch",
717
+ "arg": "in_file",
718
+ "description": ["If passed, it replaces the paths specified in the",
719
+ "patch."],
720
+ "source_url": "http://enve-omics.ce.gatech.edu/data/public_misc/patch_Silva_SSU_r115.txt"
721
+ },
722
+ {
723
+ "opt": "--seqinfo",
724
+ "arg": "out_file",
725
+ "description": ["If passed, it creates a CSV seq-info file",
726
+ "compatible with taxtastic."]
727
+ },
728
+ {
729
+ "opt": "--taxfile",
730
+ "arg": "out_file",
731
+ "description": "If passed, it creates a simple TSV taxonomy file."
732
+ },
733
+ {
734
+ "opt": "--ncbi",
735
+ "arg": "out_dir",
736
+ "description": ["If passed, output folder for the NCBI dump files",
737
+ "(e.g., taxdmp)."]
738
+ },
739
+ {
740
+ "opt": "--warns",
741
+ "description": "Verbously display warnings."
742
+ }
743
+ ]
744
+ }
745
+ ]
746
+ }