miga-base 0.7.26.0 → 1.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/classify_wf.rb +2 -2
  7. data/lib/miga/cli/action/derep_wf.rb +1 -1
  8. data/lib/miga/cli/action/doctor.rb +57 -14
  9. data/lib/miga/cli/action/doctor/base.rb +47 -23
  10. data/lib/miga/cli/action/init.rb +11 -7
  11. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  12. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  13. data/lib/miga/cli/action/tax_dist.rb +2 -2
  14. data/lib/miga/cli/action/wf.rb +5 -4
  15. data/lib/miga/common.rb +1 -0
  16. data/lib/miga/daemon.rb +11 -4
  17. data/lib/miga/dataset/result.rb +10 -6
  18. data/lib/miga/json.rb +5 -4
  19. data/lib/miga/metadata.rb +5 -1
  20. data/lib/miga/parallel.rb +36 -0
  21. data/lib/miga/project.rb +8 -8
  22. data/lib/miga/project/base.rb +4 -4
  23. data/lib/miga/project/result.rb +2 -2
  24. data/lib/miga/sqlite.rb +10 -2
  25. data/lib/miga/version.rb +23 -9
  26. data/scripts/aai_distances.bash +16 -18
  27. data/scripts/ani_distances.bash +16 -17
  28. data/scripts/assembly.bash +31 -16
  29. data/scripts/haai_distances.bash +3 -27
  30. data/scripts/miga.bash +6 -4
  31. data/scripts/p.bash +1 -1
  32. data/scripts/read_quality.bash +9 -18
  33. data/scripts/trimmed_fasta.bash +14 -30
  34. data/scripts/trimmed_reads.bash +36 -36
  35. data/test/parallel_test.rb +31 -0
  36. data/test/project_test.rb +2 -1
  37. data/test/remote_dataset_test.rb +1 -1
  38. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  39. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  40. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  41. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  42. data/utils/FastAAI/README.md +84 -0
  43. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  44. data/utils/distance/commands.rb +1 -0
  45. data/utils/distance/database.rb +0 -1
  46. data/utils/distance/runner.rb +2 -4
  47. data/utils/enveomics/Docs/recplot2.md +244 -0
  48. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  49. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  50. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  51. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  52. data/utils/enveomics/LICENSE.txt +73 -0
  53. data/utils/enveomics/Makefile +52 -0
  54. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  55. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  56. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  57. data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
  58. data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
  59. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  60. data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
  61. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  62. data/utils/enveomics/Manifest/Tasks/other.json +906 -0
  63. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  64. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +638 -0
  65. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  66. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  67. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  68. data/utils/enveomics/Manifest/categories.json +165 -0
  69. data/utils/enveomics/Manifest/examples.json +154 -0
  70. data/utils/enveomics/Manifest/tasks.json +4 -0
  71. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  72. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  73. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  74. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  75. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  76. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  77. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  78. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  79. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  80. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  81. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  82. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  83. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  84. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  85. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  86. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  87. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  88. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  89. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  90. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  91. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  92. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  93. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  94. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  95. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  96. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  97. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  98. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  99. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  100. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  101. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  102. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  103. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  104. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  105. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  106. data/utils/enveomics/README.md +42 -0
  107. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  108. data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
  109. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  110. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  111. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  112. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  113. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  114. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  115. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  116. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  117. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  118. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  119. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  120. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  121. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  122. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  123. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  124. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  125. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  126. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  127. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  128. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  129. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  130. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  131. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  132. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  133. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  134. data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
  135. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  136. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  137. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  138. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  139. data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
  140. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  141. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  142. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  143. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  144. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  145. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  146. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  147. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  148. data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
  149. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  150. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  151. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  152. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  153. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  154. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  155. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  156. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  157. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  158. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  159. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  160. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  161. data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
  162. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  163. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  164. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  165. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  166. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  167. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  168. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  169. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  170. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  171. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  172. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  173. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  174. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  175. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  176. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  177. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  178. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  179. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  180. data/utils/enveomics/Scripts/SRA.download.bash +55 -0
  181. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  182. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  183. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  184. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  185. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  186. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  187. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  188. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  189. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  190. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  191. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  192. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  193. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  194. data/utils/enveomics/Scripts/aai.rb +419 -0
  195. data/utils/enveomics/Scripts/ani.rb +362 -0
  196. data/utils/enveomics/Scripts/anir.rb +137 -0
  197. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  198. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  199. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  200. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  201. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  202. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  203. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  204. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  205. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  206. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  207. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  208. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  209. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  210. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  211. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  212. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  213. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  214. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  215. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  216. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  217. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  218. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  219. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  220. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  221. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  222. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  223. data/utils/enveomics/Scripts/ogs.rb +104 -0
  224. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  225. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  226. data/utils/enveomics/Scripts/rbm.rb +100 -0
  227. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  228. data/utils/enveomics/Tests/Makefile +10 -0
  229. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  230. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  231. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  232. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  233. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  234. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  235. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  236. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  237. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  238. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  239. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  240. data/utils/enveomics/Tests/alkB.nwk +1 -0
  241. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  242. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  243. data/utils/enveomics/Tests/hiv1.faa +59 -0
  244. data/utils/enveomics/Tests/hiv1.fna +134 -0
  245. data/utils/enveomics/Tests/hiv2.faa +70 -0
  246. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  247. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  248. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  249. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  250. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  251. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  252. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  253. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  254. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  255. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  256. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  257. data/utils/enveomics/build_enveomics_r.bash +45 -0
  258. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  259. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  260. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  261. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  262. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  263. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  264. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  265. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  266. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  267. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  268. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  269. data/utils/enveomics/enveomics.R/R/utils.R +80 -0
  270. data/utils/enveomics/enveomics.R/README.md +81 -0
  271. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  272. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  273. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
  274. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
  275. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
  276. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  277. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  278. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  279. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  280. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  281. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  282. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
  283. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
  284. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
  285. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  286. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  287. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
  288. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
  289. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
  290. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
  291. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  292. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
  293. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
  294. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  295. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  296. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
  297. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  298. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  299. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  300. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  301. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
  302. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  303. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  304. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
  305. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  306. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  307. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  308. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  309. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
  310. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  311. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
  312. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
  313. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
  314. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  315. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  316. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
  317. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  318. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
  319. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  320. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  321. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  322. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  323. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
  324. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
  325. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
  326. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
  327. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  328. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  329. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  330. data/utils/enveomics/globals.mk +8 -0
  331. data/utils/enveomics/manifest.json +9 -0
  332. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  333. data/utils/multitrim/README.md +67 -0
  334. data/utils/multitrim/multitrim.py +1555 -0
  335. data/utils/multitrim/multitrim.yml +13 -0
  336. data/utils/requirements.txt +4 -3
  337. metadata +304 -3
@@ -0,0 +1,355 @@
1
+ {
2
+ "tasks": [
3
+ {
4
+ "task": "BlastTab.kegg_pep2path_rest.pl",
5
+ "description": ["Takes a BLAST against KEGG_PEP (or KO) and retrieves",
6
+ "the pathways in which the subject peptides are involved."],
7
+ "requires": [
8
+ { "perl_lib": "JSON" },
9
+ { "perl_lib": "LWP::Simple" }
10
+ ],
11
+ "help_arg": "",
12
+ "options": [
13
+ {
14
+ "name": "blast.txt",
15
+ "arg": "in_file",
16
+ "mandatory": true,
17
+ "description": "Input tabular BLAST file."
18
+ },
19
+ {
20
+ "name": "Cache file",
21
+ "arg": "out_file",
22
+ "description": ["(optional) File containing the saved cache. If",
23
+ "unset, the cache won't be recoverable across instances of this",
24
+ "script. It is strongly recommended to set a file. Multiple",
25
+ "parallel instances of this script may use the same cache file."]
26
+ },
27
+ ">",
28
+ {
29
+ "name": "output.txt",
30
+ "arg": "out_file",
31
+ "mandatory": true,
32
+ "description": ["Tab-delimited output file, with the columns: (1)",
33
+ "Query ID, (2) Subject ID, (3) Pathway ID, (4) Pathway (reference)",
34
+ "description, (5) Organism."]
35
+ }
36
+ ]
37
+ },
38
+ {
39
+ "task": "M5nr.getSequences.rb",
40
+ "description": ["Downloads a set of sequences from M5nr with a given",
41
+ "functional annotation."],
42
+ "help_arg": "--help",
43
+ "requires": [
44
+ { "ruby_gem": "restclient" },
45
+ { "ruby_gem": "open-uri" },
46
+ { "ruby_gem": "JSON" }
47
+ ],
48
+ "options": [
49
+ {
50
+ "opt": "--function",
51
+ "arg": "string",
52
+ "mandatory": true,
53
+ "description": "Functional annotation."
54
+ },
55
+ {
56
+ "name": "Output file",
57
+ "opt": "--out",
58
+ "arg": "out_file",
59
+ "mandatory": true,
60
+ "description": "File containing the sequences."
61
+ },
62
+ {
63
+ "opt": "--max",
64
+ "arg": "integer",
65
+ "default": 0,
66
+ "description": ["Maximum number of sequences to download. Set to",
67
+ "0 to download all."]
68
+ },
69
+ {
70
+ "opt": "--recover",
71
+ "description": "If set, tries to recover a previous FastA."
72
+ },
73
+ {
74
+ "name": "URL",
75
+ "opt": "--url",
76
+ "arg": "string",
77
+ "default": "http://api.metagenomics.anl.gov/m5nr",
78
+ "description": "URL for M5nr API."
79
+ },
80
+ {
81
+ "opt": "--quiet",
82
+ "description": "Run quietly (no STDERR output)."
83
+ }
84
+ ]
85
+ },
86
+ {
87
+ "task": "EBIseq2tax.rb",
88
+ "description": ["Maps a list of EBI-supported IDs to their corresponding",
89
+ "NCBI taxonomy using EBI RESTful API."],
90
+ "warn": ["Avoid using this script on millions of entries at a time,",
91
+ "since each entry elicits requests to EBI and NCBI servers."],
92
+ "requires": [
93
+ { "ruby_gem": "nokogiri" }
94
+ ],
95
+ "see_also": ["NCBIacc2tax.rb"],
96
+ "help_arg": "--help",
97
+ "options": [
98
+ {
99
+ "name": "IDs",
100
+ "opt": "--ids",
101
+ "arg": "string",
102
+ "description": "Comma-separated list of EBI IDs.",
103
+ "note": "Required unless 'Input file' is passed."
104
+ },
105
+ {
106
+ "name": "Input file",
107
+ "opt": "--infile",
108
+ "arg": "in_file",
109
+ "description": ["Raw text file containing the list of EBI IDs,",
110
+ "one per line."],
111
+ "note": "Required unless 'IDs' is passed."
112
+ },
113
+ {
114
+ "opt": "--database",
115
+ "arg": "string",
116
+ "default": "uniprotkb",
117
+ "description": "EBI database defining the EBI IDs."
118
+ },
119
+ {
120
+ "opt": "--ranks",
121
+ "arg": "string",
122
+ "description": "Comma-delimited list of taxonomic ranks to report.",
123
+ "default": "superkingdom,phylum,class,order,family,genus,species"
124
+ },
125
+ {
126
+ "opt": "--noheader",
127
+ "description": "Do not includ a header in the output."
128
+ },
129
+ {
130
+ "opt": "--taxids",
131
+ "description": "Return Taxonomy IDs instead of scientific names."
132
+ },
133
+ {
134
+ "opt": "--quiet",
135
+ "description": "Run quietly."
136
+ }
137
+ ]
138
+ },
139
+ {
140
+ "task": "RefSeq.download.bash",
141
+ "description": ["Downloads a collection of sequences and/or annotations",
142
+ "from NCBI's RefSeq."],
143
+ "help_arg": "",
144
+ "requires": [
145
+ {
146
+ "description": "curl",
147
+ "test": "curl --version"
148
+ }
149
+ ],
150
+ "options": [
151
+ {
152
+ "name": "Organism",
153
+ "arg": "string",
154
+ "mandatory": true,
155
+ "description": ["The organism to download (e.g.,",
156
+ "Streptococcus_pneumoniae)."]
157
+ },
158
+ {
159
+ "name": "Extension",
160
+ "arg": "string",
161
+ "mandatory": true,
162
+ "default": ".*.gz",
163
+ "description": ["Extension to download. Common extensions include",
164
+ ".fna.gz (genomic sequences), .faa.gz (protein sequences),",
165
+ ".gff.gz (annotations), and .*.gz (all data)."]
166
+ },
167
+ {
168
+ "name": "Level",
169
+ "arg": "string",
170
+ "mandatory": true,
171
+ "default": "Any",
172
+ "description": ["Use only genomes with this assembly level. Common",
173
+ "levels are 'Complete Genome' and 'Contig'. By default, any",
174
+ "assembly level is allowed ('Any')."]
175
+ },
176
+ {
177
+ "name": "Directory",
178
+ "arg": "out_dir",
179
+ "description": ["Directory where the files are to be downladed.",
180
+ "By default, same as 'Organism'."]
181
+ }
182
+ ]
183
+ },
184
+ {
185
+ "task": "SRA.download.bash",
186
+ "description": ["Downloads the set of runs from a project, sample, or",
187
+ "experiment in SRA."],
188
+ "help_arg": "",
189
+ "requires": [
190
+ {
191
+ "description": "curl",
192
+ "test": "curl --version"
193
+ }
194
+ ],
195
+ "options": [
196
+ {
197
+ "name": "SRA ID",
198
+ "arg": "string",
199
+ "mandatory": true,
200
+ "description": "ID of the SRA Project, Sample, or Experiment."
201
+ },
202
+ {
203
+ "name": "dir",
204
+ "arg": "out_dir",
205
+ "description": ["Directory where the files are to be downladed. By",
206
+ "default, same as 'SRA ID'."]
207
+ }
208
+ ]
209
+ },
210
+ {
211
+ "task": "gi2tax.rb",
212
+ "description": ["Maps a list of NCBI GIs to their corresponding taxonomy",
213
+ "using the NCBI EUtilities."],
214
+ "warn": ["Avoid using this script on millions of entries at a time,",
215
+ "since each entry elicits two requests to NCBI's servers.",
216
+ "*IMPORTANT NOTE*: NCBI is phasing out support for GIs. Please use",
217
+ "acc.ver instead with NCBIacc2tax.rb."],
218
+ "requires": [
219
+ { "ruby_gem": "nokogiri" },
220
+ { "ruby_gem": "json" }
221
+ ],
222
+ "see_also": ["NCBIacc2tax.rb"],
223
+ "help_arg": "--help",
224
+ "options": [
225
+ {
226
+ "name": "GIs",
227
+ "opt": "--gis",
228
+ "arg": "string",
229
+ "description": "Comma-separated list of GIs.",
230
+ "note": "Required unless 'Input file' is passed."
231
+ },
232
+ {
233
+ "name": "Input file",
234
+ "opt": "--infile",
235
+ "arg": "in_file",
236
+ "description": ["Raw text file containing the list of GIs, one per",
237
+ "line."],
238
+ "note": "Required unless 'GIs' is passed."
239
+ },
240
+ {
241
+ "opt": "--protein",
242
+ "description": ["Use if the GIs are proteins. Otherwise, GIs are",
243
+ "assumed to be from the Nuccore Database."]
244
+ },
245
+ {
246
+ "opt": "--ranks",
247
+ "arg": "string",
248
+ "description": "Taxonomic ranks to report, separated by commas.",
249
+ "default": "superkingdom,phylum,class,order,family,genus,species"
250
+ },
251
+ {
252
+ "name": "No header",
253
+ "opt": "--noheader",
254
+ "description": "Do not include a header in the output."
255
+ },
256
+ {
257
+ "name": "Tax IDs",
258
+ "opt": "--taxids",
259
+ "description": "Return Taxonomy IDs instead of scientific names."
260
+ },
261
+ {
262
+ "name": "Exact GI",
263
+ "opt": "--exact-gi",
264
+ "description": ["Returns only taxonomy associated with the exact GI",
265
+ "passed. By default, it attempts to update accession versions if",
266
+ "possible."]
267
+ },
268
+ {
269
+ "opt": "--ignore-missing",
270
+ "description": ["Does not report missing GIs in the output file. By",
271
+ "default, it reports GI and empty values for all other columns."]
272
+ },
273
+ {
274
+ "opt": "--quiet",
275
+ "description": "Run quietly."
276
+ },
277
+ ">",
278
+ {
279
+ "arg": "out_file",
280
+ "mandatory": true,
281
+ "description": ["Tab-delimited table with GIs and taxonomic",
282
+ "classification."]
283
+ }
284
+ ]
285
+ },
286
+ {
287
+ "task": "NCBIacc2tax.rb",
288
+ "description": ["Maps a list of NCBI accessions to their corresponding",
289
+ "taxonomy using the NCBI EUtilities."],
290
+ "warn": ["Avoid using this script on millions of entries at a time,",
291
+ "since each entry elicits two requests to NCBI's servers."],
292
+ "requires": [
293
+ { "ruby_gem": "nokogiri" },
294
+ { "ruby_gem": "json" }
295
+ ],
296
+ "see_also": ["EBIseq2tax.rb"],
297
+ "help_arg": "--help",
298
+ "options": [
299
+ {
300
+ "name": "Accessions",
301
+ "opt": "--acc",
302
+ "arg": "string",
303
+ "description": "Comma-separated list of accessions.",
304
+ "note": "Required unless 'Input file' is passed."
305
+ },
306
+ {
307
+ "name": "Input file",
308
+ "opt": "--infile",
309
+ "arg": "in_file",
310
+ "description": ["Raw text file containing the list of accessions,",
311
+ "one per line."],
312
+ "note": "Required unless 'Accessions' is passed."
313
+ },
314
+ {
315
+ "opt": "--protein",
316
+ "description": ["Use if the accessions are proteins. Otherwise,",
317
+ "accessions are assumed to be from the Nuccore Database."]
318
+ },
319
+ {
320
+ "opt": "--ranks",
321
+ "arg": "string",
322
+ "description": "Taxonomic ranks to report, separated by commas.",
323
+ "default": "superkingdom,phylum,class,order,family,genus,species"
324
+ },
325
+ {
326
+ "name": "No header",
327
+ "opt": "--noheader",
328
+ "description": "Do not include a header in the output."
329
+ },
330
+ {
331
+ "name": "Tax IDs",
332
+ "opt": "--taxids",
333
+ "description": "Return Taxonomy IDs instead of scientific names."
334
+ },
335
+ {
336
+ "opt": "--ignore-missing",
337
+ "description": ["Does not report missing accessions in the output",
338
+ "file. By default, it reports GI and empty values for all other",
339
+ "columns."]
340
+ },
341
+ {
342
+ "opt": "--quiet",
343
+ "description": "Run quietly."
344
+ },
345
+ ">",
346
+ {
347
+ "arg": "out_file",
348
+ "mandatory": true,
349
+ "description": ["Tab-delimited table with accessions and taxonomic",
350
+ "classification."]
351
+ }
352
+ ]
353
+ }
354
+ ]
355
+ }
@@ -0,0 +1,638 @@
1
+ {
2
+ "tasks": [
3
+ {
4
+ "task": "aai.rb",
5
+ "description": ["Calculates the Average Amino acid Identity between two",
6
+ "genomes."],
7
+ "help_arg": "--help",
8
+ "see_also": ["ani.rb", "rbm.rb"],
9
+ "cite": [
10
+ ["Konstantinidis & Tiedje, 2005, JBac",
11
+ "https://dx.doi.org/10.1128%2FJB.187.18.6258-6264.2005"],
12
+ ["Altschul et al, 2000, JMB (BLAST)",
13
+ "https://dx.doi.org/10.1016/S0022-2836(05)80360-2"],
14
+ ["Kent WJ, 2002, Genome Res (BLAT)",
15
+ "https://dx.doi.org/10.1101/gr.229202"],
16
+ ["Buchfink B, Xie C, Huson D, 2015, Nat Meth (Diamond)",
17
+ "https://dx.doi.org/10.1038/nmeth.3176"]
18
+ ],
19
+ "options": [
20
+ {
21
+ "name": "Sequence 1",
22
+ "opt": "--seq1",
23
+ "arg": "in_file",
24
+ "mandatory": true,
25
+ "description": "FastA file containing the genome 1 (proteins).",
26
+ "note": ["Alternatively, you can supply the NCBI-acc of a genome",
27
+ " (nucleotides) with the format ncbi:CP014272 instead of files."]
28
+ },
29
+ {
30
+ "name": "Sequence 2",
31
+ "opt": "--seq2",
32
+ "arg": "in_file",
33
+ "mandatory": true,
34
+ "description": "FastA file containing the genome 2.",
35
+ "note": ["Alternatively, you can supply the NCBI-acc of a genome",
36
+ " (nucleotides) with the format ncbi:NC_004337 instead of files."]
37
+ },
38
+ {
39
+ "name": "Length",
40
+ "opt": "--len",
41
+ "arg": "integer",
42
+ "description": "Minimum alignment length (in aa).",
43
+ "default": 0
44
+ },
45
+ {
46
+ "name": "Length fraction",
47
+ "opt": "--len-fraction",
48
+ "arg": "float",
49
+ "description": ["Minimum alignment length as a fraction of the",
50
+ "shorter sequence (range 0-1)."],
51
+ "default": 0.0
52
+ },
53
+ {
54
+ "name": "Identity",
55
+ "opt": "--id",
56
+ "arg": "float",
57
+ "description": "Minimum alignment identity (in %).",
58
+ "default": 20.0
59
+ },
60
+ {
61
+ "name": "Bit-score",
62
+ "opt": "--bitscore",
63
+ "arg": "float",
64
+ "description": "Minimum bit score (in bits).",
65
+ "default": 0.0
66
+ },
67
+ {
68
+ "opt": "--hits",
69
+ "arg": "float",
70
+ "description": "Minimum number of hits.",
71
+ "default": 50.0
72
+ },
73
+ {
74
+ "name": "Nucleotides",
75
+ "opt": "--nucl",
76
+ "description": ["The input sequences are nucleotides (genes), not",
77
+ "proteins."]
78
+ },
79
+ {
80
+ "name": "Max ACTG",
81
+ "opt": "--max-actg",
82
+ "arg": "float",
83
+ "default": 0.95,
84
+ "description": ["Maximum fraction of ACTGN in the sequences before",
85
+ "assuming nucleotides."]
86
+ },
87
+ {
88
+ "name": "Executables",
89
+ "opt": "--bin",
90
+ "arg": "in_dir",
91
+ "description": ["Path to the directory containing the binaries of",
92
+ "the search program."]
93
+ },
94
+ {
95
+ "opt": "--program",
96
+ "arg": "select",
97
+ "values": ["blast+","blast","blat","diamond"],
98
+ "default": "blast+",
99
+ "description": "Search program to be used.",
100
+ "note": ["Make sure that you have installed the search program you",
101
+ "want to use. If you have downloaded the program, but it's not",
102
+ "installed, please use the Executables option above."]
103
+ },
104
+ {
105
+ "opt": "--threads",
106
+ "arg": "integer",
107
+ "description": "Number of parallel threads to be used.",
108
+ "default": 1
109
+ },
110
+ {
111
+ "name": "SQLite3 DB",
112
+ "opt": "--sqlite3",
113
+ "arg": "out_file",
114
+ "description": ["Path to the SQLite3 database to create (or update)",
115
+ "with the results."]
116
+ },
117
+ {
118
+ "name": "Name 1",
119
+ "opt": "--name1",
120
+ "arg": "string",
121
+ "description": ["Name of Sequence 1 to use in SQLite3 DB. By default",
122
+ "determined by filename."]
123
+ },
124
+ {
125
+ "name": "Name 2",
126
+ "opt": "--name2",
127
+ "arg": "string",
128
+ "description": ["Name of Sequence 2 to use in SQLite3 DB. By default",
129
+ "determined by filename."]
130
+ },
131
+ {
132
+ "name": "Don't save RBM",
133
+ "opt": "--no-save-rbm",
134
+ "description": ["Don't save the reciprocal best matches in the",
135
+ "--sqlite3 database."]
136
+ },
137
+ {
138
+ "opt": "--lookup-first",
139
+ "description": ["Indicates if the AAI should be looked up first in",
140
+ "the database. Requires SQLite3 DB, Auto, Name 1, and Name 2.",
141
+ "Incompatible with Result, Tab, Out, and RBM."]
142
+ },
143
+ {
144
+ "name": "Precision",
145
+ "opt": "--dec",
146
+ "arg": "integer",
147
+ "default": 2,
148
+ "description": "Decimal positions to report."
149
+ },
150
+ {
151
+ "name": "RBM",
152
+ "opt": "--rbm",
153
+ "arg": "out_file",
154
+ "description": "Saves a file with the reciprocal best matches."
155
+ },
156
+ {
157
+ "opt": "--out",
158
+ "arg": "out_file",
159
+ "description": ["Saves a file describing the alignments used for",
160
+ "two-way AAI."]
161
+ },
162
+ {
163
+ "name": "Result",
164
+ "opt": "--res",
165
+ "arg": "out_file",
166
+ "description": "Saves a file with the final results."
167
+ },
168
+ {
169
+ "opt": "--tab",
170
+ "arg": "out_file",
171
+ "description": ["Saves a file with the final two-way results in a",
172
+ "tab-delimited form. The columns are (in that order): AAI,",
173
+ "standard deviation, proteins used, proteins in the smallest",
174
+ "genome."]
175
+ },
176
+ {
177
+ "opt": "--auto",
178
+ "description": ["ONLY outputs the AAI value in STDOUT (or nothing,",
179
+ "if calculation fails)."]
180
+ },
181
+ {
182
+ "opt": "--quiet",
183
+ "description": "Run quietly (no STDERR output)."
184
+ }
185
+ ]
186
+ },
187
+ {
188
+ "task": "ani.rb",
189
+ "description": ["Calculates the Average Nucleotide Identity between two",
190
+ "genomes."],
191
+ "help_arg": "--help",
192
+ "see_also": ["aai.rb","rbm.rb","HMM.essential.rb"],
193
+ "cite": [
194
+ ["Konstantinidis & Tiedje, 2005, PNAS",
195
+ "http://dx.doi.org/10.1073%2Fpnas.0409727102"],
196
+ ["Altschul et al, 2000, JMB (BLAST)",
197
+ "http://dx.doi.org/10.1016/S0022-2836(05)80360-2"],
198
+ ["Kent WJ, 2002, Genome Res (BLAT)",
199
+ "https://dx.doi.org/10.1101/gr.229202"]
200
+ ],
201
+ "options": [
202
+ {
203
+ "name": "Sequence 1",
204
+ "opt": "--seq1",
205
+ "arg": "in_file",
206
+ "mandatory": true,
207
+ "description": "FastA file containing the genome 1.",
208
+ "note": ["Alternatively, you can supply an NCBI-acc with the format",
209
+ "ncbi:CP014272 instead of files."]
210
+ },
211
+ {
212
+ "name": "Sequence 2",
213
+ "opt": "--seq2",
214
+ "arg": "in_file",
215
+ "mandatory": true,
216
+ "description": "FastA file containing the genome 2.",
217
+ "note": ["Alternatively, you can supply an NCBI-acc with the format",
218
+ "ncbi:AE005174 instead of files."]
219
+ },
220
+ {
221
+ "name": "Window",
222
+ "opt": "--win",
223
+ "arg": "integer",
224
+ "description": "Window size in the ANI calculation (in bp).",
225
+ "default": 1000
226
+ },
227
+ {
228
+ "opt": "--step",
229
+ "arg": "integer",
230
+ "description": "Step size in the ANI calculation (in bp).",
231
+ "default": 200
232
+ },
233
+ {
234
+ "name": "Length",
235
+ "opt": "--len",
236
+ "arg": "integer",
237
+ "description": "Minimum alignment length (in bp).",
238
+ "default": 700
239
+ },
240
+ {
241
+ "name": "Identity",
242
+ "opt": "--id",
243
+ "arg": "float",
244
+ "description": "Minimum alignment identity (in %).",
245
+ "default": 70.0
246
+ },
247
+ {
248
+ "opt": "--hits",
249
+ "arg": "integer",
250
+ "description": "Minimum number of hits.",
251
+ "default": 50
252
+ },
253
+ {
254
+ "name": "No correction",
255
+ "opt": "--nocorrection",
256
+ "description": "Report values without post-hoc correction."
257
+ },
258
+ {
259
+ "name": "Min ACTG",
260
+ "opt": "--min-actg",
261
+ "arg": "float",
262
+ "default": 0.95,
263
+ "description": ["Minimum fraction of ACTGN in the sequences before",
264
+ "assuming proteins."]
265
+ },
266
+ {
267
+ "name": "Executables",
268
+ "opt": "--bin",
269
+ "arg": "in_dir",
270
+ "description": ["Directory containing the binaries of the search",
271
+ "program."]
272
+ },
273
+ {
274
+ "opt": "--program",
275
+ "arg": "select",
276
+ "values": ["blast+", "blast", "blat"],
277
+ "default": "blast+",
278
+ "description": "Search program to be used.",
279
+ "note": ["Make sure that you have installed the search program you",
280
+ "want to use. If you have downloaded the program, but it's not",
281
+ "installed, please use the Executables option above."]
282
+ },
283
+ {
284
+ "opt": "--threads",
285
+ "arg": "integer",
286
+ "description": "Number of parallel threads to be used.",
287
+ "default": 1
288
+ },
289
+ {
290
+ "name": "SQLite3 DB",
291
+ "opt": "--sqlite3",
292
+ "arg": "out_file",
293
+ "description": ["Path to the SQLite3 database to create (or update)",
294
+ "with the results."]
295
+ },
296
+ {
297
+ "name": "Name 1",
298
+ "opt": "--name1",
299
+ "arg": "string",
300
+ "description": ["Name of Sequence 1 to use in SQLite3 DB. By default",
301
+ "it's determined by the filename."]
302
+ },
303
+ {
304
+ "name": "Name 2",
305
+ "opt": "--name2",
306
+ "arg": "string",
307
+ "description": ["Name of Sequence 2 to use in SQLite3 DB. By default",
308
+ "it's determined by the filename."]
309
+ },
310
+ {
311
+ "name": "Don't save regions",
312
+ "opt": "--no-save-regions",
313
+ "description": "Don't save the fragments in the SQLite3 database."
314
+ },
315
+ {
316
+ "name": "Don't save RBM",
317
+ "opt": "--no-save-rbm",
318
+ "description": ["Don't save the reciprocal best matches in the",
319
+ "--sqlite3 database."]
320
+ },
321
+ {
322
+ "opt": "--lookup-first",
323
+ "description": ["Indicates if the ANI should be looked up first in",
324
+ "the database. Requires SQLite3 DB, Auto, Name 1, and Name 2.",
325
+ "Incompatible with Result, Tab, and Out."]
326
+ },
327
+ {
328
+ "name": "Precision",
329
+ "opt": "--dec",
330
+ "arg": "integer",
331
+ "description": "Decimal positions to report.",
332
+ "default": 2
333
+ },
334
+ {
335
+ "opt": "--out",
336
+ "arg": "out_file",
337
+ "description": ["Saves a file describing the alignments used for",
338
+ "two-way ANI."]
339
+ },
340
+ {
341
+ "name": "Result",
342
+ "opt": "--res",
343
+ "arg": "out_file",
344
+ "description": "Saves a file with the final results."
345
+ },
346
+ {
347
+ "opt": "--tab",
348
+ "arg": "out_file",
349
+ "description": ["Saves a file with the final two-way results in a",
350
+ "tab-delimited form. The columns are (in that order): ANI,",
351
+ "standard deviation, fragments used, fragments in the smallest",
352
+ "genome."]
353
+ },
354
+ {
355
+ "opt": "--auto",
356
+ "description": ["ONLY outputs the ANI value in STDOUT (or nothing,",
357
+ "if calculation fails)."]
358
+ },
359
+ {
360
+ "opt": "--quiet",
361
+ "description": "Run quietly (no STDERR output)."
362
+ }
363
+ ]
364
+ },
365
+ {
366
+ "task": "anir.rb",
367
+ "description": ["Estimates ANIr: the Average Nucleotide Identity of",
368
+ "reads against a genome."],
369
+ "help_arg": "--help",
370
+ "see_also": ["ani.rb", "sam.filter.rb"],
371
+ "options": [
372
+ {
373
+ "opt": "--reads",
374
+ "arg": "in_file",
375
+ "description": "Metagenomic reads."
376
+ },
377
+ {
378
+ "opt": "--genome",
379
+ "arg": "in_file",
380
+ "description": "Genome assembly."
381
+ },
382
+ {
383
+ "opt": "--mapping",
384
+ "arg": "in_file",
385
+ "description": "Mapping file."
386
+ },
387
+ {
388
+ "opt": "--list",
389
+ "arg": "in_file",
390
+ "description": "Output file with identities."
391
+ },
392
+ {
393
+ "opt": "--hist",
394
+ "arg": "in_file",
395
+ "description": "Output file with histogram."
396
+ },
397
+ {
398
+ "opt": "--tab",
399
+ "arg": "out_file",
400
+ "description": "Output file with results in tabular format."
401
+ },
402
+ {
403
+ "name": "Reads format",
404
+ "opt": "--r-format",
405
+ "arg": "select",
406
+ "description": ["Metagenomic reads format: fastq or fasta.",
407
+ "Both options support compression with .gz file extension."],
408
+ "values": ["fastq", "fasta"],
409
+ "default": "fastq"
410
+ },
411
+ {
412
+ "name": "Reads type",
413
+ "opt": "--r-type",
414
+ "arg": "select",
415
+ "description": ["Type of metagenomic reads: Single reads (single),",
416
+ "coupled reads in separate files (-m must be comma-delimited;",
417
+ "coupled), or coupled reads in a single interposed file",
418
+ "(interleaved)."],
419
+ "values": ["single", "coupled", "interleaved"],
420
+ "default": "single"
421
+ },
422
+ {
423
+ "name": "Genome format",
424
+ "opt": "--g-format",
425
+ "arg": "select",
426
+ "description": ["Genome assembly format: fasta or list.",
427
+ "Both options support compression with .gz file extension.",
428
+ "If passed in mapping-read mode, filters only matches to these",
429
+ "contigs."],
430
+ "values": ["fasta", "list"],
431
+ "default": "fasta"
432
+ },
433
+ {
434
+ "name": "Mapping format",
435
+ "opt": "--m-format",
436
+ "arg": "select",
437
+ "description": ["Mapping file format: sam, bam, tab, or list.",
438
+ "All except bam support compression with .gz file extension."],
439
+ "values": ["sam", "bam", "tab", "list"],
440
+ "default": "sam"
441
+ },
442
+ {
443
+ "opt": "--identity",
444
+ "arg": "float",
445
+ "description": "Set a fixed threshold of percent identity.",
446
+ "default": 95.0
447
+ },
448
+ {
449
+ "opt": "--algorithm",
450
+ "arg": "select",
451
+ "description": ["Set an algorithm to automatically detect identity",
452
+ "threshold: Valley detection by E-M of Gaussian Mixture Model",
453
+ "(gmm), fixed threshold (see Identity; fix),",
454
+ "Pick gmm or fix depending on bimodality (see Bimodality; auto)."],
455
+ "values": ["gmm", "fix", "auto"],
456
+ "default": "auto"
457
+ },
458
+ {
459
+ "opt": "--bimodality",
460
+ "arg": "float",
461
+ "description": ["Threshold of bimodality below which the algorithm",
462
+ "is set to fix. The coefficient used is the de Michele & Accantino",
463
+ "(2014) B index."],
464
+ "default": 0.5
465
+ },
466
+ {
467
+ "opt": "--coefficient",
468
+ "arg": "select",
469
+ "description": ["Coefficient of bimodality for Algorithm auto: ",
470
+ "Sarle's bimodality coefficient b (sarle), or",
471
+ "de Michele and Accatino (2014 PLoS ONE) B index",
472
+ "(use with Bimodality 0.1, dma)."],
473
+ "values": ["sarle", "dma"],
474
+ "default": "sarle"
475
+ },
476
+ {
477
+ "opt": "--bin-size",
478
+ "arg": "float",
479
+ "description": "Width of histogram bins (in percent identity).",
480
+ "default": 1.0
481
+ },
482
+ {
483
+ "opt": "--threads",
484
+ "arg": "integer",
485
+ "description": "Threads to use."
486
+ },
487
+ {
488
+ "opt": "--log",
489
+ "arg": "out_file",
490
+ "description": "Log file to save output."
491
+ },
492
+ {
493
+ "opt": "--quiet",
494
+ "description": "Run quietly."
495
+ }
496
+ ]
497
+ },
498
+ {
499
+ "task": "HMM.haai.rb",
500
+ "description": ["Estimates Average Amino Acid Identity (AAI) from the",
501
+ "essential genes extracted and aligned by HMM.essential.rb (see",
502
+ "Alignments)."],
503
+ "help_arg": "--help",
504
+ "see_also": ["HMM.essential.rb","aai.rb"],
505
+ "options": [
506
+ {
507
+ "name": "Alignments 1",
508
+ "opt": "-1",
509
+ "arg": "in_file",
510
+ "description": "Input alignments file for genome 1."
511
+ },
512
+ {
513
+ "name": "Alignments 2",
514
+ "opt": "-2",
515
+ "arg": "in_file",
516
+ "description": "Input alignments file for genome 2."
517
+ },
518
+ {
519
+ "name": "Alignment output",
520
+ "opt": "--aln-out",
521
+ "arg": "out_file",
522
+ "description": "Output file containing the aligned proteins."
523
+ },
524
+ {
525
+ "opt": "--components",
526
+ "arg": "out_file",
527
+ "description": ["Output file containing the components of the",
528
+ "estimation. Tab-delimited file with model name, matches, and",
529
+ "columns."]
530
+ },
531
+ {
532
+ "opt": "--quiet",
533
+ "description": "Run quietly (no STDERR output)."
534
+ }
535
+ ]
536
+ },
537
+ {
538
+ "task": "rbm.rb",
539
+ "description": ["Finds the reciprocal best matches between two sets of",
540
+ "sequences."],
541
+ "help_arg": "--help",
542
+ "cite":[
543
+ ["Camacho et al, 2009, BMC Bioinf (BLAST+)",
544
+ "https://doi.org/10.1186/1471-2105-10-421"],
545
+ ["Altschul et al, 2000, JMB (BLAST)",
546
+ "http://dx.doi.org/10.1016/S0022-2836(05)80360-2"],
547
+ ["Buchfink B, Xie C, Huson D, 2015, Nat Meth (Diamond)",
548
+ "https://dx.doi.org/10.1038/nmeth.3176"],
549
+ ["Kent, 2002, Genome Res (BLAT)",
550
+ "https://doi.org/10.1101/gr.229202"]
551
+ ],
552
+ "options": [
553
+ {
554
+ "name": "Sequence 1",
555
+ "opt": "--seq1",
556
+ "arg": "in_file",
557
+ "mandatory": true,
558
+ "description": "FastA file containing the genome 1."
559
+ },
560
+ {
561
+ "name": "Sequence 2",
562
+ "opt": "--seq2",
563
+ "arg": "in_file",
564
+ "mandatory": true,
565
+ "description": "FastA file containing the genome 2."
566
+ },
567
+ {
568
+ "name": "Nucleotides",
569
+ "opt": "--nucl",
570
+ "description": ["Sequences are assumed to be nucleotides (proteins",
571
+ "by default)."]
572
+ },
573
+ {
574
+ "name": "Length",
575
+ "opt": "--len",
576
+ "arg": "integer",
577
+ "description": "Minimum alignment length (in residues).",
578
+ "default": 0
579
+ },
580
+ {
581
+ "name": "Fraction",
582
+ "opt": "--fract",
583
+ "arg": "float",
584
+ "default": 0.0,
585
+ "description": ["Minimum alignment length (as a fraction of the",
586
+ "query). If set, requires BLAST+ or Diamond (see Program)."]
587
+ },
588
+ {
589
+ "name": "Identity",
590
+ "opt": "--id",
591
+ "arg": "float",
592
+ "description": "Minimum alignment identity (in %).",
593
+ "default": 0.0
594
+ },
595
+ {
596
+ "name": "Bit score",
597
+ "opt": "--score",
598
+ "arg": "float",
599
+ "default": 0.0,
600
+ "description": "Minimum alignment score (in bits)."
601
+ },
602
+ {
603
+ "name": "Executables",
604
+ "opt": "--bin",
605
+ "arg": "in_dir",
606
+ "description": ["Directory containing the binaries of the search",
607
+ "program."]
608
+ },
609
+ {
610
+ "opt": "--program",
611
+ "arg": "select",
612
+ "values": ["blast+", "blast", "blat", "diamond"],
613
+ "default": "blast+",
614
+ "description": "Search program to be used. Default: blast+.",
615
+ "note": ["Make sure that you have installed the search program you",
616
+ "want to use. If you have downloaded the program, but it's not",
617
+ "installed, please use the Executables option above."]
618
+ },
619
+ {
620
+ "opt": "--threads",
621
+ "arg": "integer",
622
+ "description": "Number of parallel threads to be used.",
623
+ "default": 1
624
+ },
625
+ {
626
+ "opt": "--quiet",
627
+ "description": "Run quietly (no STDERR output)."
628
+ },
629
+ ">",
630
+ {
631
+ "arg": "out_file",
632
+ "mandatory": true,
633
+ "description": "Reciprocal Best Matches in BLAST tabular format."
634
+ }
635
+ ]
636
+ }
637
+ ]
638
+ }