miga-base 0.7.26.0 → 1.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/classify_wf.rb +2 -2
  7. data/lib/miga/cli/action/derep_wf.rb +1 -1
  8. data/lib/miga/cli/action/doctor.rb +57 -14
  9. data/lib/miga/cli/action/doctor/base.rb +47 -23
  10. data/lib/miga/cli/action/init.rb +11 -7
  11. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  12. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  13. data/lib/miga/cli/action/tax_dist.rb +2 -2
  14. data/lib/miga/cli/action/wf.rb +5 -4
  15. data/lib/miga/common.rb +1 -0
  16. data/lib/miga/daemon.rb +11 -4
  17. data/lib/miga/dataset/result.rb +10 -6
  18. data/lib/miga/json.rb +5 -4
  19. data/lib/miga/metadata.rb +5 -1
  20. data/lib/miga/parallel.rb +36 -0
  21. data/lib/miga/project.rb +8 -8
  22. data/lib/miga/project/base.rb +4 -4
  23. data/lib/miga/project/result.rb +2 -2
  24. data/lib/miga/sqlite.rb +10 -2
  25. data/lib/miga/version.rb +23 -9
  26. data/scripts/aai_distances.bash +16 -18
  27. data/scripts/ani_distances.bash +16 -17
  28. data/scripts/assembly.bash +31 -16
  29. data/scripts/haai_distances.bash +3 -27
  30. data/scripts/miga.bash +6 -4
  31. data/scripts/p.bash +1 -1
  32. data/scripts/read_quality.bash +9 -18
  33. data/scripts/trimmed_fasta.bash +14 -30
  34. data/scripts/trimmed_reads.bash +36 -36
  35. data/test/parallel_test.rb +31 -0
  36. data/test/project_test.rb +2 -1
  37. data/test/remote_dataset_test.rb +1 -1
  38. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  39. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  40. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  41. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  42. data/utils/FastAAI/README.md +84 -0
  43. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  44. data/utils/distance/commands.rb +1 -0
  45. data/utils/distance/database.rb +0 -1
  46. data/utils/distance/runner.rb +2 -4
  47. data/utils/enveomics/Docs/recplot2.md +244 -0
  48. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  49. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  50. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  51. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  52. data/utils/enveomics/LICENSE.txt +73 -0
  53. data/utils/enveomics/Makefile +52 -0
  54. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  55. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  56. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  57. data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
  58. data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
  59. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  60. data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
  61. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  62. data/utils/enveomics/Manifest/Tasks/other.json +906 -0
  63. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  64. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +638 -0
  65. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  66. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  67. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  68. data/utils/enveomics/Manifest/categories.json +165 -0
  69. data/utils/enveomics/Manifest/examples.json +154 -0
  70. data/utils/enveomics/Manifest/tasks.json +4 -0
  71. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  72. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  73. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  74. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  75. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  76. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  77. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  78. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  79. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  80. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  81. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  82. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  83. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  84. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  85. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  86. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  87. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  88. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  89. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  90. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  91. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  92. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  93. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  94. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  95. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  96. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  97. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  98. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  99. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  100. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  101. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  102. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  103. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  104. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  105. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  106. data/utils/enveomics/README.md +42 -0
  107. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  108. data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
  109. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  110. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  111. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  112. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  113. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  114. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  115. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  116. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  117. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  118. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  119. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  120. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  121. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  122. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  123. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  124. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  125. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  126. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  127. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  128. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  129. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  130. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  131. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  132. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  133. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  134. data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
  135. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  136. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  137. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  138. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  139. data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
  140. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  141. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  142. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  143. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  144. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  145. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  146. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  147. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  148. data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
  149. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  150. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  151. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  152. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  153. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  154. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  155. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  156. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  157. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  158. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  159. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  160. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  161. data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
  162. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  163. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  164. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  165. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  166. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  167. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  168. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  169. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  170. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  171. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  172. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  173. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  174. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  175. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  176. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  177. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  178. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  179. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  180. data/utils/enveomics/Scripts/SRA.download.bash +55 -0
  181. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  182. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  183. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  184. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  185. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  186. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  187. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  188. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  189. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  190. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  191. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  192. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  193. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  194. data/utils/enveomics/Scripts/aai.rb +419 -0
  195. data/utils/enveomics/Scripts/ani.rb +362 -0
  196. data/utils/enveomics/Scripts/anir.rb +137 -0
  197. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  198. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  199. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  200. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  201. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  202. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  203. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  204. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  205. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  206. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  207. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  208. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  209. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  210. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  211. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  212. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  213. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  214. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  215. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  216. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  217. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  218. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  219. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  220. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  221. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  222. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  223. data/utils/enveomics/Scripts/ogs.rb +104 -0
  224. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  225. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  226. data/utils/enveomics/Scripts/rbm.rb +100 -0
  227. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  228. data/utils/enveomics/Tests/Makefile +10 -0
  229. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  230. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  231. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  232. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  233. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  234. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  235. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  236. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  237. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  238. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  239. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  240. data/utils/enveomics/Tests/alkB.nwk +1 -0
  241. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  242. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  243. data/utils/enveomics/Tests/hiv1.faa +59 -0
  244. data/utils/enveomics/Tests/hiv1.fna +134 -0
  245. data/utils/enveomics/Tests/hiv2.faa +70 -0
  246. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  247. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  248. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  249. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  250. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  251. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  252. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  253. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  254. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  255. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  256. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  257. data/utils/enveomics/build_enveomics_r.bash +45 -0
  258. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  259. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  260. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  261. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  262. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  263. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  264. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  265. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  266. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  267. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  268. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  269. data/utils/enveomics/enveomics.R/R/utils.R +80 -0
  270. data/utils/enveomics/enveomics.R/README.md +81 -0
  271. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  272. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  273. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
  274. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
  275. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
  276. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  277. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  278. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  279. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  280. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  281. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  282. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
  283. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
  284. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
  285. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  286. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  287. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
  288. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
  289. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
  290. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
  291. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  292. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
  293. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
  294. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  295. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  296. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
  297. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  298. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  299. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  300. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  301. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
  302. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  303. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  304. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
  305. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  306. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  307. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  308. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  309. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
  310. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  311. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
  312. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
  313. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
  314. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  315. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  316. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
  317. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  318. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
  319. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  320. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  321. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  322. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  323. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
  324. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
  325. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
  326. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
  327. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  328. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  329. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  330. data/utils/enveomics/globals.mk +8 -0
  331. data/utils/enveomics/manifest.json +9 -0
  332. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  333. data/utils/multitrim/README.md +67 -0
  334. data/utils/multitrim/multitrim.py +1555 -0
  335. data/utils/multitrim/multitrim.yml +13 -0
  336. data/utils/requirements.txt +4 -3
  337. metadata +304 -3
@@ -0,0 +1,308 @@
1
+ {
2
+ "tasks": [
3
+ {
4
+ "task": "Table.df2dist.R",
5
+ "description": ["Transform a tab-delimited list of distances into a",
6
+ "squared matrix."],
7
+ "help_arg": "--help",
8
+ "requires": [ { "r_package": "optparse" } ],
9
+ "options": [
10
+ {
11
+ "name": "Input list",
12
+ "opt": "--x",
13
+ "arg": "in_file",
14
+ "mandatory": true,
15
+ "description": "A tab-delimited table with the distances."
16
+ },
17
+ {
18
+ "opt": "--obj1-index",
19
+ "arg": "integer",
20
+ "default": 1,
21
+ "description": ["Index of the column containing the ID of the object",
22
+ "1."]
23
+ },
24
+ {
25
+ "opt": "--obj2-index",
26
+ "arg": "integer",
27
+ "default": 2,
28
+ "description": ["Index of the column containing the ID of the object",
29
+ "2."]
30
+ },
31
+ {
32
+ "opt": "--dist-index",
33
+ "arg": "integer",
34
+ "default": 3,
35
+ "description": "Index of the column containing the distance."
36
+ },
37
+ {
38
+ "opt": "--default-d",
39
+ "arg": "integer",
40
+ "description": "Default value for missing values. NA if empty."
41
+ },
42
+ {
43
+ "opt": "--max-sim",
44
+ "arg": "float",
45
+ "description": ["If not-empty, assumes that the values are",
46
+ "similarity (not distance) and this is the maximum similarity",
47
+ "(corresponding to distance 0). Applies transformation:",
48
+ "distance = (max.sim - values)/max.sim."]
49
+ },
50
+ {
51
+ "arg": "out_file",
52
+ "mandatory": true,
53
+ "description": "Output squared matrix in tab-separated values format."
54
+ }
55
+ ]
56
+ },
57
+ {
58
+ "task": "Table.filter.pl",
59
+ "description": ["Extracts (and re-orders) a subset of rows from a raw",
60
+ "table."],
61
+ "help_arg": "",
62
+ "options": [
63
+ {
64
+ "name": "Key",
65
+ "opt": "-k",
66
+ "arg": "integer",
67
+ "default": 1,
68
+ "description": "Column of the table to use as key to filter."
69
+ },
70
+ {
71
+ "name": "Sep",
72
+ "opt": "-s",
73
+ "arg": "string",
74
+ "description": ["String to use as separation between rows. By",
75
+ "default, tabulation."]
76
+ },
77
+ {
78
+ "name": "Inverse",
79
+ "opt": "-i",
80
+ "description": ["If set, reports the inverse of the list (i.e.,",
81
+ "reports only rows absent in the list). Implies 'No re-order'."]
82
+ },
83
+ {
84
+ "name": "Header",
85
+ "opt": "-h",
86
+ "description": "Keep first row of the table (header) untouched."
87
+ },
88
+ {
89
+ "name": "No re-order",
90
+ "opt": "-n",
91
+ "description": ["The output has the same order of the table. By",
92
+ "default, it prints in the order of the list."]
93
+ },
94
+ {
95
+ "name": "List",
96
+ "arg": "in_file",
97
+ "mandatory": true,
98
+ "description": "List of IDs to extract."
99
+ },
100
+ {
101
+ "name": "Table",
102
+ "arg": "in_file",
103
+ "mandatory": true,
104
+ "description": "Table file containing the superset."
105
+ },
106
+ ">",
107
+ {
108
+ "name": "Subset",
109
+ "arg": "out_file",
110
+ "mandatory": true,
111
+ "description": "Table file to be created."
112
+ }
113
+ ]
114
+ },
115
+ {
116
+ "task": "Table.merge.pl",
117
+ "description": "Merges multiple (two-column) lists into one table.",
118
+ "see_also": ["Table.split.pl"],
119
+ "help_arg": "",
120
+ "options": [
121
+ {
122
+ "name": "Strings",
123
+ "opt": "-s",
124
+ "description": ["Values are read as Strings. By default, values are",
125
+ "read as numbers."]
126
+ },
127
+ {
128
+ "name": "Input delimiter",
129
+ "opt": "-i",
130
+ "arg": "string",
131
+ "description": "Input field-delimiter. By default, tabulation."
132
+ },
133
+ {
134
+ "name": "Output delimiter",
135
+ "opt": "-o",
136
+ "arg": "string",
137
+ "description": "Output field-delimiter. By default: tabulation."
138
+ },
139
+ {
140
+ "name": "No header",
141
+ "opt": "-n",
142
+ "description": ["By default, the header is determined by the file",
143
+ "names."]
144
+ },
145
+ {
146
+ "name": "Empty",
147
+ "opt": "-e",
148
+ "description": ["Default string when no value is found. By default,",
149
+ "the 'empty' value is 0 if values are numeric (i.e., unless -s is",
150
+ "set) or an empty string otherwise."]
151
+ },
152
+ {
153
+ "name": "Header",
154
+ "opt": "-h",
155
+ "arg": "string",
156
+ "default": "Tag",
157
+ "description": "Header of the first column, containing the IDs."
158
+ },
159
+ {
160
+ "name": "Header format",
161
+ "opt": "-H",
162
+ "arg": "string",
163
+ "default": "(?:.*/)?([^\\.]+)",
164
+ "description": ["Format of filenames capturing the column header in",
165
+ "the first capturing parenthesis. Non-capturing paretheses can be",
166
+ "defined as (?:...). By default: '(?:.*/)?([^\\.]+)', which",
167
+ "captures the part of the basename of the file before the first",
168
+ "dot (if any)."]
169
+ },
170
+ {
171
+ "name": "Rows to ignore",
172
+ "opt": "-r",
173
+ "arg": "integer",
174
+ "default": 0,
175
+ "description": "Number of leading rows to ignore in the input files."
176
+ },
177
+ {
178
+ "arg": "in_file",
179
+ "mandatory": true,
180
+ "multiple_sep": " ",
181
+ "description": "Input two-column lists."
182
+ },
183
+ ">",
184
+ {
185
+ "arg": "out_file",
186
+ "mandatory": true,
187
+ "description": "Merged table."
188
+ }
189
+ ]
190
+ },
191
+ {
192
+ "task": "Table.round.rb",
193
+ "description": "Rounds numbers in a table.",
194
+ "help_arg": "--help",
195
+ "options": [
196
+ {
197
+ "name": "Input file",
198
+ "opt": "--in",
199
+ "arg": "in_file",
200
+ "mandatory": true,
201
+ "description": "Input table."
202
+ },
203
+ {
204
+ "name": "Output file",
205
+ "opt": "--out",
206
+ "arg": "out_file",
207
+ "mandatory": true,
208
+ "description": "Output table."
209
+ },
210
+ {
211
+ "name": "Decimals",
212
+ "opt": "--ndigits",
213
+ "arg": "integer",
214
+ "default": 0,
215
+ "description": "Number of decimal digits."
216
+ },
217
+ {
218
+ "opt": "--floor",
219
+ "description": ["Floors the values instead of rounding them. Ignores",
220
+ "'Decimals'."]
221
+ },
222
+ {
223
+ "opt": "--ceil",
224
+ "description": ["Ceils the values instead of rounding them. Ignores",
225
+ "'Decimals'."]
226
+ },
227
+ {
228
+ "opt": "--delimiter",
229
+ "arg": "string",
230
+ "description": "String delimiting columns. By default, tabulation."
231
+ }
232
+ ]
233
+ },
234
+ {
235
+ "task": "Table.split.pl",
236
+ "description": ["Split a file with multiple columns into multiple",
237
+ "two-columns lists."],
238
+ "see_also": ["Table.merge.pl"],
239
+ "help_arg": "",
240
+ "options": [
241
+ {
242
+ "name": "Input delimiter",
243
+ "opt": "-i",
244
+ "arg": "string",
245
+ "description": "Input field-delimiter. By default, tabulation."
246
+ },
247
+ {
248
+ "name": "Out prefix",
249
+ "opt": "-o",
250
+ "arg": "string",
251
+ "description": "Prefix of the output files."
252
+ },
253
+ {
254
+ "name": "Output directory",
255
+ "opt": "-d",
256
+ "arg": "out_dir",
257
+ "description": "Output directory."
258
+ },
259
+ {
260
+ "arg": "in_file",
261
+ "mandatory": true,
262
+ "description": "Input table."
263
+ }
264
+ ]
265
+ },
266
+ {
267
+ "task": "Table.replace.rb",
268
+ "description": "Replace a field in a table using a mapping file.",
269
+ "help_arg": "--help",
270
+ "options": [
271
+ {
272
+ "opt": "--map",
273
+ "arg": "in_file",
274
+ "mandatory": true,
275
+ "description": "Mapping file with two columns (key and replacement)."
276
+ },
277
+ {
278
+ "opt": "--in",
279
+ "arg": "in_file",
280
+ "mandatory": true,
281
+ "description": "Input table."
282
+ },
283
+ {
284
+ "opt": "--out",
285
+ "arg": "out_file",
286
+ "mandatory": true,
287
+ "description": "Output table."
288
+ },
289
+ {
290
+ "opt": "--key",
291
+ "arg": "integer",
292
+ "default": 1,
293
+ "description": "Column to replace in Input table."
294
+ },
295
+ {
296
+ "opt": "--unknown",
297
+ "arg": "string",
298
+ "description": "String to use whenever the key is not found in Map."
299
+ },
300
+ {
301
+ "opt": "--delimiter",
302
+ "arg": "string",
303
+ "description": "String delimiting columns. By default, tabulation."
304
+ }
305
+ ]
306
+ }
307
+ ]
308
+ }
@@ -0,0 +1,68 @@
1
+ {
2
+ "tasks": [
3
+ {
4
+ "task": "Newick.autoprune.R",
5
+ "description": ["Automatically prunes a tree, to keep representatives of",
6
+ "each clade."],
7
+ "requires": [ { "r_package": "optparse" }, { "r_package": "ape" } ],
8
+ "cite": [["Paradis et al, 2004, Bioinf",
9
+ "http://dx.doi.org/10.1093/bioinformatics/btg412"]],
10
+ "help_arg": "--help",
11
+ "options": [
12
+ {
13
+ "name": "Input tree",
14
+ "opt": "--t",
15
+ "arg": "in_file",
16
+ "mandatory": true,
17
+ "description": "A tree to prune in Newick format."
18
+ },
19
+ {
20
+ "opt": "--dist-quantile",
21
+ "arg": "float",
22
+ "default": 0.25,
23
+ "description": "The quantile of edge lengths."
24
+ },
25
+ {
26
+ "opt": "--min_dist",
27
+ "arg": "float",
28
+ "description": ["The minimum distance to allow between two tips. If",
29
+ "not set, dist.quantile is used instead to calculate it."]
30
+ },
31
+ {
32
+ "opt": "--quiet",
33
+ "description": ["Boolean indicating if the function must run without",
34
+ "output."]
35
+ },
36
+ {
37
+ "opt": "--max_iters",
38
+ "arg": "integer",
39
+ "default": 1000,
40
+ "description": "Maximum number of iterations."
41
+ },
42
+ {
43
+ "opt": "--min_nodes_random",
44
+ "arg": "integer",
45
+ "default": 40000,
46
+ "description": ["Minimum number of nodes to trigger 'tip-pairs'",
47
+ "nodes sampling. This sampling is less reproducible and more",
48
+ "computationally expensive, but it's the only solution if the",
49
+ "cophenetic matrix exceeds 2^31-1 entries; above that, it cannot",
50
+ "be represented in R."]
51
+ },
52
+ {
53
+ "opt": "--random_nodes_frx",
54
+ "arg": "float",
55
+ "default": 1.0,
56
+ "description": ["Fraction of the nodes to be sampled if more than",
57
+ "'Min nodes random'."]
58
+ },
59
+ {
60
+ "arg": "out_file",
61
+ "mandatory": true,
62
+ "description": ["Output file in Newick format containing the pruned",
63
+ "tree."]
64
+ }
65
+ ]
66
+ }
67
+ ]
68
+ }
@@ -0,0 +1,111 @@
1
+ {
2
+ "tasks": [
3
+ {
4
+ "task": "VCF.SNPs.rb",
5
+ "description": ["Counts the number of Single-Nucleotide Polymorphisms",
6
+ "(SNPs) in a VCF file."],
7
+ "help_arg": "--help",
8
+ "options": [
9
+ {
10
+ "opt": "--input",
11
+ "arg": "in_file",
12
+ "mandatory": true,
13
+ "description": "Input file in Variant Call Format (VCF)."
14
+ },
15
+ {
16
+ "name": "Filtered VCF",
17
+ "opt": "--out",
18
+ "arg": "out_file",
19
+ "description": "Output (filtered) file in Variant Call Format (VCF)."
20
+ },
21
+ {
22
+ "name": "Min DP",
23
+ "opt": "--min-dp",
24
+ "arg": "integer",
25
+ "description": "Minimum number of reads covering the position.",
26
+ "default": 4
27
+ },
28
+ {
29
+ "name": "Max DP",
30
+ "opt": "--max-dp",
31
+ "arg": "integer",
32
+ "description": ["Maximum number of reads covering the position. By",
33
+ "default no limit."]
34
+ },
35
+ {
36
+ "name": "Min Ref-DP",
37
+ "opt": "--min-ref-dp",
38
+ "arg": "integer",
39
+ "description": "Minimum number of reads supporting the REF allele.",
40
+ "default": 2
41
+ },
42
+ {
43
+ "name": "Min Alt-DP",
44
+ "opt": "--min-alt-dp",
45
+ "arg": "integer",
46
+ "description": "Minimum number of reads supporting the ALT allele.",
47
+ "default": 2
48
+ },
49
+ {
50
+ "opt": "--min-quality",
51
+ "arg": "float",
52
+ "description": "Minimum quality of the position mapping.",
53
+ "default": 0.0
54
+ },
55
+ {
56
+ "opt": "--min-shannon",
57
+ "arg": "integer",
58
+ "description": "Minimum information content (in bits, from 0 to 1).",
59
+ "default": 0.0
60
+ },
61
+ {
62
+ "opt": "--indels",
63
+ "description": "Process indels."
64
+ }
65
+ ]
66
+ },
67
+ {
68
+ "task": "VCF.KaKs.rb",
69
+ "description": ["Estimates the Ka/Ks ratio from the SNPs in a VCF file.",
70
+ "Ka and Ks are corrected using pseudo-counts, but no corrections for",
71
+ "multiple substitutions are applied."],
72
+ "help_arg": "--help",
73
+ "see_also": ["VCF.SNPs.rb"],
74
+ "options": [
75
+ {
76
+ "opt": "--input",
77
+ "arg": "in_file",
78
+ "mandatory": true,
79
+ "description": "Input file in Variant Call Format (VCF)."
80
+ },
81
+ {
82
+ "opt": "--seqs",
83
+ "arg": "in_file",
84
+ "mandatory": true,
85
+ "description": "Input gene sequences (nucleotides) in FastA format."
86
+ },
87
+ {
88
+ "name": "Synonymous fraction",
89
+ "opt": "--syn-frx",
90
+ "arg": "float",
91
+ "description": ["Fraction of synonymous substitutions. If passed,",
92
+ "the number of sites are estimated (not counted per gene),",
93
+ "speeding up the computation ~10X."]
94
+ },
95
+ {
96
+ "name": "Bacterial code synonymous fraction",
97
+ "opt": "--syn-bacterial-code",
98
+ "description": ["Sets the synonymous fraction to 0.760417,",
99
+ "approximately the proportion of synonymous substitutions in the",
100
+ "bacterial code."]
101
+ },
102
+ {
103
+ "opt": "--codon-file",
104
+ "arg": "out_file",
105
+ "description": ["Output file including the codons of substitution",
106
+ "variants."]
107
+ }
108
+ ]
109
+ }
110
+ ]
111
+ }