miga-base 0.7.26.0 → 1.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/classify_wf.rb +2 -2
  7. data/lib/miga/cli/action/derep_wf.rb +1 -1
  8. data/lib/miga/cli/action/doctor.rb +57 -14
  9. data/lib/miga/cli/action/doctor/base.rb +47 -23
  10. data/lib/miga/cli/action/init.rb +11 -7
  11. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  12. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  13. data/lib/miga/cli/action/tax_dist.rb +2 -2
  14. data/lib/miga/cli/action/wf.rb +5 -4
  15. data/lib/miga/common.rb +1 -0
  16. data/lib/miga/daemon.rb +11 -4
  17. data/lib/miga/dataset/result.rb +10 -6
  18. data/lib/miga/json.rb +5 -4
  19. data/lib/miga/metadata.rb +5 -1
  20. data/lib/miga/parallel.rb +36 -0
  21. data/lib/miga/project.rb +8 -8
  22. data/lib/miga/project/base.rb +4 -4
  23. data/lib/miga/project/result.rb +2 -2
  24. data/lib/miga/sqlite.rb +10 -2
  25. data/lib/miga/version.rb +23 -9
  26. data/scripts/aai_distances.bash +16 -18
  27. data/scripts/ani_distances.bash +16 -17
  28. data/scripts/assembly.bash +31 -16
  29. data/scripts/haai_distances.bash +3 -27
  30. data/scripts/miga.bash +6 -4
  31. data/scripts/p.bash +1 -1
  32. data/scripts/read_quality.bash +9 -18
  33. data/scripts/trimmed_fasta.bash +14 -30
  34. data/scripts/trimmed_reads.bash +36 -36
  35. data/test/parallel_test.rb +31 -0
  36. data/test/project_test.rb +2 -1
  37. data/test/remote_dataset_test.rb +1 -1
  38. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  39. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  40. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  41. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  42. data/utils/FastAAI/README.md +84 -0
  43. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  44. data/utils/distance/commands.rb +1 -0
  45. data/utils/distance/database.rb +0 -1
  46. data/utils/distance/runner.rb +2 -4
  47. data/utils/enveomics/Docs/recplot2.md +244 -0
  48. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  49. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  50. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  51. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  52. data/utils/enveomics/LICENSE.txt +73 -0
  53. data/utils/enveomics/Makefile +52 -0
  54. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  55. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  56. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  57. data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
  58. data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
  59. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  60. data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
  61. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  62. data/utils/enveomics/Manifest/Tasks/other.json +906 -0
  63. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  64. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +638 -0
  65. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  66. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  67. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  68. data/utils/enveomics/Manifest/categories.json +165 -0
  69. data/utils/enveomics/Manifest/examples.json +154 -0
  70. data/utils/enveomics/Manifest/tasks.json +4 -0
  71. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  72. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  73. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  74. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  75. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  76. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  77. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  78. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  79. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  80. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  81. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  82. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  83. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  84. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  85. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  86. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  87. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  88. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  89. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  90. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  91. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  92. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  93. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  94. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  95. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  96. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  97. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  98. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  99. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  100. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  101. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  102. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  103. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  104. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  105. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  106. data/utils/enveomics/README.md +42 -0
  107. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  108. data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
  109. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  110. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  111. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  112. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  113. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  114. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  115. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  116. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  117. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  118. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  119. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  120. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  121. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  122. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  123. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  124. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  125. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  126. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  127. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  128. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  129. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  130. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  131. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  132. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  133. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  134. data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
  135. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  136. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  137. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  138. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  139. data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
  140. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  141. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  142. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  143. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  144. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  145. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  146. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  147. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  148. data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
  149. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  150. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  151. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  152. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  153. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  154. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  155. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  156. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  157. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  158. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  159. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  160. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  161. data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
  162. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  163. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  164. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  165. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  166. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  167. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  168. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  169. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  170. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  171. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  172. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  173. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  174. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  175. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  176. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  177. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  178. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  179. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  180. data/utils/enveomics/Scripts/SRA.download.bash +55 -0
  181. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  182. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  183. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  184. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  185. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  186. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  187. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  188. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  189. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  190. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  191. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  192. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  193. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  194. data/utils/enveomics/Scripts/aai.rb +419 -0
  195. data/utils/enveomics/Scripts/ani.rb +362 -0
  196. data/utils/enveomics/Scripts/anir.rb +137 -0
  197. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  198. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  199. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  200. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  201. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  202. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  203. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  204. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  205. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  206. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  207. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  208. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  209. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  210. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  211. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  212. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  213. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  214. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  215. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  216. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  217. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  218. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  219. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  220. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  221. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  222. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  223. data/utils/enveomics/Scripts/ogs.rb +104 -0
  224. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  225. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  226. data/utils/enveomics/Scripts/rbm.rb +100 -0
  227. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  228. data/utils/enveomics/Tests/Makefile +10 -0
  229. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  230. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  231. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  232. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  233. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  234. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  235. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  236. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  237. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  238. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  239. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  240. data/utils/enveomics/Tests/alkB.nwk +1 -0
  241. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  242. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  243. data/utils/enveomics/Tests/hiv1.faa +59 -0
  244. data/utils/enveomics/Tests/hiv1.fna +134 -0
  245. data/utils/enveomics/Tests/hiv2.faa +70 -0
  246. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  247. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  248. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  249. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  250. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  251. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  252. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  253. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  254. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  255. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  256. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  257. data/utils/enveomics/build_enveomics_r.bash +45 -0
  258. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  259. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  260. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  261. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  262. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  263. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  264. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  265. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  266. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  267. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  268. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  269. data/utils/enveomics/enveomics.R/R/utils.R +80 -0
  270. data/utils/enveomics/enveomics.R/README.md +81 -0
  271. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  272. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  273. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
  274. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
  275. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
  276. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  277. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  278. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  279. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  280. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  281. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  282. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
  283. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
  284. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
  285. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  286. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  287. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
  288. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
  289. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
  290. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
  291. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  292. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
  293. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
  294. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  295. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  296. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
  297. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  298. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  299. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  300. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  301. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
  302. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  303. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  304. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
  305. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  306. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  307. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  308. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  309. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
  310. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  311. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
  312. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
  313. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
  314. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  315. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  316. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
  317. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  318. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
  319. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  320. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  321. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  322. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  323. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
  324. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
  325. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
  326. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
  327. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  328. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  329. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  330. data/utils/enveomics/globals.mk +8 -0
  331. data/utils/enveomics/manifest.json +9 -0
  332. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  333. data/utils/multitrim/README.md +67 -0
  334. data/utils/multitrim/multitrim.py +1555 -0
  335. data/utils/multitrim/multitrim.yml +13 -0
  336. data/utils/requirements.txt +4 -3
  337. metadata +304 -3
@@ -0,0 +1,73 @@
1
+
2
+ require 'enveomics_rb/errors'
3
+ require 'zlib'
4
+
5
+ def use(gems, mandatory = true)
6
+ gems = [gems] unless gems.is_a? Array
7
+ begin
8
+ require 'rubygems'
9
+ while !gems.empty?
10
+ require gems.shift
11
+ end
12
+ return true
13
+ rescue LoadError
14
+ abort "\nUnmet requirements, please install required gems:" +
15
+ gems.map{ |gem| "\n gem install #{gem}" }.join + "\n\n" if mandatory
16
+ return false
17
+ end
18
+ end
19
+
20
+ def say(*msg)
21
+ return if $QUIET ||= false
22
+
23
+ o = '[%s] %s' % [Time.now, msg.join('')]
24
+ $stderr.puts(o)
25
+ end
26
+
27
+ ##
28
+ # Returns an open reading file handler for the file,
29
+ # supporting .gz and '-' for STDIN
30
+ def reader(file)
31
+ file == '-' ? $stdin :
32
+ file =~ /\.gz$/ ? Zlib::GzipReader.open(file) :
33
+ File.open(file, 'r')
34
+ end
35
+
36
+ ##
37
+ # Returns an open writing file handler for the file,
38
+ # supporting .gz and '-' for STDOUT
39
+ def writer(file)
40
+ file == '-' ? $stdout :
41
+ file =~ /\.gz$/ ? Zlib::GzipWriter.open(file) :
42
+ File.open(file, 'w')
43
+ end
44
+
45
+ ##
46
+ # Run a command +cmd+ that can be a ready-to-go string or an Array to escape
47
+ #
48
+ # Supported symbol key options in Hash +opts+:
49
+ # - wait: Boolean, should I wait for the command to complete? Default: true
50
+ # - stdout: Path to redirect the standard output
51
+ # - stderr: Path to redirect the standard error
52
+ # - mergeout: Send stderr to stdout
53
+ #
54
+ # Return the process ID. If wait is true (default), check for the exit
55
+ # status and throw an Enveomics::CommandError if non-zero
56
+ def run_cmd(cmd, opts = {})
57
+ opts[:wait] = true if opts[:wait].nil?
58
+ cmd = cmd.shelljoin if cmd.is_a? Array
59
+ cmd += " > #{opts[:stdout].shellescape}" if opts[:stdout]
60
+ cmd += " 2> #{opts[:stderr].shellescape}" if opts[:stderr]
61
+ cmd += ' 2>&1' if opts[:mergeout]
62
+ pid = spawn(cmd)
63
+ return pid unless opts[:wait]
64
+
65
+ Process.wait(pid)
66
+ unless $?.success?
67
+ raise Enveomics::CommandError.new(
68
+ "Command failed with status #{$?.exitstatus}:\n#{cmd}"
69
+ )
70
+ end
71
+ pid
72
+ end
73
+
@@ -0,0 +1,135 @@
1
+ # @author Luis M. Rodriguez-R
2
+ # @license Artistic-2.0
3
+
4
+ ##
5
+ # Enveomics representation of a Variant Call Format (VCF) file.
6
+ class VCF
7
+
8
+ ##
9
+ # File-handler, a File object.
10
+ attr_reader :fh
11
+ def initialize(file)
12
+ @fh = (file.is_a?(String) ? File.open(file, "r") : file )
13
+ end
14
+
15
+ ##
16
+ # Iterate through each variant (i.e., each non-comment line), passing a
17
+ # VCF::Variant object to +blk+.
18
+ def each_variant(&blk)
19
+ fh.rewind
20
+ fh.each_line do |ln|
21
+ next if ln =~ /^#/
22
+ blk.call VCF::Variant.new(ln)
23
+ end
24
+ end
25
+
26
+ ##
27
+ # Iterate through each header (i.e., each comment line), passing a String to
28
+ # +blk+.
29
+ def each_header(&blk)
30
+ fh.rewind
31
+ fh.each_line do |ln|
32
+ next unless ln =~ /^#/
33
+ blk.call ln
34
+ end
35
+ end
36
+ end
37
+
38
+ class VCF::Variant
39
+
40
+ ##
41
+ # Column definitions in VCF.
42
+ @@COLUMNS = [:chrom,:pos,:id,:ref,:alt,:qual,:filter,:info,:format,:bam]
43
+
44
+ ##
45
+ # An Array of String, containing each of the VCF entrie's columns.
46
+ attr_reader :data
47
+
48
+ ##
49
+ # Initialize VCF::Variant from String +line+, a non-comment line in the VCF.
50
+ def initialize(line)
51
+ @data = line.chomp.split("\t")
52
+ # Qual as float
53
+ @data[5] = data[5].to_f
54
+ # Split info
55
+ info = data[7].split(";").map{ |i| i=~/=/ ? i.split("=", 2) : [i, true] }
56
+ @data[7] = Hash[*info.map{ |i| [i[0].to_sym, i[1]] }.flatten]
57
+ # Read formatted data
58
+ unless data[9].nil? or data[9].empty?
59
+ f = format.split(":")
60
+ b = bam.split(":")
61
+ f.each_index{ |i| @data[7][f[i].to_sym] = b[i] }
62
+ end
63
+ @data[7][:INDEL] = true if ref.size != alt.split(",").first.size
64
+ end
65
+
66
+ ##
67
+ # Named functions for each column.
68
+ @@COLUMNS.each_index do |i|
69
+ define_method(@@COLUMNS[i]) { @@COLUMNS[i]==:pos ? data[i].to_i : data[i] }
70
+ end
71
+
72
+ ##
73
+ # Sequencing depth.
74
+ def dp
75
+ return nil if info[:DP].nil?
76
+ info[:DP].to_i
77
+ end
78
+
79
+ ##
80
+ # Sequencing depth of FWD-REF, REV-REF, FWD-ALT, and REV-ALT.
81
+ def dp4
82
+ return nil if info[:DP4].nil?
83
+ @dp4 ||= info[:DP4].split(",").map{ |i| i.to_i }
84
+ @dp4
85
+ end
86
+
87
+ ##
88
+ ## Sequencing depth of REF and ALT.
89
+ def ad
90
+ return nil if info[:AD].nil?
91
+ @ad ||= info[:AD].split(",").map{ |i| i.to_i }
92
+ @ad
93
+ end
94
+
95
+ ##
96
+ # Sequencing depth of the REF allele.
97
+ def ref_dp
98
+ return dp4[0] + dp4[1] unless dp4.nil?
99
+ return ad[0] unless ad.nil?
100
+ nil
101
+ end
102
+
103
+ ##
104
+ # Sequencing depth of the ALT allele.
105
+ def alt_dp
106
+ return dp4[2] + dp4[3] unless dp4.nil?
107
+ return ad[1] unless ad.nil?
108
+ nil
109
+ end
110
+
111
+ ##
112
+ # Information content of the variant in bits (from 0 to 1).
113
+ def shannon
114
+ return @shannon unless @shannon.nil?
115
+ a = ref_dp
116
+ b = alt_dp
117
+ ap = a.to_f/(a+b)
118
+ bp = b.to_f/(a+b)
119
+ @shannon = -(ap*Math.log(ap,2) + bp*Math.log(bp,2))
120
+ @shannon
121
+ end
122
+
123
+ ##
124
+ # Is it an indel?
125
+ def indel? ; !info[:INDEL].nil? and info[:INDEL] ; end
126
+
127
+ ##
128
+ # Return as String.
129
+ def to_s ; (data[0..6] + [info_to_s] + data[8..-1].to_a).join("\t") + "\n" ; end
130
+
131
+ ##
132
+ # Returns the INFO entry as String.
133
+ def info_to_s ; data[7].to_a.map{ |i| i.join("=") }.join(";") ; end
134
+
135
+ end
@@ -0,0 +1,88 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # @author: Luis M. Rodriguez-R
5
+ # @update: Mar-23-2015
6
+ # @license: artistic license 2.0
7
+ #
8
+
9
+ $:.push File.expand_path(File.dirname(__FILE__) + '/lib')
10
+ require 'enveomics_rb/og'
11
+ require 'optparse'
12
+
13
+ o = {:q=>FALSE, :f=>"(\\S+)\\.txt", :consolidate=>TRUE, :pre=>[]}
14
+ ARGV << '-h' if ARGV.size==0
15
+ OptionParser.new do |opts|
16
+ opts.banner = "
17
+ Annotates Orthology Groups (OGs) using one or more reference genomes.
18
+
19
+ Usage: #{$0} [options]"
20
+ opts.separator ""
21
+ opts.separator "Mandatory"
22
+ opts.on("-i", "--in FILE", "Input file containing the OGs (as generated by ogs.rb)."){ |v| o[:in]=v }
23
+ opts.on("-o", "--out FILE", "Output file containing the annotated OGs."){ |v| o[:out]=v }
24
+ opts.on("-a FILE1,FILE2,...", Array, "Input file(s) containing the annotations.One or more tab-delimited files",
25
+ "with the gene names in the first column and the annotation in the second."){ |v| o[:annotations]=v }
26
+ opts.separator ""
27
+ opts.separator "Other Options"
28
+ opts.on("-f","--format STRING", "Format of the filenames for the annotation files, using regex syntax.",
29
+ "By default: '#{o[:f]}'."){ |v| o[:f]=v }
30
+ opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
31
+ opts.on("-h", "--help", "Display this screen.") do
32
+ puts opts
33
+ exit
34
+ end
35
+ opts.separator ""
36
+ end.parse!
37
+ abort "-i is mandatory" if o[:in].nil?
38
+ abort "-o is mandatory" if o[:out].nil?
39
+ abort "-a is mandatory" if o[:annotations].nil?
40
+
41
+ ##### MAIN:
42
+ begin
43
+ # Read the pre-computed OGs
44
+ collection = OGCollection.new
45
+ $stderr.puts "Reading pre-computed OGs in '#{o[:in]}'." unless o[:q]
46
+ f = File.open(o[:in], 'r')
47
+ h = f.gets.chomp.split /\t/
48
+ while ln = f.gets
49
+ collection << OG.new(h, ln.chomp.split(/\t/))
50
+ end
51
+ f.close
52
+ $stderr.puts " Loaded OGs: #{collection.ogs.length}." unless o[:q]
53
+
54
+ # Read annotations
55
+ o[:annotations].each do |annot|
56
+ m = /#{o[:f]}/.match(annot)
57
+ if m.nil? or m[1].nil?
58
+ warn "Cannot parse filename: #{rbm_file} (doesn't match /#{o[:f]}/)."
59
+ next
60
+ end
61
+ f = File.open(annot, 'r')
62
+ no_og = 0
63
+ collection.add_note_src m[1]+' annotation'
64
+ while ln=f.gets
65
+ r = ln.chomp.split /\t/
66
+ g = Gene.new m[1], r[0]
67
+ og = collection.get_og g
68
+ if og.nil?
69
+ no_og += 1
70
+ else
71
+ og.add_note g.id + ': ' + r[1], collection.note_srcs.length-1
72
+ end
73
+ end
74
+ warn "Warning: Cannot find #{no_og} genes from #{m[1]} in OG collection." if no_og>0
75
+ end
76
+ # Save the output matrix
77
+ $stderr.puts "Saving annotated OGs into '#{o[:out]}'." unless o[:q]
78
+ f = File.open(o[:out], "w")
79
+ f.puts collection.to_s
80
+ f.close
81
+ $stderr.puts "Done.\n" unless o[:q]
82
+ rescue => err
83
+ $stderr.puts "Exception: #{err}\n\n"
84
+ err.backtrace.each { |l| $stderr.puts l + "\n" }
85
+ err
86
+ end
87
+
88
+
@@ -0,0 +1,160 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # @author: Luis M. Rodriguez-R
5
+ # @license: artistic-2.0
6
+ #
7
+
8
+ $:.push File.expand_path("../lib", __FILE__)
9
+ require "optparse"
10
+ require "json"
11
+ require "tmpdir"
12
+
13
+ o = {q:false, n:100, thr:2}
14
+ ARGV << "-h" if ARGV.size==0
15
+ OptionParser.new do |opts|
16
+ opts.banner = "
17
+ Subsamples the genomes in a set of Orthology Groups (OGs) and estimates the
18
+ trend of core genome and pangenome sizes.
19
+
20
+ Usage: #{$0} [options]"
21
+ opts.separator ""
22
+ opts.separator "Mandatory"
23
+ opts.on("-o", "--ogs FILE",
24
+ "Input file containing the precomputed OGs."){ |v| o[:ogs]=v }
25
+ opts.separator ""
26
+ opts.separator "Output Options"
27
+ opts.on("-s", "--summary FILE",
28
+ "Output file in tabular format with summary statistics."){ |v| o[:summ]=v }
29
+ opts.on("-t", "--tab FILE","Output file in tabular format."){ |v| o[:tab]=v }
30
+ opts.on("-j", "--json FILE", "Output file in JSON format."){ |v| o[:json]=v }
31
+ opts.separator ""
32
+ opts.separator "Other Options"
33
+ opts.on("-n", "--replicates INT",
34
+ "Number of replicates to estimate. By default: #{o[:n]}."
35
+ ){ |v| o[:n]=v.to_i }
36
+ opts.on("--threads INT",
37
+ "Children threads to spawn. By default: #{o[:thr]}."){ |v| o[:thr]=v.to_i}
38
+ opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
39
+ opts.on("-h", "--help", "Display this screen.") do
40
+ puts opts
41
+ exit
42
+ end
43
+ opts.separator ""
44
+ end.parse!
45
+ abort "-o is mandatory" if o[:ogs].nil?
46
+
47
+ ##### MAIN:
48
+ begin
49
+ # Read the pre-computed OGs
50
+ $stderr.puts "Reading pre-computed OGs in '#{o[:ogs]}'." unless o[:q]
51
+ bool_a = []
52
+ genomes_n = nil
53
+ File.open(o[:ogs], "r") do |f|
54
+ h = f.gets.chomp.split "\t"
55
+ genomes_n = h.size
56
+ while ln = f.gets
57
+ bool_a << ln.chomp.split("\t").map{ |g| g!="-" }
58
+ end
59
+ end
60
+ $stderr.puts " Loaded OGs: #{bool_a.size}." unless o[:q]
61
+ bool_a_b = bool_a.map{ |og| og.map{ |g| g ? "1" : "0" }.join("").to_i(2) }
62
+
63
+ # Generate subsamples
64
+ size = {core:[], pan:[]}
65
+ Dir.mktmpdir do |dir|
66
+ children = 0
67
+ (0 .. o[:n]-1).each do |i|
68
+ fork do
69
+ # Generate trajectory
70
+ genomes = (0 .. genomes_n-1).to_a.shuffle
71
+ genomes_b = (2 ** genomes_n) - 1
72
+ core = []
73
+ pan = []
74
+ while not genomes.empty?
75
+ core.unshift 0
76
+ pan.unshift 0
77
+ bool_a_b.map! do |og|
78
+ r_og = og & genomes_b
79
+ if r_og>0
80
+ core[0] += 1 if r_og==genomes_b
81
+ pan[0] += 1
82
+ og
83
+ else
84
+ nil
85
+ end
86
+ end
87
+ bool_a_b.compact!
88
+ genomes_b ^= 2 ** genomes.pop
89
+ end
90
+ abort "UNEXPECTED ERROR: Final genomes_b=#{genomes_b}." if genomes_b>0
91
+ # Store trajectory
92
+ File.open("#{dir}/#{i}", "w") do |tfh|
93
+ tfh.puts JSON.generate({core:core, pan:pan})
94
+ end
95
+ end # fork
96
+ children += 1
97
+ if children >= o[:thr]
98
+ Process.wait
99
+ children -= 1
100
+ end
101
+ end
102
+ Process.waitall
103
+ # Recover trajectories
104
+ (0 .. o[:n]-1).each do |i|
105
+ s = JSON.parse(File.read("#{dir}/#{i}"), {:symbolize_names=>true})
106
+ size[:core][i] = s[:core]
107
+ size[:pan][i] = s[:pan]
108
+ end
109
+ end # Dir.mktmpdir
110
+
111
+ # Show result
112
+ $stderr.puts "Generating reports." unless o[:q]
113
+
114
+ # Save results in JSON
115
+ unless o[:json].nil?
116
+ ofh = File.open(o[:json], "w")
117
+ ofh.puts JSON.pretty_generate(size)
118
+ ofh.close
119
+ end
120
+
121
+ # Save results in tab
122
+ unless o[:tab].nil?
123
+ ofh = File.open(o[:tab], "w")
124
+ ofh.puts (%w{replicate metric}+(1 .. genomes_n).to_a).join("\t")
125
+ (0 .. o[:n]-1).each do |i|
126
+ ofh.puts ([i+1,"core"] + size[:core][i]).join("\t")
127
+ ofh.puts ([i+1,"pan"] + size[:pan][i]).join("\t")
128
+ end
129
+ ofh.close
130
+ end
131
+
132
+ # Save summary results in tab
133
+ unless o[:summ].nil?
134
+ ofh = File.open(o[:summ], "w")
135
+ ofh.puts %w{genomes core_avg core_sd core_q1 core_q2 core_q3
136
+ pan_avg pan_sd pan_q1 pan_q2 pan_q3}.join("\t")
137
+ (0 .. genomes_n-1).each do |i|
138
+ res = [ i+1 ]
139
+ [:core, :pan].each do |met|
140
+ a = size[met].map{ |r| r[i] }.sort
141
+ avg = a.inject(0,:+).to_f / a.size
142
+ var = a.map{ |v| v**2 }.inject(0,:+).to_f/a.size - avg**2
143
+ sd = Math.sqrt(var)
144
+ q1 = a[ a.size*1/4 ]
145
+ q2 = a[ a.size*2/4 ]
146
+ q3 = a[ a.size*3/4 ]
147
+ res += [avg,sd,q1,q2,q3]
148
+ end
149
+ ofh.puts res.join("\t")
150
+ end
151
+ ofh.close
152
+ end
153
+
154
+ $stderr.puts "Done.\n" unless o[:q]
155
+ rescue => err
156
+ $stderr.puts "Exception: #{err}\n\n"
157
+ err.backtrace.each { |l| $stderr.puts l + "\n" }
158
+ err
159
+ end
160
+