miga-base 1.2.15.2 → 1.2.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli/action/download/gtdb.rb +4 -1
  3. data/lib/miga/cli/action/gtdb_get.rb +4 -0
  4. data/lib/miga/daemon.rb +4 -1
  5. data/lib/miga/lair.rb +6 -4
  6. data/lib/miga/remote_dataset/download.rb +3 -2
  7. data/lib/miga/remote_dataset.rb +25 -7
  8. data/lib/miga/taxonomy.rb +6 -0
  9. data/lib/miga/version.rb +2 -2
  10. metadata +6 -302
  11. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  12. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  13. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  14. data/utils/FastAAI/FastAAI +0 -3659
  15. data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
  16. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
  17. data/utils/FastAAI/README.md +0 -84
  18. data/utils/enveomics/Docs/recplot2.md +0 -244
  19. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  20. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  21. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  22. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  23. data/utils/enveomics/LICENSE.txt +0 -73
  24. data/utils/enveomics/Makefile +0 -52
  25. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  26. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
  27. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  28. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
  29. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
  30. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  31. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
  32. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  33. data/utils/enveomics/Manifest/Tasks/other.json +0 -906
  34. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  35. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
  36. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  37. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  38. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  39. data/utils/enveomics/Manifest/categories.json +0 -165
  40. data/utils/enveomics/Manifest/examples.json +0 -162
  41. data/utils/enveomics/Manifest/tasks.json +0 -4
  42. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  49. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  50. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  51. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  52. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  53. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  54. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  55. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  56. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  57. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  58. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  59. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  60. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  61. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  62. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  63. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  64. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  68. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  69. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  70. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  71. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  72. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  73. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  74. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  75. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  76. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  77. data/utils/enveomics/README.md +0 -42
  78. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  79. data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
  80. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  81. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  82. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  83. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  84. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  85. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  86. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  87. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  88. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  89. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  90. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  91. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  92. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  93. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  94. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  95. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  96. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  97. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  98. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  99. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  100. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  101. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
  102. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  103. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  104. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  105. data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
  106. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  107. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  108. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  109. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  110. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
  111. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  112. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  113. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  114. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  115. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  116. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  117. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  118. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  119. data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
  120. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  121. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  122. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  123. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  124. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  125. data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
  126. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  127. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  128. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  129. data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
  130. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  131. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  132. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
  133. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  134. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  135. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  136. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  137. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  138. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  139. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  140. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  141. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  142. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  143. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  144. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  145. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  146. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  147. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  148. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  149. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  150. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  151. data/utils/enveomics/Scripts/SRA.download.bash +0 -55
  152. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  153. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  154. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  155. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  156. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  157. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  158. data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
  159. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  160. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  161. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  162. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  163. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  164. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  165. data/utils/enveomics/Scripts/aai.rb +0 -421
  166. data/utils/enveomics/Scripts/ani.rb +0 -362
  167. data/utils/enveomics/Scripts/anir.rb +0 -137
  168. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  169. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  170. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  171. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  172. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  173. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  174. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
  175. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
  176. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  177. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
  178. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
  179. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  180. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
  181. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  182. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
  183. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  184. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  185. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
  186. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
  187. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
  188. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
  189. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  190. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  191. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  192. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  193. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  194. data/utils/enveomics/Scripts/ogs.rb +0 -104
  195. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  196. data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
  197. data/utils/enveomics/Scripts/rbm.rb +0 -108
  198. data/utils/enveomics/Scripts/sam.filter.rb +0 -148
  199. data/utils/enveomics/Tests/Makefile +0 -10
  200. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  201. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  202. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  203. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  204. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  205. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  206. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  207. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  208. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  209. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  210. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  211. data/utils/enveomics/Tests/alkB.nwk +0 -1
  212. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  213. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  214. data/utils/enveomics/Tests/hiv1.faa +0 -59
  215. data/utils/enveomics/Tests/hiv1.fna +0 -134
  216. data/utils/enveomics/Tests/hiv2.faa +0 -70
  217. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  218. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  219. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  220. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  221. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  222. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  223. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  224. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  225. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  226. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  227. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  228. data/utils/enveomics/build_enveomics_r.bash +0 -45
  229. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  230. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  231. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  232. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  233. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  234. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  235. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  236. data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
  237. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  238. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  239. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  240. data/utils/enveomics/enveomics.R/R/utils.R +0 -80
  241. data/utils/enveomics/enveomics.R/README.md +0 -81
  242. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  243. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  244. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
  245. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
  246. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
  247. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  248. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  249. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  250. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  251. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  252. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  253. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
  254. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
  255. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
  256. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  257. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  258. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
  259. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
  260. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
  261. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
  262. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
  263. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
  264. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
  265. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  266. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  267. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
  268. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  269. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  270. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  271. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  272. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
  273. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  274. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  275. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
  276. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  277. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  278. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  279. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  280. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
  281. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  282. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
  283. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
  284. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
  285. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  286. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  287. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
  288. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
  289. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
  290. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  291. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  292. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  293. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  294. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
  295. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
  296. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
  297. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
  298. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  299. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  300. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  301. data/utils/enveomics/globals.mk +0 -8
  302. data/utils/enveomics/manifest.json +0 -9
  303. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  304. data/utils/multitrim/README.md +0 -67
  305. data/utils/multitrim/multitrim.py +0 -1555
  306. data/utils/multitrim/multitrim.yml +0 -13
@@ -1,103 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
- # @license artistic license 2.0
6
- #
7
-
8
- $:.push File.expand_path(File.dirname(__FILE__) + "/lib")
9
- require "enveomics_rb/remote_data"
10
- use "nokogiri"
11
-
12
- #================================[ Options parsing ]
13
- o = {
14
- :q=>false, :gis=>[], :dbfrom=>"nuccore", :header=>true,
15
- :exact_gi=>false, :no_nil=>false, :ret=>"ScientificName",
16
- :ranks=>%w(superkingdom phylum class order family genus species)}
17
- OptionParser.new do |opt|
18
- opt.banner = "
19
- Maps a list of NCBI GIs to their corresponding taxonomy using the NCBI
20
- EUtilities. Avoid using this script on millions of entries at a time, since
21
- each entry elicits two requests to NCBI's servers.
22
-
23
- *IMPORTANT NOTE*: NCBI is phasing out support for GIs. Please use acc.ver
24
- instead with NCBIacc2tax.rb.
25
-
26
- Usage: #{$0} [options]".gsub(/^ +/,"")
27
- opt.separator ""
28
- opt.on("-g", "--gis GI1,GI2,...", Array,
29
- "Comma-separated list of GIs. Required unless -i is passed."
30
- ){ |v| o[:gis]=v }
31
- opt.on("-i", "--infile FILE",
32
- "Raw text file containing the list of GIs, one per line.",
33
- "Required unless -g is passed."){ |v| o[:infile]=v }
34
- opt.on("-p", "--protein",
35
- "Use if the GIs are proteins. Otherwise, GIs are assumed to be from " +
36
- "the Nuccore Database."){ o[:dbfrom]="protein" }
37
- opt.on("-r", "--ranks RANK1,RANK2,...", Array,
38
- "Taxonomic ranks to report. By default: #{o[:ranks].join(",")}."
39
- ){ |v| o[:ranks]=v }
40
- opt.on("-n", "--noheader",
41
- "Do not include a header in the output."){ o[:header]=false }
42
- opt.on("-t", "--taxids",
43
- "Return Taxonomy IDs instead of scientific names."){ o[:ret]="TaxId" }
44
- opt.on("--exact-gi",
45
- "Returns only taxonomy associated with the exact GI passed.",
46
- "By default, it attempts to update accession versions if possible."
47
- ){ |v| o[:exact_gi]=v }
48
- opt.on("--ignore-missing",
49
- "Does not report missing GIs in the output file.",
50
- "By default, it reports GI and empty values for all other columns."
51
- ){ |v| o[:no_nil]=v }
52
- opt.on("-q", "--quiet", "Run quietly."){ |v| o[:q]=true }
53
- opt.on("-h", "--help","Display this screen") do
54
- puts opt
55
- exit
56
- end
57
- opt.separator ""
58
- end.parse!
59
-
60
- #================================[ Functions ]
61
- def gi2taxid(db, gi)
62
- doc = Nokogiri::XML( RemoteData.elink({:dbfrom=>db,
63
- :db=>"taxonomy", :id=>gi}) )
64
- doc.at_xpath("/eLinkResult/LinkSet/LinkSetDb/Link/Id")
65
- end
66
- #================================[ Main ]
67
- begin
68
- o[:gis] += File.readlines(o[:infile]).map{ |l| l.chomp } unless
69
- o[:infile].nil?
70
- o[:ranks].map!{ |r| r.downcase }
71
- puts (["GI", "TaxId"] + o[:ranks].map{ |r| r.capitalize }).join("\t") if
72
- o[:header]
73
- o[:gis].each do |gi|
74
- taxid = gi2taxid(o[:dbfrom], gi)
75
- status = ""
76
- if taxid.nil? and not o[:exact_gi]
77
- new_gi, status = RemoteData.update_gi(o[:dbfrom], gi)
78
- taxid = gi2taxid(o[:dbfrom], new_gi) unless new_gi.nil?
79
- end
80
- if taxid.nil?
81
- warn "Cannot find link to taxonomy: #{gi} #{status}"
82
- puts ([gi, ""] + o[:ranks].map{ |i| "" }).join("\t") unless o[:no_nil]
83
- next
84
- end
85
- taxonomy = {}
86
- unless taxid.nil?
87
- doc = Nokogiri::XML( RemoteData.efetch({:db=>"taxonomy",
88
- :id=>taxid.content}) )
89
- taxonomy[ doc.at_xpath("/TaxaSet/Taxon/Rank").content ] =
90
- doc.at_xpath("/TaxaSet/Taxon/#{o[:ret]}").content
91
- doc.xpath("/TaxaSet/Taxon/LineageEx/Taxon").each do |taxon|
92
- taxonomy[ taxon.at_xpath("./Rank").content ] =
93
- taxon.at_xpath("./#{o[:ret]}").content
94
- end
95
- end
96
- puts ([gi, taxid.content] +
97
- o[:ranks].map{ |rank| taxonomy[ rank ] ||= "" }).join("\t")
98
- end
99
- rescue => err
100
- $stderr.puts "Exception: #{err}\n\n"
101
- err.backtrace.each { |l| $stderr.puts l + "\n" }
102
- err
103
- end
@@ -1,96 +0,0 @@
1
- # usage perl in_silico_GA.pl [options]
2
-
3
- use Getopt::Long;
4
- use Math::Random qw(:all);
5
-
6
- $argu=GetOptions('in=s'=>\$infile, # input fasta chr file
7
- 'out=s'=>\$outfile, # output file name
8
- 'coverage=s'=>\$cov, # desired output
9
- 'seq_error=s'=>\$seq_error, # sequencing error
10
- 'read_len=s'=>\$read_len, # simulated read length
11
- 'ins_len=s'=>\$ins_len, # insertion length
12
- 'ins_var=s'=>\$ins_var);
13
-
14
- $chr='';
15
- open(IN,$infile);
16
- open(OUT,">$outfile");
17
- %code=();
18
- $code{'0'}='C';
19
- $code{'1'}='A';
20
- $code{'2'}='T';
21
- $code{'3'}='G';
22
-
23
- while(<IN>){
24
- chomp;
25
- if(!/^\>/){
26
- $chr.=$_;
27
- }
28
- else{
29
- $gi=$_;
30
- if($gi= ~/^\>gi\|(\S+)\|\S+\|\S+/){
31
- $gi=$1;}
32
-
33
- }
34
- }
35
- close(IN);
36
-
37
- $chr_size=length $chr;
38
- print "chromosome size: $chr_size\n";
39
- $seg_size=2*$read_len+$ins_len;
40
- $reads_number=int($cov*$chr_size/($read_len*2));
41
- print "generated reads $reads_number x 2\n";
42
-
43
- for(1..$reads_number){
44
- $index=$_;
45
- $l=length $index;
46
- $k=8-$l;
47
- $kk='0' x $k;
48
- $id= 'read'.$kk.$index.'_'.$gi;
49
-
50
- #make start site;
51
- $start_site=int(rand($chr_size));
52
- #make short seg length;
53
- $seg_length=int(random_normal(1,$seg_size,$ins_var));
54
-
55
- #extract the segment
56
- $seg=substr($chr,$start_site,$seg_length);
57
- $s_len=length $seg;
58
- $gap=$seg_length-$s_len;
59
- if($gap!=0){
60
- $makeup=substr($chr,0,$gap);
61
- $seg.=$makeup;
62
- }
63
-
64
- $id.='.start'.$start_site.'.seg_len'.$seg_length;
65
-
66
- #get the reads
67
- $seq1=substr($seg,0,$read_len);
68
- #$seg=~tr/ATCG/TAGC/ this line can change the orientation of the second read;
69
- $seq2=substr($seg,-$read_len);
70
- # sequencing error introducing
71
- @seq1=split(//,$seq1);
72
- @seq2=split(//,$seq2);
73
- @mut1=random_binomial($read_len,1,$seq_error);
74
- @mut2=random_binomial($read_len,1,$seq_error);
75
-
76
- for(0..$#mut1){
77
- $i=$_;
78
- if($mut1[$i]==1){
79
- $r=int(rand(4));
80
- $seq1[$i]=$code{$r};
81
- }
82
- if($mut2[$i]==1){
83
- $r=int(rand(4));
84
- $seq2[$i]=$code{$r};
85
- }
86
- }
87
- $seq1=join('',@seq1);
88
- $seq2=join('',@seq2);
89
-
90
- $id1=$id.'#0/1';
91
- $id2=$id.'#0/2';
92
-
93
- print OUT ">$id1\n$seq1\n>$id2\n$seq2\n";
94
- }
95
-
96
-
@@ -1 +0,0 @@
1
- ../../enveomics.R
@@ -1,293 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'enveomics_rb/stats'
4
- require 'fileutils'
5
- require 'shellwords'
6
- require 'tmpdir'
7
- require 'zlib'
8
-
9
- module Enveomics
10
- # Wrapper class for ANIr estimation
11
- #
12
- # Use as: +ANIr.new(opts).go!+
13
- class ANIr
14
- # Options hash
15
- attr :opts
16
-
17
- # Identities list (unsorted)
18
- attr :identities
19
-
20
- def initialize(opts)
21
- @opts = opts
22
- @identities = []
23
- end
24
-
25
- # --------------------------------------------------[ High-level pipelines ]
26
-
27
- # Perform all the analyses
28
- def go!
29
- read_input
30
- detect_identity
31
- estimate_ani_r
32
- end
33
-
34
- # Identify input/output mode and read mapping
35
- def read_input
36
- if opts[:m_format] != :list
37
- @tmpdir = Dir.mktmpdir
38
- @filter_contigs = !opts[:g].nil?
39
- opts[:m] = File.join(@tmpdir, 'map.sam') if opts[:m].nil?
40
- run_mapping unless File.exist? opts[:m]
41
- load_contigs_to_filter if @filter_contigs
42
- end
43
- read_mapping = :"read_mapping_from_#{opts[:m_format]}"
44
- raise Enveomics::OptionError.new(
45
- "Unsupported mapping format: #{opts[:m_format]}"
46
- ) unless respond_to? read_mapping
47
- @identities = []
48
- send(read_mapping)
49
- say "- Unfiltered average identity: #{sample.mean}"
50
- say "- Reads mapped: #{sample.n}"
51
- save_identities
52
- save_histogram
53
- ensure
54
- @tmpdir ||= nil
55
- FileUtils.rm_rf @tmpdir if @tmpdir
56
- end
57
-
58
- # Identify the identity threshold
59
- def detect_identity
60
- say 'Detecting identity threshold'
61
- if opts[:algorithm] == :auto
62
- say "- Bimodality: #{bimodality}"
63
- opts[:algorithm] = bimodality >= opts[:bimodality] ? :gmm : :fix
64
- end
65
- say "- Algorithm: #{opts[:algorithm]}"
66
- if opts[:algorithm] == :gmm
67
- detect_identity_by_gmm
68
- end
69
- end
70
-
71
- # Estimate ANIr
72
- def estimate_ani_r
73
- say 'Estimating ANIr'
74
- @sample = nil # Empty cached sample
75
- @identities.delete_if { |i| i < opts[:identity] }
76
- say "- ANIr: #{sample.mean}"
77
- end
78
-
79
- # -----------------------------------------------------------------[ Utils ]
80
-
81
- # Show progress unless +opts[:q]+
82
- def say(*msg)
83
- o = '[%s] %s' % [Time.now, msg.join('')]
84
- $stderr.puts(o) unless opts[:q]
85
- File.open(opts[:log], 'a') { |fh| fh.puts o } if opts[:log]
86
- end
87
-
88
- # Execute command in the shell
89
- def run(cmd)
90
- say " - Running: #{cmd.join(' ')}"
91
- `#{cmd.shelljoin} 2>&1 | tee >> #{opts[:log] || '/dev/null'}`
92
- unless $?.success?
93
- raise Enveomics::CommandError.new("#{cmd.first} failed: #{$?}")
94
- end
95
- end
96
-
97
- # Returns an open file handler for the file, supporting .gz
98
- def reader(file)
99
- file =~ /\.gz$/ ? Zlib::GzipReader.open(file) : File.open(file, 'r')
100
- end
101
-
102
- # Is the mapping in SAM format?
103
- def sam?
104
- opts[:m_format] == :sam
105
- end
106
-
107
- # ------------------------------------------------------------[ Map it out ]
108
-
109
- # Execute Bowtie2 and generate SAM file
110
- def run_mapping
111
- say 'Running mapping using Bowtie2'
112
- raise Enveomics::OptionError.new(
113
- 'Only SAM output is supported for mapping'
114
- ) unless sam?
115
-
116
- @filter_contigs = false
117
- say '- Indexing input sequences'
118
- raise Enveomics::OptionError.new(
119
- 'Only FastA genome input is supported for mapping'
120
- ) unless opts[:g_format] == :fasta
121
-
122
- idx = File.join(@tmpdir, 'genome.idx')
123
- run(['bowtie2-build', opts[:g], idx])
124
-
125
- say '- Mapping metagenomic reads to genome assembly'
126
- cmd = [
127
- 'bowtie2', '-x', idx, '-p', opts[:threads], '-S', opts[:m], '--no-mixed'
128
- ]
129
- cmd << '-f' if opts[:r_format] == :fasta
130
- cmd +=
131
- case opts[:r_type]
132
- when :single
133
- ['-U', opts[:r]]
134
- when :coupled
135
- pairs = opts[:r].split(',', 2)
136
- ['-1', pairs[0], '-2', pairs[1], '--no-discordant']
137
- when :interleaved
138
- ['--interleaved', opts[:r], '--no-discordant']
139
- else
140
- raise Enveomics::OptionError.new(
141
- "Unsupported reads type: #{o[:r_type]}"
142
- )
143
- end
144
- run(cmd)
145
- end
146
-
147
- # If +@filter_contigs+ is true, reads the genome assembly and saves contig
148
- # names to filter the mapping
149
- def load_contigs_to_filter
150
- return unless @filter_contigs
151
- say 'Loading contigs to filter'
152
- reader = reader(opts[:g])
153
- @contigs_to_filter =
154
- case opts[:g_format]
155
- when :fasta
156
- reader.each.map { |ln| $1 if ln =~ /^>(\S+)/ }.compact
157
- when :list
158
- reader.each.map(&:chomp)
159
- else
160
- raise Enveomics::OptionError.new(
161
- "Unsupported genome assembly format: #{opts[:g_format]}"
162
- )
163
- end
164
- reader.close
165
- say "- Got #{@contigs_to_filter.size} contigs"
166
- end
167
-
168
- # Reads the mapping file assuming SAM format
169
- def read_mapping_from_sam
170
- say 'Reading mapping from SAM file'
171
- reader = reader(opts[:m])
172
- reader.each { |ln| parse_sam_line(ln) }
173
- reader.close
174
- end
175
-
176
- # Reads the mapping file assuming BAM format
177
- def read_mapping_from_bam
178
- say 'Reading mapping from BAM file'
179
- IO.popen(['samtools', 'view', opts[:m]].shelljoin) do |fh|
180
- fh.each { |ln| parse_sam_line(ln) }
181
- end
182
- end
183
-
184
- # Reads the mapping file assuming a Tabular BLAST format
185
- def read_mapping_from_tab
186
- say 'Reading mapping from Tabular BLAST file'
187
- reader = reader(opts[:m])
188
- reader.each do |ln|
189
- next if ln =~ /^\s*(#.*)?$/ # Comment or empty line
190
- row = ln.chomp.split("\t")
191
- next if @filter_contigs && !@contigs_to_filter.include?(row[1])
192
- @identities << row[2].to_f
193
- end
194
- reader.close
195
- end
196
-
197
- # Reads the identities from a raw-text list
198
- def read_mapping_from_list
199
- say 'Reading identities from raw text list'
200
- reader = reader(opts[:m])
201
- @identities = reader.each.map(&:to_f)
202
- reader.close
203
- end
204
-
205
- # Parses one line in SAM format
206
- def parse_sam_line(ln)
207
- return if ln =~ /^@/ || ln =~ /^\s*$/
208
- row = ln.chomp.split("\t")
209
- return if row[2] == '*'
210
- return if @filter_contigs && !@contigs_to_filter.include?(row[2])
211
- length = row[9].size
212
- row.shift(11) # Discard non-flag columns
213
- flags = Hash[row.map { |i| i.sub(/:.:/, ':').split(':', 2) }]
214
- return if flags['YT'] && !%w[CP UU].include?(flags['YT'])
215
- unless flags['MD']
216
- raise Enveomics::ParseError.new(
217
- "SAM line missing MD flag:\n#{ln}\nFlags: #{flags}"
218
- )
219
- end
220
- mismatches = flags['MD'].scan(/[^\d]/).count
221
- @identities << 100.0 * (length - mismatches) / length
222
- end
223
-
224
- # Save identites as raw text
225
- def save_identities
226
- return unless opts[:L]
227
- say '- Saving identities'
228
- File.open(opts[:L], 'w') do |fh|
229
- identities.each { |i| fh.puts i }
230
- end
231
- end
232
-
233
- # Save identity histogram as raw text
234
- def save_histogram
235
- return unless opts[:H]
236
- say '- Saving histogram'
237
- File.open(opts[:H], 'w') do |fh|
238
- fh.puts "from\tto\tcount"
239
- sample.histo_ranges.each_with_index do |r, k|
240
- fh.puts (r + [sample.histo_counts[k]]).join("\t")
241
- end
242
- end
243
- end
244
-
245
- # -----------------------------------------------------------[ Peak finder ]
246
-
247
- # Detect identity threshold by gaussian mixture model EM
248
- def detect_identity_by_gmm
249
- model_identities_by_gmm_em
250
- detect_valley_by_gmm
251
- end
252
-
253
- # Model identities as a 2-gaussian mix by EM
254
- def model_identities_by_gmm_em
255
- say 'Modeling identities by gaussian mixture model using EM'
256
- # TODO: Implement
257
- raise Enveomics::UnimplementedError.new('Unimplemented operation')
258
- end
259
-
260
- # Detect valley by gaussian mix
261
- def detect_valley_by_gmm
262
- say 'Detecting valley by gaussian mixture model'
263
- # TODO: Implement
264
- raise Enveomics::UnimplementedError.new('Unimplemented operation')
265
- end
266
-
267
- # -----------------------------------------------------------[ Do the math ]
268
-
269
- # Identities as a Enveomics::Stats::Sample object
270
- def sample
271
- @sample ||= Enveomics::Stats::Sample.new(
272
- identities,
273
- effective_range: [nil, 100.0],
274
- histo_bin_size: opts[:bin_size]
275
- )
276
- end
277
-
278
- # Returns the bimodality coefficient indicated by +opts[:coefficient]+
279
- def bimodality
280
- @bimodality ||=
281
- case opts[:coefficient]
282
- when :sarle
283
- sample.sarle_bimodality
284
- when :dma
285
- sample.dma_bimodality
286
- else
287
- raise Enveomics::OptionError.new(
288
- "Unsupported coefficient of bimodality: #{opts[:coefficient]}"
289
- )
290
- end
291
- end
292
- end
293
- end
@@ -1,175 +0,0 @@
1
-
2
- require 'enveomics_rb/enveomics'
3
- require 'enveomics_rb/match'
4
- use 'tmpdir'
5
- use 'shellwords'
6
-
7
- module Enveomics
8
- class BMset
9
- attr :qry, :sbj, :set, :opt
10
-
11
- ##
12
- # Initialize Enveomics::BMset object with sequence paths +qry+ and +sbj+,
13
- # and options Hash +opts+ (see #opt for supported options) with Symbol keys
14
- def initialize(qry, sbj, opts = {})
15
- @qry = qry
16
- @sbj = sbj
17
- @set = nil
18
- @opt = opts
19
- end
20
-
21
- ##
22
- # Returns option with key +k+ as defined by #initialize or by default
23
- # Supported options include [defaults in brackets]:
24
- # - len [0]: Minimum alignment length in residues
25
- # - id [0.0]: Minimum alignment identity in percent
26
- # - fract [0.0]: Minimum alignment length as fraction of the query
27
- # - score [0.0]: Minimum alignment score in bits
28
- # - nucl [false]: The sequences are in nucleotides
29
- # - thr [1]: Number of threads to use
30
- # - bin ['']: Path to the directory containing binaries
31
- # - program [:blast+]: Search engine to use
32
- def opt(k)
33
- @defaults ||= {
34
- len: 0, id: 0.0, fract: 0.0, score: 0.0,
35
- nucl: false, thr: 1, bin: '', program: :'blast+'
36
- }
37
- k = k.to_sym
38
- @opt[k] = @defaults[k] if @opt[k].nil?
39
- @opt[k]
40
- end
41
-
42
- ##
43
- # Array of Enveomics::Match objects
44
- def set
45
- match_and_filter! if @set.nil?
46
- @set
47
- end
48
-
49
- ##
50
- # Returns the best match of query +qry+ as Enveomics::Match or nil if
51
- # no qualifying match was found
52
- def [](qry)
53
- set[qry]
54
- end
55
-
56
- ##
57
- # Number of matches found
58
- def count
59
- set.count
60
- end
61
-
62
- ##
63
- # Execute search and filter matches
64
- def match_and_filter!
65
- @set = {}
66
- match!.each do |match|
67
- # Already a better match?
68
- next if self[match.qry] && self[match.qry].score >= match.score
69
-
70
- # Is this a good enough match?
71
- next unless %i[len id score fract].all? do |metric|
72
- match.send(metric) >= opt(metric)
73
- end
74
-
75
- # Save match
76
- @set[match.qry] = match
77
- end
78
- end
79
-
80
- ##
81
- # Find all matches and return as an array of Enveomics::Match objects
82
- def match!
83
- y = []
84
- Dir.mktmpdir do |dir|
85
- # Determine commands
86
- say('Temporal directory: ', dir)
87
- db_path = File.join(dir, 'sbj.db')
88
- out_path = File.join(dir, 'out.tsv')
89
- cmds = []
90
- case opt(:program)
91
- when :blast
92
- cmds << [
93
- 'formatdb', '-i', sbj, '-n', db_path, '-l', File.join(dir, 'log'),
94
- '-p', opt(:nucl) ? 'F' : 'T'
95
- ]
96
- cmd << [
97
- 'blastall', '-p', opt(:nucl) ? 'blastn' : 'blastp', '-d', db_path,
98
- '-i', qry, '-v', '1', '-b', '1', '-a', opt(:thr).to_s, '-m', '8',
99
- '-o', out_path
100
- ]
101
- when :'blast+'
102
- cmds << [
103
- 'makeblastdb', '-in', sbj, '-out', db_path,
104
- '-dbtype', opt(:nucl) ? 'nucl' : 'prot'
105
- ]
106
- cmds << [
107
- opt(:nucl) ? 'blastn' : 'blastp', '-db', db_path, '-query', qry,
108
- '-num_threads', opt(:thr).to_s, '-out', out_path, '-outfmt',
109
- '6 qseqid sseqid pident length mismatch gapopen qstart qend ' \
110
- 'sstart send evalue bitscore qlen slen'
111
- ]
112
- when :diamond
113
- raise Enveomics::OptionError.new(
114
- 'Unsupported search engine diamond for nucleotides'
115
- ) if opt(:nucl)
116
- cmds << [
117
- 'diamond', 'makedb', '--in', sbj, '--db', db_path,
118
- '--threads', opt(:thr).to_s
119
- ]
120
- cmds << [
121
- 'diamond', 'blastp', '--threads', opt(:thr).to_s,
122
- '--db', db_path, '--query', qry, '--daa', "#{out_path}.daa",
123
- '--quiet', '--sensitive'
124
- ]
125
- cmds << [
126
- 'diamond', 'view', '--daa', "#{out_path}.daa", '--out', out_path,
127
- '--quiet', '--outfmt'
128
- ] + %w[6 qseqid sseqid pident length mismatch gapopen qstart] +
129
- %w[qend sstart send evalue bitscore qlen slen]
130
- when :blat
131
- cmds << ['blat', sbj, qry, '-out=blast8', out_path]
132
- cmds[0] << '-prot' unless opt(:nucl)
133
- else
134
- raise Enveomics::OptionError.new(
135
- "Unsupported search engine: #{opt(:program)}"
136
- )
137
- end
138
-
139
- # Run commands
140
- say('Running comparison')
141
- say('Query: ', qry)
142
- say('Subject: ', sbj)
143
- cmd_err = File.join(dir, 'err')
144
- begin
145
- cmds.each do |cmd|
146
- cmd[0] = File.join(opt(:bin), cmd[0]) unless opt(:bin) == ''
147
- run_cmd(cmd, stderr: cmd_err)
148
- end
149
- rescue Enveomics::CommandError => e
150
- $stderr.puts e
151
- $stderr.puts ''
152
- $stderr.puts '[ Error log ]'
153
- $stderr.puts File.read(cmd_err)
154
- exit
155
- end
156
-
157
- # Parse output
158
- File.open(out_path, 'r') do |fh|
159
- fh.each { |ln| y << Enveomics::Match.new(ln) }
160
- end
161
- end
162
- y
163
- end
164
-
165
- ##
166
- # Enumerate RBMs and yield +blk+
167
- def each(&blk)
168
- if block_given?
169
- set.each { |_, bm| blk.call(bm) }
170
- else
171
- to_enum(:each)
172
- end
173
- end
174
- end
175
- end
@@ -1,24 +0,0 @@
1
-
2
- require 'enveomics_rb/utils'
3
- use 'optparse'
4
- ARGV << '-h' if ARGV.empty?
5
-
6
- module Enveomics
7
- class << self
8
- def opt_banner(opt, banner, usage = nil)
9
- opt.version ||= $VERSION
10
- usage ||= "#{opt.program_name}.rb [options]"
11
- opt.banner = <<~BANNER
12
-
13
- [Enveomics Collection: #{opt.program_name} #{opt.version}]
14
-
15
- #{banner}
16
-
17
- Usage
18
- #{usage}
19
-
20
- BANNER
21
- end
22
- end
23
- end
24
-