miga-base 1.2.15.2 → 1.2.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli/action/download/gtdb.rb +4 -1
  3. data/lib/miga/cli/action/gtdb_get.rb +4 -0
  4. data/lib/miga/daemon.rb +4 -1
  5. data/lib/miga/lair.rb +6 -4
  6. data/lib/miga/remote_dataset/download.rb +3 -2
  7. data/lib/miga/remote_dataset.rb +25 -7
  8. data/lib/miga/taxonomy.rb +6 -0
  9. data/lib/miga/version.rb +2 -2
  10. metadata +6 -302
  11. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  12. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  13. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  14. data/utils/FastAAI/FastAAI +0 -3659
  15. data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
  16. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
  17. data/utils/FastAAI/README.md +0 -84
  18. data/utils/enveomics/Docs/recplot2.md +0 -244
  19. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  20. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  21. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  22. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  23. data/utils/enveomics/LICENSE.txt +0 -73
  24. data/utils/enveomics/Makefile +0 -52
  25. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  26. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
  27. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  28. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
  29. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
  30. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  31. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
  32. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  33. data/utils/enveomics/Manifest/Tasks/other.json +0 -906
  34. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  35. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
  36. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  37. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  38. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  39. data/utils/enveomics/Manifest/categories.json +0 -165
  40. data/utils/enveomics/Manifest/examples.json +0 -162
  41. data/utils/enveomics/Manifest/tasks.json +0 -4
  42. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  49. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  50. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  51. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  52. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  53. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  54. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  55. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  56. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  57. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  58. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  59. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  60. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  61. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  62. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  63. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  64. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  68. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  69. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  70. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  71. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  72. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  73. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  74. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  75. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  76. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  77. data/utils/enveomics/README.md +0 -42
  78. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  79. data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
  80. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  81. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  82. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  83. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  84. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  85. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  86. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  87. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  88. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  89. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  90. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  91. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  92. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  93. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  94. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  95. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  96. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  97. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  98. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  99. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  100. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  101. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
  102. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  103. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  104. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  105. data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
  106. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  107. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  108. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  109. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  110. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
  111. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  112. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  113. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  114. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  115. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  116. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  117. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  118. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  119. data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
  120. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  121. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  122. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  123. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  124. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  125. data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
  126. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  127. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  128. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  129. data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
  130. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  131. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  132. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
  133. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  134. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  135. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  136. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  137. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  138. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  139. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  140. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  141. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  142. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  143. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  144. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  145. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  146. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  147. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  148. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  149. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  150. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  151. data/utils/enveomics/Scripts/SRA.download.bash +0 -55
  152. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  153. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  154. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  155. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  156. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  157. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  158. data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
  159. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  160. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  161. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  162. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  163. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  164. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  165. data/utils/enveomics/Scripts/aai.rb +0 -421
  166. data/utils/enveomics/Scripts/ani.rb +0 -362
  167. data/utils/enveomics/Scripts/anir.rb +0 -137
  168. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  169. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  170. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  171. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  172. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  173. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  174. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
  175. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
  176. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  177. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
  178. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
  179. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  180. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
  181. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  182. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
  183. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  184. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  185. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
  186. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
  187. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
  188. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
  189. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  190. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  191. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  192. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  193. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  194. data/utils/enveomics/Scripts/ogs.rb +0 -104
  195. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  196. data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
  197. data/utils/enveomics/Scripts/rbm.rb +0 -108
  198. data/utils/enveomics/Scripts/sam.filter.rb +0 -148
  199. data/utils/enveomics/Tests/Makefile +0 -10
  200. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  201. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  202. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  203. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  204. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  205. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  206. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  207. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  208. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  209. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  210. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  211. data/utils/enveomics/Tests/alkB.nwk +0 -1
  212. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  213. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  214. data/utils/enveomics/Tests/hiv1.faa +0 -59
  215. data/utils/enveomics/Tests/hiv1.fna +0 -134
  216. data/utils/enveomics/Tests/hiv2.faa +0 -70
  217. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  218. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  219. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  220. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  221. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  222. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  223. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  224. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  225. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  226. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  227. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  228. data/utils/enveomics/build_enveomics_r.bash +0 -45
  229. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  230. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  231. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  232. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  233. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  234. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  235. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  236. data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
  237. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  238. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  239. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  240. data/utils/enveomics/enveomics.R/R/utils.R +0 -80
  241. data/utils/enveomics/enveomics.R/README.md +0 -81
  242. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  243. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  244. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
  245. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
  246. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
  247. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  248. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  249. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  250. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  251. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  252. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  253. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
  254. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
  255. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
  256. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  257. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  258. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
  259. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
  260. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
  261. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
  262. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
  263. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
  264. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
  265. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  266. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  267. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
  268. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  269. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  270. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  271. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  272. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
  273. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  274. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  275. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
  276. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  277. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  278. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  279. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  280. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
  281. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  282. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
  283. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
  284. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
  285. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  286. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  287. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
  288. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
  289. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
  290. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  291. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  292. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  293. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  294. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
  295. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
  296. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
  297. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
  298. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  299. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  300. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  301. data/utils/enveomics/globals.mk +0 -8
  302. data/utils/enveomics/manifest.json +0 -9
  303. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  304. data/utils/multitrim/README.md +0 -67
  305. data/utils/multitrim/multitrim.py +0 -1555
  306. data/utils/multitrim/multitrim.yml +0 -13
@@ -1,131 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author: Luis M. Rodriguez-R
5
- # @license: Artistic-2.0
6
- #
7
-
8
- $:.push File.expand_path(File.dirname(__FILE__) + '/lib')
9
- require 'enveomics_rb/og'
10
- require 'optparse'
11
- require 'json'
12
-
13
- o = {q:false, a:false}
14
- ARGV << '-h' if ARGV.size==0
15
- OptionParser.new do |opts|
16
- opts.banner = "
17
- Estimates some descriptive statistics on a set of Orthology Groups (OGs).
18
-
19
- Usage: #{$0} [options]"
20
- opts.separator ""
21
- opts.separator "Mandatory"
22
- opts.on("-o", "--ogs FILE",
23
- "Input file containing the precomputed OGs."){ |v| o[:ogs]=v }
24
- opts.separator ""
25
- opts.separator "Other Options"
26
- opts.on("-j", "--json FILE", "Output file in JSON format."){ |v| o[:json]=v }
27
- opts.on("-t", "--tab FILE","Output file in tabular format."){ |v| o[:tab]=v }
28
- opts.on("-T", "--transposed-tab FILE",
29
- "Output file in transposed tabular format."){ |v| o[:ttab]=v }
30
- opts.on("-a", "--auto", "Run completely quietly (no STDERR or STDOUT)") do
31
- o[:q] = true
32
- o[:a] = true
33
- end
34
- opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
35
- opts.on("-h", "--help", "Display this screen.") do
36
- puts opts
37
- exit
38
- end
39
- opts.separator ""
40
- end.parse!
41
- abort "-o is mandatory" if o[:ogs].nil?
42
-
43
- ##### MAIN:
44
- begin
45
- # Initialize the collection of OGs.
46
- collection = OGCollection.new
47
-
48
- # Read the pre-computed OGs
49
- $stderr.puts "Reading pre-computed OGs in '#{o[:ogs]}'." unless o[:q]
50
- f = File.open(o[:ogs], "r")
51
- h = f.gets.chomp.split /\t/
52
- while ln = f.gets
53
- collection << OG.new(h, ln.chomp.split(/\t/))
54
- end
55
- f.close
56
- $stderr.puts " Loaded OGs: #{collection.ogs.length}." unless o[:q]
57
-
58
- # Estimate descriptive stats
59
- stat_name = {
60
- genomes: "Number of genomes",
61
- pan: "Pangenome (OGs)",
62
- core: "Core genome (OGs)",
63
- core90pc: "OGs in 90% of the genomes",
64
- core80pc: "OGs in 80% of the genomes",
65
- unus: "Unus genome, core genome discarding paralogs (OGs)",
66
- avg: "Average number of OGs in a genome",
67
- avg_pan: "Average genome (OGs) / Pangenome (OGs)",
68
- core_avg: "Core genome (OGs) / Average genome (OGs)",
69
- core_pan: "Core genome (OGs) / Pangenome (OGs)",
70
- ogs_shannon: "Entropy of the OG frequencies (bits)"
71
- }
72
- stats = {}
73
- stats[:genomes] = Gene.genomes.length
74
- stats[:pan] = collection.ogs.length
75
- stats[:core] = collection.ogs.map do |og|
76
- (og.genomes.length == Gene.genomes.length) ? 1 : 0
77
- end.inject(0,:+)
78
- stats[:core90pc] = collection.ogs.map do |og|
79
- (og.genomes.length >= 0.9*Gene.genomes.length) ? 1 : 0
80
- end.inject(0,:+)
81
- stats[:core80pc] = collection.ogs.map do |og|
82
- (og.genomes.length >= 0.8*Gene.genomes.length) ? 1 : 0
83
- end.inject(0,:+)
84
- stats[:unus] = collection.ogs.map do |og|
85
- (og.genomes.length != Gene.genomes.length) ? 0 :
86
- (og.genes.all?{ |i| i.size==1 }) ? 1 : 0
87
- end.inject(0,:+)
88
- og_genomes = collection.ogs.map{ |og| og.genomes.length }.inject(0,:+)
89
- stats[:avg] = og_genomes.to_f/Gene.genomes.length
90
- stats[:avg_pan] = stats[:avg]/stats[:pan]
91
- stats[:core_avg] = stats[:core].to_f/stats[:avg]
92
- stats[:core_pan] = stats[:core].to_f/stats[:pan]
93
- stats[:ogs_shannon] = -1 * collection.ogs.map do |og|
94
- pi = og.genomes.length.to_f/Gene.genomes.length
95
- pi * Math.log(pi)
96
- end.inject(0.0,:+)
97
-
98
- # Show result
99
- $stderr.puts "Generating reports." unless o[:q]
100
- stats.each_pair{ |k,v| puts " #{stat_name[k]}: #{v}" } unless o[:a]
101
-
102
- # Save results in JSON
103
- unless o[:json].nil?
104
- ohf = File.open(o[:json], "w")
105
- ohf.puts JSON.pretty_generate(stats)
106
- ohf.close
107
- end
108
-
109
- # Save results in tab
110
- unless o[:tab].nil?
111
- ohf = File.open(o[:tab], "w")
112
- stats.each_pair{ |k,v| ohf.puts "#{k}\t#{v}" }
113
- ohf.close
114
- end
115
-
116
- # Save results in T(tab)
117
- unless o[:ttab].nil?
118
- ohf = File.open(o[:ttab], "w")
119
- ohf.puts stats.keys.join("\t")
120
- ohf.puts stats.values.join("\t")
121
- ohf.close
122
- end
123
-
124
- $stderr.puts "Done.\n" unless o[:q]
125
- rescue => err
126
- $stderr.puts "Exception: #{err}\n\n"
127
- err.backtrace.each { |l| $stderr.puts l + "\n" }
128
- err
129
- end
130
-
131
-
@@ -1,172 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # frozen_string_literal: true
4
-
5
- $VERSION = 0.1
6
- $:.push File.expand_path('../lib', __FILE__)
7
- require 'enveomics_rb/enveomics'
8
- require 'tmpdir'
9
-
10
- o = {
11
- q: false, thr: 1,
12
- len: 0, id: 0.0, fract: 0.0, score: 0.0,
13
- bin: '', program: :'blast+', nucl: false
14
- }
15
-
16
- OptionParser.new do |opts|
17
- cmd = File.basename($0)
18
- opts.banner = <<~BANNER
19
-
20
- [Enveomics Collection: #{cmd} v#{$VERSION}]
21
-
22
- [DEPRECATED: Please use rbm.rb instead]
23
-
24
- Finds the reciprocal best matches between two sets of sequences
25
-
26
- Usage: #{cmd} [options]
27
-
28
- BANNER
29
-
30
- opts.separator 'Mandatory'
31
- opts.on(
32
- '-1', '--seq1 FILE',
33
- 'Path to the FastA file containing the set 1'
34
- ) { |v| o[:seq1] = v }
35
- opts.on(
36
- '-2', '--seq2 FILE',
37
- 'Path to the FastA file containing the set 2'
38
- ) { |v| o[:seq2] = v }
39
- opts.separator ''
40
- opts.separator 'Search Options'
41
- opts.on(
42
- '-n', '--nucl',
43
- 'Sequences are assumed to be nucleotides (proteins by default)',
44
- 'Incompatible with -p diamond'
45
- ) { |v| o[:nucl] = true }
46
- opts.on(
47
- '-l', '--len INT', Integer,
48
- 'Minimum alignment length (in residues)',
49
- "By default: #{o[:len]}"
50
- ) { |v| o[:len] = v }
51
- opts.on(
52
- '-f', '--fract FLOAT', Float,
53
- 'Minimum alignment length (as a fraction of the query)',
54
- 'If set, requires BLAST+ or Diamond (see -p)',
55
- "By default: #{o[:fract]}"
56
- ) { |v| o[:fract] = v }
57
- opts.on(
58
- '-i', '--id NUM', Float,
59
- 'Minimum alignment identity (in %)',
60
- "By default: #{o[:id]}"
61
- ){ |v| o[:id] = v }
62
- opts.on(
63
- '-s', '--score NUM', Float,
64
- 'Minimum alignment score (in bits)',
65
- "By default: #{o[:score]}"
66
- ) { |v| o[:score] = v }
67
- opts.separator ''
68
- opts.separator 'Software Options'
69
- opts.on(
70
- '-b', '--bin DIR',
71
- 'Path to the directory containing the binaries of the search program'
72
- ) { |v| o[:bin] = v }
73
- opts.on(
74
- '-p', '--program STR',
75
- 'Search program to be used. One of: blast+ (default), blast, diamond'
76
- ) { |v| o[:program] = v.downcase.to_sym }
77
- opts.on(
78
- '-t', '--threads INT', Integer,
79
- 'Number of parallel threads to be used',
80
- "By default: #{o[:thr]}"
81
- ) { |v| o[:thr] = v }
82
- opts.separator ''
83
- opts.separator 'Other Options'
84
- opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
85
- opts.on('-h', '--help', 'Display this screen') { puts opts ; exit }
86
- opts.separator ''
87
- end.parse!
88
-
89
- abort '-1 is mandatory' if o[:seq1].nil?
90
- abort '-2 is mandatory' if o[:seq2].nil?
91
- if o[:program] == :diamond && o[:nucl]
92
- abort '-p diamond is incompatible with -n'
93
- end
94
- if o[:fract] > 0.0 && o[:program] == :blast
95
- abort 'Argument -f/--fract requires -p blast+ or -p diamond'
96
- end
97
- o[:bin] = o[:bin] + '/' if o[:bin].size > 0
98
- $quiet = o[:q]
99
-
100
- Dir.mktmpdir do |dir|
101
- say('Temporal directory: ', dir)
102
-
103
- # Create databases
104
- say 'Creating databases'
105
- [:seq1, :seq2].each do |seq|
106
- case o[:program]
107
- when :blast
108
- `"#{o[:bin]}formatdb" -i "#{o[seq]}" -n "#{dir}/#{seq}" \
109
- -p #{o[:nucl] ? 'F' : 'T'}`
110
- when :'blast+'
111
- `"#{o[:bin]}makeblastdb" -in "#{o[seq]}" -out "#{dir}/#{seq}" \
112
- -dbtype #{o[:nucl] ? 'nucl' : 'prot'}`
113
- when :diamond
114
- `"#{o[:bin]}diamond" makedb --in "#{o[seq]}" \
115
- --db "#{dir}/#{seq}.dmnd" --threads "#{o[:thr]}"`
116
- else
117
- abort "Unsupported program: #{o[:program]}"
118
- end
119
- end
120
-
121
- # Best-hits
122
- rbh = {}
123
- n2 = 0
124
- say ' Running comparisons'
125
- [2, 1].each do |i|
126
- qry_seen = {}
127
- q = o[:"seq#{i}"]
128
- s = "#{dir}/seq#{i == 1 ? 2 : 1}"
129
- say(' Query: ', q)
130
- case o[:program]
131
- when :blast
132
- `"#{o[:bin]}blastall" -p #{o[:nucl] ? 'blastn' : 'blastp'} -d "#{s}" \
133
- -i "#{q}" -v 1 -b 1 -a #{o[:thr]} -m 8 -o "#{dir}/#{i}.tab"`
134
- when :'blast+'
135
- `"#{o[:bin]}#{o[:nucl] ? 'blastn' : 'blastp'}" -db "#{s}" -query "#{q}" \
136
- -max_target_seqs 1 -num_threads #{o[:thr]} -out "#{dir}/#{i}.tab" \
137
- -outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend \
138
- sstart send evalue bitscore qlen slen"`
139
- when :diamond
140
- `"#{o[:bin]}diamond" blastp --threads "#{o[:thr]}" --db "#{s}.dmnd" \
141
- --query "#{q}" --sensitive --daa "#{dir}/#{i}.daa" --quiet \
142
- && "#{o[:bin]}diamond" view --daa "#{dir}/#{i}.daa" --outfmt \
143
- 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart \
144
- send evalue bitscore qlen slen --out "#{dir}/#{i}.tab" --quiet`
145
- else
146
- abort "Unsupported program: #{o[:program]}"
147
- end
148
-
149
- n = 0
150
- File.open("#{dir}/#{i}.tab", 'r') do |fh|
151
- fh.each do |ln|
152
- ln.chomp!
153
- row = ln.split(/\t/)
154
- row[12] = '1' unless [:'blast+', :diamond].include? o[:program]
155
- next unless qry_seen[row[0]].nil? &&
156
- row[3].to_i >= o[:len] && row[2].to_f >= o[:id] &&
157
- row[11].to_f >= o[:score] && row[3].to_f / row[12].to_i >= o[:fract]
158
-
159
- qry_seen[row[0]] = 1
160
- n += 1
161
- if i == 2
162
- rbh[row[0]] = row[1]
163
- elsif !rbh[row[1]].nil? && rbh[row[1]] == row[0]
164
- puts ln
165
- n2 += 1
166
- end
167
- end
168
- end
169
- say " #{n} sequences with hit"
170
- end
171
- say " #{n2} RBMs"
172
- end
@@ -1,108 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # frozen_string_literal: true
4
-
5
- $VERSION = 1.01
6
- $:.push File.expand_path('../lib', __FILE__)
7
- require 'enveomics_rb/rbm'
8
- require 'tmpdir'
9
-
10
- bms_dummy = Enveomics::RBM.new('1', '2').bms1
11
- o = { q: false, out: '-' }
12
- %i[thr len id fract score bin program nucl].each do |k|
13
- o[k] = bms_dummy.opt(k)
14
- end
15
-
16
- OptionParser.new do |opts|
17
- opts.version = $VERSION
18
- cmd = File.basename($0)
19
- opts.banner = <<~BANNER
20
-
21
- [Enveomics Collection: #{cmd} v#{$VERSION}]
22
-
23
- Finds the reciprocal best matches between two sets of sequences
24
-
25
- Usage: #{cmd} [options]
26
-
27
- BANNER
28
-
29
- opts.separator 'Mandatory'
30
- opts.on(
31
- '-1', '--seq1 FILE',
32
- 'Path to the FastA file containing the set 1'
33
- ) { |v| o[:seq1] = v }
34
- opts.on(
35
- '-2', '--seq2 FILE',
36
- 'Path to the FastA file containing the set 2'
37
- ) { |v| o[:seq2] = v }
38
- opts.on(
39
- '-o', '--out FILE',
40
- 'Reciprocal Best Matches in BLAST tabular format.',
41
- 'Supports compression with .gz extension, use - for STDOUT (default)'
42
- ) { |v| o[:out] = v }
43
- opts.separator ''
44
- opts.separator 'Search Options'
45
- opts.on(
46
- '-n', '--nucl',
47
- 'Sequences are assumed to be nucleotides (proteins by default)',
48
- 'Incompatible with -p diamond'
49
- ) { |v| o[:nucl] = true }
50
- opts.on(
51
- '-l', '--len INT', Integer,
52
- 'Minimum alignment length (in residues)',
53
- "By default: #{o[:len]}"
54
- ) { |v| o[:len] = v }
55
- opts.on(
56
- '-f', '--fract FLOAT', Float,
57
- 'Minimum alignment length (as a fraction of the query)',
58
- 'If set, requires BLAST+ or Diamond (see -p)',
59
- "By default: #{o[:fract]}"
60
- ) { |v| o[:fract] = v }
61
- opts.on(
62
- '-i', '--id NUM', Float,
63
- 'Minimum alignment identity (in %)',
64
- "By default: #{o[:id]}"
65
- ){ |v| o[:id] = v }
66
- opts.on(
67
- '-s', '--score NUM', Float,
68
- 'Minimum alignment score (in bits)',
69
- "By default: #{o[:score]}"
70
- ) { |v| o[:score] = v }
71
- opts.separator ''
72
- opts.separator 'Software Options'
73
- opts.on(
74
- '-b', '--bin DIR',
75
- 'Path to the directory containing the binaries of the search program'
76
- ) { |v| o[:bin] = v }
77
- opts.on(
78
- '-p', '--program STR',
79
- 'Search program to be used',
80
- 'One of: blast+ (default), blast, diamond, blat'
81
- ) { |v| o[:program] = v.downcase.to_sym }
82
- opts.on(
83
- '-t', '--threads INT', Integer,
84
- 'Number of parallel threads to be used',
85
- "By default: #{o[:thr]}"
86
- ) { |v| o[:thr] = v }
87
- opts.separator ''
88
- opts.separator 'Other Options'
89
- opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { $QUIET = true }
90
- opts.on('-h', '--help', 'Display this screen') { puts opts ; exit }
91
- opts.separator ''
92
- end.parse!
93
-
94
- raise Enveomics::OptionError.new('-1 is mandatory') if o[:seq1].nil?
95
- raise Enveomics::OptionError.new('-2 is mandatory') if o[:seq2].nil?
96
- raise Enveomics::OptionError.new(
97
- 'Argument -f/--fract requires -p blast+ or -p diamond'
98
- ) if o[:fract] > 0.0 && !%i[blast+ diamond].include?(o[:program])
99
-
100
- rbm = Enveomics::RBM.new(o[:seq1], o[:seq2], o)
101
- ofh = writer(o[:out])
102
- rbm.each { |bm| ofh.puts bm.to_s }
103
- ofh.close
104
-
105
- say('Forward Best Matches: ', rbm.bms1.count)
106
- say('Reverse Best Matches: ', rbm.bms2.count)
107
- say('Reciprocal Best Matches: ', rbm.count)
108
-
@@ -1,148 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # frozen_string_literal: true
4
-
5
- $VERSION = 1.0
6
- $:.push File.expand_path('../lib', __FILE__)
7
- require 'enveomics_rb/enveomics'
8
- use 'shellwords'
9
-
10
- o = {
11
- q: false, threads: 2, m_format: :sam, g_format: :fasta, identity: 95.0,
12
- o: '-', header: true
13
- }
14
-
15
- OptionParser.new do |opt|
16
- Enveomics.opt_banner(
17
- opt, 'Filters a SAM or BAM file by target sequences and/or identity',
18
- "#{File.basename($0)} -m map.sam -o filtered_map.sam [options]"
19
- )
20
-
21
- opt.separator 'Input/Output'
22
- opt.on(
23
- '-g', '--genome PATH',
24
- 'Genome assembly',
25
- 'Supports compression with .gz extension, use - for STDIN'
26
- ) { |v| o[:g] = v }
27
- opt.on(
28
- '-m', '--mapping PATH',
29
- 'Mapping file',
30
- 'Supports compression with .gz extension, use - for STDIN'
31
- ) { |v| o[:m] = v }
32
- opt.on(
33
- '-o', '--out-sam PATH',
34
- 'Output filtered file in SAM format',
35
- 'Supports compression with .gz extension, use - for STDOUT (default)'
36
- ) { |v| o[:o] = v }
37
- opt.separator ''
38
-
39
- opt.separator 'Formats'
40
- opt.on(
41
- '--g-format STRING',
42
- 'Genome assembly format: fasta (default) or list'
43
- ) { |v| o[:g_format] = v.downcase.to_sym }
44
- opt.on(
45
- '--m-format STRING',
46
- 'Mapping file format: sam (default) or bam',
47
- 'sam supports compression with .gz file extension'
48
- ) { |v| o[:m_format] = v.downcase.to_sym }
49
- opt.separator ''
50
-
51
- opt.separator 'General'
52
- opt.on(
53
- '-i', '--identity FLOAT', Float,
54
- "Set a fixed threshold of percent identity (default: #{o[:identity]})"
55
- ) { |v| o[:identity] = v }
56
- opt.on('--no-header', 'Do not include the headers') { |v| o[:header] = v }
57
- opt.separator ''
58
- opt.on(
59
- '-t', '--threads INT', Integer, "Threads to use (default: #{o[:threads]})"
60
- ) { |v| o[:threads] = v }
61
- opt.on('-l', '--log PATH', 'Log file to save output') { |v| o[:log] = v }
62
- opt.on('-q', '--quiet', 'Run quietly') { |v| o[:q] = v }
63
- opt.on('-h', '--help', 'Display this screen') do
64
- puts opt
65
- exit
66
- end
67
- opt.separator ''
68
- end.parse!
69
-
70
- $QUIET = o[:q]
71
-
72
- # Functions
73
-
74
- ##
75
- # Parses one line +ln+ in SAM format and outputs filtered lines to +ofh+
76
- # Filters by minimum +identity+ and +target+ sequences, and prints
77
- # the headers if +header+
78
- def parse_sam_line(ln, identity, target, header, ofh)
79
- if ln =~ /^@/ || ln =~ /^\s*$/
80
- ofh.puts ln if header
81
- return
82
- end
83
-
84
- # No match
85
- row = ln.chomp.split("\t")
86
- return if row[2] == '*'
87
-
88
- # Filter by target
89
- return if !target.nil? && !target.include?(row[2])
90
-
91
- # Exclude unless concordant or unaligned
92
- length = row[9].size
93
- row.shift(11) # Discard non-flag columns
94
- flags = Hash[row.map { |i| i.sub(/:.:/, ':').split(':', 2) }]
95
- return if flags['YT'] && !%w[CP UU].include?(flags['YT'])
96
-
97
- # Filter by identity
98
- unless flags['MD']
99
- raise Enveomics::ParseError.new(
100
- "SAM line missing MD flag:\n#{ln}\nFlags: #{flags}"
101
- )
102
- end
103
- mismatches = flags['MD'].scan(/[^\d]/).count
104
- id = 100.0 * (length - mismatches) / length
105
- ofh.puts ln if id >= identity
106
- end
107
-
108
- # Reading targets
109
- if o[:g]
110
- say 'Loading target sequences to filter'
111
- reader = reader(o[:g])
112
- target =
113
- case o[:g_format]
114
- when :fasta
115
- reader.each.map { |ln| $1 if ln =~ /^>(\S+)/ }.compact
116
- when :list
117
- reader.each.map(&:chomp)
118
- else
119
- raise Enveomics::OptionError.new(
120
- "Unsupported target sequences format: #{o[:g_format]}"
121
- )
122
- end
123
- reader.close
124
- else
125
- target = nil
126
- end
127
-
128
- # Reading and filtering mapping
129
- say 'Reading mapping file'
130
- ofh = writer(o[:o])
131
- case o[:m_format]
132
- when :sam
133
- reader = reader(o[:m])
134
- reader.each { |ln| parse_sam_line(ln, o[:identity], target, o[:header], ofh) }
135
- reader.close
136
- when :bam
137
- cmd = ['samtools', 'view', o[:m], '-@', o[:threads]]
138
- cmd << '-h' if o[:header]
139
- IO.popen(cmd.shelljoin) do |fh|
140
- fh.each { |ln| parse_sam_line(ln, o[:identity], target, o[:header], ofh) }
141
- end
142
- else
143
- raise Enveomics::OptionError.new(
144
- "Unsupported mapping format: #{o[:m_format]}"
145
- )
146
- end
147
- ofh.close
148
-
@@ -1,10 +0,0 @@
1
-
2
- # Makefile for the Enve-omics collection
3
- # @update Oct 13 2013
4
- # @author Luis M. Rodriguez-R <lmrodriguez at gmail dot com>
5
-
6
- include ../globals.mk
7
-
8
- all:
9
- @echo NOTHING TO DO YET
10
-