miga-base 1.2.15.2 → 1.2.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli/action/download/gtdb.rb +4 -1
  3. data/lib/miga/cli/action/gtdb_get.rb +4 -0
  4. data/lib/miga/daemon.rb +4 -1
  5. data/lib/miga/lair.rb +6 -4
  6. data/lib/miga/remote_dataset/download.rb +3 -2
  7. data/lib/miga/remote_dataset.rb +25 -7
  8. data/lib/miga/taxonomy.rb +6 -0
  9. data/lib/miga/version.rb +2 -2
  10. metadata +6 -302
  11. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  12. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  13. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  14. data/utils/FastAAI/FastAAI +0 -3659
  15. data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
  16. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
  17. data/utils/FastAAI/README.md +0 -84
  18. data/utils/enveomics/Docs/recplot2.md +0 -244
  19. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  20. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  21. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  22. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  23. data/utils/enveomics/LICENSE.txt +0 -73
  24. data/utils/enveomics/Makefile +0 -52
  25. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  26. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
  27. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  28. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
  29. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
  30. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  31. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
  32. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  33. data/utils/enveomics/Manifest/Tasks/other.json +0 -906
  34. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  35. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
  36. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  37. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  38. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  39. data/utils/enveomics/Manifest/categories.json +0 -165
  40. data/utils/enveomics/Manifest/examples.json +0 -162
  41. data/utils/enveomics/Manifest/tasks.json +0 -4
  42. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  49. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  50. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  51. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  52. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  53. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  54. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  55. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  56. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  57. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  58. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  59. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  60. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  61. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  62. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  63. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  64. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  68. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  69. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  70. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  71. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  72. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  73. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  74. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  75. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  76. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  77. data/utils/enveomics/README.md +0 -42
  78. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  79. data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
  80. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  81. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  82. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  83. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  84. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  85. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  86. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  87. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  88. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  89. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  90. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  91. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  92. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  93. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  94. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  95. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  96. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  97. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  98. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  99. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  100. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  101. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
  102. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  103. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  104. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  105. data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
  106. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  107. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  108. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  109. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  110. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
  111. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  112. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  113. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  114. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  115. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  116. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  117. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  118. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  119. data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
  120. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  121. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  122. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  123. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  124. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  125. data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
  126. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  127. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  128. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  129. data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
  130. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  131. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  132. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
  133. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  134. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  135. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  136. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  137. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  138. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  139. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  140. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  141. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  142. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  143. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  144. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  145. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  146. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  147. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  148. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  149. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  150. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  151. data/utils/enveomics/Scripts/SRA.download.bash +0 -55
  152. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  153. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  154. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  155. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  156. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  157. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  158. data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
  159. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  160. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  161. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  162. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  163. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  164. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  165. data/utils/enveomics/Scripts/aai.rb +0 -421
  166. data/utils/enveomics/Scripts/ani.rb +0 -362
  167. data/utils/enveomics/Scripts/anir.rb +0 -137
  168. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  169. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  170. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  171. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  172. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  173. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  174. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
  175. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
  176. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  177. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
  178. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
  179. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  180. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
  181. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  182. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
  183. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  184. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  185. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
  186. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
  187. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
  188. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
  189. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  190. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  191. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  192. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  193. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  194. data/utils/enveomics/Scripts/ogs.rb +0 -104
  195. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  196. data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
  197. data/utils/enveomics/Scripts/rbm.rb +0 -108
  198. data/utils/enveomics/Scripts/sam.filter.rb +0 -148
  199. data/utils/enveomics/Tests/Makefile +0 -10
  200. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  201. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  202. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  203. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  204. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  205. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  206. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  207. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  208. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  209. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  210. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  211. data/utils/enveomics/Tests/alkB.nwk +0 -1
  212. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  213. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  214. data/utils/enveomics/Tests/hiv1.faa +0 -59
  215. data/utils/enveomics/Tests/hiv1.fna +0 -134
  216. data/utils/enveomics/Tests/hiv2.faa +0 -70
  217. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  218. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  219. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  220. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  221. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  222. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  223. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  224. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  225. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  226. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  227. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  228. data/utils/enveomics/build_enveomics_r.bash +0 -45
  229. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  230. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  231. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  232. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  233. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  234. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  235. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  236. data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
  237. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  238. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  239. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  240. data/utils/enveomics/enveomics.R/R/utils.R +0 -80
  241. data/utils/enveomics/enveomics.R/README.md +0 -81
  242. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  243. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  244. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
  245. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
  246. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
  247. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  248. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  249. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  250. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  251. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  252. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  253. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
  254. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
  255. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
  256. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  257. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  258. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
  259. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
  260. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
  261. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
  262. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
  263. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
  264. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
  265. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  266. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  267. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
  268. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  269. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  270. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  271. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  272. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
  273. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  274. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  275. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
  276. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  277. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  278. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  279. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  280. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
  281. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  282. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
  283. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
  284. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
  285. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  286. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  287. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
  288. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
  289. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
  290. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  291. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  292. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  293. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  294. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
  295. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
  296. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
  297. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
  298. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  299. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  300. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  301. data/utils/enveomics/globals.mk +0 -8
  302. data/utils/enveomics/manifest.json +0 -9
  303. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  304. data/utils/multitrim/README.md +0 -67
  305. data/utils/multitrim/multitrim.py +0 -1555
  306. data/utils/multitrim/multitrim.yml +0 -13
@@ -1,88 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author: Luis M. Rodriguez-R
5
- # @update: Mar-23-2015
6
- # @license: artistic license 2.0
7
- #
8
-
9
- $:.push File.expand_path(File.dirname(__FILE__) + '/lib')
10
- require 'enveomics_rb/og'
11
- require 'optparse'
12
-
13
- o = {:q=>FALSE, :f=>"(\\S+)\\.txt", :consolidate=>TRUE, :pre=>[]}
14
- ARGV << '-h' if ARGV.size==0
15
- OptionParser.new do |opts|
16
- opts.banner = "
17
- Annotates Orthology Groups (OGs) using one or more reference genomes.
18
-
19
- Usage: #{$0} [options]"
20
- opts.separator ""
21
- opts.separator "Mandatory"
22
- opts.on("-i", "--in FILE", "Input file containing the OGs (as generated by ogs.rb)."){ |v| o[:in]=v }
23
- opts.on("-o", "--out FILE", "Output file containing the annotated OGs."){ |v| o[:out]=v }
24
- opts.on("-a FILE1,FILE2,...", Array, "Input file(s) containing the annotations.One or more tab-delimited files",
25
- "with the gene names in the first column and the annotation in the second."){ |v| o[:annotations]=v }
26
- opts.separator ""
27
- opts.separator "Other Options"
28
- opts.on("-f","--format STRING", "Format of the filenames for the annotation files, using regex syntax.",
29
- "By default: '#{o[:f]}'."){ |v| o[:f]=v }
30
- opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
31
- opts.on("-h", "--help", "Display this screen.") do
32
- puts opts
33
- exit
34
- end
35
- opts.separator ""
36
- end.parse!
37
- abort "-i is mandatory" if o[:in].nil?
38
- abort "-o is mandatory" if o[:out].nil?
39
- abort "-a is mandatory" if o[:annotations].nil?
40
-
41
- ##### MAIN:
42
- begin
43
- # Read the pre-computed OGs
44
- collection = OGCollection.new
45
- $stderr.puts "Reading pre-computed OGs in '#{o[:in]}'." unless o[:q]
46
- f = File.open(o[:in], 'r')
47
- h = f.gets.chomp.split /\t/
48
- while ln = f.gets
49
- collection << OG.new(h, ln.chomp.split(/\t/))
50
- end
51
- f.close
52
- $stderr.puts " Loaded OGs: #{collection.ogs.length}." unless o[:q]
53
-
54
- # Read annotations
55
- o[:annotations].each do |annot|
56
- m = /#{o[:f]}/.match(annot)
57
- if m.nil? or m[1].nil?
58
- warn "Cannot parse filename: #{rbm_file} (doesn't match /#{o[:f]}/)."
59
- next
60
- end
61
- f = File.open(annot, 'r')
62
- no_og = 0
63
- collection.add_note_src m[1]+' annotation'
64
- while ln=f.gets
65
- r = ln.chomp.split /\t/
66
- g = Gene.new m[1], r[0]
67
- og = collection.get_og g
68
- if og.nil?
69
- no_og += 1
70
- else
71
- og.add_note g.id + ': ' + r[1], collection.note_srcs.length-1
72
- end
73
- end
74
- warn "Warning: Cannot find #{no_og} genes from #{m[1]} in OG collection." if no_og>0
75
- end
76
- # Save the output matrix
77
- $stderr.puts "Saving annotated OGs into '#{o[:out]}'." unless o[:q]
78
- f = File.open(o[:out], "w")
79
- f.puts collection.to_s
80
- f.close
81
- $stderr.puts "Done.\n" unless o[:q]
82
- rescue => err
83
- $stderr.puts "Exception: #{err}\n\n"
84
- err.backtrace.each { |l| $stderr.puts l + "\n" }
85
- err
86
- end
87
-
88
-
@@ -1,160 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author: Luis M. Rodriguez-R
5
- # @license: artistic-2.0
6
- #
7
-
8
- $:.push File.expand_path("../lib", __FILE__)
9
- require "optparse"
10
- require "json"
11
- require "tmpdir"
12
-
13
- o = {q:false, n:100, thr:2}
14
- ARGV << "-h" if ARGV.size==0
15
- OptionParser.new do |opts|
16
- opts.banner = "
17
- Subsamples the genomes in a set of Orthology Groups (OGs) and estimates the
18
- trend of core genome and pangenome sizes.
19
-
20
- Usage: #{$0} [options]"
21
- opts.separator ""
22
- opts.separator "Mandatory"
23
- opts.on("-o", "--ogs FILE",
24
- "Input file containing the precomputed OGs."){ |v| o[:ogs]=v }
25
- opts.separator ""
26
- opts.separator "Output Options"
27
- opts.on("-s", "--summary FILE",
28
- "Output file in tabular format with summary statistics."){ |v| o[:summ]=v }
29
- opts.on("-t", "--tab FILE","Output file in tabular format."){ |v| o[:tab]=v }
30
- opts.on("-j", "--json FILE", "Output file in JSON format."){ |v| o[:json]=v }
31
- opts.separator ""
32
- opts.separator "Other Options"
33
- opts.on("-n", "--replicates INT",
34
- "Number of replicates to estimate. By default: #{o[:n]}."
35
- ){ |v| o[:n]=v.to_i }
36
- opts.on("--threads INT",
37
- "Children threads to spawn. By default: #{o[:thr]}."){ |v| o[:thr]=v.to_i}
38
- opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
39
- opts.on("-h", "--help", "Display this screen.") do
40
- puts opts
41
- exit
42
- end
43
- opts.separator ""
44
- end.parse!
45
- abort "-o is mandatory" if o[:ogs].nil?
46
-
47
- ##### MAIN:
48
- begin
49
- # Read the pre-computed OGs
50
- $stderr.puts "Reading pre-computed OGs in '#{o[:ogs]}'." unless o[:q]
51
- bool_a = []
52
- genomes_n = nil
53
- File.open(o[:ogs], "r") do |f|
54
- h = f.gets.chomp.split "\t"
55
- genomes_n = h.size
56
- while ln = f.gets
57
- bool_a << ln.chomp.split("\t").map{ |g| g!="-" }
58
- end
59
- end
60
- $stderr.puts " Loaded OGs: #{bool_a.size}." unless o[:q]
61
- bool_a_b = bool_a.map{ |og| og.map{ |g| g ? "1" : "0" }.join("").to_i(2) }
62
-
63
- # Generate subsamples
64
- size = {core:[], pan:[]}
65
- Dir.mktmpdir do |dir|
66
- children = 0
67
- (0 .. o[:n]-1).each do |i|
68
- fork do
69
- # Generate trajectory
70
- genomes = (0 .. genomes_n-1).to_a.shuffle
71
- genomes_b = (2 ** genomes_n) - 1
72
- core = []
73
- pan = []
74
- while not genomes.empty?
75
- core.unshift 0
76
- pan.unshift 0
77
- bool_a_b.map! do |og|
78
- r_og = og & genomes_b
79
- if r_og>0
80
- core[0] += 1 if r_og==genomes_b
81
- pan[0] += 1
82
- og
83
- else
84
- nil
85
- end
86
- end
87
- bool_a_b.compact!
88
- genomes_b ^= 2 ** genomes.pop
89
- end
90
- abort "UNEXPECTED ERROR: Final genomes_b=#{genomes_b}." if genomes_b>0
91
- # Store trajectory
92
- File.open("#{dir}/#{i}", "w") do |tfh|
93
- tfh.puts JSON.generate({core:core, pan:pan})
94
- end
95
- end # fork
96
- children += 1
97
- if children >= o[:thr]
98
- Process.wait
99
- children -= 1
100
- end
101
- end
102
- Process.waitall
103
- # Recover trajectories
104
- (0 .. o[:n]-1).each do |i|
105
- s = JSON.parse(File.read("#{dir}/#{i}"), {:symbolize_names=>true})
106
- size[:core][i] = s[:core]
107
- size[:pan][i] = s[:pan]
108
- end
109
- end # Dir.mktmpdir
110
-
111
- # Show result
112
- $stderr.puts "Generating reports." unless o[:q]
113
-
114
- # Save results in JSON
115
- unless o[:json].nil?
116
- ofh = File.open(o[:json], "w")
117
- ofh.puts JSON.pretty_generate(size)
118
- ofh.close
119
- end
120
-
121
- # Save results in tab
122
- unless o[:tab].nil?
123
- ofh = File.open(o[:tab], "w")
124
- ofh.puts (%w{replicate metric}+(1 .. genomes_n).to_a).join("\t")
125
- (0 .. o[:n]-1).each do |i|
126
- ofh.puts ([i+1,"core"] + size[:core][i]).join("\t")
127
- ofh.puts ([i+1,"pan"] + size[:pan][i]).join("\t")
128
- end
129
- ofh.close
130
- end
131
-
132
- # Save summary results in tab
133
- unless o[:summ].nil?
134
- ofh = File.open(o[:summ], "w")
135
- ofh.puts %w{genomes core_avg core_sd core_q1 core_q2 core_q3
136
- pan_avg pan_sd pan_q1 pan_q2 pan_q3}.join("\t")
137
- (0 .. genomes_n-1).each do |i|
138
- res = [ i+1 ]
139
- [:core, :pan].each do |met|
140
- a = size[met].map{ |r| r[i] }.sort
141
- avg = a.inject(0,:+).to_f / a.size
142
- var = a.map{ |v| v**2 }.inject(0,:+).to_f/a.size - avg**2
143
- sd = Math.sqrt(var)
144
- q1 = a[ a.size*1/4 ]
145
- q2 = a[ a.size*2/4 ]
146
- q3 = a[ a.size*3/4 ]
147
- res += [avg,sd,q1,q2,q3]
148
- end
149
- ofh.puts res.join("\t")
150
- end
151
- ofh.close
152
- end
153
-
154
- $stderr.puts "Done.\n" unless o[:q]
155
- rescue => err
156
- $stderr.puts "Exception: #{err}\n\n"
157
- err.backtrace.each { |l| $stderr.puts l + "\n" }
158
- err
159
- end
160
-
@@ -1,125 +0,0 @@
1
- #!/usr/bin/env ruby
2
- #
3
- # @author Luis M. Rodriguez-R
4
- # @license artistic license 2.0
5
- #
6
-
7
- $:.push File.expand_path("lib", File.dirname(__FILE__))
8
- require "enveomics_rb/enveomics"
9
- require "enveomics_rb/og"
10
-
11
- o = {q:false, pergenome:false, prefix:false, first:false, rand:false,
12
- core:0.0, dups:0}
13
- OptionParser.new do |opts|
14
- opts.banner = "
15
- Extracts sequences of Orthology Groups (OGs) from genomes (proteomes).
16
-
17
- Usage: #{$0} [options]"
18
- opts.separator ""
19
- opts.separator "Mandatory"
20
- opts.on("-i", "--in FILE",
21
- "Input file containing the OGs (as generated by ogs.rb)."){ |v| o[:in]=v }
22
- opts.on("-o", "--out FILE",
23
- "Output directory where to place extracted sequences."){ |v| o[:out]=v }
24
- opts.on("-s", "--seqs STRING",
25
- "Path to the proteomes in FastA format, using '%s' to denote the genome.",
26
- "For example: /path/to/seqs/%s.faa."){ |v| o[:seqs]=v }
27
- opts.separator ""
28
- opts.separator "Other Options"
29
- opts.on("-c", "--core FLOAT",
30
- "Use only OGs present in at least this fraction of the genomes.",
31
- "To use only the strict core genome*, use -c 1."){ |v| o[:core]=v.to_f }
32
- opts.on("-d", "--duplicates INT",
33
- "Use only OGs with less than this number of in-paralogs in a genome.",
34
- "To use only genes without in-paralogs*, use -d 1."
35
- ){ |v| o[:dups]=v.to_i }
36
- opts.on("-g", "--per-genome",
37
- "If set, the output is generated per genome.",
38
- "By default, the output is per OG."){ |v| o[:pergenome]=v }
39
- opts.on("-p", "--prefix",
40
- "If set, each sequence is prefixed with the genome name",
41
- "(or OG number, if --per-genome) and a dash."){ |v| o[:prefix]=v }
42
- opts.on("-r", "--rand",
43
- "Get only one gene per genome per OG (random) regardless of in-paralogs.",
44
- "By default all genes are extracted."){ |v| o[:rand]=v }
45
- opts.on("-f", "--first",
46
- "Get only one gene per genome per OG (first) regardless of in-paralogs.",
47
- "By default all genes are extracted. Takes precendece over --rand."
48
- ){ |v| o[:first]=v }
49
- opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
50
- opts.on("-h", "--help", "Display this screen.") do
51
- puts opts
52
- exit
53
- end
54
- opts.separator ""
55
- opts.separator " * To use only the unus genome (OGs with exactly one " +
56
- "gene per genome), use: -c 1 -d 1."
57
- opts.separator ""
58
- end.parse!
59
- abort "-i is mandatory" if o[:in].nil?
60
- abort "-o is mandatory" if o[:out].nil?
61
- abort "-s is mandatory" if o[:seqs].nil?
62
-
63
- ##### MAIN:
64
- begin
65
- # Read the pre-computed OGs
66
- collection = OGCollection.new
67
- $stderr.puts "Reading pre-computed OGs in '#{o[:in]}'." unless o[:q]
68
- f = File.open(o[:in], "r")
69
- h = f.gets.chomp.split /\t/
70
- while ln = f.gets
71
- collection << OG.new(h, ln.chomp.split(/\t/))
72
- end
73
- f.close
74
- $stderr.puts " Loaded OGs: #{collection.ogs.size}." unless o[:q]
75
- $stderr.puts " Reported Genomes: #{Gene.genomes.size}." unless o[:q]
76
-
77
- # Filter core/in-paralog genes
78
- collection.filter_core! o[:core] unless o[:core]==0.0
79
- collection.remove_inparalogs! o[:dups] unless o[:dups]==0
80
- $stderr.puts " Filtered OGs: #{collection.ogs.size}." unless
81
- o[:q] or o[:core]==0.0
82
-
83
- # Open outputs
84
- $stderr.puts "Initializing output files." unless o[:q]
85
- Dir.mkdir(o[:out]) unless Dir.exist? o[:out]
86
- ofhs = o[:pergenome] ?
87
- Gene.genomes.map{|g| File.open("#{o[:out]}/#{g}.fa", "w")} :
88
- ( (1 .. collection.ogs.size).map do |og|
89
- File.open("#{o[:out]}/OG#{og}.fa", "w")
90
- end )
91
- $stderr.puts " Created files: #{ofhs.size}." unless o[:q]
92
-
93
- # Read genomes
94
- $stderr.puts "Filtering genes." unless o[:q]
95
- genome_i = -1
96
- Gene.genomes.each do |genome|
97
- genome_i = Gene.genomes.index(genome)
98
- $stderr.print " Genome #{genome_i+1}. \r" unless o[:q]
99
- genes = ( collection.get_genome_genes(genome).map do |og|
100
- o[:first] ? [og.first] : (o[:rand] ? [og.sample] : og)
101
- end )
102
- hand = nil
103
- File.open(sprintf(o[:seqs], genome), "r").each do |ln|
104
- if ln =~ /^>(\S+)/
105
- og = genes.index{|g| g.include? $1}
106
- hand = og.nil? ? nil : ( o[:pergenome] ? genome_i : og )
107
- ln.sub!(/^>/, ">#{o[:pergenome] ? "OG#{og}" : genome}-") if
108
- o[:prefix] and not hand.nil?
109
- end
110
- ofhs[hand].puts(ln) unless hand.nil?
111
- end
112
- end
113
- $stderr.puts " #{genome_i+1} genomes processed." unless o[:q]
114
-
115
- # Close outputs
116
- $stderr.puts "Closing output files." unless o[:q]
117
- ofhs.each{|h| h.close}
118
- $stderr.puts "Done.\n" unless o[:q]
119
- rescue => err
120
- $stderr.puts "Exception: #{err}\n\n"
121
- err.backtrace.each { |l| $stderr.puts l + "\n" }
122
- err
123
- end
124
-
125
-
@@ -1,186 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author: Luis M. Rodriguez-R
5
- # @update: Sep-11-2015
6
- # @license: artistic license 2.0
7
- #
8
-
9
- $:.push File.expand_path(File.dirname(__FILE__) + "/lib")
10
- require 'enveomics_rb/og'
11
- require 'optparse'
12
- require 'tmpdir'
13
-
14
- o = {q:false, f:"(\\S+)-(\\S+)\\.rbm", mcl:"", inflation:1.5, blind:false,
15
- evalue:false, thr:2, identity:false, bestmatch:false}
16
- ARGV << "-h" if ARGV.size==0
17
- OptionParser.new do |opts|
18
- opts.banner = "
19
- Identifies Orthology Groups (OGs) in Reciprocal Best Matches (RBM)
20
- between all pairs in a collection of genomes, using the Markov Cluster
21
- Algorithm.
22
-
23
- Requires MCL (see http://www.micans.org/mcl).
24
-
25
- Usage: #{$0} [options]"
26
- opts.separator ""
27
- opts.separator "Mandatory"
28
- opts.on("-o", "--out FILE",
29
- "Output file containing the detected OGs."){ |v| o[:out]=v }
30
- opts.on("-d", "--dir DIR",
31
- "Directory containing the RBM files.",
32
- "Becomes optional iff --abc is set to a non-empty file."){ |v| o[:dir]=v }
33
- opts.separator ""
34
- opts.separator "Other Options"
35
- opts.on("-f", "--format STRING",
36
- "Format of the filenames for the RBM files (within -d), using regex " +
37
- "syntax.", "By default: '#{o[:f]}'."){ |v| o[:f]=v }
38
- opts.on("-I", "--inflation FLOAT",
39
- "Inflation parameter for MCL clustering. By default: #{o[:inflation]}."
40
- ){ |v| o[:inflation]=v.to_f }
41
- opts.on("-b", "--blind",
42
- "If set, computes clusters without taking bitscore into account."
43
- ){ |v| o[:blind]=v }
44
- opts.on("-e", "--evalue",
45
- "If set, uses the e-value to weight edges, instead of the default " +
46
- "Bit-Score."){ |v| o[:evalue]=v }
47
- opts.on("-i", "--identity",
48
- "If set, uses the identity to weight edges, instead of the default " +
49
- "Bit-Score."){ |v| o[:identity]=v }
50
- opts.on("-B", "--best-match",
51
- "If set, it assumes best-matches instead reciprocal best matches."
52
- ){ |v| o[:bestmatch]=v }
53
- opts.on("-m", "--mcl-bin DIR",
54
- "Path to the directory containing the mcl binaries.",
55
- "By default, assumed to be in the PATH."){ |v| o[:mcl]=v+"/" }
56
- opts.on("--abc FILE",
57
- "Use this abc file instead of a temporal file."){ |v| o[:abc] = v }
58
- opts.on("-t", "--threads INT",
59
- "Number of threads to use. By default: #{o[:thr]}."){ |v| o[:thr]=v.to_i }
60
- opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
61
- opts.on("-h", "--help", "Display this screen.") do
62
- puts opts
63
- exit
64
- end
65
- opts.separator ""
66
- end.parse!
67
- abort "-o is mandatory" if o[:out].nil?
68
- o[:evalue] = false if o[:identity]
69
- o[:evalue] = false if o[:blind]
70
- o[:identity] = false if o[:blind]
71
-
72
- ##### MAIN:
73
- begin
74
- Dir.mktmpdir do |dir|
75
- o[:abc] = "#{dir}/rbms.abc" if o[:abc].nil?
76
- abort "-d must exist and be a directory" unless
77
- File.size?(o[:abc]) or
78
- (!o[:dir].nil? and File.exists?(o[:dir]) and File.directory?(o[:dir]))
79
- # Traverse the whole directory
80
- if File.size? o[:abc]
81
- $stderr.puts "Reusing existing abc file '#{o[:abc]}'." unless o[:q]
82
- else
83
- file_i = 0
84
- ln_i = 0
85
- $stderr.puts "Reading RBM files within '#{o[:dir]}'." unless o[:q]
86
- abc = File.open(o[:abc] + ".tmp", "w")
87
- Dir.entries(o[:dir]).each do |rbm_file|
88
- next unless File.file?(o[:dir]+"/"+rbm_file)
89
- # Parse the filename to identify the genomes
90
- m = /#{o[:f]}/.match(rbm_file)
91
- if m.nil? or m[2].nil?
92
- warn "Ignoring #{rbm_file}: doesn't match /#{o[:f]}/."
93
- next
94
- end
95
- file_i += 1
96
- # Read the RBMs list
97
- f = File.open(o[:dir]+"/"+rbm_file, "r")
98
- while ln = f.gets
99
- # Add the RBM to the abc file
100
- row = ln.split(/\t/)
101
- abc.puts [m[1]+">"+row[0], m[2]+">"+row[1],
102
- (o[:blind] ? "1" :
103
- (o[:evalue] ? row[10] :
104
- (o[:identity] ? row[2] : row[11])))].join("\t")
105
- ln_i += 1
106
- end
107
- f.close
108
- $stderr.print " Scanned files: #{file_i}. " +
109
- "Found RBMs: #{ln_i}. \r" unless o[:q]
110
- end
111
- abc.close
112
- File.rename(o[:abc] + ".tmp", o[:abc])
113
- $stderr.print "\n" unless o[:q]
114
- end # if File.size? o[:abc] ... else
115
-
116
- # Build .mci file (mcxload) and compute .mccl file (mcl)
117
- $stderr.puts "Markov-Clustering" unless o[:q]
118
- `'#{o[:mcl]}mcxload' #{"--stream-mirror" unless o[:bestmatch]} \
119
- -abc '#{o[:abc]}' -o '#{dir}/rbms.mci' --write-binary \
120
- -write-tab '#{dir}/genes.tab' #{"--stream-neg-log10" if o[:evalue]} \
121
- &>/dev/null`
122
- `'#{o[:mcl]}mcl' '#{dir}/rbms.mci' -V all -I #{o[:inflation].to_s} \
123
- -o '#{dir}/ogs.mcl' -te #{o[:thr].to_s}`
124
-
125
- # Load .tab as Gene objects
126
- $stderr.puts "Loading gene table from '#{dir}/genes.tab'." unless o[:q]
127
- genes = []
128
- tab = File.open("#{dir}/genes.tab", "r")
129
- while ln = tab.gets
130
- ln.chomp!
131
- r = ln.split /\t|>/
132
- genes[ r[0].to_i ] = Gene.new(r[1], r[2])
133
- end
134
- tab.close
135
- $stderr.puts " Got " + genes.size.to_s + " genes in " +
136
- Gene.genomes.size.to_s + " genomes." unless o[:q]
137
-
138
- # Load .mcl file as OGCollection
139
- $stderr.puts "Loading clusters from '#{dir}/ogs.mcl'." unless o[:q]
140
- collection = OGCollection.new
141
- mcl = File.open("#{dir}/ogs.mcl", "r")
142
- in_matrix = false
143
- my_genes = nil
144
- while ln = mcl.gets
145
- ln.chomp!
146
- if ln =~ /^\(mclmatrix$/
147
- in_matrix = true
148
- next
149
- end
150
- next if ln =~ /^begin$/
151
- if in_matrix
152
- break if ln =~ /^\)$/
153
- if ln =~ /^\d+\s+/
154
- ln.sub!(/^\d+\s+/, "")
155
- my_genes = []
156
- end
157
- ln.sub!(/^\s+/, "")
158
- raise "Incomplete mcl matrix, offending line: #{$.}: #{ln}" if
159
- my_genes.nil?
160
- my_genes += ln.split(/\s/)
161
- if my_genes.last == "$"
162
- my_genes.pop
163
- og = OG.new
164
- my_genes.each{|i| og << genes[ i.to_i ]}
165
- collection << og
166
- my_genes = nil
167
- end
168
- end
169
- end
170
- mcl.close
171
- $stderr.puts " Got #{collection.ogs.size} clusters." unless o[:q]
172
-
173
- # Save the output matrix
174
- $stderr.puts "Saving matrix into '#{o[:out]}'." unless o[:q]
175
- f = File.open(o[:out], "w")
176
- f.puts collection.to_s
177
- f.close
178
- $stderr.puts "Done.\n" unless o[:q]
179
- end
180
- rescue => err
181
- $stderr.puts "Exception: #{err}\n\n"
182
- err.backtrace.each { |l| $stderr.puts l + "\n" }
183
- err
184
- end
185
-
186
-
@@ -1,104 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author: Luis M. Rodriguez-R
5
- # @update: Apr-29-2015
6
- # @license: artistic license 2.0
7
- #
8
-
9
- $:.push File.expand_path(File.dirname(__FILE__) + '/lib')
10
- require 'enveomics_rb/og'
11
- require 'optparse'
12
-
13
- o = {:q=>FALSE, :f=>"(\\S+)-(\\S+)\\.rbm", :consolidate=>TRUE, :pre=>[]}
14
- ARGV << '-h' if ARGV.size==0
15
- OptionParser.new do |opts|
16
- opts.banner = "
17
- ***IMPORTANT NOTE***
18
- This script suffers from chaining effect and is very sensitive to spurious connections,
19
- because it applies a greedy clustering algorithm. For most practical purposes, the use
20
- of this script is discouraged and `ogs.mcl.rb` should be preferred. [ Apr-29-2015 ]
21
-
22
- Identifies Orthology Groups (OGs) in Reciprocal Best Matches (RBM)
23
- between all pairs in a collection of genomes.
24
-
25
- Usage: #{$0} [options]"
26
- opts.separator ""
27
- opts.separator "Mandatory"
28
- opts.on("-o", "--out FILE", "Output file containing the detected OGs."){ |v| o[:out]=v }
29
- opts.separator ""
30
- opts.separator "Other Options"
31
- opts.on("-d", "--dir DIR", "Directory containing the RBM files."){ |v| o[:dir]=v }
32
- opts.on("-p", "--pre-ogs FILE1,FILE2,...", Array, "Pre-computed OGs file(s), separated by commas."){ |v| o[:pre]=v }
33
- opts.on("-n", "--unchecked", "Do not check internal redundancy in OGs."){ o[:consolidate]=FALSE }
34
- opts.on("-f","--format STRING", "Format of the filenames for the RBM files (within -d), using regex syntax. By default: '#{o[:f]}'."){ |v| o[:f]=v }
35
- opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
36
- opts.on("-h", "--help", "Display this screen.") do
37
- puts opts
38
- exit
39
- end
40
- opts.separator ""
41
- end.parse!
42
- abort "-o is mandatory" if o[:out].nil?
43
-
44
- ##### MAIN:
45
- begin
46
- # Initialize the collection of OGs.
47
- collection = OGCollection.new
48
- # Read the pre-computed OGs (if -p is passed).
49
- o[:pre].each do |pre|
50
- $stderr.puts "Reading pre-computed OGs in '#{pre}'." unless o[:q]
51
- f = File.open(pre, 'r')
52
- h = f.gets.chomp.split /\t/
53
- while ln = f.gets
54
- collection << OG.new(h, ln.chomp.split(/\t/))
55
- end
56
- f.close
57
- $stderr.puts " Loaded OGs: #{collection.ogs.length}." unless o[:q]
58
- end
59
- # Read the RBM files in the directory (if -d is passed).
60
- unless o[:dir].nil?
61
- abort "-d must exist and be a directory" unless File.exists?(o[:dir]) and File.directory?(o[:dir])
62
- # Traverse the whole directory.
63
- file_i = 0
64
- $stderr.puts "Reading RBM files within '#{o[:dir]}'." unless o[:q]
65
- Dir.entries(o[:dir]).each do |rbm_file|
66
- next unless File.file?(o[:dir]+"/"+rbm_file)
67
- # Parse the filename to identify the genomes.
68
- m = /#{o[:f]}/.match(rbm_file)
69
- if m.nil? or m[2].nil?
70
- warn "Cannot parse filename: #{rbm_file} (doesn't match /#{o[:f]}/)."
71
- next
72
- end
73
- file_i += 1
74
- # Read the RBMs list
75
- f = File.open(o[:dir]+"/"+rbm_file, "r")
76
- while ln = f.gets
77
- # Add the RBM to the collection of OGs. Only the first two columns are used.
78
- row = ln.split(/\t/)
79
- collection.add_rbm( Gene.new(m[1],row[0]), Gene.new(m[2],row[1]) )
80
- end
81
- f.close
82
- $stderr.print " Scanned files: #{file_i}. Found OGs: #{collection.ogs.length}. \r" unless o[:q]
83
- end
84
- $stderr.print "\n" unless o[:q]
85
- end
86
- # Evaluate internal consistency merging linked OGs (unless -n is passed).
87
- if o[:consolidate]
88
- $stderr.puts "Evaluating internal consistency." unless o[:q]
89
- collection.consolidate!
90
- $stderr.puts " Final OGs: #{collection.ogs.length}." unless o[:q]
91
- end
92
- # Save the output matrix
93
- $stderr.puts "Saving matrix into '#{o[:out]}'." unless o[:q]
94
- f = File.open(o[:out], "w")
95
- f.puts collection.to_s
96
- f.close
97
- $stderr.puts "Done.\n" unless o[:q]
98
- rescue => err
99
- $stderr.puts "Exception: #{err}\n\n"
100
- err.backtrace.each { |l| $stderr.puts l + "\n" }
101
- err
102
- end
103
-
104
-