miga-base 1.2.15.2 → 1.2.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli/action/download/gtdb.rb +4 -1
  3. data/lib/miga/cli/action/gtdb_get.rb +4 -0
  4. data/lib/miga/daemon.rb +4 -1
  5. data/lib/miga/lair.rb +6 -4
  6. data/lib/miga/remote_dataset/download.rb +3 -2
  7. data/lib/miga/remote_dataset.rb +25 -7
  8. data/lib/miga/taxonomy.rb +6 -0
  9. data/lib/miga/version.rb +2 -2
  10. metadata +6 -302
  11. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  12. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  13. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  14. data/utils/FastAAI/FastAAI +0 -3659
  15. data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
  16. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
  17. data/utils/FastAAI/README.md +0 -84
  18. data/utils/enveomics/Docs/recplot2.md +0 -244
  19. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  20. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  21. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  22. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  23. data/utils/enveomics/LICENSE.txt +0 -73
  24. data/utils/enveomics/Makefile +0 -52
  25. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  26. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
  27. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  28. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
  29. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
  30. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  31. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
  32. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  33. data/utils/enveomics/Manifest/Tasks/other.json +0 -906
  34. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  35. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
  36. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  37. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  38. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  39. data/utils/enveomics/Manifest/categories.json +0 -165
  40. data/utils/enveomics/Manifest/examples.json +0 -162
  41. data/utils/enveomics/Manifest/tasks.json +0 -4
  42. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  49. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  50. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  51. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  52. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  53. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  54. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  55. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  56. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  57. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  58. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  59. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  60. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  61. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  62. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  63. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  64. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  68. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  69. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  70. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  71. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  72. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  73. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  74. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  75. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  76. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  77. data/utils/enveomics/README.md +0 -42
  78. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  79. data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
  80. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  81. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  82. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  83. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  84. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  85. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  86. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  87. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  88. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  89. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  90. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  91. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  92. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  93. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  94. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  95. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  96. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  97. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  98. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  99. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  100. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  101. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
  102. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  103. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  104. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  105. data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
  106. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  107. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  108. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  109. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  110. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
  111. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  112. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  113. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  114. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  115. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  116. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  117. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  118. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  119. data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
  120. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  121. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  122. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  123. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  124. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  125. data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
  126. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  127. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  128. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  129. data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
  130. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  131. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  132. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
  133. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  134. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  135. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  136. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  137. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  138. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  139. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  140. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  141. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  142. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  143. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  144. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  145. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  146. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  147. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  148. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  149. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  150. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  151. data/utils/enveomics/Scripts/SRA.download.bash +0 -55
  152. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  153. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  154. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  155. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  156. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  157. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  158. data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
  159. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  160. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  161. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  162. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  163. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  164. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  165. data/utils/enveomics/Scripts/aai.rb +0 -421
  166. data/utils/enveomics/Scripts/ani.rb +0 -362
  167. data/utils/enveomics/Scripts/anir.rb +0 -137
  168. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  169. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  170. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  171. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  172. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  173. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  174. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
  175. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
  176. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  177. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
  178. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
  179. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  180. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
  181. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  182. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
  183. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  184. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  185. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
  186. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
  187. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
  188. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
  189. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  190. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  191. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  192. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  193. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  194. data/utils/enveomics/Scripts/ogs.rb +0 -104
  195. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  196. data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
  197. data/utils/enveomics/Scripts/rbm.rb +0 -108
  198. data/utils/enveomics/Scripts/sam.filter.rb +0 -148
  199. data/utils/enveomics/Tests/Makefile +0 -10
  200. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  201. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  202. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  203. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  204. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  205. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  206. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  207. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  208. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  209. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  210. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  211. data/utils/enveomics/Tests/alkB.nwk +0 -1
  212. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  213. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  214. data/utils/enveomics/Tests/hiv1.faa +0 -59
  215. data/utils/enveomics/Tests/hiv1.fna +0 -134
  216. data/utils/enveomics/Tests/hiv2.faa +0 -70
  217. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  218. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  219. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  220. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  221. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  222. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  223. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  224. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  225. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  226. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  227. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  228. data/utils/enveomics/build_enveomics_r.bash +0 -45
  229. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  230. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  231. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  232. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  233. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  234. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  235. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  236. data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
  237. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  238. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  239. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  240. data/utils/enveomics/enveomics.R/R/utils.R +0 -80
  241. data/utils/enveomics/enveomics.R/README.md +0 -81
  242. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  243. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  244. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
  245. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
  246. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
  247. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  248. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  249. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  250. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  251. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  252. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  253. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
  254. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
  255. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
  256. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  257. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  258. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
  259. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
  260. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
  261. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
  262. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
  263. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
  264. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
  265. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  266. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  267. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
  268. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  269. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  270. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  271. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  272. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
  273. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  274. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  275. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
  276. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  277. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  278. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  279. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  280. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
  281. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  282. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
  283. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
  284. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
  285. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  286. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  287. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
  288. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
  289. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
  290. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  291. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  292. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  293. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  294. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
  295. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
  296. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
  297. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
  298. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  299. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  300. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  301. data/utils/enveomics/globals.mk +0 -8
  302. data/utils/enveomics/manifest.json +0 -9
  303. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  304. data/utils/multitrim/README.md +0 -67
  305. data/utils/multitrim/multitrim.py +0 -1555
  306. data/utils/multitrim/multitrim.yml +0 -13
@@ -1,362 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @author Luis M. Rodriguez-R
4
- # @license Artistic-2.0
5
-
6
- require "optparse"
7
- require "tmpdir"
8
- has_rest_client = true
9
- has_sqlite3 = true
10
- begin
11
- require "rubygems"
12
- require "restclient"
13
- rescue LoadError
14
- has_rest_client = false
15
- end
16
- begin
17
- require "sqlite3"
18
- rescue LoadError
19
- has_sqlite3 = false
20
- end
21
-
22
- o = {win:1000, step:200, id:70, len:700, correct:true, hits:50, q:false, bin:"",
23
- program:"blast+", thr:1, dec:2, auto:false, lookupfirst:false,
24
- dbregions:true, dbrbm: true, min_actg:0.95}
25
- ARGV << "-h" if ARGV.size==0
26
- OptionParser.new do |opts|
27
- opts.banner = "
28
- Calculates the Average Nucleotide Identity between two genomes.
29
-
30
- Usage: #{$0} [options]"
31
- opts.separator ""
32
- opts.separator "Mandatory"
33
- opts.on("-1", "--seq1 FILE",
34
- "Path to the FastA file containing the genome 1."){ |v| o[:seq1] = v }
35
- opts.on("-2", "--seq2 FILE",
36
- "Path to the FastA file containing the genome 2."){ |v| o[:seq2] = v }
37
- if has_rest_client
38
- opts.separator " Alternatively, you can supply a NCBI-acc with the " +
39
- "format ncbi:CP014272 instead of files."
40
- else
41
- opts.separator " Install rest-client to enable NCBI-acc support."
42
- end
43
- opts.separator ""
44
- opts.separator "Search Options"
45
- opts.on("-w", "--win INT",
46
- "Window size in the ANI calculation (in bp). By default: " +
47
- "#{o[:win].to_s}."){ |v| o[:win] = v.to_i }
48
- opts.on("-s", "--step INT",
49
- "Step size in the ANI calculation (in bp). By default: " +
50
- "#{o[:step].to_s}."){ |v| o[:step] = v.to_i }
51
- opts.on("-l", "--len INT",
52
- "Minimum alignment length (in bp). By default: #{o[:len]}."
53
- ){ |v| o[:len] = v.to_i }
54
- opts.on("-i", "--id NUM",
55
- "Minimum alignment identity (in %). By default: #{o[:id]}."
56
- ){ |v| o[:id] = v.to_f }
57
- opts.on("-n", "--hits INT",
58
- "Minimum number of hits. By default: #{o[:hits]}."
59
- ){ |v| o[:hits] = v.to_i }
60
- opts.on("-N", "--nocorrection",
61
- "Report values without post-hoc correction."){ |v| o[:correct] = false }
62
- opts.on("--min-actg FLOAT",
63
- "Minimum fraction of ACTGN in the sequences before assuming proteins.",
64
- "By default: #{o[:min_actg]}."
65
- ){ |v| o[:min_actg] = v.to_f }
66
- opts.separator ""
67
- opts.separator "Software Options"
68
- opts.on("-b", "--bin DIR",
69
- "Path to the directory containing the binaries of the search program."
70
- ){ |v| o[:bin] = v }
71
- opts.on("-p", "--program STR",
72
- "Search program to be used. One of: blast+ (default), blast, blat."
73
- ){ |v| o[:program] = v }
74
- opts.on("-t", "--threads INT",
75
- "Number of parallel threads to be used. By default: #{o[:thr]}."
76
- ){ |v| o[:thr] = v.to_i }
77
- opts.separator ""
78
- opts.separator "SQLite3 Options"
79
- opts.on("-S", "--sqlite3 FILE",
80
- "Path to the SQLite3 database to create (or update) with the results."
81
- ){ |v| o[:sqlite3] = v }
82
- opts.separator " Install sqlite3 gem to enable database support." unless
83
- has_sqlite3
84
- opts.on("--name1 STR",
85
- "Name of --seq1 to use in --sqlite3. By default determined by filename."
86
- ){ |v| o[:seq1name] = v }
87
- opts.on("--name2 STR",
88
- "Name of --seq2 to use in --sqlite3. By default determined by filename."
89
- ){ |v| o[:seq2name] = v }
90
- opts.on("--[no-]save-regions",
91
- "Save (or don't save) the fragments in the --sqlite3 database.",
92
- "By default: #{o[:dbregions]}."){ |v| o[:dbregions] = !!v }
93
- opts.on("--[no-]save-rbm",
94
- "Save (or don't save) the reciprocal best matches in the --sqlite3 db.",
95
- "By default: #{o[:dbrbm]}."){ |v| o[:dbrbm] = !!v }
96
- opts.on("--lookup-first",
97
- "Indicates if the ANI should be looked up first in the database.",
98
- "Requires --sqlite3, --auto, --name1, and --name2.",
99
- "Incompatible with --res, --tab, and --out."){ |v| o[:lookupfirst] = v }
100
- opts.separator ""
101
- opts.separator "Other Output Options"
102
- opts.on("-d", "--dec INT",
103
- "Decimal positions to report. By default: #{o[:dec]}"
104
- ){ |v| o[:dec] = v.to_i }
105
- opts.on("-o", "--out FILE",
106
- "Saves a file describing the alignments used for two-way ANI."
107
- ){ |v| o[:out] = v }
108
- opts.on("-r", "--res FILE",
109
- "Saves a file with the final results."){ |v| o[:res] = v }
110
- opts.on("-T", "--tab FILE",
111
- "Saves a file with the final two-way results in a tab-delimited form.",
112
- "The columns are (in that order):",
113
- "ANI, standard deviation, fragments used, fragments in the smallest genome."
114
- ){ |v| o[:tab]=v }
115
- opts.on("-a", "--auto",
116
- "ONLY outputs the ANI value in STDOUT (or nothing, if calculation fails)."
117
- ){ o[:auto] = true }
118
- opts.on("-q", "--quiet", "Run quietly (no STDERR output)"){ o[:q] = true }
119
- opts.on("-h", "--help", "Display this screen") do
120
- puts opts
121
- exit
122
- end
123
- opts.separator ""
124
- end.parse!
125
- abort "-1 is mandatory" if o[:seq1].nil?
126
- abort "-2 is mandatory" if o[:seq2].nil?
127
- abort "SQLite3 requested (-S) but sqlite3 not supported. First install gem " +
128
- "sqlite3." unless o[:sqlite3].nil? or has_sqlite3
129
- abort "Step size must be smaller than window size." if o[:step] > o[:win]
130
- o[:bin] = o[:bin]+"/" if o[:bin].size > 0
131
- if o[:lookupfirst]
132
- abort "--lookup-first needs --sqlite3" if o[:sqlite3].nil?
133
- abort "--lookup-first requires --auto" unless o[:auto]
134
- abort "--lookup-first requires --name1" if o[:seq1name].nil?
135
- abort "--lookup-first requires --name2" if o[:seq2name].nil?
136
- abort "--lookup-first conflicts with --res" unless o[:res].nil?
137
- abort "--lookup-first conflicts with --tab" unless o[:tab].nil?
138
- abort "--lookup-first conflicts with --out" unless o[:out].nil?
139
- end
140
-
141
- # Create SQLite3 file
142
- unless o[:sqlite3].nil?
143
- $stderr.puts "Accessing SQLite3 file: #{o[:sqlite3]}." unless o[:q]
144
- sqlite_db = SQLite3::Database.new o[:sqlite3]
145
- sqlite_db.execute "create table if not exists regions( " +
146
- "seq varchar(256), id int, source varchar(256), `start` int," +
147
- " `end` int )"
148
- sqlite_db.execute "create table if not exists rbm( seq1 varchar(256), " +
149
- "seq2 varchar(256), id1 int, id2 int, id float, evalue float, " +
150
- "bitscore float )"
151
- sqlite_db.execute "create table if not exists ani( seq1 varchar(256), " +
152
- "seq2 varchar(256), ani float, sd float, n int, omega int )"
153
- end
154
-
155
- # Look-up first
156
- if o[:lookupfirst]
157
- val = sqlite_db.execute "select ani from ani where seq1=? and seq2=?",
158
- [o[:seq1name], o[:seq2name]]
159
- val = sqlite_db.execute "select ani from ani where seq1=? and seq2=?",
160
- [o[:seq2name], o[:seq1name]] if val.empty?
161
- unless val.empty?
162
- puts val.first.first
163
- exit
164
- end
165
- end
166
-
167
- Dir.mktmpdir do |dir|
168
- $stderr.puts "Temporal directory: #{dir}." unless o[:q]
169
-
170
- # Create databases.
171
- $stderr.puts "Creating databases." unless o[:q]
172
- minfrg = nil
173
- seq_names = []
174
- seq_len = {}
175
- actg_cnt = {}
176
- [:seq1, :seq2].each do |seq|
177
- abort "GIs are no longer supported by NCBI. Please use NCBI-acc instead" if
178
- /^gi:/.match(o[seq])
179
- acc = /^ncbi:(\S+)/.match(o[seq])
180
- if not acc.nil?
181
- abort "NCBI-acc requested but rest-client not supported. First " +
182
- "install gem rest-client." unless has_rest_client
183
- response = RestClient.get(
184
- "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi",
185
- {params:{db:"nuccore",rettype:"fasta",id:acc[1],idtype:"acc"}})
186
- abort "Unable to reach NCBI EUtils, error code " +
187
- response.code.to_s + "." unless response.code == 200
188
- o[seq] = "#{dir}/ncbi-#{seq.to_s}.fa"
189
- fo = File.open(o[seq], "w")
190
- fo.puts response.to_str
191
- fo.close
192
- seq_names << ( o[ "#{seq}name".to_sym ].nil? ?
193
- "ncbi:#{acc[1]}" : o[ "#{seq}name".to_sym ] )
194
- else
195
- seq_names << ( o[ "#{seq}name".to_sym ].nil? ?
196
- File.basename(o[seq], ".*") : o[ "#{seq}name".to_sym ] )
197
- end
198
- $stderr.puts " Reading FastA file: #{o[seq]}" unless o[:q]
199
- sqlite_db.execute("delete from regions where seq=?",
200
- [seq_names.last]) unless o[:sqlite3].nil?
201
- buffer = ""
202
- frgs = 0
203
- seq_len[seq] = 0
204
- actg_cnt[seq] = 0
205
- seqs = 0
206
- disc = 0
207
- seqn = ""
208
- from = 1
209
- fi = File.open(o[seq], "r")
210
- fo = File.open("#{dir}/#{seq.to_s}.fa", "w")
211
- fi.each_line do |ln|
212
- if ln =~ /^>(\S+)/
213
- seqs += 1
214
- disc += buffer.size
215
- buffer = ""
216
- seqn = $1
217
- from = 1
218
- else
219
- ln.gsub!(/[^A-Za-z]/, '')
220
- seq_len[seq] += ln.length
221
- actg_cnt[seq] += ln.gsub(/[^ACTGNactgn]/,"").length
222
- buffer = buffer + ln
223
- while buffer.size > o[:win]
224
- seq_i = buffer[0, o[:win]]
225
- if seq_i =~ /^N+$/
226
- disc += seq_i.size
227
- else
228
- frgs += 1
229
- fo.puts ">#{frgs}"
230
- fo.puts seq_i
231
- sqlite_db.execute("insert into regions values(?,?,?,?,?)",
232
- [seq_names.last, frgs, seqn, from, from+o[:win]]) if
233
- not o[:sqlite3].nil? and o[:dbregions]
234
- end
235
- buffer = buffer[o[:step] .. -1]
236
- from += o[:win]
237
- end
238
- end
239
- end
240
- fi.close
241
- fo.close
242
- actg_frx = actg_cnt[seq].to_f/seq_len[seq].to_f
243
- abort "Input sequences appear to be proteins " +
244
- "(ACTGN fraction: %.2f%%)." % (actg_frx*100) if actg_frx < o[:min_actg]
245
- $stderr.puts " Created #{frgs} fragments from #{seqs} sequences, " +
246
- "discarded #{disc} bp." unless o[:q]
247
- minfrg ||= frgs
248
- minfrg = frgs if minfrg > frgs
249
- case o[:program].downcase
250
- when "blast"
251
- `"#{o[:bin]}formatdb" -i "#{dir}/#{seq.to_s}.fa" -p F`
252
- when "blast+"
253
- `"#{o[:bin]}makeblastdb" -in "#{dir}/#{seq.to_s}.fa" -dbtype nucl`
254
- when "blat"
255
- # Nothing to do
256
- else
257
- abort "Unsupported program: #{o[:program]}."
258
- end
259
- end # [:seq1, :seq2].each
260
-
261
- # Best-hits.
262
- $stderr.puts "Running one-way comparisons." unless o[:q]
263
- rbh = []
264
- id2 = 0
265
- sq2 = 0
266
- n2 = 0
267
- unless o[:sqlite3].nil?
268
- sqlite_db.execute "delete from rbm where seq1=? and seq2=?", seq_names
269
- sqlite_db.execute "delete from ani where seq1=? and seq2=?", seq_names
270
- end
271
- unless o[:out].nil?
272
- fo = File.open(o[:out], "w")
273
- fo.puts %w(identity aln.len mismatch gap.open evalue bitscore).join("\t")
274
- end
275
- res = File.open(o[:res], "w") unless o[:res].nil?
276
- [1,2].each do |i|
277
- qry_seen = []
278
- q = "#{dir}/seq#{i}.fa"
279
- s = "#{dir}/seq#{i==1?2:1}.fa"
280
- case o[:program].downcase
281
- when "blast"
282
- `"#{o[:bin]}blastall" -p blastn -d "#{s}" -i "#{q}" \
283
- -F F -v 1 -b 1 -a #{o[:thr]} -m 8 -o "#{dir}/#{i}.tab"`
284
- when "blast+"
285
- `"#{o[:bin]}blastn" -db "#{s}" -query "#{q}" \
286
- -dust no -max_target_seqs 1 \
287
- -num_threads #{o[:thr]} -outfmt 6 -out "#{dir}/#{i}.tab"`
288
- when "blat"
289
- `#{o[:bin]}blat "#{s}" "#{q}" -out=blast8 "#{dir}/#{i}.tab"`
290
- else
291
- abort "Unsupported program: #{o[:program]}."
292
- end
293
- fh = File.open("#{dir}/#{i}.tab", "r")
294
- id = 0
295
- sq = 0
296
- n = 0
297
- fh.each_line do |ln|
298
- ln.chomp!
299
- row = ln.split(/\t/)
300
- if qry_seen[ row[0].to_i ].nil? and row[3].to_i >= o[:len] and
301
- row[2].to_f >= o[:id]
302
- qry_seen[ row[0].to_i ] = 1
303
- identity_corr = 100 - (100-row[2].to_f)/(o[:correct] ? 0.8621 : 1.0)
304
- id += identity_corr
305
- sq += identity_corr ** 2
306
- n += 1
307
- if i==1
308
- rbh[ row[0].to_i ] = row[1].to_i
309
- else
310
- if !rbh[ row[1].to_i ].nil? and rbh[ row[1].to_i ]==row[0].to_i
311
- id2 += identity_corr
312
- sq2 += identity_corr ** 2
313
- n2 += 1
314
- fo.puts [identity_corr,row[3..5],
315
- row[10..11]].join("\t") unless o[:out].nil?
316
- sqlite_db.execute("insert into rbm values(?,?,?,?,?,?,?)",
317
- seq_names + [row[1], row[0], row[2], row[10], row[11]]
318
- ) if not o[:sqlite3].nil? and o[:dbrbm]
319
- end
320
- end
321
- end
322
- end
323
- fh.close
324
- if n < o[:hits]
325
- puts "Insuffient hits to estimate one-way ANI: #{n}." unless o[:auto]
326
- res.puts "Insufficient hits to estimate one-way ANI: #{n}" unless
327
- o[:res].nil?
328
- else
329
- printf "! One-way ANI %d: %.#{o[:dec]}f%% (SD: %.#{o[:dec]}f%%), " +
330
- "from %i fragments.\n", i, id/n, (sq/n - (id/n)**2)**0.5, n unless
331
- o[:auto]
332
- res.puts sprintf "<b>One-way ANI %d:</b> %.#{o[:dec]}f%% " +
333
- "(SD: %.#{o[:dec]}f%%), from %i fragments.<br/>", i, id/n,
334
- (sq/n - (id/n)**2)**0.5, n unless o[:res].nil?
335
- end
336
- end # [1,2].each
337
- if n2 < o[:hits]
338
- puts "Insufficient hits to estimate two-way ANI: #{n2}" unless o[:auto]
339
- res.puts "Insufficient hits to estimate two-way ANI: #{n2}" unless
340
- o[:res].nil?
341
- else
342
- ani = id2/n2
343
- ani_sd = (sq2/n2 - (id2/n2)**2)**0.5
344
- printf "! Two-way ANI : %.#{o[:dec]}f%% (SD: %.#{o[:dec]}f%%), " +
345
- "from %i fragments.\n", ani, ani_sd, n2 unless o[:auto]
346
- res.puts sprintf "<b>Two-way ANI:</b> %.#{o[:dec]}f%% " +
347
- "(SD: %.#{o[:dec]}f%%), from %i fragments.<br/>",
348
- ani, ani_sd, n2 unless o[:res].nil?
349
- unless o[:tab].nil?
350
- tab = File.open(o[:tab], "w")
351
- tab.printf "%.#{o[:dec]}f\t%.#{o[:dec]}f\t%i\t%i\n",
352
- ani, ani_sd, n2, minfrg
353
- tab.close
354
- end
355
- sqlite_db.execute("insert into ani values(?,?,?,?,?,?)",
356
- seq_names + [ani, ani_sd, n2, minfrg]) unless o[:sqlite3].nil?
357
- puts ani if o[:auto]
358
- end
359
- res.close unless o[:res].nil?
360
- fo.close unless o[:out].nil?
361
- end
362
-
@@ -1,137 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # frozen_string_literal: true
4
-
5
- $:.push File.expand_path('../lib', __FILE__)
6
- require 'enveomics_rb/enveomics'
7
- require 'enveomics_rb/anir'
8
- $VERSION = 1.0
9
-
10
- o = {
11
- q: false, threads: 2,
12
- r_format: :fastq, g_format: :fasta, m_format: :sam, r_type: :single,
13
- identity: 95.0, algorithm: :auto, bimodality: 0.5, bin_size: 1.0,
14
- coefficient: :sarle
15
- }
16
-
17
- OptionParser.new do |opt|
18
- cmd = File.basename($0)
19
- opt.banner = <<~BANNER
20
-
21
- [Enveomics Collection: #{cmd} v#{$VERSION}]
22
-
23
- Estimates ANIr: the Average Nucleotide Identity of reads against a genome
24
-
25
- Usage
26
- # [ Input/output modes ]
27
- # Run mapping and (optionally) save it as SAM
28
- # Requires bowtie2
29
- #{cmd} -r reads.fastq -g genome.fasta -m out_map.sam [options]
30
-
31
- # Read mapping from BAM file
32
- # Requires samtools
33
- #{cmd} -m map.bam --m-format bam [options]
34
-
35
- # Read mapping from other formats: SAM or Tabular BLAST
36
- #{cmd} -m map.blast --m-format tab [options]
37
-
38
- # Read a list of identities as percentage (contig filtering off)
39
- #{cmd} -m identities.txt --m-format list [options]
40
-
41
- # [ Identity threshold modes ]
42
- #{cmd} -i 95 -a fix [options] # Set fixed identity threshold
43
- #{cmd} -a gmm [options] # Find valley by EM of GMM
44
- #{cmd} -a auto [options] # Pick method by bimodality (default)"
45
-
46
- BANNER
47
-
48
- opt.separator 'Input/Output'
49
- opt.on('-r', '--reads PATH', 'Metagenomic reads') { |v| o[:r] = v }
50
- opt.on('-g', '--genome PATH', 'Genome assembly') { |v| o[:g] = v }
51
- opt.on('-m', '--mapping PATH', 'Mapping file') { |v| o[:m] = v }
52
- opt.on('-L', '--list PATH', 'Output file with identities') { |v| o[:L] = v }
53
- opt.on('-H', '--hist PATH', 'Output file with histogram') { |v| o[:H] = v }
54
- opt.on(
55
- '-T', '--tab PATH', 'Output file with results in tabular format'
56
- ) { |v| o[:T] = v }
57
- opt.separator ''
58
-
59
- opt.separator 'Formats'
60
- opt.on(
61
- '--r-format STRING',
62
- 'Metagenomic reads format: fastq (default) or fasta',
63
- 'Both options support compression with .gz file extension'
64
- ) { |v| o[:r_format] = v.downcase.to_sym }
65
- opt.on(
66
- '--r-type STRING', 'Type of metagenomic reads:',
67
- '~ single (default): Single reads',
68
- '~ coupled: Coupled reads in separate files (-m must be comma-delimited)',
69
- '~ interleaved: Coupled reads in a single interposed file'
70
- ) { |v| o[:r_type] = v.downcase.to_sym }
71
- opt.on(
72
- '--g-format STRING',
73
- 'Genome assembly format: fasta (default) or list',
74
- 'Both options support compression with .gz file extension',
75
- 'If passed in mapping-read mode, filters only matches to these contigs'
76
- ) { |v| o[:g_format] = v.downcase.to_sym }
77
- opt.on(
78
- '--m-format STRING',
79
- 'Mapping file format: sam (default), bam, tab, or list',
80
- 'sam, tab, and list options support compression with .gz file extension'
81
- ) { |v| o[:m_format] = v.downcase.to_sym }
82
- opt.separator ''
83
-
84
- opt.separator 'Identity threshold'
85
- opt.on(
86
- '-i', '--identity FLOAT', Float,
87
- "Set a fixed threshold of percent identity (default: #{o[:identity]})"
88
- ) { |v| o[:identity] = v }
89
- opt.on(
90
- '-a', '--algorithm STRING',
91
- 'Set an algorithm to automatically detect identity threshold:',
92
- '~ gmm: Valley detection by E-M of Gaussian Mixture Model',
93
- '~ fix: Fixed threshold, see -i',
94
- '~ auto (default): Pick gmm or fix depending on bimodality, see -b'
95
- ) { |v| o[:algorithm] = v.downcase.to_sym }
96
- opt.on(
97
- '-b', '--bimodality FLOAT', Float,
98
- 'Threshold of bimodality below which the algorithm is set to fix',
99
- 'The coefficient used is the de Michele & Accantino (2014) B index',
100
- "By default: #{o[:bimodality]}"
101
- ) { |v| o[:bimodality] = v }
102
- opt.on(
103
- '--coefficient STRING',
104
- 'Coefficient of bimodality for -a auto:',
105
- '~ sarle (default): Sarle\'s bimodality coefficient b',
106
- '~ dma: de Michele and Accatino (2014 PLoS ONE) B index, use with -b 0.1'
107
- ) { |v| o[:coefficient] = v.downcase.to_sym }
108
- opt.on(
109
- '--bin-size FLOAT', Float,
110
- "Width of histogram bins (in percent identity). By default: #{o[:bin_size]}"
111
- ) { |v| o[:bin_size] = v }
112
- opt.separator ''
113
-
114
- opt.separator 'General'
115
- opt.on(
116
- '-t', '--threads INT', Integer, 'Threads to use'
117
- ) { |v| o[:threads] = v }
118
- opt.on('-l', '--log PATH', 'Log file to save output') { |v| o[:log] = v }
119
- opt.on('-q', '--quiet', 'Run quietly') { |v| o[:q] = v }
120
- opt.on('-h', '--help', 'Display this screen') do
121
- puts opt
122
- exit
123
- end
124
- opt.separator ''
125
- end.parse!
126
-
127
- anir = Enveomics::ANIr.new(o)
128
- anir.go!
129
- if o[:T]
130
- File.open(o[:T], 'w') do |fh|
131
- fh.puts "anir\tsd\treads\tid_threshold"
132
- fh.puts [
133
- anir.sample.mean, anir.sample.sd, anir.sample.n, anir.opts[:identity]
134
- ].join("\t")
135
- end
136
- end
137
-
@@ -1,102 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author: Luis M. Rodriguez-R
5
- # @license: artistic license 2.0
6
- #
7
-
8
- require "optparse"
9
-
10
- o = { q:false, prec:6 }
11
- ARGV << "-h" if ARGV.empty?
12
- OptionParser.new do |opts|
13
- opts.banner = "
14
- Calculates the Rand Index and the Adjusted Rand Index between two clusterings.
15
-
16
- The clustering format is a raw text file with one cluster per line, each
17
- defined as comma-delimited members, and a header line (ignored). Note that this
18
- is equivalent to the OGs format for 1 genome.
19
-
20
- Usage: #{$0} [options]"
21
- opts.separator ""
22
- opts.separator "Mandatory"
23
- opts.on("-1", "--clust1 FILE", "First input file."){ |v| o[:clust1]=v }
24
- opts.on("-2", "--clust2 FILE", "Second input file."){ |v| o[:clust2]=v }
25
- opts.separator ""
26
- opts.separator "Other options"
27
- opts.on("-p", "--prec INT",
28
- "Precision to report. By default: #{o[:prec]}"){ |v| o[:prec]=v.to_i }
29
- opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
30
- opts.on("-h", "--help", "Display this screen.") do
31
- puts opts
32
- exit
33
- end
34
- opts.separator ""
35
- end.parse!
36
- abort "-1 is mandatory" if o[:clust1].nil?
37
- abort "-2 is mandatory" if o[:clust2].nil?
38
-
39
- def load_clust(file, q)
40
- $stderr.puts "Reading clusters in '#{file}'." unless q
41
- out = []
42
- File.open(file, "r") do |fh|
43
- fh.each_line do |ln|
44
- next if $.==1
45
- out[$.-2] = ln.chomp.split(",")
46
- end
47
- end
48
- $stderr.puts " Loaded clusters: #{out.size}." unless q
49
- out
50
- end
51
-
52
- def choose_2(n)
53
- return 0 if n<2
54
- n*(n-1)/2
55
- end
56
-
57
- ##### MAIN:
58
- begin
59
- # Read the pre-computed OGs
60
- clust1 = load_clust(o[:clust1], o[:q])
61
- clust2 = load_clust(o[:clust2], o[:q])
62
-
63
- # Contingency table
64
- $stderr.puts "Estimating the contingency table." unless o[:q]
65
- cont = []
66
- b_sums = []
67
- clust1.each_with_index do |x_i, i|
68
- cont[i] = []
69
- clust2.each_with_index do |y_j, j|
70
- cont[i][j] = (x_i & y_j).size
71
- b_sums[j]||= 0
72
- b_sums[j] += cont[i][j]
73
- end
74
- end
75
- a_sums = cont.map{ |i| i.inject(:+) }
76
-
77
- # Calculate variables
78
- # - see http://i11www.iti.kit.edu/extra/publications/ww-cco-06.pdf
79
- $stderr.puts "Estimating indexes." unless o[:q]
80
- n = clust1.map{ |i| i.size }.inject(:+)
81
- pairs = choose_2(n)
82
- n11 = clust1.each_index.map do |i|
83
- clust2.each_index.map do |j|
84
- choose_2(cont[i][j])
85
- end.inject(:+)
86
- end.inject(:+).to_f
87
- t1 = a_sums.map{ |a_i| choose_2(a_i) }.inject(:+).to_f
88
- t2 = b_sums.map{ |b_j| choose_2(b_j) }.inject(:+).to_f
89
- t3 = 2*t1*t2/(n*(n-1))
90
- n00 = pairs + n11 - t1 - t2
91
- r_index = (n11 + n00)/pairs
92
- r_adjusted = (n11 - t3)/((t1+t2)/2 - t3)
93
-
94
- # Report
95
- puts "Rand Index = %.#{o[:prec]}f" % r_index
96
- puts "Adjusted Rand Index = %.#{o[:prec]}f" % r_adjusted
97
- rescue => err
98
- $stderr.puts "Exception: #{err}\n\n"
99
- err.backtrace.each { |l| $stderr.puts l + "\n" }
100
- err
101
- end
102
-