miga-base 1.2.15.2 → 1.2.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli/action/download/gtdb.rb +4 -1
  3. data/lib/miga/cli/action/gtdb_get.rb +4 -0
  4. data/lib/miga/daemon.rb +4 -1
  5. data/lib/miga/lair.rb +6 -4
  6. data/lib/miga/remote_dataset/download.rb +3 -2
  7. data/lib/miga/remote_dataset.rb +25 -7
  8. data/lib/miga/taxonomy.rb +6 -0
  9. data/lib/miga/version.rb +2 -2
  10. metadata +6 -302
  11. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  12. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  13. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  14. data/utils/FastAAI/FastAAI +0 -3659
  15. data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
  16. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
  17. data/utils/FastAAI/README.md +0 -84
  18. data/utils/enveomics/Docs/recplot2.md +0 -244
  19. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  20. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  21. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  22. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  23. data/utils/enveomics/LICENSE.txt +0 -73
  24. data/utils/enveomics/Makefile +0 -52
  25. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  26. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
  27. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  28. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
  29. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
  30. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  31. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
  32. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  33. data/utils/enveomics/Manifest/Tasks/other.json +0 -906
  34. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  35. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
  36. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  37. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  38. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  39. data/utils/enveomics/Manifest/categories.json +0 -165
  40. data/utils/enveomics/Manifest/examples.json +0 -162
  41. data/utils/enveomics/Manifest/tasks.json +0 -4
  42. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  49. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  50. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  51. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  52. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  53. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  54. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  55. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  56. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  57. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  58. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  59. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  60. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  61. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  62. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  63. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  64. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  68. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  69. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  70. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  71. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  72. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  73. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  74. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  75. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  76. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  77. data/utils/enveomics/README.md +0 -42
  78. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  79. data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
  80. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  81. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  82. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  83. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  84. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  85. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  86. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  87. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  88. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  89. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  90. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  91. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  92. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  93. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  94. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  95. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  96. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  97. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  98. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  99. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  100. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  101. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
  102. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  103. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  104. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  105. data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
  106. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  107. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  108. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  109. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  110. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
  111. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  112. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  113. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  114. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  115. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  116. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  117. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  118. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  119. data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
  120. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  121. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  122. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  123. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  124. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  125. data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
  126. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  127. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  128. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  129. data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
  130. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  131. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  132. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
  133. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  134. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  135. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  136. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  137. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  138. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  139. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  140. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  141. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  142. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  143. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  144. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  145. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  146. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  147. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  148. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  149. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  150. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  151. data/utils/enveomics/Scripts/SRA.download.bash +0 -55
  152. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  153. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  154. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  155. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  156. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  157. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  158. data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
  159. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  160. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  161. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  162. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  163. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  164. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  165. data/utils/enveomics/Scripts/aai.rb +0 -421
  166. data/utils/enveomics/Scripts/ani.rb +0 -362
  167. data/utils/enveomics/Scripts/anir.rb +0 -137
  168. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  169. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  170. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  171. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  172. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  173. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  174. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
  175. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
  176. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  177. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
  178. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
  179. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  180. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
  181. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  182. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
  183. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  184. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  185. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
  186. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
  187. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
  188. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
  189. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  190. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  191. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  192. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  193. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  194. data/utils/enveomics/Scripts/ogs.rb +0 -104
  195. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  196. data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
  197. data/utils/enveomics/Scripts/rbm.rb +0 -108
  198. data/utils/enveomics/Scripts/sam.filter.rb +0 -148
  199. data/utils/enveomics/Tests/Makefile +0 -10
  200. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  201. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  202. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  203. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  204. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  205. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  206. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  207. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  208. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  209. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  210. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  211. data/utils/enveomics/Tests/alkB.nwk +0 -1
  212. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  213. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  214. data/utils/enveomics/Tests/hiv1.faa +0 -59
  215. data/utils/enveomics/Tests/hiv1.fna +0 -134
  216. data/utils/enveomics/Tests/hiv2.faa +0 -70
  217. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  218. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  219. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  220. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  221. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  222. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  223. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  224. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  225. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  226. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  227. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  228. data/utils/enveomics/build_enveomics_r.bash +0 -45
  229. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  230. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  231. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  232. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  233. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  234. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  235. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  236. data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
  237. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  238. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  239. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  240. data/utils/enveomics/enveomics.R/R/utils.R +0 -80
  241. data/utils/enveomics/enveomics.R/README.md +0 -81
  242. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  243. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  244. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
  245. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
  246. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
  247. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  248. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  249. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  250. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  251. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  252. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  253. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
  254. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
  255. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
  256. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  257. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  258. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
  259. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
  260. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
  261. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
  262. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
  263. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
  264. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
  265. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  266. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  267. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
  268. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  269. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  270. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  271. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  272. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
  273. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  274. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  275. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
  276. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  277. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  278. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  279. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  280. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
  281. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  282. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
  283. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
  284. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
  285. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  286. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  287. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
  288. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
  289. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
  290. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  291. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  292. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  293. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  294. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
  295. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
  296. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
  297. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
  298. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  299. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  300. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  301. data/utils/enveomics/globals.mk +0 -8
  302. data/utils/enveomics/manifest.json +0 -9
  303. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  304. data/utils/multitrim/README.md +0 -67
  305. data/utils/multitrim/multitrim.py +0 -1555
  306. data/utils/multitrim/multitrim.yml +0 -13
@@ -1,60 +0,0 @@
1
- #!/usr/bin/env Rscript
2
-
3
- #= Load stuff
4
- args <- commandArgs(trailingOnly = FALSE)
5
- enveomics_R <- file.path(
6
- dirname(sub('^--file=', '', args[grep('^--file=', args)])),
7
- 'lib',
8
- 'enveomics.R'
9
- )
10
- for(file in c('cliopts.R','utils.R','prefscore.R'))
11
- source(file.path(enveomics_R, 'R', file))
12
-
13
- #= Generate interface
14
- opt <- enve.cliopts(
15
- enve.prefscore,
16
- file.path(enveomics_R, 'man', 'enve.prefscore.Rd'),
17
- positional_arguments = c(1, 4),
18
- usage = 'usage: %prog [options] output.tsv [output.pdf [width height]]',
19
- mandatory = c('x', 'set'),
20
- number = c('signif.thr'),
21
- ignore = c('plot'),
22
- o_desc = list(
23
- x = 'A tab-delimited table of presence/absence (1/0) with species as rows and samples as columns.',
24
- set = 'A list of sample names that constitute the test set, one per line',
25
- ignore = 'A list of species to exclude from the analysis, one per line'
26
- )
27
- )
28
-
29
- #= Set output files
30
- opt$options[['x']] <- read.table(
31
- opt$options[['x']],
32
- header = TRUE,
33
- row.names = 1,
34
- sep = '\t'
35
- )
36
- opt$options[['set']] <- read.table(
37
- opt$options[['set']],
38
- header = FALSE,
39
- sep = '\t',
40
- as.is = TRUE
41
- )[,1]
42
- if(!is.null(opt$options[['ignore']]))
43
- opt$options[['ignore']] <- read.table(
44
- opt$options[['ignore']],
45
- header = FALSE,
46
- sep = '\t',
47
- as.is = TRUE
48
- )[,1]
49
- if(length(opt$args) > 1) {
50
- args <- as.list(opt$args[-1])
51
- for(i in 2:3) if(length(args) >= i) args[[i]] <- as.numeric(args[[i]])
52
- do.call('pdf', args)
53
- } else {
54
- opt$options[['plot']] <- FALSE
55
- }
56
-
57
- #= Run it!
58
- y <- do.call('enve.prefscore', opt$options)
59
- write.table(y, opt$args[1], quote = FALSE, sep = '\t', col.names = FALSE)
60
- if(length(opt$args)>1) ttt <- dev.off()
@@ -1,69 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
- # @update Feb 01 2016
6
- # @license artistic license 2.0
7
- #
8
-
9
- require "optparse"
10
-
11
- o = {delimiter: "\t", key: 1, default: ""}
12
- ARGV << "-h" if ARGV.size==0
13
- OptionParser.new do |opts|
14
- opts.banner = "\nReplaces a field in a table using a mapping file."
15
- opts.on("-m", "--map FILE",
16
- "Mapping file with two columns (key and replacement)."){ |v| o[:map] = v }
17
- opts.on("-i", "--in FILE", "Input table."){ |v| o[:in] = v }
18
- opts.on("-o", "--out FILE", "Output table."){ |v| o[:out] = v }
19
- opts.on("-k", "--key INT",
20
- "Column to replace in --in. By deafult: 1."){ |v| o[:key] = v.to_i }
21
- opts.on("-u", "--unknown STR",
22
- "String to use whenever the key is not found in --map."
23
- ){ |v| o[:default] = v }
24
- opts.on("-d", "--delimiter STR",
25
- "String delimiting columns. By default, tabulation."
26
- ){ |v| o[:delimiter] = v }
27
- opts.on("-h", "--help", "Display this screen") do
28
- puts opts
29
- exit
30
- end
31
- opts.separator ""
32
- end.parse!
33
- abort "-m is mandatory" if o[:map].nil?
34
- abort "-i is mandatory" if o[:in].nil?
35
- abort "-o is mandatory" if o[:out].nil?
36
-
37
- class String
38
- def is_number?
39
- true if Float(self) rescue false
40
- end
41
- end
42
-
43
- begin
44
- # Read mapping file
45
- ifh = File.open(o[:map], "r")
46
- map = {}
47
- while(ln = ifh.gets)
48
- row = ln.chomp.split(o[:delimiter])
49
- map[ row[0] ] = row[1]
50
- end
51
- ifh.close
52
- # Process table
53
- ifh = File.open(o[:in], "r")
54
- ofh = File.open(o[:out], "w")
55
- while(ln = ifh.gets)
56
- row = ln.chomp.split(o[:delimiter])
57
- k = row[ o[:key]-1 ]
58
- v = map[ k ]
59
- v = o[:default] if v.nil?
60
- row[ o[:key]-1 ] = v
61
- ofh.puts(row.join(o[:delimiter]))
62
- end
63
- ifh.close
64
- ofh.close
65
- rescue => err
66
- $stderr.puts "Exception: #{err}\n\n"
67
- err.backtrace.each { |l| $stderr.puts " - " + l + "\n" }
68
- err
69
- end
@@ -1,63 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
- # @update: Feb 04 2015
6
- # @license: artistic license 2.0
7
- #
8
-
9
- require 'optparse'
10
-
11
- o = {:ndigits=>0, :action=>:round, :delimiter=>"\t"}
12
- ARGV << '-h' if ARGV.size==0
13
- OptionParser.new do |opts|
14
- opts.banner = "\nRounds numbers in a table."
15
- opts.on("-i", "--in FILE", "Input table."){ |v| o[:in] = v}
16
- opts.on("-o", "--out FILE", "Output table."){ |v| o[:out] = v }
17
- opts.on("-n", "--ndigits INT", "Number of decimal digits. By default: #{o[:ndigits]}"){ |v| o[:ndigits] = v.to_i }
18
- opts.on("-f", "--floor", "Floors the values instead of rounding them. Ignores -n."){ o[:action] = :floor }
19
- opts.on("-c", "--ceil", "Ceils the values instead of rounding them. Ignores -n."){ o[:action] = :ceil }
20
- opts.on("-d", "--delimiter STR", "String delimiting columns. By default, tabulation."){ |v| o[:delimiter] = v }
21
- opts.on("-h", "--help", "Display this screen") do
22
- puts opts
23
- exit
24
- end
25
- opts.separator ""
26
- end.parse!
27
- abort "-i is mandatory" if o[:in].nil?
28
- abort "-o is mandatory" if o[:out].nil?
29
-
30
- class String
31
- def is_number?
32
- true if Float(self) rescue false
33
- end
34
- end
35
-
36
- begin
37
- ifh = File.open(o[:in], "r")
38
- ofh = File.open(o[:out], "w")
39
- while(ln = ifh.gets)
40
- ln.chomp!
41
- row = []
42
- ln.split(o[:delimiter]).each do |value|
43
- if value.is_number?
44
- case o[:action]
45
- when :round
46
- value = value.to_f.round(o[:ndigits])
47
- when :floor
48
- value = value.to_f.floor
49
- when :ceil
50
- value = value.to_f.ceil
51
- end
52
- end
53
- row.push value.to_s
54
- end
55
- ofh.puts(row.join(o[:delimiter]))
56
- end
57
- ifh.close
58
- ofh.close
59
- rescue => err
60
- $stderr.puts "Exception: #{err}\n\n"
61
- err.backtrace.each { |l| $stderr.puts " - " + l + "\n" }
62
- err
63
- end
@@ -1,57 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update Feb-01-2016
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
- use Getopt::Std;
11
- use Symbol;
12
-
13
- my %o;
14
- getopts('i:o:d:e:h', \%o);
15
- my $file = shift @ARGV;
16
-
17
- ($file and not $o{h}) or die "
18
- .Description:
19
- Split a file with multiple columns into multiple two-columns lists.
20
-
21
- .Usage:
22
- $0 [options] file
23
-
24
- Options:
25
- -i <str> Input field-delimiter. By default: tabulation (\"\\t\").
26
- -o <str> Prefix of the output files. By default: no prefix (\"\").
27
- -d <str> Output directory. By default: current directory (\"\").
28
-
29
- ";
30
- $o{i} ||= "\t";
31
- $o{o} ||= "";
32
- $o{o} = $o{d}."/".$o{o} if $o{d};
33
-
34
- my $open=0;
35
- my @fhs=();
36
- open IN, "<", $file or die "Cannot read file: $file: $!\n";
37
- while(<IN>){
38
- chomp;
39
- my @row = split $o{i};
40
- my $h = shift @row;
41
- if($open){
42
- for my $i (0 .. $#row){
43
- print { qualify_to_ref $fhs[$i] } $h.$o{i}.$row[$i]."\n" if $row[$i];
44
- }
45
- }else{
46
- $open++;
47
- for my $l (@row){
48
- $l =~ s/[\.\/:]/_/g;
49
- my $gs = gensym;
50
- open($gs, '>', $o{o}.$l.".txt") or die "Cannot create file: $o{o}$l.txt: $!\n";
51
- push @fhs, $gs;
52
- }
53
- }
54
- }
55
- close IN;
56
- close $_ for @fhs;
57
-
@@ -1,227 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
- # @update: Feb-06-2015
6
- # @license artistic license 2.0
7
- #
8
-
9
- require 'optparse'
10
-
11
- $opts = {:warns=>false}
12
- ARGV << '-h' if ARGV.size==0
13
- OptionParser.new do |opt|
14
- opt.separator "Re-formats Silva taxonomy into NCBI-like taxonomy dump files."
15
- opt.separator ""
16
- opt.separator "Mandatory arguments"
17
- opt.on("-k", "--silvaranks FILE", "Input Silva ranks file (e.g., tax_ranks_ssu_115.txt)."){ |v| $opts[:silvaranks]=v }
18
- opt.on("-f", "--silvaref FILE", "Input Silva ref alignment file (e.g., SSURef_NR99_115_tax_silva_full_align_trunc.fasta)."){ |v| $opts[:silvaref]=v }
19
- opt.separator ""
20
- opt.separator "Additional options"
21
- opt.on("-p", "--patch FILE", "If passed, it replaces the paths specified in the patch."){ |v| $opts[:patch]=v }
22
- opt.on("-s", "--seqinfo FILE", "If passed, it creates a CSV seq-info file compatible with taxtastic."){ |v| $opts[:seqinfo]=v }
23
- opt.on("-t", "--taxfile FILE", "If passed, it creates a simple TSV taxonomy file."){ |v| $opts[:taxfile]=v }
24
- opt.on("-n", "--ncbi FILE", "If passed, output folder for the NCBI dump files (e.g., taxdmp)."){ |v| $opts[:ncbi]=v }
25
- opt.on("-w", "--warns", "Verbously display warnings."){ $opts[:warns]=true }
26
- opt.on("-h", "--help","Display this screen") do
27
- puts opt
28
- exit
29
- end
30
- opt.separator ""
31
- end.parse!
32
- abort "-k/--silvaranks is mandatory." if $opts[:silvaranks].nil?
33
- abort "-k/--silvaranks must exist." unless File.exists? $opts[:silvaranks]
34
- abort "-f/--silvaref is mandatory." if $opts[:silvaref].nil?
35
- abort "-f/--silvaref must exist." unless File.exists? $opts[:silvaref]
36
-
37
- class Node
38
- attr_accessor :id, :tax, :leaf, :name_type
39
- attr_reader :name, :rank, :parent, :children
40
- def initialize(name, rank=nil)
41
- @name = name
42
- @rank = rank.nil? ? "no rank" : rank
43
- @children = []
44
- @leaf = false
45
- @name_type = "scientific name";
46
- end
47
- def parent=(node)
48
- @parent=node
49
- node.add_child(self)
50
- end
51
- def add_child(node)
52
- @children << node
53
- end
54
- def ncbirank
55
- ncbirank =
56
- self.rank == "superkingdom" ? "no rank" :
57
- self.rank == "domain" ? "superkingdom" :
58
- self.rank == "major_clade" ? "no rank" : self.rank
59
- return ncbirank
60
- end
61
- def path
62
- if self.parent.nil?
63
- self.name
64
- else
65
- "#{self.parent.path};#{self.name}"
66
- end
67
- end
68
- def each_desc internals, leaves, &blk
69
- blk[self] if (leaves and self.leaf) or (internals and not self.leaf)
70
- self.children.each {|child| child.each_desc internals, leaves, &blk}
71
- end
72
- def to_s
73
- "#{self.name} (#{self.rank})"
74
- end
75
- end
76
-
77
- class Taxonomy
78
- attr_reader :root, :next_id
79
- def initialize
80
- @root = Node.new('root')
81
- @root.id = 1
82
- @next_id = 2
83
- end
84
- def register(node)
85
- node.id = self.next_id
86
- node.parent = self.root if node.parent.nil?
87
- @next_id += 1
88
- end
89
- def node(path)
90
- node = self.root
91
- path.each do |level|
92
- node.children.each do |child|
93
- if child.name == level
94
- node = child
95
- break
96
- end
97
- end
98
- unless node.name == level
99
- $stderr.puts "Warning: Impossible to find #{level} at #{node.to_s}, making it up." if $opts[:warns]
100
- child = Node.new(level)
101
- child.parent = node
102
- self.register(child)
103
- node = child
104
- end
105
- end
106
- node
107
- end
108
- def each_node &blk
109
- self.root.each_desc true, true, &blk
110
- end
111
- def each_leaf &blk
112
- self.root.each_desc false, true, &blk
113
- end
114
- def each_internal &blk
115
- self.root.each_desc true, false, &blk
116
- end
117
- end
118
-
119
- begin
120
- taxo = Taxonomy.new()
121
-
122
- ## Read patch
123
- patch = {}
124
- unless $opts[:patch].nil?
125
- $stderr.puts "Reading patch: #{$opts[:patch]}"
126
- f = File.open($opts[:patch], "r")
127
- while(ln = f.gets)
128
- m = ln.chomp.split(/\t/)
129
- patch[ m[0] ] = m[1]
130
- end
131
- end
132
-
133
- ## Read the Silva ranks
134
- $stderr.puts "Reading Silva ranks: #{$opts[:silvaranks]}"
135
- f = File.open($opts[:silvaranks], "r")
136
- f.gets # header
137
- while(ln = f.gets)
138
- m = ln.chomp.split(/\t/)
139
- m[0] = patch[ m[0] ] unless patch[ m[0] ].nil?
140
- p = m[0].split(/;/)
141
- raise "Inconsistent path and node name at line #{$.}: #{ln}." unless m[1] == p.pop
142
- if m[3] != "w"
143
- node = Node.new(m[1], m[2])
144
- node.name_type = "common name" if m[3] == "a"
145
- node.parent = taxo.node(p)
146
- taxo.register(node)
147
- end
148
- end
149
- f.close
150
-
151
- $stderr.puts " Top taxa:"
152
- taxo.root.children.each do |top|
153
- $stderr.puts " o #{top.to_s} has #{top.children.length} children."
154
- end
155
-
156
- ## Read the Silva ref alignment
157
- $stderr.puts "Reading Silva ref alignment: #{$opts[:silvaref]}"
158
- i = 0
159
- f = File.open($opts[:silvaref], "r")
160
- while(ln = f.gets)
161
- m = />([^\s]+)\s(.*)/.match(ln)
162
- next unless m
163
- # Patch
164
- pm = /(.+);([^;]+)/.match(m[2])
165
- path = "#{patch[ pm[1] ].nil? ? pm[1] : patch[ pm[1] ]};#{pm[2]}".split(/;/)
166
- # Register
167
- node = taxo.node(path)
168
- taxo.register(node)
169
- refseq = Node.new(m[1], 'refseq')
170
- refseq.parent = node
171
- refseq.leaf = true
172
- taxo.register(refseq)
173
- i += 1
174
- end
175
- f.close
176
- $stderr.puts " Saved #{i} leaves."
177
-
178
- ### NCBI
179
- unless $opts[:ncbi].nil?
180
- ## Create taxonomy .dmp files
181
- $stderr.puts "Creating NCBI-like files: #{$opts[:ncbi]}"
182
- Dir.mkdir($opts[:ncbi]) unless Dir.exists?($opts[:ncbi]);
183
- # merged.dmp
184
- $stderr.puts " o Creating merged.dmp"
185
- File.open(File.join($opts[:ncbi], 'merged.dmp'), 'w'){}
186
- # names.dmp
187
- $stderr.puts " o Creating names.dmp"
188
- f = File.open(File.join($opts[:ncbi], 'names.dmp'), 'w')
189
- taxo.each_internal do |n|
190
- f.puts [n.id, n.name, "", n.name_type].join("\t|\t")+"\t|"
191
- end
192
- f.close
193
- # nodes.dmp
194
- $stderr.puts " o Creating nodes.dmp"
195
- f = File.open(File.join($opts[:ncbi], 'nodes.dmp'), 'w')
196
- taxo.each_internal do |n|
197
- f.puts ([n.id, n.parent.nil? ? n.id : n.parent.id, n.ncbirank, ""] << Array.new(8,0) << "").join("\t|\t")+"\t|"
198
- end
199
- f.close
200
- end
201
-
202
- ## Taxtastic
203
- unless $opts[:seqinfo].nil?
204
- $stderr.puts "Creating seq-info file: #{$opts[:seqinfo]}"
205
- f = File.open($opts[:seqinfo], 'w')
206
- f.puts "\"seqname\",\"tax_id\",\"group_name\""
207
- taxo.each_leaf { |n| f.puts "\"#{n.name}\",\"#{n.parent.id}\",\"#{n.parent.name}\"" }
208
- f.close
209
- end
210
-
211
- ## Misc
212
- unless $opts[:taxfile].nil?
213
- $stderr.puts "Creating taxonomy file: #{$opts[:taxfile]}"
214
- f = File.open($opts[:taxfile], 'w')
215
- f.puts "tax_id\tparent_id\trank\ttax_name"
216
- taxo.each_internal do |n|
217
- f.puts [n.id, n.parent.nil? ? n.id : n.parent.id, n.rank, n.name].join("\t")
218
- end
219
- f.close
220
- end
221
- rescue => err
222
- $stderr.puts "Exception: #{err}\n\n"
223
- err.backtrace.each { |l| $stderr.puts l + "\n" }
224
- err
225
- end
226
-
227
-
@@ -1,147 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @author Luis M. Rodriguez-R
4
- # @license Artistic-2.0
5
-
6
- $:.push File.expand_path("../lib", __FILE__)
7
- require "enveomics_rb/enveomics"
8
- require "enveomics_rb/vcf"
9
-
10
- o = {}
11
- OptionParser.new do |opt|
12
- opt.banner = "
13
- Estimates the Ka/Ks ratio from the SNPs in a VCF file. Ka and Ks are corrected
14
- using pseudo-counts, but no corrections for multiple substitutions are
15
- applied.
16
-
17
- Usage: #{$0} [options]".gsub(/^ +/,"")
18
- opt.separator ""
19
- opt.separator "Mandatory"
20
- opt.on("-i", "--input FILE",
21
- "Input file in Variant Call Format (VCF)."){ |v| o[:file] = v}
22
- opt.on("-s", "--seqs FILE",
23
- "Input gene sequences (nucleotides) in FastA format."){ |v| o[:seqs] = v}
24
- opt.separator ""
25
- opt.separator "Parameters"
26
- opt.on("-f", "--syn-frx FLOAT",
27
- "Fraction of synonymous substitutions. If passed, the number of sites are",
28
- "estimated (not counted per gene), speeding up the computation ~10X."
29
- ){ |v| o[:syn_frx] = v.to_f }
30
- opt.on("-b", "--syn-bacterial-code",
31
- "Sets --syn-frx to 0.760417, approximately the proportion of synonymous",
32
- "substitutions in the bacterial code."){ o[:syn_frx] = 0.760417 }
33
- opt.separator ""
34
- opt.separator "Miscellaneous"
35
- opt.on("-c", "--codon-file FILE",
36
- "Output file including the codons of substitution variants."
37
- ){ |v| o[:codon_file] = v }
38
- opt.on("-h", "--help", "Display this screen.") do
39
- puts opt
40
- exit
41
- end
42
- opt.separator ""
43
- end.parse!
44
-
45
- abort "--input is mandatory" if o[:file].nil?
46
- abort "--seqs is mandatory" if o[:seqs].nil?
47
-
48
- # Codon table (11. The Bacterial, Archaeal and Plant Plastid Code)
49
- # https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi#SG11
50
- t = {
51
- AAs: "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
52
- Starts: "---M------**--*----M------------MMMM---------------M------------",
53
- Base1: "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
54
- Base2: "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
55
- Base3: "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"
56
- }
57
- $codon_aa = {}
58
- $codon_st = {}
59
- (0 .. (t[:Base1].size-1)).each do |i|
60
- cod = [:Base1, :Base2, :Base3].map{ |k| t[k][i] }.join
61
- $codon_aa[cod] = t[:AAs][i]
62
- $codon_st[cod] = t[:Starts][i]
63
- end
64
-
65
- ##
66
- # Is the change +cod+ to +cod_alt+ synonymous? +start_codon+ indicates if the
67
- # codon the first in the gene.
68
- def syn?(cod, cod_alt, start_codon=false)
69
- start_codon ?
70
- ( $codon_st[cod] == $codon_st[cod_alt] ) :
71
- ( $codon_aa[cod] == $codon_aa[cod_alt] )
72
- end
73
-
74
- ##
75
- # Estimates the fraction of times that the substitutions in the sequence +seq+
76
- # result in synonymous mutations from those in position +pos+ by any of the
77
- # nucleotides in +alts+.
78
- def syn_fraction(seq, pos, alts)
79
- cod_let = (pos-1)%3
80
- cod_pos = (pos-1) - cod_let
81
- cod = seq[cod_pos .. (cod_pos+2)]
82
- syn = 0
83
- cod_alts = alts.map do |alt|
84
- cod_alt = "#{cod}"
85
- cod_alt[cod_let] = alt
86
- cod_alt
87
- end
88
- syn = cod_alts.map{ |i| syn?(cod, i, pos<=3) ? 1 : 0 }.inject(0,:+)
89
- $codon_fh.puts [syn, cod, cod_alts.join(",")].join("\t") unless $codon_fh.nil?
90
- syn.to_f/alts.size
91
- end
92
-
93
- # Read sequences
94
- seqs = {}
95
- File.open(o[:seqs], "r") do |fh|
96
- id = ""
97
- fh.each_line do |ln|
98
- if ln =~ /^>(\S+)/
99
- id = $1
100
- seqs[id] = ""
101
- else
102
- seqs[id] += ln.chomp.gsub(/[^A-Za-z]/, "")
103
- end
104
- end
105
- end
106
-
107
- # Process variants
108
- $codon_fh = nil
109
- unless o[:codon_file].nil?
110
- $codon_fh = File.open(o[:codon_file], "w")
111
- $codon_fh.puts "#" + %w[Syn Ref Alt].join("\t")
112
- end
113
- vcf = VCF.new(o[:file])
114
- gen = {}
115
- vcf.each_variant do |v|
116
- next if v.indel?
117
- raise "REF doesn't match VCF:\n#{v}" unless seqs[v.chrom][v.pos-1] == v.ref
118
- gen[v.chrom] ||= [0.0, 0.0]
119
- alts = v.alt.split(",")
120
- syn = syn_fraction(seqs[v.chrom], v.pos, alts)
121
- gen[v.chrom][0] += 1.0-syn
122
- gen[v.chrom][1] += syn
123
- end
124
- $codon_fh.close unless $codon_fh.nil?
125
- $codon_fh = nil
126
-
127
- # Ka/Ks
128
- puts "#" +
129
- "SeqID KaKs Ka Ks NonSynSubs SynSubs NonSynSites SynSites".tr(" ","\t")
130
- gen.each do |k,v|
131
- if o[:syn_frx].nil?
132
- v[2,3] = [0.0,0.0]
133
- (1 .. seqs[k].size).each do |pos|
134
- alts = %w(A C T G) - [seqs[k][pos-1]]
135
- syn = syn_fraction(seqs[k], pos, alts)
136
- v[2] += 1.0-syn
137
- v[3] += syn
138
- end
139
- else
140
- v[2] = seqs[k].size.to_f*o[:syn_frx]
141
- v[3] = seqs[k].size.to_f*(1.0-o[:syn_frx])
142
- end
143
- ka = (v[0] + 1) / (v[2] + 2)
144
- ks = (v[1] + 1) / (v[3] + 2)
145
- puts ([k, ka/ks, ka, ks] + v).join("\t")
146
- end
147
-