miga-base 1.2.15.2 → 1.2.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli/action/download/gtdb.rb +4 -1
  3. data/lib/miga/cli/action/gtdb_get.rb +4 -0
  4. data/lib/miga/daemon.rb +4 -1
  5. data/lib/miga/lair.rb +6 -4
  6. data/lib/miga/remote_dataset/download.rb +3 -2
  7. data/lib/miga/remote_dataset.rb +25 -7
  8. data/lib/miga/taxonomy.rb +6 -0
  9. data/lib/miga/version.rb +2 -2
  10. metadata +6 -302
  11. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  12. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  13. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  14. data/utils/FastAAI/FastAAI +0 -3659
  15. data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
  16. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
  17. data/utils/FastAAI/README.md +0 -84
  18. data/utils/enveomics/Docs/recplot2.md +0 -244
  19. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  20. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  21. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  22. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  23. data/utils/enveomics/LICENSE.txt +0 -73
  24. data/utils/enveomics/Makefile +0 -52
  25. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  26. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
  27. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  28. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
  29. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
  30. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  31. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
  32. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  33. data/utils/enveomics/Manifest/Tasks/other.json +0 -906
  34. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  35. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
  36. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  37. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  38. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  39. data/utils/enveomics/Manifest/categories.json +0 -165
  40. data/utils/enveomics/Manifest/examples.json +0 -162
  41. data/utils/enveomics/Manifest/tasks.json +0 -4
  42. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  49. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  50. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  51. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  52. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  53. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  54. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  55. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  56. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  57. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  58. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  59. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  60. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  61. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  62. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  63. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  64. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  68. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  69. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  70. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  71. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  72. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  73. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  74. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  75. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  76. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  77. data/utils/enveomics/README.md +0 -42
  78. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  79. data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
  80. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  81. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  82. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  83. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  84. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  85. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  86. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  87. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  88. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  89. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  90. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  91. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  92. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  93. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  94. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  95. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  96. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  97. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  98. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  99. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  100. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  101. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
  102. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  103. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  104. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  105. data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
  106. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  107. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  108. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  109. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  110. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
  111. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  112. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  113. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  114. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  115. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  116. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  117. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  118. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  119. data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
  120. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  121. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  122. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  123. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  124. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  125. data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
  126. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  127. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  128. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  129. data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
  130. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  131. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  132. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
  133. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  134. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  135. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  136. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  137. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  138. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  139. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  140. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  141. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  142. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  143. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  144. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  145. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  146. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  147. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  148. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  149. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  150. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  151. data/utils/enveomics/Scripts/SRA.download.bash +0 -55
  152. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  153. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  154. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  155. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  156. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  157. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  158. data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
  159. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  160. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  161. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  162. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  163. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  164. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  165. data/utils/enveomics/Scripts/aai.rb +0 -421
  166. data/utils/enveomics/Scripts/ani.rb +0 -362
  167. data/utils/enveomics/Scripts/anir.rb +0 -137
  168. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  169. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  170. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  171. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  172. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  173. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  174. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
  175. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
  176. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  177. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
  178. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
  179. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  180. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
  181. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  182. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
  183. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  184. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  185. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
  186. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
  187. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
  188. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
  189. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  190. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  191. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  192. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  193. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  194. data/utils/enveomics/Scripts/ogs.rb +0 -104
  195. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  196. data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
  197. data/utils/enveomics/Scripts/rbm.rb +0 -108
  198. data/utils/enveomics/Scripts/sam.filter.rb +0 -148
  199. data/utils/enveomics/Tests/Makefile +0 -10
  200. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  201. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  202. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  203. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  204. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  205. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  206. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  207. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  208. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  209. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  210. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  211. data/utils/enveomics/Tests/alkB.nwk +0 -1
  212. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  213. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  214. data/utils/enveomics/Tests/hiv1.faa +0 -59
  215. data/utils/enveomics/Tests/hiv1.fna +0 -134
  216. data/utils/enveomics/Tests/hiv2.faa +0 -70
  217. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  218. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  219. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  220. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  221. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  222. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  223. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  224. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  225. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  226. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  227. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  228. data/utils/enveomics/build_enveomics_r.bash +0 -45
  229. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  230. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  231. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  232. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  233. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  234. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  235. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  236. data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
  237. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  238. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  239. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  240. data/utils/enveomics/enveomics.R/R/utils.R +0 -80
  241. data/utils/enveomics/enveomics.R/README.md +0 -81
  242. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  243. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  244. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
  245. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
  246. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
  247. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  248. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  249. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  250. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  251. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  252. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  253. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
  254. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
  255. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
  256. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  257. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  258. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
  259. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
  260. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
  261. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
  262. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
  263. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
  264. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
  265. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  266. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  267. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
  268. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  269. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  270. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  271. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  272. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
  273. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  274. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  275. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
  276. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  277. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  278. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  279. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  280. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
  281. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  282. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
  283. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
  284. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
  285. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  286. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  287. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
  288. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
  289. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
  290. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  291. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  292. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  293. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  294. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
  295. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
  296. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
  297. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
  298. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  299. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  300. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  301. data/utils/enveomics/globals.mk +0 -8
  302. data/utils/enveomics/manifest.json +0 -9
  303. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  304. data/utils/multitrim/README.md +0 -67
  305. data/utils/multitrim/multitrim.py +0 -1555
  306. data/utils/multitrim/multitrim.yml +0 -13
@@ -1,221 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @author Luis M. Rodriguez-R
4
- # @license artistic license 2.0
5
-
6
- $VERSION = 1.0
7
- $:.push File.expand_path('../lib', __FILE__)
8
- require 'enveomics_rb/enveomics'
9
-
10
- o = {
11
- q: false, missing: '-', model: 'AUTO', removeinvar: false, undefined: '-.Xx?'
12
- }
13
-
14
- OptionParser.new do |opt|
15
- cmd = File.basename($0)
16
- opt.banner = <<~BANNER
17
-
18
- [Enveomics Collection: #{cmd} v#{$VERSION}]
19
-
20
- Concatenates several multiple alignments in FastA format into a single
21
- multiple alignment. The IDs of the sequences (or the ID prefixes, if using
22
- --ignore-after) must coincide across files.
23
-
24
- Usage: #{cmd} [options] aln1.fa aln2.fa ... > aln.fa
25
-
26
- BANNER
27
- opt.on(
28
- '-c', '--coords FILE',
29
- 'Output file of coordinates in RAxML-compliant format'
30
- ) { |v| o[:coords] = v }
31
- opt.on(
32
- '-i', '--ignore-after STRING',
33
- 'Remove everything in the IDs after the specified string'
34
- ) { |v| o[:ignoreafter] = v }
35
- opt.on(
36
- '-I', '--remove-invariable', 'Remove invariable sites',
37
- 'Note: Invariable sites are defined as columns with only one state and',
38
- 'undefined characters. Additional ambiguous characters may exist and',
39
- 'should be declared using --undefined'
40
- ) { |v| o[:removeinvar] = v }
41
- opt.on(
42
- '-u', '--missing-char CHAR',
43
- "Character denoting missing data. By default: '#{o[:missing]}'"
44
- ) do |v|
45
- if v.length != 1
46
- abort "-missing-char can only be denoted by single characters: #{v}"
47
- end
48
- o[:missing] = v
49
- end
50
- opt.on(
51
- '-m', '--model STRING',
52
- 'Name of the model to use if --coords is used. See RAxML docs;',
53
- 'supported values in v8+ include:',
54
- '~ For DNA alignments:',
55
- ' "DNA[F|X]", or "DNA[F|X]/3" (to estimate rates per codon position,',
56
- ' particular notation for this script)',
57
- '~ General protein alignments:',
58
- ' "AUTO" (default in this script), "DAYHOFF" (1978), "DCMUT" (MBE 2005;',
59
- ' 22(2):193-199), "JTT" (Nat 1992;358:86-89), "VT" (JCompBiol 2000;',
60
- ' 7(6):761-776), "BLOSUM62" (PNAS 1992;89:10915), and "LG" (MBE 2008;',
61
- ' 25(7):1307-1320)',
62
- '~ Specialized protein alignments:',
63
- ' "MTREV" (mitochondrial, JME 1996;42(4):459-468), "WAG" (globular, MBE',
64
- ' 2001;18(5):691-699), "RTREV" (retrovirus, JME 2002;55(1):65-73),',
65
- ' "CPREV" (chloroplast, JME 2000;50(4):348-358), and "MTMAM" (nuclear',
66
- ' mammal proteins, JME 1998;46(4):409-418)'
67
- ) { |v| o[:model] = v }
68
- opt.on(
69
- '--undefined STRING',
70
- 'All characters to be regarded as "undefined". It should include all',
71
- 'ambiguous and missing data chars. Ignored unless --remove-invariable',
72
- "By default: '#{o[:undefined]}'"
73
- ) { |v| o[:undefined] = v }
74
- opt.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
75
- opt.on('-V', '--version', 'Returns version') { puts $VERSION ; exit }
76
- opt.on('-h', '--help', 'Display this screen') { puts opt ; exit }
77
- opt.separator ''
78
- end.parse!
79
- files = ARGV
80
- abort 'Alignment files are mandatory' if files.nil? || files.empty?
81
- $QUIET = o[:q]
82
-
83
- # Read individual gene alignments and return them as a single hash with genome
84
- # IDs as keys and arrays of single-line strings as values
85
- #
86
- # IDs are trimmed after the first occurrence of +ignoreafter+, if defined
87
- def read_alignments(files, ignoreafter = nil)
88
- aln = {}
89
- files.each_with_index do |file, i|
90
- key = nil
91
- File.open(file, 'r').each do |ln|
92
- ln.chomp!
93
- if ln =~ /^>(\S+)/
94
- key = $1
95
- key.sub!(/#{ignoreafter}.*/, '') if ignoreafter
96
- aln[key] ||= []
97
- aln[key][i] = ''
98
- else
99
- if key.nil?
100
- abort "Invalid FastA file: #{file}: Leading line not a def-line"
101
- end
102
- ln.gsub!(/\s/, '')
103
- aln[key][i] += ln
104
- end
105
- end
106
- abort "Empty alignment file: #{file}" if key.nil?
107
- end
108
- aln
109
- end
110
-
111
- # Remove invariable sites from the alignment hash +aln+, using +undefined+ as
112
- # a string including all characters representing undefined positions (e.g., X)
113
- #
114
- # Returns number of columns removed
115
- def remove_invariable(aln, undefined)
116
- invs = 0
117
- lengths = aln.values.first.map(&:length)
118
- undef_chars = undefined.chars
119
-
120
- lengths.each_with_index do |len, i|
121
- (0 .. len - 1).each do |pos|
122
- chr = nil
123
- inv = true
124
- aln.each_key do |key|
125
- next if aln[key][i].nil?
126
- chr = aln[key][i][pos] if chr.nil? || undefined.chars.include?(chr)
127
- if chr != aln[key][i][pos] && !undef_chars.include?(aln[key][i][pos])
128
- inv = false
129
- break
130
- end
131
- end
132
- if inv
133
- aln.each_key { |key| aln[key][i][pos] = '!' unless aln[key][i].nil? }
134
- lengths[i] -= 1
135
- invs += 1
136
- end
137
- end
138
- aln.each_key { |key| aln[key][i].gsub!('!', '') unless aln[key][i].nil? }
139
- end
140
- invs
141
- end
142
-
143
- # Concatenate the alignments hash +aln+ using the character +missing+ to
144
- # indicate missing alignments, and send each entry in the concatenated alignment
145
- # to +blk+ as two variables: key (name) and value (alignment string)
146
- #
147
- # Returns an array with the lengths of each individual alignment
148
- def concatenate(aln, missing, &blk)
149
- say 'Concatenating'
150
- lengths = aln.values.first.map(&:length)
151
- aln.each_key do |key|
152
- # Pad missing entries
153
- lengths.each_with_index { |len, i| aln[key][i] ||= missing * len }
154
-
155
- # Check length
156
- obs_len = aln[key].map(&:length)
157
- unless lengths == obs_len
158
- abort "Inconsistent lengths in '#{key}'\nexp: #{lengths}\nobs: #{obs_len}"
159
- end
160
-
161
- # Pass entry to the block and remove from alignment hash
162
- blk[key, aln[key].join('')]
163
- aln.delete(key)
164
- end
165
- lengths
166
- end
167
-
168
- # Save the coordinates in +file+ based on +files+ paths (for the names), and
169
- # using +lengths+ individual alignment lengths
170
- #
171
- # The saved format is RAxML coords, including the +model+ for each alignment
172
- def save_coords(file, names, lengths, model)
173
- File.open(file, 'w') do |fh|
174
- s = 0
175
- names.each_with_index do |name, i|
176
- l = lengths[i]
177
- next unless l > 0
178
- name += "_#{i}" while names.count(name) > 1
179
- if model =~ /(DNA.?)\/3/
180
- fh.puts "#{$1}, #{name}codon1 = #{s + 1}-#{s + l}\\3"
181
- fh.puts "#{$1}, #{name}codon2 = #{s + 2}-#{s + l}\\3"
182
- fh.puts "#{$1}, #{name}codon3 = #{s + 3}-#{s + l}\\3"
183
- else
184
- fh.puts "#{model}, #{name} = #{s + 1}-#{s + l}"
185
- end
186
- s += l
187
- end
188
- end
189
- end
190
-
191
- # ------ MAIN ------
192
- begin
193
- say 'Reading'
194
- alignments = read_alignments(files, o[:ignoreafter])
195
-
196
- if o[:removeinvar]
197
- say 'Removing invariable sites'
198
- inv = remove_invariable(alignments, o[:undefined])
199
- say " Removed #{inv} sites"
200
- end
201
-
202
- lengths = concatenate(alignments, o[:missing]) do |name, seq|
203
- puts ">#{name}", seq.gsub(/(.{1,60})/, "\\1\n")
204
- end
205
- say " #{lengths.inject(:+)} columns"
206
-
207
- unless o[:coords].nil?
208
- say 'Generating coordinates'
209
- names = files.map do |i|
210
- File.basename(i).gsub(/\..*/, '').gsub(/[^A-Za-z0-9_]/, '_')
211
- end
212
- save_coords(o[:coords], names, lengths, o[:model])
213
- end
214
-
215
- $stderr.puts 'Done' unless o[:q]
216
- rescue => err
217
- $stderr.puts "Exception: #{err}\n\n"
218
- err.backtrace.each { |l| $stderr.puts l + "\n" }
219
- err
220
- end
221
-
@@ -1,35 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- #
4
- # @author: Luis M. Rodriguez-R
5
- # @update: Mar-23-2015
6
- # @license: artistic license 2.0
7
- #
8
-
9
- use Bio::AlignIO;
10
-
11
- my($iformat,$oformat) = @ARGV;
12
- ($iformat and $oformat) or die "
13
- Usage:
14
- $0 in-format out-format < in_file > output_file
15
-
16
- in-format Input file's format.
17
- out-format Output file's format.
18
- in_file Input file.
19
- out_file Output file.
20
-
21
- Example:
22
- # Re-format example.fa into Stockholm
23
- $0 fasta stockholm < example.fa > example.stk
24
-
25
- Supported formats are:
26
- bl2seq, clustalw, emboss, fasta, maf, mase, mega,
27
- meme, metafasta, msf, nexus, pfam, phylip, po,
28
- prodom, psi, selex, stockholm, XMFA, arp
29
-
30
- ";
31
-
32
- $in = Bio::AlignIO->new(-fh => \*STDIN, -format => $iformat);
33
- $out = Bio::AlignIO->new(-fh => \*STDOUT, -format => $oformat);
34
- while ( my $aln = $in->next_aln ) { $out->write_aln($aln) }
35
-
@@ -1,152 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- #
4
- # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
- # @update: Nov-29-2015
6
- # @license: artistic license 2.0
7
- #
8
-
9
- use warnings;
10
- use strict;
11
- use Getopt::Std;
12
- use List::Util qw/min max sum/;
13
-
14
- sub VERSION_MESSAGE(){print "Alpha-diversity indices (enveomics)\n"}
15
- sub HELP_MESSAGE(){die "
16
- Description:
17
- Takes a table of OTU abundance in one or more samples and calculates the Rao
18
- (Q_alpha), Rao-Jost (Q_alpha_eqv), Shannon (Hprime), and inverse Simpson
19
- (1_lambda) indices of alpha diversity for each sample.
20
-
21
- To use it with Qiime OTU Tables, run it as:
22
- $0 -i OTU_Table.txt -h
23
-
24
- Usage:
25
- $0 [opts]
26
-
27
- -i <str> * Input table (columns:samples, rows:OTUs, first column:OTU
28
- names).
29
- -r <int> Number of rows to ignore. By default: 0.
30
- -c <int> Number of columns to ignore after the first column (i.e.,
31
- between the first column, containing the name of the categories,
32
- and the first column of abundance values). By default: 0.
33
- -C <int> Number of columns to ignore at the end of each line.
34
- By default: 0.
35
- -d <str> Delimiter. Supported escaped characters are: \"\\t\"
36
- (tabulation), and \"\\0\" (null bit). By default: \"\\t\".
37
- -h If set, the first row is assumed to have the names of the
38
- samples.
39
- -D <str> Distances file. A squared matrix (or bottom-left half matrix)
40
- with the distances between categories (OTUs or functions). The
41
- first column must contain the names of the categories, and it
42
- shouldn't have headers. If not set, all distances are assumed
43
- to be one. Only used for Rao.
44
- -R Do not calculate Rao indices. This significantly decreases the
45
- total running time. Note that Rao indices are highly susceptible
46
- to precision errors, and shouldn't be trusted for very big
47
- numbers.
48
- -q <int> Estimate the qD index (true diversity order q). By default: 0.
49
- --help This help message.
50
-
51
- * Mandatory.
52
-
53
- "}
54
-
55
- # Input arguments
56
- my %o;
57
- getopts('i:c:C:d:r:hD:Rq:', \%o);
58
-
59
- #$o{B} and (eval("use bignum; 1") or die "Cannot use bignum.\n");
60
- &HELP_MESSAGE() unless $o{i};
61
- $o{c} ||= 0;
62
- $o{C} ||= 0;
63
- $o{r} ||= 0;
64
- $o{d} ||= "\\t";
65
- $o{q} ||= 0;
66
-
67
- $o{d}="\t" if $o{d} eq "\\t";
68
- $o{d}="\0" if $o{d} eq "\\0";
69
-
70
- # Distance matrix
71
- my $D = {};
72
- if($o{D} and not $o{R}){
73
- my @Didx = ();
74
- open DIST, "<", $o{D} or die "Cannot read file: $o{D}: $!\n";
75
- while(<DIST>){
76
- chomp;
77
- my @d = split /\t/;
78
- my $idx = shift @d;
79
- push @Didx, $idx;
80
- $D->{ $idx } ||= {};
81
- $D->{ $idx }->{ $Didx[$_] } = $d[$_] for(0 .. $#d);
82
- }
83
- close DIST;
84
- undef @Didx;
85
- }
86
-
87
- # Abundance matrix
88
- my @names = ();
89
- my @cats = ();
90
- my @values = ();
91
- open TABLE, "<", $o{i} or die "Cannot open file: ".$o{i}.": $!\n";
92
- <TABLE> for (1 .. $o{r});
93
- if($o{h}){
94
- my $h = <TABLE>;
95
- $h or die "Empty table!\n";
96
- chomp $h;
97
- @names = split $o{d}, $h;
98
- shift @names for (0 .. $o{c});
99
- }
100
-
101
- while(<TABLE>){
102
- chomp;
103
- my @ln = split $o{d};
104
- push @cats, shift(@ln);
105
- shift @ln for (1 .. $o{c});
106
- pop @ln for (1 .. $o{C});
107
- push @{$values[$_] ||= []}, $ln[$_] for (0 .. $#ln);
108
- push @{$values[$#ln+1]}, sum(@ln);
109
- }
110
- close TABLE;
111
- $names[$#values] = "gamma";
112
-
113
- if($o{R}){
114
- print "".join($o{d}, qw/Sample Hprime 1_lambda qD/)."\n";
115
- }else{
116
- print "".join($o{d}, qw/Sample Q_alpha Q_alpha_eqv Hprime 1_lambda qD/)."\n";
117
- }
118
- for my $i (0 .. $#values){
119
- print "".(exists $names[$i] ? $names[$i] : $i).$o{d};
120
- my $N = sum @{$values[$i]};
121
- my $Q = 0;
122
- my $H = 0;
123
- my $l = 0;
124
- my $qD = 0 unless $o{q}==1;
125
- for my $ik (0 .. $#{$values[$i]}){
126
- unless($o{R}){
127
- my $Qi = 0;
128
- for my $jk (0 .. $#{$values[$i]}){
129
- my $dij = (!$o{D}) ? 1 :
130
- exists $D->{ $cats[$ik] }->{ $cats[$jk] } ?
131
- $D->{ $cats[$ik] }->{ $cats[$jk] } :
132
- exists $D->{ $cats[$jk] }->{ $cats[$ik] } ?
133
- $D->{ $cats[$jk] }->{ $cats[$ik] } :
134
- die "Cannot find distance between ".$cats[$ik].
135
- " and ".$cats[$jk].".\n";
136
- $Qi += $dij * ($values[$i]->[$ik]/$N) * ($values[$i]->[$jk]/$N);
137
- }
138
- $Q += $Qi;
139
- }
140
- my $pi = $N ? $values[$i]->[$ik]/$N : 0;
141
- $H -= $pi * log($pi) if $pi;
142
- $l += $pi**2;
143
- $qD += $pi * ($pi**($o{q}-1)) unless $o{q}==1 or not $pi;
144
- }
145
- $qD = $o{q}==1 ? exp($H) : 1/($qD**(1/($o{q}-1)));
146
- if($o{R}){
147
- print "".join($o{d}, $H, $l ? 1/$l : "Inf", $qD)."\n";
148
- }else{
149
- print "".join($o{d}, $Q, ($Q==1 ? "NA" : 1/(1-$Q)), $H, 1/$l, $qD)."\n";
150
- }
151
- }
152
-
@@ -1,93 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'optparse'
4
-
5
- o = {range: 0.5, perseq: false, length: false}
6
- ARGV << '-h' if ARGV.empty?
7
- OptionParser.new do |opt|
8
- opt.banner = "
9
- Estimates the truncated average sequencing depth (TAD) from a BedGraph file.
10
-
11
- IMPORTANT: This script doesn't consider zero-coverage positions if missing
12
- from the file. If you produce your BedGraph file with bedtools genomecov and
13
- want to consider zero-coverage position, be sure to use -bga (not -bg).
14
-
15
- Usage: #{$0} [options]"
16
- opt.separator ''
17
- opt.on('-i', '--input PATH',
18
- 'Input BedGraph file (mandatory).'){ |v| o[:i]=v }
19
- opt.on('-r', '--range FLOAT',
20
- 'Central range to consider, between 0 and 1.',
21
- "By default: #{o[:range]} (inter-quartile range)."
22
- ){ |v| o[:range]=v.to_f }
23
- opt.on('-s', '--per-seq',
24
- 'Calculate averages per reference sequence, not total.',
25
- 'Assumes a sorted BedGraph file.'
26
- ){ |v| o[:perseq] = v }
27
- opt.on('-l', '--length',
28
- 'Add sequence length to the output.'){ |v| o[:length] = v }
29
- opt.on('-h', '--help', 'Display this screen.') do
30
- puts opt
31
- exit
32
- end
33
- opt.separator ''
34
- end.parse!
35
- abort '-i is mandatory.' if o[:i].nil?
36
-
37
- def pad(d, idx, r)
38
- idx.each do |i|
39
- next if d[i].nil?
40
- d[i] -= r
41
- break unless d[i] < 0
42
- r = -d[i]
43
- d[i] = nil
44
- end
45
- d
46
- end
47
-
48
- def report(sq, d, ln, o)
49
- # Estimate padding ranges
50
- pad = (1.0-o[:range])/2.0
51
- r = (pad*ln).round
52
-
53
- # Pad
54
- d = pad(d, d.each_index.to_a, r+0)
55
- d = pad(d, d.each_index.to_a.reverse, r+0)
56
-
57
- # Average
58
- y = [0.0]
59
- unless d.compact.empty?
60
- s = d.each_with_index.to_a.map{ |v,i| v.nil? ? 0 : i*v }.inject(0,:+)
61
- y[0] = s.to_f/d.compact.inject(:+)
62
- end
63
-
64
- # Report
65
- y.unshift(sq) if o[:perseq]
66
- y << ln if o[:length]
67
- puts y.join("\t")
68
- end
69
-
70
- # Read BedGraph
71
- d = []
72
- ln = 0
73
- pre_sq = nil
74
- File.open(o[:i], "r") do |ifh|
75
- ifh.each_line do |i|
76
- next if i =~ /^#/
77
- r = i.chomp.split("\t")
78
- sq = r.shift
79
- if o[:perseq] and !pre_sq.nil? and pre_sq!=sq
80
- report(pre_sq, d, ln, o)
81
- d = []
82
- ln = 0
83
- end
84
- r.map! { |j| j.to_i }
85
- l = r[1]-r[0]
86
- d[ r[2] ] ||= 0
87
- d[ r[2] ] += l
88
- ln += l
89
- pre_sq = sq
90
- end
91
- end
92
- report(pre_sq, d, ln, o)
93
-
@@ -1,71 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'optparse'
4
-
5
- o = {win: 1000}
6
- ARGV << '-h' if ARGV.empty?
7
- OptionParser.new do |opt|
8
- opt.banner = "
9
- Estimates the sequencing depth per windows from a BedGraph file.
10
-
11
- IMPORTANT: This script doesn't consider zero-coverage positions if missing
12
- from the file. If you produce your BedGraph file with bedtools genomecov and
13
- want to consider zero-coverage position, be sure to use -bga (not -bg).
14
-
15
- Usage: #{$0} [options]"
16
- opt.separator ''
17
- opt.on('-i', '--input PATH',
18
- 'Input BedGraph file (mandatory).'){ |v| o[:i]=v }
19
- opt.on('-w', '--win INT',
20
- 'Window size, in base pairs.', "By default: #{o[:win]}."
21
- ){ |v| o[:win]=v.to_i }
22
- opt.on('-h', '--help', 'Display this screen.') do
23
- puts opt
24
- exit
25
- end
26
- opt.separator ''
27
- end.parse!
28
- abort '-i is mandatory.' if o[:i].nil?
29
-
30
- def report(d, a, b, seqs)
31
- # Average
32
- y = 0.0
33
- unless d.compact.empty?
34
- s = d.each_with_index.to_a.map{ |v,i| v.nil? ? 0 : i*v }.inject(0,:+)
35
- y = s.to_f/d.compact.inject(:+)
36
- end
37
-
38
- # Report
39
- puts [a, b, y, seqs.keys.join(",")].join("\t")
40
- end
41
-
42
- # Read BedGraph
43
- d = []
44
- ln = 0
45
- a = 1
46
- seqs = {}
47
- b = o[:win]
48
- File.open(o[:i], "r") do |ifh|
49
- ifh.each_line do |i|
50
- next if i =~ /^#/
51
- r = i.chomp.split("\t")
52
- sq = r.shift
53
- seqs[sq] = 1
54
- r.map!{ |j| j.to_i }
55
- l = r[1]-r[0]
56
- d[ r[2] ] ||= 0
57
- d[ r[2] ] += l
58
- ln += l
59
- while ln >= b
60
- d[ r[2] ] -= (ln-b)
61
- report(d, a, b, seqs)
62
- seqs = {}
63
- seqs[ sq ] = 1 if ln > b
64
- d = []
65
- d[ r[2] ] = (ln-b)
66
- a = b + 1
67
- b = a + o[:win] - 1
68
- end
69
- end
70
- end
71
-
@@ -1,102 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @authors Konstantinos Konstantinidis (initial version)
4
- # modified to work with the BLASTp 2.2.25+ m0 output by
5
- # Despina Tsementzi & Luis M. Rodriguez-R
6
- # @updated Dec-21-2015
7
- #
8
-
9
-
10
- $/ = "Lambda ";
11
- use strict;
12
- my %hash_depth;
13
-
14
- my @query;
15
- my @subject;
16
- my @similarity;
17
- my $length = "0";
18
-
19
- my($cigar_chr, $blast) = @ARGV;
20
-
21
- ($cigar_chr and $blast) or die "
22
- .Description:
23
- Counts the different AA substitutions in the best hit blast alignments, from
24
- a BLASTP pairwise format output (-outfmt 0 in BLAST+, -m 0 in legacy BLAST).
25
-
26
- .Usage: $0 cigar_char blast.m0.txt > aa-subs.list
27
-
28
- cigar_char Use '+' for similar substitutions, use '_' for non similar
29
- substitutions
30
- blast.m0.txt Blast in 'text' format (-outfmt/-m 0).
31
- aa-subs.list A tab-delimited raw file with one substitution per row and
32
- columns:
33
- (1) Name-of-query_Name-of-subject
34
- (2) AA-in-subject
35
- (3) AA-in-query
36
- (4) Total-Align-Length
37
-
38
- ";
39
-
40
- # For each blast result (i.e., for each query)
41
- open BLAST, "<", $blast or die "Cannot read file: $blast: $!\n";
42
- while(my $data=<BLAST>) {
43
- $data =~ s/\r//g;
44
- my ($data_q, @array_matches) = split(/>/,$data);
45
- my ($name_query) = ($data_q =~ /Query\= (\S+?)(?:_GENE|\s)/);
46
- my ($length_query) = ($data_q =~ /\(([\d,]+) letters/ );
47
- ($length_query) = ($data_q =~ /Length=([\d,]+)/) unless $length_query;
48
- $length_query =~ tr/,//d;
49
-
50
- # For each alignment (i.e., for each HSP),
51
- # note the "last" at the end of the block,
52
- # so only the best match is considered
53
- foreach my $data_f (@array_matches) {
54
- # Capture statistics
55
- my ($length_match) = ($data_f =~ /Identities = \d+\/(\d+)/);
56
- my ($identity_match) = ($data_f =~ /Identities = \d+\/\d+ \((\d+)%/);
57
- my ($target_name) = ($data_f =~ /^\s?(\S+)/);
58
-
59
- # If the alignment meets minimum requirements
60
- if ($length_query >30 && ($length_match/$length_query > 0.7) && $identity_match > 60) {
61
- $data_f =~ tr/ /_/;
62
- my @array = split ("\n", $data_f);
63
- my $blanks = 0;
64
- my $prefix_size = 0;
65
-
66
- # For each line in the alignment
67
- for my $data_fff (@array) {
68
- if ($data_fff =~ /(Query[:_]_+\d+_+)([^_]+)/){
69
- # Query lines
70
- $prefix_size = length($1);
71
- $length = length($2);
72
- @query = split (//, $2);
73
- }elsif ($data_fff =~ /^_{11}/){
74
- # Cigar lines
75
- @similarity = split(//, substr($data_fff, $prefix_size, $length));
76
- }elsif ($data_fff =~ /Sbjct[:_]_+\d+_+([^_]+)/){
77
- # Subject lines
78
- @subject = split(//, $1);
79
- # For each alignment column
80
- for(my $i=0; $i <= $length; $i++){
81
- if ($similarity[$i] eq $cigar_chr) {
82
- print "$name_query\_$target_name\t$subject[$i]\t$query[$i]\t$length_match\n";
83
- }
84
- }
85
- undef @query;
86
- undef @similarity;
87
- undef @subject;
88
- }
89
-
90
- # Remove secondary alignments
91
- if ($data_fff =~ /^$/){
92
- $blanks++;
93
- last if $blanks >= 3;
94
- }else{
95
- $blanks=0;
96
- }
97
- } # for my $data_fff (@array)
98
- } # if ($length_query >30 ...
99
- last; # <---- So it takes only the best match!
100
- } # foreach my $data_f (@array_matches)
101
- } # while(my $data=<>)
102
-