miga-base 1.2.15.2 → 1.2.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli/action/download/gtdb.rb +4 -1
  3. data/lib/miga/cli/action/gtdb_get.rb +4 -0
  4. data/lib/miga/daemon.rb +4 -1
  5. data/lib/miga/lair.rb +6 -4
  6. data/lib/miga/remote_dataset/download.rb +3 -2
  7. data/lib/miga/remote_dataset.rb +25 -7
  8. data/lib/miga/taxonomy.rb +6 -0
  9. data/lib/miga/version.rb +2 -2
  10. metadata +6 -302
  11. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  12. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  13. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  14. data/utils/FastAAI/FastAAI +0 -3659
  15. data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
  16. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
  17. data/utils/FastAAI/README.md +0 -84
  18. data/utils/enveomics/Docs/recplot2.md +0 -244
  19. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  20. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  21. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  22. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  23. data/utils/enveomics/LICENSE.txt +0 -73
  24. data/utils/enveomics/Makefile +0 -52
  25. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  26. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
  27. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  28. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
  29. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
  30. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  31. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
  32. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  33. data/utils/enveomics/Manifest/Tasks/other.json +0 -906
  34. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  35. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
  36. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  37. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  38. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  39. data/utils/enveomics/Manifest/categories.json +0 -165
  40. data/utils/enveomics/Manifest/examples.json +0 -162
  41. data/utils/enveomics/Manifest/tasks.json +0 -4
  42. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  49. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  50. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  51. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  52. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  53. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  54. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  55. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  56. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  57. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  58. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  59. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  60. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  61. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  62. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  63. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  64. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  68. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  69. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  70. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  71. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  72. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  73. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  74. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  75. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  76. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  77. data/utils/enveomics/README.md +0 -42
  78. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  79. data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
  80. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  81. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  82. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  83. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  84. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  85. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  86. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  87. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  88. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  89. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  90. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  91. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  92. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  93. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  94. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  95. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  96. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  97. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  98. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  99. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  100. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  101. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
  102. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  103. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  104. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  105. data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
  106. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  107. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  108. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  109. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  110. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
  111. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  112. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  113. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  114. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  115. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  116. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  117. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  118. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  119. data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
  120. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  121. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  122. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  123. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  124. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  125. data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
  126. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  127. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  128. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  129. data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
  130. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  131. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  132. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
  133. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  134. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  135. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  136. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  137. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  138. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  139. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  140. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  141. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  142. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  143. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  144. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  145. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  146. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  147. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  148. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  149. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  150. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  151. data/utils/enveomics/Scripts/SRA.download.bash +0 -55
  152. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  153. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  154. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  155. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  156. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  157. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  158. data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
  159. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  160. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  161. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  162. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  163. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  164. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  165. data/utils/enveomics/Scripts/aai.rb +0 -421
  166. data/utils/enveomics/Scripts/ani.rb +0 -362
  167. data/utils/enveomics/Scripts/anir.rb +0 -137
  168. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  169. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  170. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  171. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  172. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  173. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  174. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
  175. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
  176. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  177. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
  178. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
  179. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  180. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
  181. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  182. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
  183. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  184. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  185. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
  186. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
  187. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
  188. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
  189. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  190. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  191. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  192. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  193. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  194. data/utils/enveomics/Scripts/ogs.rb +0 -104
  195. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  196. data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
  197. data/utils/enveomics/Scripts/rbm.rb +0 -108
  198. data/utils/enveomics/Scripts/sam.filter.rb +0 -148
  199. data/utils/enveomics/Tests/Makefile +0 -10
  200. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  201. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  202. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  203. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  204. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  205. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  206. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  207. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  208. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  209. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  210. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  211. data/utils/enveomics/Tests/alkB.nwk +0 -1
  212. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  213. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  214. data/utils/enveomics/Tests/hiv1.faa +0 -59
  215. data/utils/enveomics/Tests/hiv1.fna +0 -134
  216. data/utils/enveomics/Tests/hiv2.faa +0 -70
  217. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  218. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  219. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  220. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  221. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  222. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  223. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  224. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  225. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  226. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  227. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  228. data/utils/enveomics/build_enveomics_r.bash +0 -45
  229. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  230. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  231. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  232. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  233. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  234. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  235. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  236. data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
  237. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  238. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  239. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  240. data/utils/enveomics/enveomics.R/R/utils.R +0 -80
  241. data/utils/enveomics/enveomics.R/README.md +0 -81
  242. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  243. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  244. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
  245. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
  246. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
  247. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  248. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  249. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  250. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  251. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  252. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  253. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
  254. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
  255. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
  256. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  257. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  258. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
  259. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
  260. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
  261. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
  262. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
  263. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
  264. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
  265. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  266. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  267. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
  268. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  269. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  270. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  271. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  272. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
  273. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  274. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  275. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
  276. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  277. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  278. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  279. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  280. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
  281. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  282. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
  283. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
  284. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
  285. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  286. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  287. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
  288. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
  289. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
  290. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  291. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  292. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  293. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  294. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
  295. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
  296. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
  297. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
  298. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  299. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  300. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  301. data/utils/enveomics/globals.mk +0 -8
  302. data/utils/enveomics/manifest.json +0 -9
  303. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  304. data/utils/multitrim/README.md +0 -67
  305. data/utils/multitrim/multitrim.py +0 -1555
  306. data/utils/multitrim/multitrim.yml +0 -13
@@ -1,63 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author: Luis M. Rodriguez-R
5
- # @license: artistic license 2.0
6
- #
7
-
8
- require 'optparse'
9
-
10
- o = { sbj: false, q: false }
11
- ARGV << '-h' if ARGV.size == 0
12
- OptionParser.new do |opts|
13
- opts.banner = "
14
- Appends an extra column to a BLAST with the length of the query or the subject
15
- sequence. You can pipe two instances to add both:
16
- cat input.blast | #{$0} -f queries.fa | #{$0} -f subjects.fa -s > output.blast
17
-
18
- Usage: #{$0} [options] < input.blast > output.blast"
19
- opts.separator ''
20
- opts.separator 'Mandatory'
21
- opts.on('-f', '--fasta FILE', 'Path to the FastA file'){ |v| o[:fasta] = v }
22
- opts.separator ''
23
- opts.separator 'Options'
24
- opts.on('-s', '--subject',
25
- 'Use the subject column of the BLAST, by default the query column is used'
26
- ){ o[:sbj] = true }
27
- opts.on('-q', '--quiet', 'Run quietly (no STDERR output)'){ o[:q] = true }
28
- opts.on('-h', '--help', 'Display this screen') do
29
- puts opts
30
- exit
31
- end
32
- opts.separator ''
33
- end.parse!
34
- abort '-f is mandatory' if o[:fasta].nil?
35
-
36
- len = {}
37
- id = ''
38
- $stderr.puts "Reading FastA file: #{o[:fasta]}" unless o[:q]
39
- fh = File.open(o[:fasta], 'r')
40
- fh.each_line do |ln|
41
- defline = /^>(\S+)/.match(ln)
42
- if defline.nil?
43
- ln.gsub! /[^A-Za-z]/, ''
44
- abort 'Error: Unsupported format, expecting FastA' if len[id].nil?
45
- len[id] = len[id] + ln.size
46
- else
47
- id = defline[1]
48
- len[id] = 0
49
- end
50
- end
51
- fh.close
52
-
53
- unless o[:q]
54
- $stderr.puts 'Appending %s length column' % (o[:sbj] ? 'subject' : 'query')
55
- end
56
- ARGF.each_line do |ln|
57
- ln.chomp!
58
- row = ln.split /\t/
59
- id = o[:sbj] ? row[1] : row[0]
60
- abort "Impossible to find sequence of #{id}" if len[id].nil?
61
- puts "#{ln}\t#{len[id]}"
62
- end
63
-
@@ -1,48 +0,0 @@
1
- #!/bin/bash
2
-
3
- #
4
- # @author Luis M. Rodriguez-R
5
- # @update Mar-23-2016
6
- # @license artistic license 2.0
7
- #
8
-
9
- if [[ ! $2 ]] ; then
10
- echo "
11
- .DESCRIPTION
12
- Calculates the percentage of a partial BLAST result. The
13
- value produced slightly subestimates the actual advance,
14
- due to un-flushed output and trailing queries that could
15
- be processed but generate no results.
16
-
17
- .USAGE
18
- $0 blast.txt qry.fasta
19
-
20
- blast.txt Incomplete Tabular BLAST output.
21
- qry.fasta FastA file with query sequences.
22
- ";
23
- exit 1;
24
- fi
25
-
26
- if [[ ! -r $1 ]]; then
27
- echo "Cannot open file: $1";
28
- exit 1;
29
- fi
30
-
31
- if [[ ! -r $2 ]]; then
32
- echo "Cannot open file: $2";
33
- exit 1;
34
- fi
35
-
36
- LAST_Q=`tail -n 2 $1 | head -n 1 | awk '{print $1}'`
37
- LAST_Q_NO=`grep -n "^>$LAST_Q\\( \\|$\\)" $2 | sed -e 's/:.*//'`
38
- if [[ ! $LAST_Q_NO ]]; then
39
- echo "Cannot find sequence: $LAST_Q";
40
- echo "Make sure you are providing the right query file.";
41
- exit 1;
42
- fi
43
- TOTAL_Q_NO=`cat $2 | wc -l | sed -e 's/ *//'`
44
- let PERC=100*$LAST_Q_NO/$TOTAL_Q_NO
45
-
46
- echo "$PERC%: $LAST_Q_NO / $TOTAL_Q_NO"
47
- exit 0;
48
-
@@ -1,55 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- #
4
- # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
- # @license: artistic license 2.0
6
- # @last_update: Mar-23-2015
7
- #
8
-
9
- use strict;
10
- use warnings;
11
-
12
- die "
13
- Usage:
14
- sort blast.txt ... | $0 > blast.bh.txt
15
- $0 blast_sorted.txt ... > blast.bh.txt
16
- $0 -h|--help|-?
17
-
18
- blast.txt ... One or more files in Tabular BLAST format.
19
- blast_sorted.txt ... One or more files in Tabular BLAST format pre-sorted.
20
- blast.bh.txt Output file in BLAST format containing best-hits only.
21
- -h|--help|-? Any of these flags trigger this help message and exits.
22
-
23
- NOTE: This script assumes that the BLAST is sorted. Because it can read
24
- from the STDIN, calling this script without arguments cause it to still until
25
- killed or until an EOF (^D) is presented.
26
-
27
- " if exists $ARGV[0] and $ARGV[0] =~ /^\-?\-(h(elp)?|\?)/i;
28
-
29
- my $last_qry = '';
30
- my @best_res;
31
-
32
- sub best_result($$){
33
- my($r1, $r2)=@_;
34
- return $r1 unless $r2;
35
- return $r1->[11] > $r2->[11] ? @$r1 : @$r2;
36
- }
37
-
38
- my $i=0;
39
- while(<>){
40
- chomp;
41
- #print STDERR " Reading entry $i... \r" unless $i%1000;
42
- my @res = split /\t/;
43
- die "\nCannot parse BLAST line $.: $_\n" unless exists $res[1];
44
- if($last_qry eq $res[0]){
45
- @best_res = &best_result(\@res, \@best_res);
46
- }else{
47
- print join("\t", @best_res), "\n" if $#best_res>0;
48
- @best_res = @res;
49
- $last_qry = $res[0];
50
- }
51
- }
52
- print join("\t", @best_res), "\n" if @best_res;
53
-
54
-
55
-
@@ -1,104 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- # @author: Luis M. Rodriguez-R
4
- # @license: Artistic-2.0
5
-
6
- use warnings;
7
- use strict;
8
- use List::Util qw/min max/;
9
- use Getopt::Std;
10
-
11
- sub HELP_MESSAGE { die "
12
-
13
- Description:
14
- Generates a list of hits from a BLAST result concatenating the subject
15
- sequences. This can be used, e.g., to analyze BLAST results against
16
- draft genomes.
17
-
18
- Usage:
19
- $0 [options] seq.fa map.bls
20
-
21
- seq.fa Subject sequences (ref) in FastA format.
22
- map.bls Mapping of the reads to the reference in BLAST Tabular
23
- format.
24
-
25
- Options:
26
- -i <float> Minimum identity to report a result. By default: 70.
27
- -l <int> Minimum alignment length to report a result. By default: 60.
28
- -s The FastA provided is to be treated as a subset of the subject.
29
- By default, it expects all the BLAST subjects to be present in
30
- the FastA.
31
- -q Run quietly.
32
- -h Display this message and exit.
33
-
34
- This script creates two files using <map.bls> as prefix with extensions
35
- .rec (for the recruitment plot) and .lim (for the limits of the different
36
- sequences in <seq.fa>).
37
-
38
- ";}
39
-
40
- my %o;
41
- getopts('i:l:sqh', \%o);
42
- my($fa, $map) = @ARGV;
43
- ($fa and $map) or &HELP_MESSAGE;
44
- $o{h} and &HELP_MESSAGE;
45
- $o{i} ||= 70;
46
- $o{l} ||= 60;
47
-
48
- my %seq = ();
49
- my @seq = ();
50
- my $tot = 0;
51
-
52
- SEQ: {
53
- print STDERR "== Reading reference sequences\n" unless $o{q};
54
- open FA, "<", $fa or die "Cannot read the file: $fa: $!\n";
55
- my $cur_seq = '';
56
- while(<FA>){
57
- chomp;
58
- if(m/^>(\S+)/){
59
- my $c = $1;
60
- $seq{$c} = exists $seq{$cur_seq} ? $seq{$cur_seq}+1 : 1;
61
- push @seq, $c;
62
- $cur_seq = $c;
63
- }else{
64
- s/[^A-Za-z]//g;
65
- $seq{$cur_seq} += length $_;
66
- }
67
- }
68
- close FA;
69
- print STDERR " Found ".(scalar @seq)." sequences.\n" unless $o{q};
70
- }
71
-
72
- open LIM, ">", "$map.lim" or die "Cannot create the file: $map.lim: $!\n";
73
- my $l = 0;
74
- for my $s (@seq){
75
- print LIM "$s\t".(++$l)."\t$seq{$s}\n";
76
- ($l, $seq{$s}) = ($seq{$s}, $l);
77
- }
78
- close LIM;
79
-
80
- MAP: {
81
- print STDERR "== Reading mapping\n" unless $o{q};
82
- open BLS, "<", $map or die "Cannot read the file: $map: $!\n";
83
- open REC, ">", "$map.rec" or die "Cannot create the file: $map.rec: $!\n";
84
- RESULT: while(<BLS>){
85
- chomp;
86
- my @ln = split /\t/;
87
- $ln[11] or die "Cannot parse line $map:$.: $_\n";
88
- next unless $ln[3]>=$o{l};
89
- next unless $ln[2]>=$o{i};
90
- unless(exists $seq{$ln[1]}){
91
- die "Cannot find the subject sequence: $ln[1]\n" unless $o{s};
92
- next RESULT;
93
- }
94
- my $start = $seq{$ln[1]}+min($ln[8], $ln[9]);
95
- my $end = $seq{$ln[1]}+max($ln[8], $ln[9]);
96
- print REC "$start\t$end\t$ln[2]\t$ln[11]\t$ln[0]",
97
- (exists($ln[13])?"\t".($ln[2]*$ln[3]/min($ln[12],$ln[13]))."\t":
98
- exists($ln[12])?"\t".($ln[2]*$ln[3]/$ln[12])."\t":""),"\n";
99
- }
100
- close BLS;
101
- close REC;
102
- print STDERR " done.\n" unless $o{q};
103
- }
104
-
@@ -1,76 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
- # @update Mar-23-2016
6
- # @license artistic license 2.0
7
- #
8
-
9
- require "optparse"
10
-
11
- o = {:cog=>false, :desc=>false, :q=>false, :w=>true}
12
- ARGV << "-h" if ARGV.size==0
13
- OptionParser.new do |opts|
14
- opts.banner = "Replaces the COG gene IDs in a BLAST for the COG category."
15
- opts.separator ""
16
- opts.separator "Mandatory"
17
- opts.on("-w", "--whog FILE", "Path to the whog file."){ |v| o[:whog]=v }
18
- opts.on("-i", "--blast FILE",
19
- "Path to the Tabular BLAST file with COG IDs as subject."
20
- ){ |v| o[:blast]=v }
21
- opts.separator ""
22
- opts.separator "Optional"
23
- opts.on("-g", "--cog",
24
- "If set, returns the COG ID, not the COG category."){ o[:cog]=true }
25
- opts.on("-d", "--desc",
26
- "Includes COG description (requires -g/--cog)."){ o[:desc]=true }
27
- opts.on("-n", "--noverbose", "Run quietly, but show warnings."){ o[:q]=true }
28
- opts.on("-q", "--quiet", "Run quietly."){ o[:q]=true; o[:w]=false }
29
- opts.on("-h", "--help", "Display this screen") do
30
- puts opts
31
- exit
32
- end
33
- opts.separator ""
34
- end.parse!
35
-
36
- abort "-w/--whog is mandatory." if o[:whog].nil?
37
- abort "-i/--blast is mandatory." if o[:blast].nil?
38
-
39
- $stderr.puts "Parsing whog file." unless o[:q]
40
- cat = {}
41
- curCats = []
42
- fh = File.open o[:whog], "r"
43
- while ln=fh.gets
44
- ln.chomp!
45
- next if /^\s*$/.match ln
46
- if m=/^\[([A-Z]+)\] (COG\d+) (.*)/.match(ln)
47
- curCats = o[:cog] ? [ m[2]+(o[:desc]?" #{m[3]}":"") ] : m[1].split(//)
48
- elsif /^_+$/.match ln
49
- curCats = []
50
- elsif m=/^\s+(?:.+?:\s+)?(.*)/.match(ln)
51
- m[1].split(/\s+/).each do |g|
52
- cat[g] ||= []
53
- curCats.each { |i| cat[g] << i }
54
- end
55
- else
56
- abort "Impossible to parse line #{$.}: #{ln}"
57
- end
58
- end
59
- fh.close
60
-
61
- $stderr.puts "Parsing BLAST." unless o[:q]
62
- fh = File.open(o[:blast], "r")
63
- while ln=fh.gets
64
- row = ln.split(/\t/)
65
- if cat[ row[1] ].nil?
66
- $stderr.puts "Warning: line #{$.}: #{row[1]}: " +
67
- "Impossible to find category.\n" if o[:w]
68
- else
69
- cat[ row[1] ].each do |c|
70
- row[1] = c
71
- puts row.join("\t")
72
- end
73
- end
74
- end
75
- fh.close
76
-
@@ -1,47 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update: Mar-23-2015
5
- # @license: artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
- use Getopt::Std;
11
-
12
- my %o;
13
- getopts('si', \%o);
14
- my($list, $blast) = @ARGV;
15
-
16
- ($list and $blast) or die "
17
- .Description:
18
- Extracts a subset of hits (queries or subjects) from a tabular BLAST.
19
-
20
- .Usage: $0 [options] list.txt blast.txt > subset.txt
21
-
22
- Options:
23
- -s If set, assumes that list.txt contains subject IDs.
24
- By default: assumes query IDs.
25
- -i If set, reports the inverse of the list (i.e., reports
26
- only hits absent in the list).
27
-
28
- list.txt List of IDs to extract.
29
- blast.txt Tabular BLAST file containing the superset of hits.
30
- subset.txt Tabulat BLAST file to be created.
31
-
32
- ";
33
-
34
- open LI, "<", $list or die "Cannot read file: $list: $!\n";
35
- my %li = map { chomp; $_ => 1 } <LI>;
36
- close LI;
37
-
38
- open BLAST, "<", $blast or die "Cannot read file: $blast: $!\n";
39
- while(my $ln = <BLAST>){
40
- chomp $ln;
41
- my @ln = split("\t", $ln);
42
- my $good = exists $li{$ln[ ($o{s} ? 1 : 0) ]};
43
- $good = not $good if $o{i};
44
- print "$ln\n" if $good;
45
- }
46
- close BLAST;
47
-
@@ -1,194 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- use warnings;
4
- use strict;
5
- use LWP::Simple;
6
- use JSON;
7
- use File::Copy;
8
-
9
- my($blast, $cache_file, $max_cache) = @ARGV;
10
- ($blast) or die "
11
- Description:
12
- Takes a BLAST against KEGG_PEP (or KO) and retrieves the pathways in which the subject
13
- peptides are involved.
14
-
15
- Usage:
16
- $0 blast.txt[ cache_file] > output.txt
17
-
18
- blast.txt Input (filtered) BLAST file.
19
- cache_file (optional) File containing the saved cache. If unset, the
20
- cache won't be recoverable across instances of this script.
21
- It is strongly recommended to set a file. Multiple
22
- parallel instances of this script may use the same cache
23
- file.
24
- output.txt Tab-delimited output file, with the columns:
25
- o Query ID
26
- o Subject ID
27
- o Pathway ID
28
- o Pathway (reference) description
29
- o Organism
30
-
31
- ";
32
-
33
- $max_cache ||= 0;
34
- $cache_file ||= "";
35
-
36
- sub read_cache($){
37
- my ($cache_file) = @_;
38
- my $cache = {};
39
- my $n = 0;
40
- if($cache_file and -s $cache_file){
41
- local $/;
42
- my $json = "";
43
- while(-e "$cache_file.tmp"){
44
- print STDERR "Locked cache (read), waiting 1 sec.\n";
45
- sleep 1;
46
- }
47
- open CACHE, "<", $cache_file or die "Cannot read file: $cache_file: $!\n";
48
- while(<CACHE>){ $json.=$_ }
49
- close CACHE;
50
- $cache = decode_json($json);
51
- $n = scalar keys %$cache;
52
- }
53
- return ($cache, $n);
54
- }
55
-
56
- sub write_cache($$){
57
- my($cache, $cache_file) = @_;
58
- if($cache_file){
59
- # Get previously saved entries.
60
- my($cache2, $cache_n2) = &read_cache($cache_file);
61
- for my $k (keys %$cache2){
62
- $cache->{$k} ||= $cache2->{$k} unless $k eq "###:paths";
63
- }
64
- $cache->{'###:paths'} ||= {};
65
- for my $p (keys %{$cache2->{'###:paths'}}){
66
- $cache->{'###:paths'}->{$p} ||= $cache2->{'###:paths'}->{$p};
67
- }
68
- # Save merged cache.
69
- if(-s $cache_file){ copy $cache_file, "$cache_file.pre" or die "Cannot create file: $cache_file.tmp: $!\n" }
70
- my $json = encode_json($cache);
71
- while(-e "$cache_file.tmp"){
72
- print STDERR "Locked cache (write), waiting 1 sec.\n";
73
- sleep 1;
74
- }
75
- open CACHE, ">", "$cache_file.tmp" or die "Cannot create file: $cache_file.tmp: $!\n";
76
- print CACHE $json;
77
- close CACHE;
78
- copy "$cache_file.tmp", $cache_file or die "Cannot create file: $cache_file: $!\n";
79
- unlink "$cache_file.tmp" or die "Cannot unlink file: $cache_file.tmp: $!\n";
80
- }
81
- }
82
-
83
- sub download_pathways($$){
84
- my($cache, $ids) = @_;
85
- my @todownload = ();
86
- for my $id (@$ids){
87
- push @todownload, $id unless exists $cache->{'###:paths'}->{$id};
88
- }
89
- while($#todownload>=0){
90
- my @downloading = splice(@todownload, 0, 100);
91
- my $path = get "http://rest.kegg.jp/list/".join("+", @downloading);
92
- if($path){
93
- chomp $path;
94
- for my $p (split /\n/, $path){
95
- my @wl = split /\t/, $p;
96
- $wl[1] =~ s/ - /\t/;
97
- $cache->{'###:paths'}->{$wl[0]} = $wl[1];
98
- }
99
- }
100
- }
101
- return $cache;
102
- }
103
-
104
- sub download($$){
105
- my($cache, $todownload) = @_;
106
- $cache->{'###:paths'} ||= {};
107
- return $cache unless $#$todownload>=0;
108
- $cache->{$_} = [] for @$todownload;
109
- my $list = get "http://rest.kegg.jp/link/pathway/".join("+", @$todownload);
110
- $list ||= "";
111
- chomp $list;
112
- my @pathids = ();
113
- for my $res (split /\n/, $list){
114
- my @rel = split /\t/, $res;
115
- $#rel==1 or die "Unexpected number of columns:\n$res\n";
116
- my $id = $rel[1];
117
- push @pathids, $id;
118
- unless(exists $cache->{$rel[0]}){
119
- #print STDERR "Request/response difference in ID: ".$rel[0].", searching match.\n";
120
- for my $id (@$todownload){
121
- $rel[0] = $id if lc $id eq lc $rel[0];
122
- }
123
- die "Cannot find corresponding request.\n" unless exists $cache->{$rel[0]};
124
- }
125
- push @{ $cache->{$rel[0]} }, $id;
126
- }
127
- return &download_pathways($cache, \@pathids);
128
- }
129
-
130
- sub print_out($$){
131
- my($cache, $hits) = @_;
132
- for my $hit (@$hits){
133
- die "Impossible to find gene in cache: ".$hit->[1]."\n" unless exists $cache->{$hit->[1]};
134
- for my $path (@{$cache->{$hit->[1]}}){
135
- next if $path =~ /^path:ko\d/;
136
- unless(exists $cache->{'###:paths'}->{$path}){
137
- print STDERR "Cannot find pathway in cache: $path (from ".$hit->[1]."), emergency download\n";
138
- $cache = &download_pathways($cache, [$path]);
139
- die "Impossible to find pathway: $path.\n" unless exists $cache->{'###:paths'}->{$path};
140
- }
141
- print "", join("\t", $hit->[0], $hit->[1], $path, $cache->{'###:paths'}->{$path}), "\n";
142
- }
143
- }
144
- }
145
-
146
- print STDERR "Loading cache.\n";
147
- my ($cache, $n) = &read_cache($cache_file);
148
- print STDERR " $n entries loaded.\n";
149
- my @nopath = ();
150
- for my $k (keys %$cache){
151
- next if $k eq "###:paths";
152
- for my $p (@{ $cache->{$k} }){
153
- push @nopath, $p unless exists $cache->{'###:paths'}->{$p};
154
- }
155
- }
156
- if($#nopath>=0){
157
- print STDERR " Sanitizing ".@nopath." pathways in cache.\n";
158
- while($#nopath>=0){
159
- my @paths = ();
160
- for(1 .. 15){ push @paths, shift @nopath unless $#nopath==-1 }
161
- $cache = &download_pathways($cache, \@paths);
162
- }
163
- &write_cache($cache, $cache_file);
164
- }
165
-
166
- my $lines=0;
167
- my $downs=0;
168
- my @buff = ();
169
- my @todownload = ();
170
- print STDERR "Mapping genes.\n";
171
- open BLAST, "<", $blast or die "Cannot read file: $blast: $!\n";
172
- while(<BLAST>){
173
- chomp;
174
- my @l = split /\t/;
175
- print STDERR " Mapping line ".(++$lines).". \r";
176
- unless(($#todownload+2)%100){
177
- print STDERR "+\r";
178
- print STDERR " *\r" unless ++$downs%10;
179
- $cache = &download($cache, \@todownload);
180
- @todownload = ();
181
- &print_out($cache, \@buff);
182
- @buff = ();
183
- &write_cache($cache, $cache_file) unless $downs%10;
184
- }
185
- push @buff, \@l;
186
- push @todownload, $l[1] unless exists $cache->{$l[1]};
187
- }
188
- print STDERR "\nDone.\n";
189
- close BLAST;
190
-
191
- $cache = &download($cache, \@todownload);
192
- &print_out($cache, \@buff);
193
- &write_cache($cache, $cache_file);
194
-
@@ -1,104 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- #
4
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
- # @update Mar-23-2015
6
- # @license artistic license 2.0
7
- #
8
-
9
- use warnings;
10
- use strict;
11
- use Getopt::Std;
12
-
13
-
14
- sub HELP_MESSAGE { die "
15
- Usage:
16
- $0 [options] genes.txt blast.txt ... > blast_metaxa.txt
17
-
18
- genes.gff2 File containing the genes in any supported format
19
- (see option -f).
20
- blast.txt ... One or more tabular BLAST files.
21
- blast_metaxa.txt Input file for MeTaxa.
22
-
23
- Options:
24
- -l <float> Minimum fraction of the gene aligned to consider a
25
- hit. By default: 0.75. Ignored if -f 'no'.
26
- -f <str> Format of the genes prediction. Any of:
27
- o gff2: GFF v2 as produced by MetaGeneMark.hmm.
28
- o gff3: GFF v3 with id field in the last column.
29
- o tab: Tabular file with columns gene, gene length,
30
- and contig.
31
- o no: Ignores genes file.
32
- By default: gff2.
33
- -q Run quietly.
34
- -h Display this message and exit.
35
-
36
- ";}
37
-
38
- my %o;
39
- getopts('l:f:qh',\%o);
40
- my($gff, @blasts) = @ARGV;
41
- ($gff and $#blasts>=0) or &HELP_MESSAGE;
42
- $o{h} and &HELP_MESSAGE;
43
- $o{f} ||= "gff2";
44
- $o{f} = lc $o{f};
45
- $o{l} ||= 0.75;
46
-
47
- my %gene;
48
- if($o{f} ne 'no'){
49
- print STDERR "Reading genes collection.\n" unless $o{q};
50
- open GFF, "<", $gff or die "Cannot read file: $gff: $!\n";
51
- while(<GFF>){
52
- next if /^#/;
53
- next if /^\s*$/;
54
- chomp;
55
- my @ln = split /\t/;
56
- if($o{f} eq 'gff2'){
57
- exists $ln[8] or die "Cannot parse line $.: $_\n";
58
- my $id = $ln[8];
59
- $id =~ s/gene_id /gene_id_/;
60
- $ln[0] =~ s/ .*//;
61
- $gene{$id} = [$ln[0], (1+$ln[4]-$ln[3])/3];
62
- }elsif($o{f} eq 'gff3'){
63
- exists $ln[8] or die "Cannot parse line $.: $_\n";
64
- $ln[8] =~ /id=([^;]+)/ or die "Cannot parse line $.: $_\n";
65
- my $id = $1;
66
- $ln[0] =~ s/ .*//;
67
- $gene{$id} = [$ln[0], (1+$ln[4]-$ln[3])/3];
68
- }elsif($o{f} eq 'tab'){
69
- exists $ln[2] or die "Cannot parse line $.: $_\n";
70
- $ln[1]+0 or die "$ln[0]: Length zero.\n";
71
- $gene{$ln[0]} = [$ln[2], $ln[1]/3];
72
- }else{
73
- die "Unsupported format: ".$o{f}.".\n";
74
- }
75
- }
76
- close GFF;
77
- }
78
-
79
- my $i=0;
80
- my $p=0;
81
- print STDERR "Generating MeTaxa input.\n" unless $o{q};
82
- for my $blast (@blasts){
83
- print STDERR " o $blast\n" unless $o{q};
84
- open BLAST, "<", $blast or die "Cannot read file: $blast: $!\n";
85
- while(<BLAST>){
86
- chomp;
87
- my @l = split /\t/;
88
- $i++;
89
- my $ctg;
90
- if($o{f} eq 'no'){
91
- $ctg = $l[0];
92
- }else{
93
- exists $gene{$l[0]} or die "Cannot find contig for gene $l[0].\n";
94
- next unless $l[3] >= $o{l}*$gene{$l[0]}->[1];
95
- $ctg = $gene{$l[0]}->[0];
96
- }
97
- $l[1] =~ m/gi\|(\d+)\|/ or die "Cannot parse GI in $l[1].\n";
98
- print "".join("\t", @l, $ctg, $l[0], $1)."\n";
99
- $p++;
100
- }
101
- close BLAST;
102
- }
103
- print STDERR " Found $i results, reported $p.\n" unless $o{q};
104
-