miga-base 1.2.15.2 → 1.2.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli/action/download/gtdb.rb +4 -1
  3. data/lib/miga/cli/action/gtdb_get.rb +4 -0
  4. data/lib/miga/daemon.rb +4 -1
  5. data/lib/miga/lair.rb +6 -4
  6. data/lib/miga/remote_dataset/download.rb +3 -2
  7. data/lib/miga/remote_dataset.rb +25 -7
  8. data/lib/miga/taxonomy.rb +6 -0
  9. data/lib/miga/version.rb +2 -2
  10. metadata +6 -302
  11. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  12. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  13. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  14. data/utils/FastAAI/FastAAI +0 -3659
  15. data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
  16. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
  17. data/utils/FastAAI/README.md +0 -84
  18. data/utils/enveomics/Docs/recplot2.md +0 -244
  19. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  20. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  21. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  22. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  23. data/utils/enveomics/LICENSE.txt +0 -73
  24. data/utils/enveomics/Makefile +0 -52
  25. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  26. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
  27. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  28. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
  29. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
  30. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  31. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
  32. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  33. data/utils/enveomics/Manifest/Tasks/other.json +0 -906
  34. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  35. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
  36. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  37. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  38. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  39. data/utils/enveomics/Manifest/categories.json +0 -165
  40. data/utils/enveomics/Manifest/examples.json +0 -162
  41. data/utils/enveomics/Manifest/tasks.json +0 -4
  42. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  49. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  50. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  51. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  52. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  53. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  54. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  55. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  56. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  57. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  58. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  59. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  60. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  61. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  62. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  63. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  64. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  68. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  69. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  70. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  71. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  72. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  73. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  74. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  75. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  76. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  77. data/utils/enveomics/README.md +0 -42
  78. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  79. data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
  80. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  81. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  82. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  83. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  84. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  85. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  86. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  87. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  88. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  89. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  90. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  91. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  92. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  93. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  94. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  95. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  96. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  97. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  98. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  99. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  100. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  101. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
  102. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  103. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  104. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  105. data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
  106. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  107. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  108. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  109. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  110. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
  111. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  112. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  113. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  114. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  115. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  116. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  117. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  118. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  119. data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
  120. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  121. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  122. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  123. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  124. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  125. data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
  126. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  127. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  128. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  129. data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
  130. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  131. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  132. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
  133. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  134. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  135. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  136. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  137. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  138. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  139. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  140. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  141. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  142. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  143. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  144. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  145. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  146. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  147. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  148. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  149. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  150. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  151. data/utils/enveomics/Scripts/SRA.download.bash +0 -55
  152. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  153. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  154. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  155. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  156. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  157. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  158. data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
  159. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  160. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  161. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  162. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  163. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  164. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  165. data/utils/enveomics/Scripts/aai.rb +0 -421
  166. data/utils/enveomics/Scripts/ani.rb +0 -362
  167. data/utils/enveomics/Scripts/anir.rb +0 -137
  168. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  169. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  170. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  171. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  172. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  173. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  174. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
  175. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
  176. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  177. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
  178. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
  179. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  180. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
  181. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  182. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
  183. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  184. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  185. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
  186. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
  187. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
  188. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
  189. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  190. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  191. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  192. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  193. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  194. data/utils/enveomics/Scripts/ogs.rb +0 -104
  195. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  196. data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
  197. data/utils/enveomics/Scripts/rbm.rb +0 -108
  198. data/utils/enveomics/Scripts/sam.filter.rb +0 -148
  199. data/utils/enveomics/Tests/Makefile +0 -10
  200. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  201. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  202. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  203. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  204. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  205. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  206. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  207. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  208. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  209. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  210. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  211. data/utils/enveomics/Tests/alkB.nwk +0 -1
  212. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  213. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  214. data/utils/enveomics/Tests/hiv1.faa +0 -59
  215. data/utils/enveomics/Tests/hiv1.fna +0 -134
  216. data/utils/enveomics/Tests/hiv2.faa +0 -70
  217. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  218. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  219. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  220. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  221. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  222. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  223. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  224. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  225. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  226. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  227. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  228. data/utils/enveomics/build_enveomics_r.bash +0 -45
  229. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  230. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  231. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  232. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  233. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  234. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  235. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  236. data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
  237. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  238. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  239. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  240. data/utils/enveomics/enveomics.R/R/utils.R +0 -80
  241. data/utils/enveomics/enveomics.R/README.md +0 -81
  242. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  243. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  244. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
  245. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
  246. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
  247. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  248. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  249. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  250. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  251. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  252. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  253. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
  254. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
  255. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
  256. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  257. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  258. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
  259. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
  260. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
  261. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
  262. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
  263. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
  264. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
  265. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  266. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  267. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
  268. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  269. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  270. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  271. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  272. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
  273. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  274. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  275. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
  276. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  277. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  278. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  279. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  280. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
  281. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  282. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
  283. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
  284. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
  285. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  286. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  287. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
  288. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
  289. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
  290. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  291. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  292. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  293. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  294. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
  295. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
  296. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
  297. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
  298. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  299. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  300. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  301. data/utils/enveomics/globals.mk +0 -8
  302. data/utils/enveomics/manifest.json +0 -9
  303. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  304. data/utils/multitrim/README.md +0 -67
  305. data/utils/multitrim/multitrim.py +0 -1555
  306. data/utils/multitrim/multitrim.yml +0 -13
@@ -1,157 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
- # @update: Jul-29-2015
6
- # @license artistic license 2.0
7
- #
8
-
9
- require 'optparse'
10
-
11
- opts = {:minscore=>0, :besthits=>0, :orient=>0, :sisprefix=>"_"}
12
- ARGV << '-h' if ARGV.size==0
13
- OptionParser.new do |opt|
14
- opt.separator "Identifies the best hits of paired-reads."
15
- opt.separator ""
16
- opt.on("-i", "--blast FILE", "Input BLAST file."){ |v| opts[:blast]=v }
17
- opt.on("-s", "--minscore FLOAT", "Minimum (summed) Bit-Score to consider a pair-match."){ |v| opts[:minscore] = v.to_f }
18
- opt.on("-b", "--besthits INT", "Outputs top best-hits only (use 0 to output all the paired hits)."){ |v| opts[:besthits]=v.to_i }
19
- opt.on("-o", "--orient INT", "Checks the orientation of the hit. Values are: 0, no checking; 1, same direction; 2,",
20
- "inwards; 3, outwards; 4, different direction (i.e., 2 or 3)."){ |v| opts[:orient]=v.to_i }
21
- opt.on("-p", "--sisprefix STR", "Sister read number prefix in the name of the reads. Escape characters as dots (\\.),",
22
- "parenthesis (\\(, \\), \\[, \\]), or other characters with special meaning in regular expressions",
23
- "(\\*, \\+, \\^, \\$, \\|). This prefix allows regular expressions (for example, use ':|\\.' to use any of",
24
- "colon or dot). Notice that the prefix will not be included in the base name reported in the output."){ |v| opts[:sisprefix]=v }
25
- opt.on("-h","--help","Display this screen") do
26
- puts opt
27
- exit
28
- end
29
- opt.separator ""
30
- opt.separator "Output:"
31
- opt.separator " Tab-delimited flat file, with the following columns:"
32
- opt.separator " 1. Query ID (without the \"sister\" identifier)."
33
- opt.separator " 2. Subject ID."
34
- opt.separator " 3. Bit score (summed from both sister reads)."
35
- opt.separator " 4/5. From/To (subject) coordinates for read 1."
36
- opt.separator " 6/7. From/To (subject) coordinates for read 2."
37
- opt.separator " 8. Reads orientation (1: same direction, 2: inwards, 3: outwards)."
38
- opt.separator " 9. Estimated insert size."
39
- opt.separator ""
40
- opt.separator "Important note: This script assumes that paired hits are next to each other."
41
- opt.separator " If this is not the case (e.g., because the blast was concatenated),"
42
- opt.separator " you must sort the input before running this script."
43
- opt.separator ""
44
- end.parse!
45
- abort "-i/--blast is mandatory." if opts[:blast].nil?
46
- abort "-i/--blast must exist." unless File.exists? opts[:blast]
47
-
48
- class SingleHit
49
- attr_reader :sbj, :score, :orient, :sfrom, :sto, :qfrom, :qto
50
- def initialize(blast_ln)
51
- blast_ln.chomp!
52
- ln = blast_ln.split("\t")
53
- @sbj = ln[1]
54
- @score = ln[11].to_f
55
- @qfrom = ln[6].to_i
56
- @qto = ln[7].to_i
57
- @sfrom = ln[8].to_i
58
- @sto = ln[9].to_i
59
- @orient = @sfrom < @sto ? 1 : -1;
60
- end
61
- end
62
- class DoubleHit
63
- attr_reader :name, :sbj, :score, :orient, :hitA, :hitB
64
- def initialize(name, hitA, hitB)
65
- raise "Trying to set DoubleHit from hits with different subjects" unless hitA.sbj == hitB.sbj
66
- @name = name
67
- @hitA = hitA
68
- @hitB = hitB
69
- @sbj = hitA.sbj
70
- @score = hitA.score + hitB.score
71
- @orient = (hitA.orient == hitB.orient ? 1:
72
- ((hitA.orient>0 and hitB.orient<0) ? 2: 3))
73
- end
74
- def to_s
75
- coords = [@hitA.sfrom, @hitB.sfrom, @hitA.sto, @hitB.sto]
76
- @name + "\t" + @sbj + "\t" + @score.to_s + "\t" +
77
- @hitA.sfrom.to_s + "\t" + @hitA.sto.to_s + "\t" +
78
- @hitB.sfrom.to_s + "\t" + @hitB.sto.to_s + "\t" +
79
- @orient.to_s + "\t" + (coords.max-coords.min).to_s + "\n"
80
- end
81
- end
82
- class PairedHits
83
- attr_reader :name, :hitsA, :hitsB
84
- @@minscore = 0
85
- @@orient = 0
86
- @@besthits = 0
87
- def initialize(name)
88
- @name = name
89
- @hitsA = []
90
- @hitsB = []
91
- @hits = []
92
- end
93
- def hits
94
- @hits = []
95
- # Search for paired hits
96
- @hitsA.each do |hitA|
97
- @hitsB.each do |hitB|
98
- if hitA.sbj == hitB.sbj
99
- hit = DoubleHit.new(@name, hitA, hitB)
100
- next if hit.score <= @@minscore # Minimum bit-score check
101
- next if ((1 .. 3).include?(@@orient) and @@orient != hit.orient) # "typical" orientation check
102
- next if (@@orient == 4 and not((2 .. 3).include?(hit.orient))) # "different-orientation" check
103
- @hits.push(hit)
104
- end
105
- end
106
- end
107
- # Sort the hits
108
- @hits.sort! {|x,y| x.score <=> y.score }
109
- if @@besthits==0
110
- @hits
111
- else
112
- @hits.take(@@besthits)
113
- end
114
- end
115
- def hitsX(x)
116
- if x == 1
117
- @hitsA
118
- else
119
- @hitsB
120
- end
121
- end
122
- # Class methods
123
- def PairedHits.minscore=(value)
124
- @@minscore = value
125
- end
126
- def PairedHits.orient=(value)
127
- @@orient = value
128
- end
129
- def PairedHits.besthits=(value)
130
- @@besthits = value
131
- end
132
- end
133
-
134
- PairedHits.minscore = opts[:minscore]
135
- PairedHits.orient = opts[:orient]
136
- PairedHits.besthits = opts[:besthits]
137
-
138
- begin
139
- f = File.open(opts[:blast], "r")
140
- currPair = PairedHits.new(" ")
141
- while(ln = f.gets)
142
- m = /^([^\s]*)(?:#{opts[:sisprefix]})([12])/.match(ln)
143
- raise "Impossible to parse read name in line #{$.} using sister prefix '#{opts[:sisprefix]}':\n#{ln}" unless m
144
- if m[1] != currPair.name
145
- currPair.hits.each { |hit| puts hit.to_s }
146
- currPair = PairedHits.new(m[1])
147
- end
148
- currPair.hitsX(m[2].to_i).push(SingleHit.new(ln));
149
- end
150
- currPair.hits.each { |hit| puts hit.to_s }
151
- f.close
152
- rescue => err
153
- $stderr.puts "Exception: #{err}\n\n"
154
- err.backtrace.each { |l| $stderr.puts l + "\n" }
155
- err
156
- end
157
-
@@ -1,48 +0,0 @@
1
- #!/usr/bin/env Rscript
2
-
3
- # @author Luis M. Rodriguez-R
4
- # @license Artistic-2.0
5
-
6
- #= Load stuff
7
- suppressPackageStartupMessages(library(enveomics.R))
8
- args <- commandArgs(trailingOnly = FALSE)
9
- enveomics_R <- file.path(dirname(
10
- sub("^--file=", "", args[grep("^--file=", args)])),
11
- "lib", "enveomics.R")
12
-
13
- #= Generate interface
14
- opt <- enve.cliopts(enve.recplot2,
15
- file.path(enveomics_R, "man", "enve.recplot2.Rd"),
16
- positional_arguments=c(1,4),
17
- usage="usage: %prog [options] output.Rdata [output.pdf [width height]]",
18
- mandatory=c("prefix"),
19
- o_desc=list(pos.breaks="Breaks in the positions histogram.",
20
- pos.breaks.tsv="File with (absolute) coordinates of breaks in the position histogram",
21
- id.breaks="Breaks in the identity histogram.",
22
- id.summary="Function summarizing the identity bins. By default: sum.",
23
- peaks.col="Color of peaks, mandatory for peak-finding (e.g., darkred).",
24
- peaks.method="Method to detect peaks; one of emauto, em, or mower."),
25
- p_desc=paste("","Produce recruitment plot objects provided that",
26
- "BlastTab.catsbj.pl has been previously executed.", sep="\n\t"),
27
- ignore=c("plot"),
28
- defaults=c(pos.breaks.tsv=NA, id.metric="identity", peaks.col=NA,
29
- peaks.method="emauto"))
30
-
31
- #= Run it!
32
- if(length(opt$args)>1){
33
- args = as.list(opt$args[-1])
34
- for(i in 2:3) if(length(args)>=i) args[[i]] <- as.numeric(args[[i]])
35
- do.call("pdf", args)
36
- }else{
37
- opt$options[["plot"]] <- FALSE
38
- }
39
- pc <- opt$options[["peaks.col"]]
40
- if(!is.na(pc) && pc=="NA") opt$options[["peaks.col"]] <- NA
41
- if(!is.null(opt$options[["peaks.method"]])){
42
- opt$options[["peaks.opts"]] <- list(method=opt$options[["peaks.method"]])
43
- opt$options[["peaks.method"]] <- NULL
44
- }
45
- rp <- do.call("enve.recplot2", opt$options)
46
- save(rp, file=opt$args[1])
47
- if(length(opt$args)>1) dev.off()
48
-
@@ -1,86 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author: Luis M Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @license: artistic license 2.0
5
- # @update: Mar-23-2015
6
- #
7
-
8
- use strict;
9
- use warnings;
10
- use List::Util qw/min max sum/;
11
-
12
- my $fna = shift @ARGV;
13
- $fna or die "
14
- Usage:
15
- cat blast1... | $0 genes_or_ctgs.fna > genes_or_ctgs.cov
16
-
17
- blast1... One or more Tabular BLAST files of reads vs genes (or contigs).
18
- genes_or_ctgs.fna A FastA file containing the genes or the contigs (db).
19
- genes_or_ctgs.cov The output file.
20
-
21
- Output:
22
- A tab-delimited file with the following columns:
23
- 1. Subject ID
24
- 2. Average sequencing depth
25
- 3. Median sequencing depth
26
- 4. Number of mapped reads
27
- 5. Length of the subject sequence
28
-
29
- ";
30
-
31
- my $size = {};
32
- my $gene = {};
33
- my $reads = {};
34
-
35
- SIZE:{
36
- local $/=">";
37
- print STDERR "== Reading fasta\n";
38
- open FNA, "<", $fna or die "Cannot read the file: $fna: $!\n";
39
- my $i=0;
40
- while(<FNA>){
41
- chomp;
42
- my @g = split /\n/, $_, 2;
43
- next unless $g[1];
44
- #$g[1] =~ s/[^A-Za-z]//g;
45
- #$size->{$g[0]} = length $g[1];
46
- $g[0] =~ s/\s.*//;
47
- $size->{$g[0]} = ( $g[1] =~ tr/[A-Za-z]// );
48
- print STDERR " Measuring sequence ".($i).": $g[0] \r" unless ++$i%500;
49
- }
50
- close FNA;
51
- print STDERR " Found $i sequences".(" "x30)."\n";
52
- }
53
-
54
- MAP:{
55
- print STDERR "== Reading mapping\n";
56
- my $i=0;
57
- while(<>){
58
- my @ln = split /\t/;
59
- $gene->{$ln[1]} ||= [];
60
- for my $pos (min($ln[8], $ln[9]) .. max($ln[8], $ln[9])){ ($gene->{$ln[1]}->[$pos]||=0)++ }
61
- ($reads->{$ln[1]} ||= 0)++;
62
- print STDERR " Saving hit ".($i).": $ln[1] \r" unless ++$i%5000;
63
- }
64
- print STDERR " Found $i hits".(" "x30)."\n";
65
- }
66
-
67
- OUT:{
68
- print STDERR "== Creating output\n";
69
- my $i=0;
70
- for my $g (keys %$gene){
71
- $gene->{$g}->[$_] ||= 0 for (0 .. $size->{$g});
72
- my @sorted = sort {$a <=> $b} @{$gene->{$g}};
73
- die "Cannot find gene in $fna: $g.\n" unless exists $size->{$g};
74
- printf "%s\t%.6f\t%d\t%d\t%d\n", $g,
75
- sum(@{$gene->{$g}})/$size->{$g},
76
- $sorted[$#sorted/2],
77
- $reads->{$g},
78
- $size->{$g};
79
- delete $gene->{$g};
80
- print STDERR " Saving sequence $g:".($i)."\r" unless ++$i%500;
81
- }
82
- print STDERR " Saved $i sequences".(" "x30)."\n";
83
- }
84
-
85
- print STDERR " done.\n";
86
-
@@ -1,119 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author: Luis M Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @license: artistic license 2.0
5
- # @update: Mar-23-2015
6
- #
7
-
8
- use strict;
9
- use warnings;
10
- use List::Util qw/min max sum/;
11
-
12
- my $fna = shift @ARGV;
13
- $fna or die "
14
- Description:
15
- Estimates the average sequencing depth of subject sequences (genes or contigs)
16
- assuming a Zero-Inflated Poisson distribution (ZIP) to correct for non-covered
17
- positions. It uses the corrected method of moments estimators (CMMEs) as described
18
- by Beckett et al [1]. Note that [1] has a mistake in eq. (2.4), that should be:
19
- pi-hat-MM = 1 - (X-bar / lambda-hat-MM)
20
-
21
- Also note that a more elaborated mixture distribution can arise from coverage
22
- histograms (e.g., see [2] for an additional correction called 'tail distribution'
23
- and mixtures involving negative binomial) so take these results cum grano salis.
24
-
25
- Usage:
26
- cat blast1... | $0 genes_or_ctgs.fna > genes_or_ctgs.cov
27
-
28
- blast1... One or more Tabular BLAST files of reads vs genes (or contigs).
29
- genes_or_ctgs.fna A FastA file containing the genes or the contigs (db).
30
- genes_or_ctgs.cov The output file.
31
-
32
- Output:
33
- A tab-delimited file with the following columns (the one you want is #2):
34
- 1. Subject ID
35
- 2. Estimated average sequencing depth (CMME lambda)
36
- 3. Zero-inflation (CMME pi)
37
- 4. Observed average sequencing depth
38
- 5. Observed median sequencing depth
39
- 6. Observed median sequencing depth excluding zeroes
40
- 7. Number of mapped reads
41
- 8. Length of the subject sequence
42
-
43
- References:
44
- [1] http://anisette.ucs.louisiana.edu/Academic/Sciences/MATH/stage/stat2012.pdf
45
- [2] Lindner et al, Bioinformatics, 2013.
46
-
47
- ";
48
-
49
- my $size = {};
50
- my $gene = {};
51
- my $reads = {};
52
-
53
- SIZE:{
54
- local $/=">";
55
- print STDERR "== Reading fasta\n";
56
- open FNA, "<", $fna or die "Cannot read the file: $fna: $!\n";
57
- my $i=0;
58
- while(<FNA>){
59
- chomp;
60
- my @g = split /\n/, $_, 2;
61
- next unless $g[1];
62
- #$g[1] =~ s/[^A-Za-z]//g;
63
- #$size->{$g[0]} = length $g[1];
64
- $g[0] =~ s/\s.*//;
65
- $size->{$g[0]} = ( $g[1] =~ tr/[A-Za-z]// );
66
- print STDERR " Measuring sequence ".($i).": $g[0] \r" unless ++$i%500;
67
- }
68
- close FNA;
69
- print STDERR " Found $i sequences".(" "x30)."\n";
70
- }
71
-
72
- MAP:{
73
- print STDERR "== Reading mapping\n";
74
- my $i=0;
75
- while(<>){
76
- my @ln = split /\t/;
77
- $gene->{$ln[1]} ||= [];
78
- for my $pos (min($ln[8], $ln[9]) .. max($ln[8], $ln[9])){ ($gene->{$ln[1]}->[$pos]||=0)++ }
79
- ($reads->{$ln[1]} ||= 0)++;
80
- print STDERR " Saving hit ".($i).": $ln[1] \r" unless ++$i%5000;
81
- }
82
- print STDERR " Found $i hits".(" "x30)."\n";
83
- }
84
-
85
- OUT:{
86
- print STDERR "== Creating output\n";
87
- my $i=0;
88
- for my $g (keys %$gene){
89
- unless(exists $size->{$g}){
90
- warn "Warning: Cannot find gene in $fna: $g.\n";
91
- next;
92
- }
93
- $gene->{$g}->[$_] ||= 0 for (0 .. $size->{$g});
94
- die "Hits out-of-boundaries in gene $g: $#{$gene->{$g}} != $size->{$g}.\n" if $#{$gene->{$g}} != $size->{$g};
95
- my @sorted = sort {$a <=> $b} @{$gene->{$g}};
96
- my @sorted_nz = grep { $_>0 } @sorted;
97
- my $xbar = sum(@{$gene->{$g}})/$size->{$g};
98
- my $xsqbar = sum(map { ($_ - $xbar)**2 } @{$gene->{$g}})/($size->{$g}-1);
99
- my $var = $xsqbar - $xbar**2;
100
- my $lambdaMM = $xbar + ($var/$xbar) - 1;
101
- my $piMM = $lambdaMM==0 ? 0 : 1 - $xbar/$lambdaMM;
102
- printf "%s\t%.6f\t%.6f\t%.6f\t%d\t%d\t%d\t%d\n", $g,
103
- ($xbar >= $var ? $xbar : $lambdaMM),
104
- ($xbar >= $var ? 0 : $piMM),
105
- #$lambdaMM,
106
- #$piMM,
107
- sum(@{$gene->{$g}})/$size->{$g},
108
- $sorted[$#sorted/2],
109
- $sorted_nz[$#sorted_nz/2],
110
- $reads->{$g},
111
- $size->{$g};
112
- delete $gene->{$g};
113
- print STDERR " Saving sequence $g:".($i)." \r" unless ++$i%500;
114
- }
115
- print STDERR " Saved $i sequences".(" "x30)." \n";
116
- }
117
-
118
- print STDERR " done.\n";
119
-
@@ -1,86 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author: Luis M Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @license: artistic license 2.0
5
- # @update: Mar-23-2015
6
- #
7
-
8
- use strict;
9
- use warnings;
10
- use List::Util qw/min max sum/;
11
-
12
- my $fna = shift @ARGV;
13
- $fna or die "
14
- Usage:
15
- cat blast1... | $0 genes_or_ctgs.fna > genes_or_ctgs.cov
16
-
17
- blast1... One or more Tabular BLAST files of reads vs genes (or contigs).
18
- genes_or_ctgs.fna A FastA file containing the genes or the contigs (db).
19
- genes_or_ctgs.cov The output file.
20
-
21
- Output:
22
- A tab-delimited file with the following columns:
23
- 1. Subject ID
24
- 2. Average sequencing depth
25
- 3. Number of mapped reads
26
- 4. Length of the subject sequence
27
-
28
- Note:
29
- The values reported by this script may differ from those of BlastTab.seqdepth.pl,
30
- because this script uses the aligned length of the read while BlastTab.seqdepth.pl
31
- uses the aligned length of the subject sequence.
32
-
33
- ";
34
-
35
- my $size = {};
36
- my $gene = {};
37
- my $reads = {};
38
-
39
- SIZE:{
40
- local $/=">";
41
- print STDERR "== Reading fasta\n";
42
- open FNA, "<", $fna or die "Cannot read the file: $fna: $!\n";
43
- my $i=0;
44
- while(<FNA>){
45
- chomp;
46
- my @g = split /\n/, $_, 2;
47
- next unless $g[1];
48
- #$g[1] =~ s/[^A-Za-z]//g;
49
- #$size->{$g[0]} = length $g[1];
50
- $g[0] =~ s/\s.*//;
51
- $size->{$g[0]} = ( $g[1] =~ tr/[A-Za-z]// );
52
- print STDERR " Measuring sequence ".($i).": $g[0] \r" unless ++$i%500;
53
- }
54
- close FNA;
55
- print STDERR " Found $i sequences".(" "x30)."\n";
56
- }
57
-
58
- MAP:{
59
- print STDERR "== Reading mapping\n";
60
- my $i=0;
61
- while(<>){
62
- my @ln = split /\t/;
63
- $gene->{$ln[1]} ||= 0;
64
- $gene->{$ln[1]} += abs($ln[6]-$ln[7])+1;
65
- ($reads->{$ln[1]} ||= 0)++;
66
- print STDERR " Saving hit ".($i).": $ln[1] \r" unless ++$i%5000;
67
- }
68
- print STDERR " Found $i hits".(" "x30)."\n";
69
- }
70
-
71
- OUT:{
72
- print STDERR "== Creating output\n";
73
- my $i=0;
74
- for my $g (keys %$gene){
75
- die "Cannot find gene in $fna: $g.\n" unless exists $size->{$g};
76
- printf "%s\t%.6f\t%d\t%d\n", $g,
77
- $gene->{$g}/$size->{$g},
78
- $reads->{$g},
79
- $size->{$g};
80
- print STDERR " Saving sequence $g:".($i)."\r" unless ++$i%500;
81
- }
82
- print STDERR " Saved $i sequences".(" "x30)."\n";
83
- }
84
-
85
- print STDERR " done.\n";
86
-
@@ -1,47 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @license artistic license 2.0
5
- # @update Mar-23-2015
6
- #
7
-
8
- use strict;
9
- use warnings;
10
-
11
- my($blast, $fasta) = @ARGV;
12
- ($blast and $fasta) or die "
13
- Description:
14
- Filters a BLAST output including only the hits produced by
15
- any of the given sequences as query.
16
-
17
- Usage:
18
- $0 blast.tab sample.fa > out.tab
19
-
20
- blast.tab BLAST output to be filtered (tabular format).
21
- sample.fa Sequences to use as query.
22
- out.tab The filtered BLAST output (tabular format).
23
-
24
- ";
25
-
26
- print STDERR "== Reading sequences\n";
27
- my $seq = {};
28
- open FASTA, "<", $fasta or die "Cannot read the file: $fasta: $!\n";
29
- while(<FASTA>){
30
- next unless /^>(\S+)/;
31
- $seq->{$1} = 1;
32
- }
33
- close FASTA;
34
- print STDERR " ".(scalar keys %$seq)." sequences to be used as query.\n";
35
-
36
- print STDERR "== Reading BLAST\n";
37
- my ($N,$n)=(0,0);
38
- open BLAST, "<", $blast or die "Cannot read the file: $blast: $!\n";
39
- while(my $ln = <BLAST>){
40
- next if $ln=~/^#/;
41
- $N++; my ($qry) = split /\t/, $ln;
42
- next unless exists $seq->{$qry};
43
- $n++; print $ln;
44
- }
45
- close BLAST;
46
- print STDERR " Reported $n entries out of $N.\n";
47
-
@@ -1,114 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update Mar-23-2016
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
- use Getopt::Std;
11
-
12
- sub HELP_MESSAGE {
13
- die "
14
- .Description
15
- Sums the weights of all the queries hitting each subject. Often (but not
16
- necessarily) the BLAST files contain only best matches. The weights can be
17
- any number, but a common use of this Script is to add up counts (weights are
18
- integers). For example, in a BLAST of predicted genes vs some annotation
19
- source, the weights could be the number of reads recruited by each gene.
20
-
21
- .Usage:
22
- $0 [options] blast... > out-file
23
-
24
- blast... * One or more BLAST files.
25
- out-file A two-columns tab-delimited file containing the summed weights
26
- per hit.
27
-
28
- -w <str> Weights file: A two-columns tab-delimited file containing the
29
- name (column 1) and the weight (column 2) of each query.
30
- -s <float> Minimum score. By default: 0.
31
- -i <float> Minimum identity (in percentage). By default: 0.
32
- -m <int> Maximum number of queries. Set to 0 for all. By default: 0.
33
- -n Normalize weights by the number of hits per query.
34
- -z Add zero when weight is not found (by default: doesn't list
35
- them).
36
- -q Run quietly.
37
- -h Display this message and exit.
38
-
39
- * Mandatory
40
-
41
- .Note:
42
- The weights (-w parameter) are optional, but its use is encouraged. When
43
- weights are not passed, the script simply assumes all queries to be equally
44
- weighted (unity), a result that can be faster to compute with, for example:
45
- cat blast | cut -f 2 | sort | uniq -c | awk '{print \$2\"\\t\"\$1}' > out
46
- It is equivalent to simply count the number of times that each subject
47
- occurs.
48
- "
49
- }
50
-
51
- my %o = ();
52
- getopts('w:s:i:m:znqh', \%o);
53
- $o{h} and &HELP_MESSAGE;
54
- $o{s}||=0;
55
- $o{i}||=0;
56
- $o{m}||=0;
57
-
58
- my %count;
59
- if($o{w}){
60
- print STDERR "Reading counts.\n" unless $o{q};
61
- open COUNT, "<", $o{w} or die "Cannot open file: $o{w}: $!\n";
62
- %count = map {split /\t/} <COUNT>;
63
- close COUNT;
64
- }
65
-
66
- print STDERR "Reading BLASTs.\n" unless $o{q};
67
- my $qry = '';
68
- my $hits = 0;
69
- my @buf = ();
70
- my $qries = 0;
71
- my $noQry = 0;
72
- my $ln1 = 0;
73
- my %out = ();
74
- BFILE: for my $blast (@ARGV){
75
- print STDERR " o $blast\n" unless $o{q};
76
- open BLAST, "<", $blast or die "Cannot open file: $blast: $!\n";
77
- BLINE: while(<BLAST>){
78
- chomp;
79
- my @ln = split /\t/;
80
- $ln1 ||= $#ln;
81
- die "Bad line $.: $_\n" unless $#ln==$ln1;
82
- next if ($o{s} and $ln[11]<$o{s}) or ($o{i} and $ln[2]<$o{i});
83
- unless(exists $count{$ln[0]}){
84
- $noQry++;
85
- if(not $o{w}){
86
- $count{$ln[0]}=1;
87
- }elsif($o{z}){
88
- $count{$ln[0]}=0;
89
- }else{
90
- next BLINE;
91
- }
92
- }
93
-
94
- if($qry ne $ln[0]){
95
- $qries++;
96
- ($out{$_->[0]}||=0) += ($_->[1]/($o{n}?$hits:1)) for @buf;
97
- last BFILE if $o{m} and $qries >= $o{m};
98
- @buf = ();
99
- $qry = $ln[0];
100
- $hits = 0;
101
- }
102
-
103
- push @buf, [$ln[1], $count{$ln[0]}];
104
- $hits++;
105
- }
106
- ($out{$_->[0]}||=0) += ($_->[1]/($o{n}?$hits:1)) for @buf;
107
- close BLAST;
108
- }
109
- print STDERR "Warning: Couldn't find $noQry queries\n" if $noQry and $o{w};
110
-
111
- for my $h (keys %out){
112
- print "$h\t".$out{$h}."\n";
113
- }
114
-