miga-base 1.2.15.2 → 1.2.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli/action/download/gtdb.rb +4 -1
  3. data/lib/miga/cli/action/gtdb_get.rb +4 -0
  4. data/lib/miga/daemon.rb +4 -1
  5. data/lib/miga/lair.rb +6 -4
  6. data/lib/miga/remote_dataset/download.rb +3 -2
  7. data/lib/miga/remote_dataset.rb +25 -7
  8. data/lib/miga/taxonomy.rb +6 -0
  9. data/lib/miga/version.rb +2 -2
  10. metadata +6 -302
  11. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  12. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  13. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  14. data/utils/FastAAI/FastAAI +0 -3659
  15. data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
  16. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
  17. data/utils/FastAAI/README.md +0 -84
  18. data/utils/enveomics/Docs/recplot2.md +0 -244
  19. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  20. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  21. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  22. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  23. data/utils/enveomics/LICENSE.txt +0 -73
  24. data/utils/enveomics/Makefile +0 -52
  25. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  26. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
  27. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  28. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
  29. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
  30. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  31. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
  32. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  33. data/utils/enveomics/Manifest/Tasks/other.json +0 -906
  34. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  35. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
  36. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  37. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  38. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  39. data/utils/enveomics/Manifest/categories.json +0 -165
  40. data/utils/enveomics/Manifest/examples.json +0 -162
  41. data/utils/enveomics/Manifest/tasks.json +0 -4
  42. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  49. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  50. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  51. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  52. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  53. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  54. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  55. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  56. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  57. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  58. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  59. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  60. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  61. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  62. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  63. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  64. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  68. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  69. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  70. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  71. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  72. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  73. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  74. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  75. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  76. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  77. data/utils/enveomics/README.md +0 -42
  78. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  79. data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
  80. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  81. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  82. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  83. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  84. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  85. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  86. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  87. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  88. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  89. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  90. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  91. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  92. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  93. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  94. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  95. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  96. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  97. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  98. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  99. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  100. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  101. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
  102. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  103. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  104. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  105. data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
  106. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  107. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  108. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  109. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  110. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
  111. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  112. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  113. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  114. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  115. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  116. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  117. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  118. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  119. data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
  120. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  121. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  122. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  123. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  124. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  125. data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
  126. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  127. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  128. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  129. data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
  130. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  131. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  132. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
  133. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  134. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  135. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  136. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  137. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  138. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  139. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  140. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  141. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  142. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  143. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  144. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  145. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  146. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  147. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  148. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  149. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  150. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  151. data/utils/enveomics/Scripts/SRA.download.bash +0 -55
  152. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  153. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  154. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  155. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  156. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  157. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  158. data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
  159. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  160. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  161. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  162. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  163. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  164. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  165. data/utils/enveomics/Scripts/aai.rb +0 -421
  166. data/utils/enveomics/Scripts/ani.rb +0 -362
  167. data/utils/enveomics/Scripts/anir.rb +0 -137
  168. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  169. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  170. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  171. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  172. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  173. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  174. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
  175. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
  176. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  177. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
  178. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
  179. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  180. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
  181. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  182. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
  183. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  184. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  185. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
  186. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
  187. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
  188. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
  189. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  190. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  191. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  192. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  193. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  194. data/utils/enveomics/Scripts/ogs.rb +0 -104
  195. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  196. data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
  197. data/utils/enveomics/Scripts/rbm.rb +0 -108
  198. data/utils/enveomics/Scripts/sam.filter.rb +0 -148
  199. data/utils/enveomics/Tests/Makefile +0 -10
  200. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  201. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  202. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  203. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  204. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  205. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  206. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  207. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  208. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  209. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  210. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  211. data/utils/enveomics/Tests/alkB.nwk +0 -1
  212. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  213. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  214. data/utils/enveomics/Tests/hiv1.faa +0 -59
  215. data/utils/enveomics/Tests/hiv1.fna +0 -134
  216. data/utils/enveomics/Tests/hiv2.faa +0 -70
  217. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  218. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  219. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  220. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  221. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  222. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  223. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  224. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  225. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  226. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  227. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  228. data/utils/enveomics/build_enveomics_r.bash +0 -45
  229. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  230. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  231. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  232. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  233. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  234. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  235. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  236. data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
  237. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  238. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  239. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  240. data/utils/enveomics/enveomics.R/R/utils.R +0 -80
  241. data/utils/enveomics/enveomics.R/README.md +0 -81
  242. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  243. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  244. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
  245. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
  246. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
  247. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  248. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  249. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  250. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  251. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  252. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  253. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
  254. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
  255. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
  256. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  257. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  258. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
  259. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
  260. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
  261. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
  262. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
  263. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
  264. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
  265. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  266. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  267. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
  268. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  269. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  270. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  271. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  272. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
  273. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  274. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  275. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
  276. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  277. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  278. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  279. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  280. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
  281. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  282. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
  283. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
  284. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
  285. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  286. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  287. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
  288. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
  289. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
  290. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  291. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  292. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  293. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  294. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
  295. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
  296. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
  297. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
  298. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  299. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  300. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  301. data/utils/enveomics/globals.mk +0 -8
  302. data/utils/enveomics/manifest.json +0 -9
  303. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  304. data/utils/multitrim/README.md +0 -67
  305. data/utils/multitrim/multitrim.py +0 -1555
  306. data/utils/multitrim/multitrim.yml +0 -13
@@ -1,69 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'optparse'
4
- require 'zlib'
5
-
6
- o = { qual: 31, encoding: 33 }
7
- ARGV << '-h' if ARGV.empty?
8
- OptionParser.new do |opts|
9
- opts.banner = "
10
- Creates a FastQ-compliant file from a FastA file.
11
-
12
- Usage: #{$0} [options]"
13
- opts.separator ''
14
- opts.separator 'Options'
15
- opts.on(
16
- '-i', '--in FILE', 'Input FastA file (supports .gz compression)'
17
- ) { |v| o[:in] = v }
18
- opts.on(
19
- '-o', '--out FILE', 'Output FastQ file (supports .gz compression)'
20
- ) { |v| o[:out] = v }
21
- opts.on(
22
- '-q', '--quality INT', Integer,
23
- 'PHRED quality score to use (fixed), in the range [-5, 41]',
24
- "By default: #{o[:qual]}"
25
- ) { |v| o[:qual] = v }
26
- opts.on(
27
- '--encoding INT', Integer,
28
- "Base encoding (33 or 64). By default: #{o[:encoding]}"
29
- ) { |v| o[:encoding] = v }
30
- opts.on('-h', '--help', 'Display this screen.') do
31
- puts opts
32
- exit
33
- end
34
- opts.separator ''
35
- end.parse!
36
- abort '-i is mandatory' if o[:in].nil?
37
- abort '-o is mandatory' if o[:out].nil?
38
- abort '-q must be in the range -5 .. 41' if o[:qual] < -5 || o[:qual] > 41
39
-
40
- # Determine quality character
41
- $qchar = (o[:qual] + o[:encoding]).chr
42
-
43
- # Create file handlers
44
- ifh = o[:in] =~ /\.gz$/ ?
45
- Zlib::GzipReader.open(o[:in]) : File.open(o[:in], 'r')
46
- ofh = o[:out] =~ /\.gz$/ ?
47
- Zlib::GzipWriter.open(o[:out]) : File.open(o[:out], 'w')
48
-
49
- def print_seq(ofh, id, seq)
50
- ofh.puts "@#{id}", seq, '+', $qchar * seq.length unless seq.empty?
51
- end
52
-
53
- # Generate FastQ
54
- id = ''
55
- seq = ''
56
- ifh.each_line do |ln|
57
- next if ln =~ /^;/
58
- if ln =~ /^>(.*)/
59
- print_seq(ofh, id, seq)
60
- seq = ''
61
- id = $1
62
- else
63
- seq += ln.chomp.upcase.gsub(/[^A-Z]/,'')
64
- end
65
- end
66
- print_seq(ofh, id, seq)
67
- ofh.close
68
- ifh.close
69
-
@@ -1,48 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require "optparse"
4
- o = {wrap:70}
5
- ARGV << "-h" if ARGV.empty?
6
- OptionParser.new do |opts|
7
- opts.banner = "
8
- Wraps sequences in a FastA to a given line length.
9
-
10
- Usage: #{$0} [options]"
11
- opts.separator ""
12
- opts.separator "Options"
13
- opts.on("-i", "--in FILE", "Input FastA file."){ |v| o[:in] = v }
14
- opts.on("-o", "--out FILE", "Output FastA file."){ |v| o[:out] = v }
15
- opts.on("-w", "--wrap INT",
16
- "Line length to wrap sequences. Use 0 to generate 1-line sequences.",
17
- "By default: #{o[:wrap]}."){ |v| o[:wrap] = v.to_i }
18
- opts.on("-h", "--help", "Display this screen.") do
19
- puts opts
20
- exit
21
- end
22
- opts.separator ""
23
- end.parse!
24
- abort "-i is mandatory" if o[:in].nil?
25
- abort "-o is mandatory" if o[:out].nil?
26
-
27
- def wrap_width(txt, len)
28
- return "" if txt.empty?
29
- return "#{txt}\n" if len==0
30
- txt.gsub(/(.{1,#{len}})/,"\\1\n")
31
- end
32
-
33
- ofh = File.open(o[:out], "w")
34
- File.open(o[:in], "r") do |ifh|
35
- bf = ""
36
- ifh.each_line do |ln|
37
- if ln =~ /^>/
38
- ofh.print wrap_width(bf, o[:wrap])
39
- ofh.puts ln
40
- bf = ""
41
- else
42
- ln.chomp!
43
- bf << ln
44
- end
45
- end
46
- ofh.print wrap_width(bf, o[:wrap])
47
- end
48
- ofh.close
@@ -1,54 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update: Mar-23-2015
5
- # @license: artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
- use Getopt::Std;
11
-
12
- sub HELP_MESSAGE { die "
13
- .Description:
14
- Extracts a subset of sequences from a FastQ file.
15
-
16
- .Usage: $0 [options] list.txt seqs.fq > subset.fq
17
-
18
- [options]
19
- -r Reverse list. Extracts sequences NOT present in the list.
20
- -q Runs quietly.
21
- -h Prints this message and exits.
22
-
23
- [mandatory]
24
- list.txt List of sequences to extract.
25
- seqs.fq FastQ file containing the superset of sequences.
26
- subset.fq FastQ file to be created.
27
-
28
- " }
29
-
30
- my %o=();
31
- getopts('rhq', \%o);
32
- my($list, $fq) = @ARGV;
33
- ($list and $fq) or &HELP_MESSAGE;
34
- $o{h} and &HELP_MESSAGE;
35
-
36
- print STDERR "Reading list.\n" unless $o{q};
37
- open LI, "<", $list or die "Cannot read file: $list: $!\n";
38
- my %li = map { chomp; $_ => 1 } <LI>;
39
- close LI;
40
-
41
- print STDERR "Filtering FastQ.\n" unless $o{q};
42
- open FQ, "<", $fq or die "Cannot read file: $fq: $!\n";
43
- my $good = 0;
44
- while(my $ln = <FQ>){
45
- my @ln = ();
46
- $ln[$_] = <FQ> for 0 .. 2;
47
- chomp $ln;
48
- if($ln =~ m/^@((\S+).*)/){ $good = (exists $li{$1} or exists $li{">$1"} or exists $li{"\@$1"} or exists $li{$2} or exists $li{$ln}) }
49
- elsif($ln =~ m/^>/){ $good=0; print STDERR "Warning: Non-cannonical defline, line $.: $ln\n" }
50
- else{ $good=$o{r}; print STDERR "Warning: Non-cannonical defline, line $.: $ln\n" }
51
- print "".join("", "$ln\n", @ln) if (($good and not $o{r}) or ($o{r} and not $good));
52
- }
53
- close FQ;
54
-
@@ -1,90 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- # @author Luis M. Rodriguez-R
4
- # @license artistic license 2.0
5
-
6
- use strict;
7
- use warnings;
8
- use Symbol;
9
-
10
- my $HELP = <<HELP
11
-
12
- Description:
13
- Interposes sequences in FastQ format from two files into one output file.
14
- If more than two files are provided, the script will interpose all the input
15
- files.
16
- Note that this script will check for the consistency of the names (assuming
17
- a pair of related reads contains the same name varying only in a trailing
18
- slash (/) followed by a digit. If you want to turn this feature off just
19
- set the -T option to zero. If you want to decrease the sampling period (to
20
- speed the script up) or increase it (to make it more sensitive to errors)
21
- just change the -T option accordingly.
22
-
23
- Usage:
24
- $0 [-T <int> ]<output_fastq> <input_fastq_1> <input_fastq_2> [additional input files...]
25
-
26
- Where,
27
- -T <int> : Optional. Integer indicating the sampling period for
28
- names evaluation (see Description above).
29
- By default: 1000.
30
- output_fastq : Output file
31
- input_fastq_1 : First FastQ file
32
- input_fastq_2 : Second FastQ file
33
- ... : Any additional FastQ files (or none)
34
-
35
- HELP
36
- ;
37
- my $eval_T = 1000;
38
- if(exists $ARGV[0] and exists $ARGV[1] and $ARGV[0] eq '-T'){
39
- $eval_T = $ARGV[1]+0;
40
- shift @ARGV;
41
- shift @ARGV;
42
- }
43
- my $out = shift @ARGV;
44
- my @in = @ARGV;
45
-
46
-
47
- die $HELP unless $out and $#in >= 1;
48
- open OUT, ">", $out or die "Unable to write on $out: $!\n";
49
- print "Output file: $out\n";
50
-
51
- my @in_fh = ();
52
-
53
- for my $k (0 .. $#in) {
54
- $in_fh[$k] = gensym;
55
- open $in_fh[$k], "<", $in[$k] or die "Unable to read $in[$k]: $!\n";
56
- print "Input file: $in[$k]\n";
57
- }
58
-
59
- my $i = 0;
60
- my $frl;
61
- LINE: while(1){
62
- my $name = "";
63
- print STDERR "\rEntry: $i " unless $i % 1000;
64
- FILE: for my $k (0 .. $#in_fh){
65
- my @ln = ();
66
- for my $l (0 .. 3){
67
- $ln[$l] = readline($in_fh[$k]);
68
- last LINE if $k==0 and $l==0 and (not defined $ln[$l]);
69
- defined $ln[$l] or die "Impossible to read next entry (line $.) from $in[$k]: $!\n";
70
- chomp $ln[$l];
71
- }
72
- if($eval_T and not $i % $eval_T){
73
- $ln[0] =~ m/^\@(.*?)\/\d+\s*$/ or die "Impossible to evaluate names!\n offending entry:\n$ln[0]\n";
74
- $name ||= $1;
75
- die "Inconsistent name!\n base name is $name\n offending entry is:\n$ln[0]\n" unless $1 eq $name;
76
- }
77
- unless($frl){
78
- $ln[0] =~ /^@/ or die "Unexpected format! (missing @)\n offending entry: $ln[0].\n";
79
- $ln[2] =~ /^\+/ or die "Unexpected format! (missing +)\n offending entry: $ln[0].\n";
80
- $frl = length $ln[1];
81
- }
82
- print OUT "".join("\n", @ln, "");
83
- }
84
- $i++;
85
- }
86
- print "\rNumber of entries: $i \nFirst read length: $frl\n";
87
- close OUT;
88
-
89
- for my $k(0..$#in_fh){print "ALERT: The file $in[$k] contains trailing entries\n" if defined readline($in_fh[$k])}
90
-
@@ -1,89 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- $VERSION = 1.2
4
- $:.push File.expand_path('../lib', __FILE__)
5
- require 'enveomics_rb/enveomics'
6
-
7
- o = { q: false, offset: 33, qual: 15, fasta: false }
8
- OptionParser.new do |opts|
9
- opts.version = $VERSION
10
- Enveomics.opt_banner(
11
- opts, 'Masks low-quality bases in a FastQ file',
12
- "#{File.basename($0)} -i in.fastq -o out.fastq [options]"
13
- )
14
-
15
- opts.separator 'Mandatory'
16
- opts.on(
17
- '-i', '--input FILE',
18
- 'Path to the FastQ file containing the sequences',
19
- 'Supports compression with .gz extension, use - for STDIN'
20
- ) { |v| o[:in] = v }
21
- opts.on(
22
- '-o', '--out FILE',
23
- 'Path to the output FastQ file',
24
- 'Supports compression with .gz extension, use - for STDOUT'
25
- ) { |v| o[:out] = v }
26
-
27
- opts.separator ''
28
- opts.separator 'Quality Options'
29
- opts.on(
30
- '-q', '--qual INT', Integer,
31
- "Minimum quality score to allow a base, by default: #{o[:qual]}"
32
- ) { |v| o[:qual] = v }
33
- opts.on(
34
- '--offset INT', Integer,
35
- "Q-score offset, by default: #{o[:offset]}"
36
- ) { |v| o[:offset] = v }
37
-
38
- opts.separator ''
39
- opts.separator 'Other Options'
40
- opts.on(
41
- '-a', '--fasta', 'Output sequences in FastA format'
42
- ) { |v| o[:fasta] = v }
43
- opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
44
- opts.on('-h', '--help', 'Display this screen') do
45
- puts opts
46
- exit
47
- end
48
- opts.separator ''
49
- end.parse!
50
-
51
- raise Enveomics::OptionError.new('-i is mandatory') if o[:in].nil?
52
- raise Enveomics::OptionError.new('-o is mandatory') if o[:out].nil?
53
- $QUIET = o[:q]
54
-
55
- # Open in/out files
56
- say 'Reading FastQ file'
57
- ifh = reader(o[:in])
58
- ofh = writer(o[:out])
59
-
60
- # Parse and mask
61
- entry = []
62
- lno = 0
63
- ifh.each_line do |ln|
64
- lno += 1 # <- Gzip doesn't support $.
65
- case lno % 4
66
- when 1
67
- ln =~ /^@(\S+)/ or
68
- raise Enveomics::ParseError.new("Unexpected defline format: #{ln}")
69
- entry << ln
70
- when 2, 3
71
- entry << ln
72
- when 0
73
- entry << ln
74
- q = entry[3].chomp.split('').map { |i| (i.ord - o[:offset]) }
75
- q.map { |i| i < o[:qual] }.each_with_index { |i, k| entry[1][k] = 'N' if i }
76
- ofh.puts(o[:fasta] ? [entry[0].gsub(/^@/, '>'), entry[1]] : entry)
77
- entry = []
78
- end
79
- end
80
-
81
- # Finalize
82
- say " Lines: #{lno}"
83
- unless entry.empty?
84
- raise Enveomics::ParseError.new('Unexpected trailing lines in FastQ')
85
- end
86
- say " Sequences: #{lno / 4}"
87
- ifh.close
88
- ofh.close
89
-
@@ -1,90 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update Mar-23-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
-
11
- my ($in, $off, $force) = @ARGV;
12
- $in or die "
13
- .Description:
14
- There are several FastQ formats (see http://en.wikipedia.org/wiki/FASTQ_format).
15
- This script takes a FastQ in any of them, identifies the type of FastQ (this is,
16
- the offset), and generates a FastQ with the given offset. Note that Solexa+64
17
- FastQ can cause problematic values when using the offset 33, since there is no
18
- equivalent in Phred+33 for negative values (the range of Solexa+64 is -5 to 40).
19
-
20
- .Usage:
21
- $0 in.fastq[ offset[ force]] > out.fastq
22
-
23
- in.fastq Input file in FastQ format (range is automatically detected).
24
- offset (optional) Offset to use for the output. Use 0 (zero) to detect
25
- the input format and exit. By default: 33.
26
- force (optional) If true, turns errors into warnings and continues.
27
- Out-of-range values are set to the closest range limit.
28
- out.fastq Output file in FastQ format with the specified offset.
29
-
30
- ";
31
-
32
- $off = 33 unless defined $off;
33
-
34
- my $in_off = 0;
35
- open IN, "<", $in or die "Cannot read file: $in: $!\n";
36
- GUESS_FORMAT: while(<IN>){
37
- unless($.%4){
38
- chomp;
39
- for my $chr (split //){
40
- my $o = ord $chr;
41
- if($o < 55){
42
- $in_off = 33;
43
- last GUESS_FORMAT;
44
- }elsif($o > 80){
45
- $in_off = 64;
46
- last GUESS_FORMAT;
47
- }
48
- }
49
- }
50
- }
51
- close IN;
52
- print STDERR "Detected input offset: Phred+$in_off\n";
53
- exit unless $off;
54
-
55
- my $Solexa64=0;
56
- die "Couldn't guess input format.\n" unless $in_off;
57
- open IN, "<", $in or die "Cannot read file: $in: $!\n";
58
- while(<IN>){
59
- if($in_off==$off or $.%4){
60
- print $_;
61
- }else{
62
- chomp;
63
- for my $chr (split //){
64
- my $score = ord($chr) - $in_off;
65
- my $err = '';
66
- if($score < -5){
67
- $err = "Out-of-range value $chr ($score) in line $..\n";
68
- $score = $off==64 ? -5 : 0;
69
- }elsif(!$Solexa64 and $score < 0){
70
- if($in_off==64){
71
- print STDERR "Format variant: Solexa+64\n";
72
- $Solexa64 = 1;
73
- }else{
74
- $err = "Out-of-range value $chr ($score) in line $..\n";
75
- $score = 0;
76
- }
77
- }elsif($score>41){
78
- $err = "Out-of-range value $chr ($score) in line $..\n";
79
- $score = 41;
80
- }
81
- if($err){
82
- if($force){ warn $err } else { die $err }
83
- }
84
- print chr( $score + $off );
85
- }
86
- print "\n";
87
- }
88
- }
89
- close IN;
90
-
@@ -1,53 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @license artistic license 2.0
5
- # @update Jul-05-2015
6
- #
7
-
8
- use warnings;
9
- use strict;
10
- use Symbol;
11
-
12
- my ($file, $base, $outN) = @ARGV;
13
-
14
- $outN ||= 2;
15
- ($file and $base) or die "
16
- Usage
17
- $0 in_file.fq out_base[ no_files]
18
-
19
- in_file.fq Input file in FastA format.
20
- out_base Prefix for the name of the output files. It will
21
- be appended with .<i>.fastq, where <i> is a consecutive
22
- number starting in 1.
23
- no_files Number of files to generate. By default: 2.
24
-
25
- ";
26
-
27
-
28
- my @outSym = ();
29
- for my $i (1 .. $outN){
30
- $outSym[$i-1] = gensym;
31
- open $outSym[$i-1], ">", "$base.$i.fastq" or die "I can not create the file: $base.$i.fa: $!\n";
32
- }
33
-
34
-
35
- my($i, $seq) = (-1, '');
36
- open FILE, "<", $file or die "I can not read the file: $file: $!\n";
37
- while(my $ln=<FILE>){
38
- if($.%4 == 1){
39
- print { $outSym[$i % $outN] } $seq if $seq;
40
- $i++;
41
- $seq = '';
42
- }
43
- $seq.=$ln;
44
- }
45
- print { $outSym[$i % $outN] } $seq if $seq;
46
- close FILE;
47
-
48
- for(my $j=0; $j<$outN; $j++){
49
- close $outSym[$j];
50
- }
51
-
52
- print STDERR "Sequences: ".($i+1)."\nFiles: $outN\n";
53
-
@@ -1,70 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # frozen_string_literal: true
4
-
5
- $:.push File.expand_path('../lib', __FILE__)
6
- require 'enveomics_rb/enveomics'
7
- $VERSION = 1.1
8
-
9
- o = { q: false, p: '', s: '' }
10
- OptionParser.new do |opts|
11
- opts.version = $VERSION
12
- Enveomics.opt_banner(
13
- opts, 'Generates easy-to-parse tagged reads from FastQ files',
14
- "#{File.basename($0)} -i in.fasta -o out.fasta [options]"
15
- )
16
-
17
- opts.separator 'Mandatory'
18
- opts.on(
19
- '-i', '--in FILE',
20
- 'Path to the FastQ file containing the sequences',
21
- 'Supports compression with .gz extension, use - for STDIN'
22
- ) { |v| o[:in] = v }
23
- opts.on(
24
- '-o', '--out FILE', 'Path to the FastQ to create',
25
- 'Supports compression with .gz extension, use - for STDOUT'
26
- ) { |v| o[:out] = v }
27
- opts.separator ''
28
- opts.separator 'ID options'
29
- opts.on('-p', '--prefix STR', 'Prefix to use in all IDs') { |v| o[:p] = v }
30
- opts.on('-s', '--suffix STR', 'Suffix to use in all IDs') { |v| o[:s] = v }
31
- opts.separator ''
32
- opts.separator 'Other Options'
33
- opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
34
- opts.on('-h', '--help', 'Display this screen') { puts opts ; exit }
35
- opts.separator ''
36
- end.parse!
37
-
38
- raise Enveomics::OptionError.new('-i is mandatory') if o[:in].nil?
39
- raise Enveomics::OptionError.new('-o is mandatory') if o[:out].nil?
40
-
41
- begin
42
- ifh = reader(o[:in])
43
- ofh = writer(o[:out])
44
- i = 0
45
- lno = 0
46
- ifh.each do |ln|
47
- ln.chomp!
48
- lno += 1
49
- case lno % 4
50
- when 1
51
- ln =~ /^@/ or
52
- raise Enveomics::ParseError.new("Cannot parse line #{$.}: #{ln}")
53
- i += 1
54
- ofh.puts "@#{o[:p]}#{i}#{o[:s]}"
55
- when 3
56
- ln =~ /^\+/ or
57
- raise Enveomics::ParseError.new("Cannot parse line #{$.}: #{ln}")
58
- ofh.puts '+'
59
- else
60
- ofh.puts ln
61
- end
62
- end
63
- ifh.close
64
- ofh.close
65
- rescue => err
66
- $stderr.puts "Exception: #{err}\n\n"
67
- err.backtrace.each { |l| $stderr.puts l + "\n" }
68
- err
69
- end
70
-
@@ -1,81 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'optparse'
4
-
5
- o = {q:false, key:2}
6
- ARGV << '-h' if ARGV.empty?
7
- OptionParser.new do |opts|
8
- opts.banner = "
9
- Compares the estimated error of sequencing reads (Q-score) with
10
- observed mismatches (identity against a know reference sequence).
11
-
12
- Usage: #{$0} [options]"
13
- opts.separator ""
14
- opts.separator "Mandatory"
15
- opts.on("-f", "--fastq FILE",
16
- "Path to the FastQ file containing the sequences."){ |v| o[:fastq] = v }
17
- opts.on("-b", "--blast FILE",
18
- "Path to the tabular BLAST file mapping reads to reference sequences."
19
- ){ |v| o[:blast] = v }
20
- opts.on("-o", "--out FILE",
21
- "Path to the output tab-delimited file to create."){ |v| o[:out] = v }
22
- opts.separator ""
23
- opts.separator "Other Options"
24
- opts.on("-q", "--quiet", "Run quietly (no STDERR output)"){ o[:q] = TRUE }
25
- opts.on("-h", "--help", "Display this screen") do
26
- puts opts
27
- exit
28
- end
29
- opts.separator ""
30
- end.parse!
31
- abort "-f is mandatory" if o[:fastq].nil?
32
- abort "-b is mandatory" if o[:blast].nil?
33
- abort "-o is mandatory" if o[:out].nil?
34
-
35
- # Read the Q scores and estimate expected mismatches
36
- mm = {} # <- Hash with read IDs as key, and arrays as values:
37
- # [ expected mismatches, variance of mismatches, length ]
38
- $stderr.puts "Reading FastQ file" unless o[:q]
39
- File.open(o[:fastq], "r") do |fh|
40
- id = nil
41
- fh.each_line do |ln|
42
- case $.%4
43
- when 1
44
- ln =~ /^@(\S+)/ or raise "Unexpected defline format: #{ln}"
45
- id = $1
46
- $stderr.print " #{mm.size} reads...\r" unless o[:q]
47
- when 0
48
- ln.chomp!
49
- # I'm assuming ALWAYS Phred+33!!!
50
- p = ln.split('').map{ |i| (i.ord - 33).to_f }.map{ |q| 10.0**(-q/10.0) }
51
- mu = p.inject(:+)
52
- var = p.map{ |i| i*(1.0-i) }.inject(:+)
53
- mm[id] = [mu, var, p.size]
54
- end
55
- end
56
- $stderr.puts " Found: #{mm.size} reads." unless o[:q]
57
- end
58
-
59
- ofh = File.open(o[:out], "w")
60
- ofh.puts %w[id obs_subs obs_id aln_len obs_ins obs_del obs_gap mu var len].join("\t")
61
-
62
- # Read Identities and compare against expectation
63
- $stderr.puts "Reading Tabular BLAST file" unless o[:q]
64
- File.open(o[:blast], "r") do |fh|
65
- k = 0
66
- fh.each_line do |ln|
67
- r = ln.chomp.split("\t")
68
- id = r[0]
69
- next if mm[id].nil?
70
- k += 1
71
- $stderr.print " #{k} alignments...\r" unless o[:q]
72
- obs_m = r[4].to_i + (r[6].to_i - 1) + (mm[id][2] - r[7].to_i)
73
- obs_del = r[3].to_i - (r[7].to_i - r[6].to_i).abs
74
- obs_ins = r[3].to_i - (r[9].to_i - r[8].to_i).abs
75
- ofh.puts ([id, obs_m, r[2], r[7].to_i - r[6].to_i + 1,
76
- obs_ins, obs_del, r[5]] + mm[id]).join("\t")
77
- end
78
- $stderr.puts " Found #{k} alignments." unless o[:q]
79
- end
80
-
81
- ofh.close
@@ -1,24 +0,0 @@
1
- #!/usr/bin/env awk -f
2
- #
3
- # @author Luis M. Rodriguez-R
4
- # @update Dec-26-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- BEGIN {
9
- for (i = 0; i < ARGC; i++) {
10
- if(ARGV[i] == "--help"){
11
- print "Description:\n"
12
- print " Translates FastQ files into FastA.\n"
13
- print "Usage:\n"
14
- print " FastQ.toFastA.awk < in.fq > out.fa\n"
15
- exit
16
- }
17
- }
18
- }
19
-
20
- NR%4 == 1, NR%4 == 2 {
21
- if(NR%4 == 1){ gsub(/^@/,">") }
22
- print $0
23
- }
24
-