miga-base 0.7.26.0 → 0.7.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +1 -1
  3. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  4. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  5. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  6. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  7. data/utils/FastAAI/README.md +84 -0
  8. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  9. data/utils/enveomics/Docs/recplot2.md +244 -0
  10. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  11. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  12. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  13. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  14. data/utils/enveomics/LICENSE.txt +73 -0
  15. data/utils/enveomics/Makefile +52 -0
  16. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  17. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  18. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  19. data/utils/enveomics/Manifest/Tasks/fasta.json +766 -0
  20. data/utils/enveomics/Manifest/Tasks/fastq.json +243 -0
  21. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  22. data/utils/enveomics/Manifest/Tasks/mapping.json +67 -0
  23. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  24. data/utils/enveomics/Manifest/Tasks/other.json +829 -0
  25. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  26. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +501 -0
  27. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  28. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  29. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  30. data/utils/enveomics/Manifest/categories.json +156 -0
  31. data/utils/enveomics/Manifest/examples.json +154 -0
  32. data/utils/enveomics/Manifest/tasks.json +4 -0
  33. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  34. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  35. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  36. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  37. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  38. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  39. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  40. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  41. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  42. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  43. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  44. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  45. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  46. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  47. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  48. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  49. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  50. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  51. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  52. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  53. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  54. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  55. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  56. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  57. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  58. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  59. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  60. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  61. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  62. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  63. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  64. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  65. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  66. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  67. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  68. data/utils/enveomics/README.md +42 -0
  69. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  70. data/utils/enveomics/Scripts/Aln.cat.rb +163 -0
  71. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  72. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  73. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  74. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  75. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  76. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  77. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  78. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  79. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  80. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  81. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  82. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  83. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  84. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  85. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  86. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  87. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  88. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  89. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  90. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  91. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  92. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  93. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  94. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  95. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  96. data/utils/enveomics/Scripts/FastA.N50.pl +56 -0
  97. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  98. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  99. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  100. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  101. data/utils/enveomics/Scripts/FastA.fragment.rb +92 -0
  102. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  103. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  104. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  105. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  106. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  107. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  108. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  109. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  110. data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
  111. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  112. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  113. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  114. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  115. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  116. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  117. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  118. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  119. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  120. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  121. data/utils/enveomics/Scripts/FastQ.tag.rb +63 -0
  122. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  123. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  124. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  125. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  126. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  127. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  128. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  129. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  130. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  131. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  132. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  133. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  134. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  135. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  136. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  137. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  138. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  139. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  140. data/utils/enveomics/Scripts/SRA.download.bash +57 -0
  141. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  142. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  143. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  144. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  145. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  146. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  147. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  148. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  149. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  150. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  151. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  152. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  153. data/utils/enveomics/Scripts/aai.rb +418 -0
  154. data/utils/enveomics/Scripts/ani.rb +362 -0
  155. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  156. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  157. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  158. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  159. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  160. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  161. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  162. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  163. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  164. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  165. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  166. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +30 -0
  167. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  168. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  169. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  170. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  171. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  172. data/utils/enveomics/Scripts/ogs.rb +104 -0
  173. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  174. data/utils/enveomics/Scripts/rbm.rb +146 -0
  175. data/utils/enveomics/Tests/Makefile +10 -0
  176. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  177. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  178. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  179. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  180. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  181. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  182. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  183. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  184. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  185. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  186. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  187. data/utils/enveomics/Tests/alkB.nwk +1 -0
  188. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  189. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  190. data/utils/enveomics/Tests/hiv1.faa +59 -0
  191. data/utils/enveomics/Tests/hiv1.fna +134 -0
  192. data/utils/enveomics/Tests/hiv2.faa +70 -0
  193. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  194. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  195. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  196. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  197. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  198. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  199. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  200. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  201. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  202. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  203. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  204. data/utils/enveomics/build_enveomics_r.bash +45 -0
  205. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  206. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  207. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  208. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  209. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  210. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  211. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  212. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  213. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  214. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  215. data/utils/enveomics/enveomics.R/R/utils.R +50 -0
  216. data/utils/enveomics/enveomics.R/README.md +80 -0
  217. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  218. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  219. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  220. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  221. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  222. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  223. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  224. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  225. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  226. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  227. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  228. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -0
  229. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -0
  230. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -0
  231. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  232. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  233. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -0
  234. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -0
  235. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -0
  236. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -0
  237. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -0
  238. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -0
  239. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  240. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  241. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -0
  242. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  243. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  244. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  245. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  246. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -0
  247. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  248. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  249. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -0
  250. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  251. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  252. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  253. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  254. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -0
  255. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  256. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -0
  257. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +37 -0
  258. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -0
  259. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  260. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  261. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -0
  262. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -0
  263. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  264. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  265. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  266. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  267. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -0
  268. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -0
  269. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -0
  270. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -0
  271. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  272. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  273. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  274. data/utils/enveomics/globals.mk +8 -0
  275. data/utils/enveomics/manifest.json +9 -0
  276. metadata +277 -4
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "optparse"
4
+ o = {wrap:70}
5
+ ARGV << "-h" if ARGV.empty?
6
+ OptionParser.new do |opts|
7
+ opts.banner = "
8
+ Wraps sequences in a FastA to a given line length.
9
+
10
+ Usage: #{$0} [options]"
11
+ opts.separator ""
12
+ opts.separator "Options"
13
+ opts.on("-i", "--in FILE", "Input FastA file."){ |v| o[:in] = v }
14
+ opts.on("-o", "--out FILE", "Output FastA file."){ |v| o[:out] = v }
15
+ opts.on("-w", "--wrap INT",
16
+ "Line length to wrap sequences. Use 0 to generate 1-line sequences.",
17
+ "By default: #{o[:wrap]}."){ |v| o[:wrap] = v.to_i }
18
+ opts.on("-h", "--help", "Display this screen.") do
19
+ puts opts
20
+ exit
21
+ end
22
+ opts.separator ""
23
+ end.parse!
24
+ abort "-i is mandatory" if o[:in].nil?
25
+ abort "-o is mandatory" if o[:out].nil?
26
+
27
+ def wrap_width(txt, len)
28
+ return "" if txt.empty?
29
+ return "#{txt}\n" if len==0
30
+ txt.gsub(/(.{1,#{len}})/,"\\1\n")
31
+ end
32
+
33
+ ofh = File.open(o[:out], "w")
34
+ File.open(o[:in], "r") do |ifh|
35
+ bf = ""
36
+ ifh.each_line do |ln|
37
+ if ln =~ /^>/
38
+ ofh.print wrap_width(bf, o[:wrap])
39
+ ofh.puts ln
40
+ bf = ""
41
+ else
42
+ ln.chomp!
43
+ bf << ln
44
+ end
45
+ end
46
+ ofh.print wrap_width(bf, o[:wrap])
47
+ end
48
+ ofh.close
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
+ # @update: Mar-23-2015
5
+ # @license: artistic license 2.0
6
+ #
7
+
8
+ use warnings;
9
+ use strict;
10
+ use Getopt::Std;
11
+
12
+ sub HELP_MESSAGE { die "
13
+ .Description:
14
+ Extracts a subset of sequences from a FastQ file.
15
+
16
+ .Usage: $0 [options] list.txt seqs.fq > subset.fq
17
+
18
+ [options]
19
+ -r Reverse list. Extracts sequences NOT present in the list.
20
+ -q Runs quietly.
21
+ -h Prints this message and exits.
22
+
23
+ [mandatory]
24
+ list.txt List of sequences to extract.
25
+ seqs.fq FastQ file containing the superset of sequences.
26
+ subset.fq FastQ file to be created.
27
+
28
+ " }
29
+
30
+ my %o=();
31
+ getopts('rhq', \%o);
32
+ my($list, $fq) = @ARGV;
33
+ ($list and $fq) or &HELP_MESSAGE;
34
+ $o{h} and &HELP_MESSAGE;
35
+
36
+ print STDERR "Reading list.\n" unless $o{q};
37
+ open LI, "<", $list or die "Cannot read file: $list: $!\n";
38
+ my %li = map { chomp; $_ => 1 } <LI>;
39
+ close LI;
40
+
41
+ print STDERR "Filtering FastQ.\n" unless $o{q};
42
+ open FQ, "<", $fq or die "Cannot read file: $fq: $!\n";
43
+ my $good = 0;
44
+ while(my $ln = <FQ>){
45
+ my @ln = ();
46
+ $ln[$_] = <FQ> for 0 .. 2;
47
+ chomp $ln;
48
+ if($ln =~ m/^@((\S+).*)/){ $good = (exists $li{$1} or exists $li{">$1"} or exists $li{"\@$1"} or exists $li{$2} or exists $li{$ln}) }
49
+ elsif($ln =~ m/^>/){ $good=0; print STDERR "Warning: Non-cannonical defline, line $.: $ln\n" }
50
+ else{ $good=$o{r}; print STDERR "Warning: Non-cannonical defline, line $.: $ln\n" }
51
+ print "".join("", "$ln\n", @ln) if (($good and not $o{r}) or ($o{r} and not $good));
52
+ }
53
+ close FQ;
54
+
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env perl
2
+
3
+ # @author Luis M. Rodriguez-R
4
+ # @license artistic license 2.0
5
+
6
+ use strict;
7
+ use warnings;
8
+ use Symbol;
9
+
10
+ my $HELP = <<HELP
11
+
12
+ Description:
13
+ Interposes sequences in FastQ format from two files into one output file.
14
+ If more than two files are provided, the script will interpose all the input
15
+ files.
16
+ Note that this script will check for the consistency of the names (assuming
17
+ a pair of related reads contains the same name varying only in a trailing
18
+ slash (/) followed by a digit. If you want to turn this feature off just
19
+ set the -T option to zero. If you want to decrease the sampling period (to
20
+ speed the script up) or increase it (to make it more sensitive to errors)
21
+ just change the -T option accordingly.
22
+
23
+ Usage:
24
+ $0 [-T <int> ]<output_fastq> <input_fastq_1> <input_fastq_2> [additional input files...]
25
+
26
+ Where,
27
+ -T <int> : Optional. Integer indicating the sampling period for
28
+ names evaluation (see Description above).
29
+ By default: 1000.
30
+ output_fastq : Output file
31
+ input_fastq_1 : First FastQ file
32
+ input_fastq_2 : Second FastQ file
33
+ ... : Any additional FastQ files (or none)
34
+
35
+ HELP
36
+ ;
37
+ my $eval_T = 1000;
38
+ if(exists $ARGV[0] and exists $ARGV[1] and $ARGV[0] eq '-T'){
39
+ $eval_T = $ARGV[1]+0;
40
+ shift @ARGV;
41
+ shift @ARGV;
42
+ }
43
+ my $out = shift @ARGV;
44
+ my @in = @ARGV;
45
+
46
+
47
+ die $HELP unless $out and $#in >= 1;
48
+ open OUT, ">", $out or die "Unable to write on $out: $!\n";
49
+ print "Output file: $out\n";
50
+
51
+ my @in_fh = ();
52
+
53
+ for my $k (0 .. $#in) {
54
+ $in_fh[$k] = gensym;
55
+ open $in_fh[$k], "<", $in[$k] or die "Unable to read $in[$k]: $!\n";
56
+ print "Input file: $in[$k]\n";
57
+ }
58
+
59
+ my $i = 0;
60
+ my $frl;
61
+ LINE: while(1){
62
+ my $name = "";
63
+ print STDERR "\rEntry: $i " unless $i % 1000;
64
+ FILE: for my $k (0 .. $#in_fh){
65
+ my @ln = ();
66
+ for my $l (0 .. 3){
67
+ $ln[$l] = readline($in_fh[$k]);
68
+ last LINE if $k==0 and $l==0 and (not defined $ln[$l]);
69
+ defined $ln[$l] or die "Impossible to read next entry (line $.) from $in[$k]: $!\n";
70
+ chomp $ln[$l];
71
+ }
72
+ if($eval_T and not $i % $eval_T){
73
+ $ln[0] =~ m/^\@(.*?)\/\d+\s*$/ or die "Impossible to evaluate names!\n offending entry:\n$ln[0]\n";
74
+ $name ||= $1;
75
+ die "Inconsistent name!\n base name is $name\n offending entry is:\n$ln[0]\n" unless $1 eq $name;
76
+ }
77
+ unless($frl){
78
+ $ln[0] =~ /^@/ or die "Unexpected format! (missing @)\n offending entry: $ln[0].\n";
79
+ $ln[2] =~ /^\+/ or die "Unexpected format! (missing +)\n offending entry: $ln[0].\n";
80
+ $frl = length $ln[1];
81
+ }
82
+ print OUT "".join("\n", @ln, "");
83
+ }
84
+ $i++;
85
+ }
86
+ print "\rNumber of entries: $i \nFirst read length: $frl\n";
87
+ close OUT;
88
+
89
+ for my $k(0..$#in_fh){print "ALERT: The file $in[$k] contains trailing entries\n" if defined readline($in_fh[$k])}
90
+
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
+ # @update Mar-23-2015
5
+ # @license artistic license 2.0
6
+ #
7
+
8
+ use warnings;
9
+ use strict;
10
+
11
+ my ($in, $off, $force) = @ARGV;
12
+ $in or die "
13
+ .Description:
14
+ There are several FastQ formats (see http://en.wikipedia.org/wiki/FASTQ_format).
15
+ This script takes a FastQ in any of them, identifies the type of FastQ (this is,
16
+ the offset), and generates a FastQ with the given offset. Note that Solexa+64
17
+ FastQ can cause problematic values when using the offset 33, since there is no
18
+ equivalent in Phred+33 for negative values (the range of Solexa+64 is -5 to 40).
19
+
20
+ .Usage:
21
+ $0 in.fastq[ offset[ force]] > out.fastq
22
+
23
+ in.fastq Input file in FastQ format (range is automatically detected).
24
+ offset (optional) Offset to use for the output. Use 0 (zero) to detect
25
+ the input format and exit. By default: 33.
26
+ force (optional) If true, turns errors into warnings and continues.
27
+ Out-of-range values are set to the closest range limit.
28
+ out.fastq Output file in FastQ format with the specified offset.
29
+
30
+ ";
31
+
32
+ $off = 33 unless defined $off;
33
+
34
+ my $in_off = 0;
35
+ open IN, "<", $in or die "Cannot read file: $in: $!\n";
36
+ GUESS_FORMAT: while(<IN>){
37
+ unless($.%4){
38
+ chomp;
39
+ for my $chr (split //){
40
+ my $o = ord $chr;
41
+ if($o < 55){
42
+ $in_off = 33;
43
+ last GUESS_FORMAT;
44
+ }elsif($o > 80){
45
+ $in_off = 64;
46
+ last GUESS_FORMAT;
47
+ }
48
+ }
49
+ }
50
+ }
51
+ close IN;
52
+ print STDERR "Detected input offset: Phred+$in_off\n";
53
+ exit unless $off;
54
+
55
+ my $Solexa64=0;
56
+ die "Couldn't guess input format.\n" unless $in_off;
57
+ open IN, "<", $in or die "Cannot read file: $in: $!\n";
58
+ while(<IN>){
59
+ if($in_off==$off or $.%4){
60
+ print $_;
61
+ }else{
62
+ chomp;
63
+ for my $chr (split //){
64
+ my $score = ord($chr) - $in_off;
65
+ my $err = '';
66
+ if($score < -5){
67
+ $err = "Out-of-range value $chr ($score) in line $..\n";
68
+ $score = $off==64 ? -5 : 0;
69
+ }elsif(!$Solexa64 and $score < 0){
70
+ if($in_off==64){
71
+ print STDERR "Format variant: Solexa+64\n";
72
+ $Solexa64 = 1;
73
+ }else{
74
+ $err = "Out-of-range value $chr ($score) in line $..\n";
75
+ $score = 0;
76
+ }
77
+ }elsif($score>41){
78
+ $err = "Out-of-range value $chr ($score) in line $..\n";
79
+ $score = 41;
80
+ }
81
+ if($err){
82
+ if($force){ warn $err } else { die $err }
83
+ }
84
+ print chr( $score + $off );
85
+ }
86
+ print "\n";
87
+ }
88
+ }
89
+ close IN;
90
+
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
+ # @license artistic license 2.0
5
+ # @update Jul-05-2015
6
+ #
7
+
8
+ use warnings;
9
+ use strict;
10
+ use Symbol;
11
+
12
+ my ($file, $base, $outN) = @ARGV;
13
+
14
+ $outN ||= 2;
15
+ ($file and $base) or die "
16
+ Usage
17
+ $0 in_file.fq out_base[ no_files]
18
+
19
+ in_file.fq Input file in FastA format.
20
+ out_base Prefix for the name of the output files. It will
21
+ be appended with .<i>.fastq, where <i> is a consecutive
22
+ number starting in 1.
23
+ no_files Number of files to generate. By default: 2.
24
+
25
+ ";
26
+
27
+
28
+ my @outSym = ();
29
+ for my $i (1 .. $outN){
30
+ $outSym[$i-1] = gensym;
31
+ open $outSym[$i-1], ">", "$base.$i.fastq" or die "I can not create the file: $base.$i.fa: $!\n";
32
+ }
33
+
34
+
35
+ my($i, $seq) = (-1, '');
36
+ open FILE, "<", $file or die "I can not read the file: $file: $!\n";
37
+ while(my $ln=<FILE>){
38
+ if($.%4 == 1){
39
+ print { $outSym[$i % $outN] } $seq if $seq;
40
+ $i++;
41
+ $seq = '';
42
+ }
43
+ $seq.=$ln;
44
+ }
45
+ print { $outSym[$i % $outN] } $seq if $seq;
46
+ close FILE;
47
+
48
+ for(my $j=0; $j<$outN; $j++){
49
+ close $outSym[$j];
50
+ }
51
+
52
+ print STDERR "Sequences: ".($i+1)."\nFiles: $outN\n";
53
+
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # @author: Luis M. Rodriguez-R
5
+ # @update: Feb-06-2015
6
+ # @license: artistic license 2.0
7
+ #
8
+
9
+ require 'optparse'
10
+
11
+ o = {:q=>FALSE, :p=>"", :s=>""}
12
+ ARGV << '-h' if ARGV.size==0
13
+ OptionParser.new do |opts|
14
+ opts.banner = "
15
+ Generates easy-to-parse tagged reads from FastQ files.
16
+
17
+ Usage: #{$0} [options]"
18
+ opts.separator ""
19
+ opts.separator "Mandatory"
20
+ opts.on("-i", "--in FILE", "Path to the FastQ file containing the sequences."){ |v| o[:in] = v }
21
+ opts.on("-o", "--out FILE", "Path to the FastQ to create."){ |v| o[:out] = v }
22
+ opts.separator ""
23
+ opts.separator "ID options"
24
+ opts.on("-p", "--prefix STR", "Prefix to use in all IDs."){ |v| o[:p] = v }
25
+ opts.on("-s", "--suffix STR", "Suffix to use in all IDs."){ |v| o[:s] = v }
26
+ opts.separator ""
27
+ opts.separator "Other Options"
28
+ opts.on("-q", "--quiet", "Run quietly (no STDERR output)"){ o[:q] = TRUE }
29
+ opts.on("-h", "--help", "Display this screen") do
30
+ puts opts
31
+ exit
32
+ end
33
+ opts.separator ""
34
+ end.parse!
35
+ abort "-i is mandatory" if o[:in].nil?
36
+ abort "-o is mandatory" if o[:out].nil?
37
+
38
+ begin
39
+ ifh = File.open(o[:in], 'r');
40
+ ofh = File.open(o[:out], 'w');
41
+ i=0
42
+ while ln=ifh.gets
43
+ ln.chomp!
44
+ if $.%4==1 and not /^@/.match(ln).nil?
45
+ i+=1
46
+ ofh.puts "@#{o[:p]}#{i}#{o[:s]}"
47
+ elsif $.%4==2 or $.%4==0
48
+ ofh.puts ln
49
+ elsif $.%4==3 and not /^\+/.match(ln).nil?
50
+ ofh.puts "+"
51
+ else
52
+ abort "Impossible to parse line #{$.}: #{ln}.\n"
53
+ end
54
+ end
55
+ ifh.close
56
+ ofh.close
57
+ rescue => err
58
+ $stderr.puts "Exception: #{err}\n\n"
59
+ err.backtrace.each { |l| $stderr.puts l + "\n" }
60
+ err
61
+ end
62
+
63
+
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+
5
+ o = {q:false, key:2}
6
+ ARGV << '-h' if ARGV.empty?
7
+ OptionParser.new do |opts|
8
+ opts.banner = "
9
+ Compares the estimated error of sequencing reads (Q-score) with
10
+ observed mismatches (identity against a know reference sequence).
11
+
12
+ Usage: #{$0} [options]"
13
+ opts.separator ""
14
+ opts.separator "Mandatory"
15
+ opts.on("-f", "--fastq FILE",
16
+ "Path to the FastQ file containing the sequences."){ |v| o[:fastq] = v }
17
+ opts.on("-b", "--blast FILE",
18
+ "Path to the tabular BLAST file mapping reads to reference sequences."
19
+ ){ |v| o[:blast] = v }
20
+ opts.on("-o", "--out FILE",
21
+ "Path to the output tab-delimited file to create."){ |v| o[:out] = v }
22
+ opts.separator ""
23
+ opts.separator "Other Options"
24
+ opts.on("-q", "--quiet", "Run quietly (no STDERR output)"){ o[:q] = TRUE }
25
+ opts.on("-h", "--help", "Display this screen") do
26
+ puts opts
27
+ exit
28
+ end
29
+ opts.separator ""
30
+ end.parse!
31
+ abort "-f is mandatory" if o[:fastq].nil?
32
+ abort "-b is mandatory" if o[:blast].nil?
33
+ abort "-o is mandatory" if o[:out].nil?
34
+
35
+ # Read the Q scores and estimate expected mismatches
36
+ mm = {} # <- Hash with read IDs as key, and arrays as values:
37
+ # [ expected mismatches, variance of mismatches, length ]
38
+ $stderr.puts "Reading FastQ file" unless o[:q]
39
+ File.open(o[:fastq], "r") do |fh|
40
+ id = nil
41
+ fh.each_line do |ln|
42
+ case $.%4
43
+ when 1
44
+ ln =~ /^@(\S+)/ or raise "Unexpected defline format: #{ln}"
45
+ id = $1
46
+ $stderr.print " #{mm.size} reads...\r" unless o[:q]
47
+ when 0
48
+ ln.chomp!
49
+ # I'm assuming ALWAYS Phred+33!!!
50
+ p = ln.split('').map{ |i| (i.ord - 33).to_f }.map{ |q| 10.0**(-q/10.0) }
51
+ mu = p.inject(:+)
52
+ var = p.map{ |i| i*(1.0-i) }.inject(:+)
53
+ mm[id] = [mu, var, p.size]
54
+ end
55
+ end
56
+ $stderr.puts " Found: #{mm.size} reads." unless o[:q]
57
+ end
58
+
59
+ ofh = File.open(o[:out], "w")
60
+ ofh.puts %w[id obs_subs obs_id aln_len obs_ins obs_del obs_gap mu var len].join("\t")
61
+
62
+ # Read Identities and compare against expectation
63
+ $stderr.puts "Reading Tabular BLAST file" unless o[:q]
64
+ File.open(o[:blast], "r") do |fh|
65
+ k = 0
66
+ fh.each_line do |ln|
67
+ r = ln.chomp.split("\t")
68
+ id = r[0]
69
+ next if mm[id].nil?
70
+ k += 1
71
+ $stderr.print " #{k} alignments...\r" unless o[:q]
72
+ obs_m = r[4].to_i + (r[6].to_i - 1) + (mm[id][2] - r[7].to_i)
73
+ obs_del = r[3].to_i - (r[7].to_i - r[6].to_i).abs
74
+ obs_ins = r[3].to_i - (r[9].to_i - r[8].to_i).abs
75
+ ofh.puts ([id, obs_m, r[2], r[7].to_i - r[6].to_i + 1,
76
+ obs_ins, obs_del, r[5]] + mm[id]).join("\t")
77
+ end
78
+ $stderr.puts " Found #{k} alignments." unless o[:q]
79
+ end
80
+
81
+ ofh.close