miga-base 1.2.15.2 → 1.2.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli/action/download/gtdb.rb +4 -1
  3. data/lib/miga/cli/action/gtdb_get.rb +4 -0
  4. data/lib/miga/daemon.rb +4 -1
  5. data/lib/miga/lair.rb +6 -4
  6. data/lib/miga/remote_dataset/download.rb +3 -2
  7. data/lib/miga/remote_dataset.rb +25 -7
  8. data/lib/miga/taxonomy.rb +6 -0
  9. data/lib/miga/version.rb +2 -2
  10. metadata +6 -302
  11. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  12. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  13. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  14. data/utils/FastAAI/FastAAI +0 -3659
  15. data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
  16. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
  17. data/utils/FastAAI/README.md +0 -84
  18. data/utils/enveomics/Docs/recplot2.md +0 -244
  19. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  20. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  21. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  22. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  23. data/utils/enveomics/LICENSE.txt +0 -73
  24. data/utils/enveomics/Makefile +0 -52
  25. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  26. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
  27. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  28. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
  29. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
  30. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  31. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
  32. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  33. data/utils/enveomics/Manifest/Tasks/other.json +0 -906
  34. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  35. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
  36. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  37. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  38. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  39. data/utils/enveomics/Manifest/categories.json +0 -165
  40. data/utils/enveomics/Manifest/examples.json +0 -162
  41. data/utils/enveomics/Manifest/tasks.json +0 -4
  42. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  49. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  50. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  51. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  52. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  53. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  54. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  55. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  56. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  57. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  58. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  59. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  60. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  61. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  62. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  63. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  64. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  68. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  69. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  70. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  71. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  72. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  73. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  74. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  75. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  76. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  77. data/utils/enveomics/README.md +0 -42
  78. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  79. data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
  80. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  81. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  82. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  83. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  84. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  85. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  86. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  87. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  88. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  89. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  90. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  91. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  92. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  93. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  94. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  95. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  96. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  97. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  98. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  99. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  100. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  101. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
  102. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  103. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  104. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  105. data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
  106. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  107. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  108. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  109. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  110. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
  111. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  112. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  113. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  114. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  115. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  116. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  117. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  118. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  119. data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
  120. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  121. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  122. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  123. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  124. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  125. data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
  126. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  127. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  128. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  129. data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
  130. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  131. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  132. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
  133. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  134. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  135. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  136. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  137. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  138. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  139. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  140. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  141. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  142. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  143. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  144. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  145. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  146. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  147. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  148. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  149. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  150. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  151. data/utils/enveomics/Scripts/SRA.download.bash +0 -55
  152. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  153. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  154. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  155. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  156. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  157. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  158. data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
  159. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  160. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  161. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  162. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  163. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  164. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  165. data/utils/enveomics/Scripts/aai.rb +0 -421
  166. data/utils/enveomics/Scripts/ani.rb +0 -362
  167. data/utils/enveomics/Scripts/anir.rb +0 -137
  168. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  169. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  170. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  171. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  172. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  173. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  174. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
  175. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
  176. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  177. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
  178. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
  179. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  180. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
  181. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  182. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
  183. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  184. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  185. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
  186. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
  187. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
  188. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
  189. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  190. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  191. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  192. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  193. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  194. data/utils/enveomics/Scripts/ogs.rb +0 -104
  195. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  196. data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
  197. data/utils/enveomics/Scripts/rbm.rb +0 -108
  198. data/utils/enveomics/Scripts/sam.filter.rb +0 -148
  199. data/utils/enveomics/Tests/Makefile +0 -10
  200. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  201. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  202. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  203. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  204. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  205. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  206. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  207. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  208. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  209. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  210. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  211. data/utils/enveomics/Tests/alkB.nwk +0 -1
  212. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  213. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  214. data/utils/enveomics/Tests/hiv1.faa +0 -59
  215. data/utils/enveomics/Tests/hiv1.fna +0 -134
  216. data/utils/enveomics/Tests/hiv2.faa +0 -70
  217. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  218. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  219. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  220. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  221. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  222. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  223. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  224. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  225. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  226. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  227. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  228. data/utils/enveomics/build_enveomics_r.bash +0 -45
  229. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  230. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  231. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  232. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  233. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  234. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  235. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  236. data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
  237. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  238. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  239. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  240. data/utils/enveomics/enveomics.R/R/utils.R +0 -80
  241. data/utils/enveomics/enveomics.R/README.md +0 -81
  242. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  243. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  244. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
  245. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
  246. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
  247. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  248. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  249. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  250. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  251. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  252. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  253. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
  254. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
  255. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
  256. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  257. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  258. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
  259. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
  260. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
  261. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
  262. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
  263. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
  264. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
  265. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  266. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  267. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
  268. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  269. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  270. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  271. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  272. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
  273. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  274. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  275. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
  276. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  277. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  278. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  279. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  280. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
  281. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  282. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
  283. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
  284. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
  285. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  286. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  287. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
  288. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
  289. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
  290. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  291. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  292. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  293. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  294. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
  295. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
  296. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
  297. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
  298. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  299. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  300. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  301. data/utils/enveomics/globals.mk +0 -8
  302. data/utils/enveomics/manifest.json +0 -9
  303. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  304. data/utils/multitrim/README.md +0 -67
  305. data/utils/multitrim/multitrim.py +0 -1555
  306. data/utils/multitrim/multitrim.yml +0 -13
@@ -1,95 +0,0 @@
1
- #!/bin/bash
2
-
3
- if [[ "$1" == "" || "$1" == "-h" || "$2" == "" ]] ; then
4
- echo "
5
- Usage: ./RUNME.bash folder data_type [max_jobs]
6
-
7
- folder Path to the folder containing the 04.trimmed_fasta folder. The
8
- trimmed reads must be in interposed FastA format, and filenames
9
- must follow the format: <name>.CoupledReads.fa, where <name> is
10
- the name of the sample. If non-paired, the filenames must follow
11
- the format: <name>.SingleReads.fa. If both suffixes are found
12
- for the same <name> prefix, they are both used.
13
- data_type Type of datasets in the project. One of: mg (for metagenomes),
14
- scg (for single-cell genomes), g (for traditional genomes), or t
15
- (for transcriptomes).
16
- max_jobs (optional) Maximum number of jobs to run in parallel. This
17
- number can be increased, but bear in mind that this process is
18
- highly I/O-intensive, and likely to crash or significantly slow
19
- down the hard drive if many jobs are running simultaneously. By
20
- default: 5.
21
- " >&2
22
- exit 1
23
- fi
24
- TYPE=$2
25
- if [[ "$TYPE" != "g" && "$TYPE" != "mg" && "$TYPE" != "scg" \
26
- && "$TYPE" != "t" ]] ; then
27
- echo "Unsupported data type: $TYPE." >&2
28
- exit 1
29
- fi
30
- if [[ "$3" == "" ]] ; then
31
- MAX=5
32
- else
33
- let MAX=$3+0
34
- fi
35
-
36
- dir=$(readlink -f $1)
37
- pac=$(dirname $(readlink -f $0))
38
- cwd=$(pwd)
39
-
40
- cd $dir
41
- if [[ ! -e 04.trimmed_fasta ]] ; then
42
- echo "Cannot locate the 04.trimmed_fasta directory, aborting..." >&2
43
- exit 1
44
- fi
45
- for i in 05.assembly ; do
46
- [[ -d $i ]] || mkdir $i
47
- done
48
-
49
- k=0
50
- for i in $dir/04.trimmed_fasta/*.SingleReads.fa ; do
51
- b=$(basename $i .SingleReads.fa)
52
- touch $dir/04.trimmed_fasta/$b.CoupledReads.fa
53
- done
54
-
55
- for i in $dir/04.trimmed_fasta/*.CoupledReads.fa ; do
56
- b=$(basename $i .CoupledReads.fa)
57
- [[ -d $dir/05.assembly/$b ]] && continue
58
- EXTRA=""
59
- EXTRA_MSG=""
60
- if [[ $k -ge $MAX ]] ; then
61
- let prek=$k-$MAX
62
- EXTRA="-W depend=afterany:${jids[$prek]}"
63
- EXTRA_MSG=" (waiting for ${jids[$prek]})"
64
- fi
65
-
66
- # Predict time (in hours)
67
- SIZE_M=$(($(ls -pl 04.trimmed_fasta/$b.CoupledReads.fa \
68
- | awk '{print $5}')/1000000))
69
- let TIME_H=6+$SIZE_M*2/1000
70
- let RAM_G=20+$SIZE_M*20/1000
71
-
72
- # Find the right queue
73
- if [[ $TIME_H -lt 12 ]] ; then
74
- QUEUE="-q iw-shared-6 -l walltime=12:00:00"
75
- elif [[ $TIME_H -lt 120 ]] ; then
76
- QUEUE="-q microcluster -l walltime=120:00:00"
77
- else
78
- QUEUE="-q microcluster -l walltime=2000:00:00"
79
- fi
80
-
81
- # Launch job
82
- mkdir $dir/05.assembly/$b
83
- OPTS="SAMPLE=$b,FOLDER=$dir,TYPE=$TYPE"
84
- if [[ -s $dir/04.trimmed_fasta/$b.SingleReads.fa ]] ; then
85
- OPTS="$OPTS,FA=$dir/04.trimmed_fasta/$b.SingleReads.fa"
86
- [[ -s $dir/04.trimmed_fasta/$b.CoupledReads.fa ]] \
87
- && OPTS="$OPTS,FA_RL2=$dir/04.trimmed_fasta/$b.CoupledReads.fa"
88
- else
89
- OPTS="$OPTS,FA=$dir/04.trimmed_fasta/$b.CoupledReads.fa"
90
- fi
91
- jids[$k]=$(qsub -v "$OPTS" -N "IDBA-$b" -l "mem=${RAM_G}g" \
92
- $QUEUE $EXTRA $pac/run.pbs | grep .)
93
- echo "$b: ${jids[$k]}$EXTRA_MSG"
94
- let k=$k+1
95
- done
@@ -1,56 +0,0 @@
1
- #!/bin/bash
2
- #PBS -l nodes=1:ppn=10
3
- #PBS -k eo
4
-
5
- module load idba/1.1.1
6
-
7
- b=$SAMPLE
8
- shared=/nv/gpfs-gateway-pace1/project/bio-konstantinidis/shared3
9
- enve=$shared/apps/enveomics/Scripts
10
- THR=10
11
-
12
- #---------------------------------------------------------
13
-
14
- echo "==[ 05.assembly: $(date) ]"
15
- cd $FOLDER/05.assembly
16
-
17
- CMD=""
18
- case "$TYPE" in
19
- *g)
20
- CMD="idba_ud" ;;
21
- t)
22
- CMD="idba_tran" ;;
23
- *)
24
- echo "Unsupported data type: $TYPE" >&2
25
- exit 1
26
- ;;
27
- esac
28
- CMD="$CMD --pre_correction -r $FA -o $SAMPLE --num_threads $THR"
29
- [[ -n "$FA_RL2" ]] && CMD="$CMD --read_level_2 $FA_RL2"
30
- [[ -n "$FA_RL3" ]] && CMD="$CMD --read_level_3 $FA_RL3"
31
- [[ -n "$FA_RL4" ]] && CMD="$CMD --read_level_4 $FA_RL4"
32
- [[ -n "$FA_RL5" ]] && CMD="$CMD --read_level_5 $FA_RL5"
33
-
34
- time $CMD
35
-
36
- rm $SAMPLE/kmer
37
- rm $SAMPLE/graph-*.fa
38
- rm $SAMPLE/align-*
39
- rm $SAMPLE/local-contig-*.fa
40
- rm $SAMPLE/contig-*.fa
41
-
42
- if [[ -s $SAMPLE/scaffold.fa ]] ; then
43
- ln -s $SAMPLE/scaffold.fa $SAMPLE.AllContigs.fna
44
- else
45
- ln -s $SAMPLE/contig.fa $SAMPLE.AllContigs.fna
46
- fi
47
- time $enve/FastA.length.pl $SAMPLE.AllContigs.fna | awk '$2>=500{print $1}' \
48
- > $SAMPLE.LargeContigs.ids
49
- time $enve/FastA.filter.pl $SAMPLE.LargeContigs.ids $SAMPLE.AllContigs.fna \
50
- > $SAMPLE.LargeContigs.fna
51
- rm $SAMPLE.LargeContigs.ids
52
-
53
- #---------------------------------------------------------
54
-
55
- echo "Done: $(date)."
56
-
@@ -1,54 +0,0 @@
1
- @author: Luis Miguel Rodriguez-R <lmrodriguezr at gmail dot com>
2
-
3
- @update: Oct-30-2014
4
-
5
- @license: artistic 2.0
6
-
7
- @status: auto
8
-
9
- @pbs: yes
10
-
11
- # IMPORTANT
12
-
13
- This pipeline was developed for the [PACE cluster](http://pace.gatech.edu/). You
14
- are free to use it in other platforms with adequate adjustments.
15
-
16
- # PURPOSE
17
-
18
- Performs various trimming and quality-control analyses over raw reads.
19
-
20
- # HELP
21
-
22
- 1. Files preparation:
23
-
24
- 1.1. Obtain the enveomics package in the cluster. You can use:
25
- `git clone https://github.com/lmrodriguezr/enveomics.git`
26
-
27
- 1.2. Prepare the raw reads in FastQ format. Files must be raw, not zipped or packaged.
28
- Filenames must conform the format: <name>.<sis>.fastq, where <name> is the name
29
- of the sample, and <sis> is 1 or 2 indicating which sister read the file contains.
30
- Use only '1' as <sis> if you have single reads.
31
-
32
- 1.3. Gather all the FastQ files into the same folder.
33
-
34
- 2. Pipeline execution:
35
-
36
- 2.1. Simply execute `./RUNME.bash <dir>`, where <dir> is the folder containing
37
- the FastQ files.
38
-
39
- 3. What to expect:
40
-
41
- By the end of the run, you should find the following folders:
42
-
43
- 3.1. *01.raw_reads*: Gzip'ed raw FastQ files.
44
-
45
- 3.2. *02.trimmed_reads*: Trimmed and clipped reads. For each sample, there should be
46
- nine files for paired-end, and two for single-reads.
47
-
48
- 3.3. *03.read_quality*: Quality reports. For each sample, there should be two directories,
49
- one with SolexaQA++ information, another with FastQC information.
50
-
51
- 3.4. *04.trimmed_fasta*: Trimmed and clipped in FastA format (and gzip'ed, in the case of
52
- individual files for paired-end).
53
-
54
-
@@ -1,70 +0,0 @@
1
- #!/bin/bash
2
-
3
- if [[ "$1" == "" || "$1" == "-h" ]] ; then
4
- echo "
5
- Usage: ./RUNME.bash folder [clipper [max_jobs]]
6
-
7
- folder Path to the folder containing the raw reads. The raw reads must be in FastQ format,
8
- and filenames must follow the format: <name>.<sis>.fastq, where <name> is the name
9
- of the sample, and <sis> is 1 or 2 indicating which sister read the file contains.
10
- Use only '1' as <sis> if you have single reads.
11
- clipper (optional) One of: trimmomatic, scythe, or none. By default: scythe.
12
- max_jobs (optional) Maximum number of jobs to run in parallel. This number can be increased,
13
- but bear in mind that this process is highly I/O-intensive, and likely to crash or
14
- significantly slow down the hard drive if many jobs are running simultaneously. By
15
- default: 5.
16
- " >&2 ;
17
- exit 1 ;
18
- fi ;
19
- CLIPPER=$2
20
- if [[ "$CLIPPER" == "" ]] ; then
21
- CLIPPER="scythe"
22
- fi ;
23
- if [[ "$3" == "" ]] ; then
24
- MAX=5 ;
25
- else
26
- let MAX=$3+0 ;
27
- fi ;
28
-
29
- dir=$(readlink -f $1) ;
30
- pac=$(dirname $(readlink -f $0)) ;
31
- cwd=$(pwd) ;
32
-
33
- cd $dir ;
34
- for i in 01.raw_reads 02.trimmed_reads 03.read_quality 04.trimmed_fasta zz.info ; do
35
- if [[ ! -d $i ]] ; then mkdir $i ; fi ;
36
- done ;
37
-
38
- k=0 ;
39
- for i in $dir/*.1.fastq ; do
40
- EXTRA="" ;
41
- EXTRA_MSG="" ;
42
- if [[ $k -ge $MAX ]] ; then
43
- let prek=$k-$MAX ;
44
- EXTRA="-W depend=afterany:${jids[$prek]}" ;
45
- EXTRA_MSG=" (waiting for ${jids[$prek]})"
46
- fi ;
47
- b=$(basename $i .1.fastq) ;
48
- mv $b.[12].fastq 01.raw_reads/ ;
49
- # Predict time (in hours)
50
- SIZE_M=$(($(ls -pl 01.raw_reads/$b.1.fastq | awk '{print $5}')/1000000)) ;
51
- let TIME_H=$SIZE_M*5/1000 ;
52
- [[ -e 01.raw_reads/$b.2.fastq ]] || let TIME_H=$TIME_H/2 ;
53
- let RAM_G=$SIZE_M*8/1000 ;
54
- [[ $RAM_G -lt 10 ]] && RAM_G=10 ;
55
-
56
- # Find the right queue
57
- if [[ $TIME_H -lt 12 ]] ; then
58
- QUEUE="-q iw-shared-6 -l walltime=12:00:00" ;
59
- elif [[ $TIME_H -lt 120 ]] ; then
60
- QUEUE="-q microcluster -l walltime=120:00:00" ;
61
- else
62
- QUEUE="-q microcluster -l walltime=2000:00:00" ;
63
- fi ;
64
- # Launch job
65
- jids[$k]=$(qsub -v "SAMPLE=$b,FOLDER=$dir,CLIPPER=$CLIPPER" -N "Trim-$b" -l "mem=${RAM_G}g" $QUEUE $EXTRA $pac/run.pbs | grep .) ;
66
- echo "$b: ${jids[$k]}$EXTRA_MSG" ;
67
- let k=$k+1 ;
68
- done ;
69
-
70
-
@@ -1,130 +0,0 @@
1
- #!/bin/bash
2
- #PBS -l mem=10g
3
- #PBS -l nodes=1:ppn=1
4
- #PBS -k eo
5
-
6
- module load fastqc/0.11.2
7
- module load scythe/0.993
8
-
9
- shared=/gpfs/pace1/project/bio-konstantinidis/shared3
10
- b=$SAMPLE ;
11
- sqa=$shared/bin/SolexaQA++
12
- scythe=scythe
13
- enve=$shared/apps/enveomics/Scripts
14
- trim=$shared/apps/Trimmomatic-0.32/trimmomatic-0.32.jar
15
- SEadapters=$shared/apps/Trimmomatic-0.32/adapters/ALL-SE_PE.fa
16
- PEadapters=$shared/apps/Trimmomatic-0.32/adapters/ALL-PE.fa
17
-
18
- #---------------------------------------------------------
19
-
20
- echo "==[ 02.trimmed_reads: $(date) ]" ;
21
- cd $FOLDER/02.trimmed_reads ;
22
-
23
- time $enve/FastQ.tag.rb -i ../01.raw_reads/$b.1.fastq -p "$b-" -s "/1" -o $b.1.fastq ;
24
- [[ -e ../01.raw_reads/$b.2.fastq ]] && time $enve/FastQ.tag.rb -i ../01.raw_reads/$b.2.fastq -p "$b-" -s "/2" -o $b.2.fastq ;
25
-
26
- RAW_READS=$(cat $b.1.fastq | paste - - - - | wc -l | sed -e 's/ *//') ;
27
- RAW_LENGTH=$(head -n 40000 $b.1.fastq | paste - - - - | awk 'BEGIN{FS="\\t"}{SUM+=length($2)}END{print SUM/NR}') ;
28
-
29
- time $sqa dynamictrim $b.[12].fastq -h 20 -d . ;
30
- time $sqa lengthsort $b.[12].fastq.trimmed -l 50 -d . ;
31
-
32
- if [[ "$CLIPPER" == "trimmomatic" ]] ; then
33
- if [[ -e $b.2.fastq.trimmed.paired ]] ; then
34
- time java -jar $trim PE -threads 1 \
35
- $b.1.fastq.trimmed.paired \
36
- $b.2.fastq.trimmed.paired \
37
- $b.1.clipped.fastq $b.1.clipped.single.fastq \
38
- $b.2.clipped.fastq $b.2.clipped.single.fastq \
39
- ILLUMINACLIP:$PEadapters:2:30:10 MINLEN:50
40
- else
41
- time java -jar $trim SE -threads 1 \
42
- $b.1.fastq.trimmed.single $b.1.clipped.fastq \
43
- ILLUMINACLIP:$SEadapters:2:30:10 MINLEN:50
44
- fi ;
45
- elif [[ "$CLIPPER" == "scythe" ]]; then
46
- if [[ -e $b.2.fastq.trimmed.paired ]] ; then
47
- $scythe -a $PEadapters $b.1.fastq.trimmed.paired > $b.1.clipped.all.fastq ;
48
- $scythe -a $PEadapters $b.2.fastq.trimmed.paired > $b.2.clipped.all.fastq ;
49
- time $sqa lengthsort $b.[12].clipped.all.fastq -l 50 -d . ;
50
- rm $b.[12].clipped.all.fastq ;
51
- [[ -e $b.1.clipped.all.fastq.single ]] && mv $b.1.clipped.all.fastq.single $b.1.clipped.single.fastq ;
52
- [[ -e $b.2.clipped.all.fastq.single ]] && mv $b.2.clipped.all.fastq.single $b.2.clipped.single.fastq ;
53
- mv $b.1.clipped.all.fastq.paired $b.1.clipped.fastq ;
54
- mv $b.2.clipped.all.fastq.paired $b.2.clipped.fastq ;
55
- rm $b.1.clipped.all.fastq.summary.txt $b.1.clipped.all.fastq.summary.txt.pdf &>/dev/null ;
56
- else
57
- $scythe -a $PEadapters $b.1.fastq.trimmed.single > $b.1.clipped.all.fastq ;
58
- time $sqa lengthsort $b.1.clipped.all.fastq -l 50 -d . ;
59
- rm $b.1.clipped.all.fastq ;
60
- mv $b.1.clipped.all.fastq.single $b.1.clipped.fastq ;
61
- fi ;
62
- rm $b.[12].*.discard &>/dev/null ;
63
- else
64
- if [[ -e $b.2.fastq.trimmed.paired ]] ; then
65
- ln -s $b.1.fastq.trimmed.paired $b.1.clipped.fastq ;
66
- ln -s $b.2.fastq.trimmed.paired $b.2.clipped.fastq ;
67
- else
68
- ln -s $b.1.fastq.trimmed.single $b.1.clipped.fastq ;
69
- fi ;
70
- fi ;
71
-
72
- TRIMMED_READS=$(cat $b.1.clipped.fastq | paste - - - - | wc -l | sed -e 's/ *//') ;
73
- TRIMMED_LENGTH=$(head -n 40000 $b.1.clipped.fastq | paste - - - - | awk 'BEGIN{FS="\\t"}{SUM+=length($2)}END{print SUM/NR}') ;
74
-
75
- #---------------------------------------------------------
76
-
77
- echo "==[ 03.read_quality: $(date) ]" ;
78
- cd $FOLDER/03.read_quality ;
79
- if [ ! -d $b.fastqc ] ; then mkdir $b.fastqc ; fi ;
80
- perl $(which fastqc) ../02.trimmed_reads/$b.[12].clipped.fastq -o $b.fastqc ;
81
-
82
- if [ ! -d $b ] ; then mkdir $b ; fi ;
83
- time $sqa analysis ../01.raw_reads/$b.[12].fastq -h 20 -d $b -v -m ;
84
- rm $b/*.segments ;
85
- mv ../02.trimmed_reads/$b.[12].fastq_trimmed.segments* $b/
86
- mv ../02.trimmed_reads/$b.[12].fastq.trimmed.summary.txt* $b/
87
-
88
-
89
- cd $FOLDER/02.trimmed_reads ;
90
- rm $b.[12].fastq.trimmed.discard ;
91
- rm $b.[12].fastq.trimmed ;
92
- rm $b.[12].fastq ;
93
-
94
- #---------------------------------------------------------
95
-
96
- echo "==[ 04.trimmed_fasta: $(date) ]" ;
97
- cd $FOLDER/04.trimmed_fasta ;
98
- cat ../02.trimmed_reads/$b.1.clipped.fastq | paste - - - - | awk 'BEGIN{FS="\\t"}{print ">"substr($1,2)"\\n"$2}' > $b.1.fasta ;
99
- if [[ -e ../02.trimmed_reads/$b.2.clipped.fastq ]] ; then
100
- cat ../02.trimmed_reads/$b.2.clipped.fastq | paste - - - - | awk 'BEGIN{FS="\\t"}{print ">"substr($1,2)"\\n"$2}' > $b.2.fasta ;
101
- time $enve/FastA.interpose.pl $b.CoupledReads.fa $b.[12].fasta ;
102
- time gzip $b.2.fasta ;
103
- time gzip $b.1.fasta ;
104
- else
105
- mv $b.1.fasta $b.SingleReads.fa ;
106
- fi ;
107
-
108
- #---------------------------------------------------------
109
-
110
- echo "==[ zz.info: $(date) ]" ;
111
- cd $FOLDER/zz.info ;
112
- echo "
113
- RAW_LENGTH: $RAW_LENGTH
114
- RAW_READS: $RAW_READS
115
- TRIMMED_LENGTH: $TRIMMED_LENGTH
116
- TRIMMED_READS: $TRIMMED_READS
117
- " > $b.summary.txt ;
118
-
119
- #---------------------------------------------------------
120
-
121
- echo "==[ 01.raw_reads: $(date) ]"
122
- cd $FOLDER/01.raw_reads ;
123
- for i in $b.[12].fastq ; do
124
- time gzip $i ;
125
- done ;
126
-
127
- #---------------------------------------------------------
128
-
129
- echo "Done: $(date)." ;
130
-
@@ -1,42 +0,0 @@
1
- # Enveomics Collection
2
-
3
- Scripts and reference libraries at [Kostas lab](http://enve-omics.gatech.edu).
4
-
5
- ## Prerequisites
6
-
7
- The enveomics collection as a whole has very modest requirements, essentially a
8
- *nix system with `bash`, `perl`, `ruby`, and `R`. Some scripts may require
9
- additional libraries, or even external Software, but you'll be forewarned about
10
- these requirements in the documentation accompanying each script. If you prefer,
11
- you can also use the Graphical User Interface (GUI), that comes with additional
12
- tests to let you know if your system is ready to use any given script.
13
-
14
- ## Graphical User Interface (GUI)
15
-
16
- The enveomics collection now has a graphical user interface! To learn more,
17
- please visit [enveomics-gui](https://github.com/lmrodriguezr/enveomics-gui).
18
-
19
- ## License
20
-
21
- The files in this repository are licensed under the terms of the
22
- Artistic License 2.0, except when otherwise noted.
23
-
24
- You can find a copy of the license in [LICENSE.txt](LICENSE.txt) or at
25
- http://www.perlfoundation.org/artistic_license_2_0.
26
-
27
- ## Documentation
28
-
29
- Most scripts in this repository are self-documented. However,
30
- more extensive documentation (and some discussion) can be found at the
31
- [documentation website](http://enve-omics.ce.gatech.edu/enveomics/docs).
32
- Additional documentation for recruitment plots can be found
33
- [here](Docs/recplot2.md).
34
-
35
- ## Citation
36
-
37
- If you use any of the utilitites in the Enveomics Collection in your research
38
- please cite:
39
-
40
- > Rodriguez-R LM & Konstantinidis KT (2016). The enveomics collection: a toolbox
41
- > for specialized analyses of microbial genomes and metagenomes.
42
- > [PeerJ Preprints 4:e1900v1](https://peerj.com/preprints/1900/).
@@ -1,171 +0,0 @@
1
- #!/usr/bin/env ruby
2
- #
3
- # @author Luis M. Rodriguez-R
4
- # @update Dec-21-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- $:.push File.expand_path(File.dirname(__FILE__) + "/lib")
9
- require "enveomics_rb/enveomics"
10
-
11
- o = {permutations: 1000, bootstraps: 1000, overwrite: false}
12
- OptionParser.new do |opt|
13
- opt.banner = "
14
- Estimates the log2-ratio of different amino acids in homologous sites using
15
- an AAsubs file (see BlastPairwise.AAsubs.pl). It provides the point
16
- estimation (.obs file), the bootstrap of the estimation (.boot file) and the
17
- null model based on label-permutation (.null file).
18
-
19
- Usage: #{$0} [options]".gsub(/^ +/,"")
20
- opt.separator ""
21
- opt.separator "Mandatory"
22
- opt.on("-i", "--input FILE",
23
- "Input file in AAsubs format (see BlastPairwise.AAsubs.pl)."
24
- ){ |v| o[:file] = v}
25
- opt.separator ""
26
- opt.separator "Output files"
27
- opt.on("-O", "--obs-file FILE",
28
- "Output file with the log2-ratios per amino acid.",
29
- "By default, '--input value'.obs."
30
- ){ |v| o[:obs] = v }
31
- opt.on("-B", "--bootstrap-file FILE",
32
- "Output file with the bootstrap results of log2-ratios per amino acid.",
33
- "By default, '--input value'.boot."
34
- ){ |v| o[:boot] = v }
35
- opt.on("-N", "--null-file FILE",
36
- "Output file with the permutation results of log2-ratios per amino acid.",
37
- "By default, '--input value'.null."
38
- ){ |v| o[:null] = v }
39
- opt.on("--overwrite",
40
- "Overwrite existing files. By default, skip steps if the files already" +
41
- " exist."){ |v| o[:overwrite] = v }
42
- opt.separator ""
43
- opt.separator "Parameters"
44
- opt.on("-b", "--bootstraps INT",
45
- "Number of bootstraps to run. By default: #{o[:bootstraps]}."
46
- ){ |v| o[:bootstraps] = v.to_i }
47
- opt.on("-p", "--permutations INT",
48
- "Number of permutations to run. By default: #{o[:permutations]}."
49
- ){ |v| o[:permutations] = v.to_i }
50
- opt.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
51
- opt.on("-h", "--help", "Display this screen.") do
52
- puts opt
53
- exit
54
- end
55
- opt.separator ""
56
- end.parse!
57
-
58
- # Initialize
59
- abort "--input is mandatory" if o[:file].nil?
60
- ALPHABET = %w(A C D E F G H I K L M N P Q R S T V W Y X)
61
- o[:obs] ||= "#{o[:file]}.obs"
62
- o[:boot] ||= "#{o[:file]}.boot"
63
- o[:null] ||= "#{o[:file]}.null"
64
-
65
- # Functions
66
- def dist_summary(a,b)
67
- ALPHABET.map do |i|
68
- Math.log(a[i].reduce(0,:+).to_f/b[i].reduce(0,:+), 10)
69
- end
70
- end
71
- def empty_sample
72
- Hash[ALPHABET.map{|k| [k, []]}]
73
- end
74
-
75
- # Initialize
76
- $stderr.puts "Initializing." unless o[:q]
77
- sample_A = empty_sample
78
- sample_B = empty_sample
79
- last_label = nil
80
- prot_index = -1
81
-
82
- # Read file
83
- $stderr.puts "Reading input file." unless o[:q]
84
- ifh = File.open(o[:file], "r")
85
- ifh.each do |l|
86
- r = l.chomp.split /\t/
87
- if r.first != last_label
88
- prot_index +=1
89
- last_label = r.first
90
- ALPHABET.each do |a|
91
- sample_A[a][prot_index] = 0
92
- sample_B[a][prot_index] = 0
93
- end
94
- end
95
- [1,2].each do |ds|
96
- unless %w(- *).include? r[ds]
97
- abort "Unknown amino acid in line #{$.}: '#{r[ds]}'." unless
98
- ALPHABET.include? r[ds]
99
- sample_A[ r[ds] ][ prot_index ] += 1 if ds==1
100
- sample_B[ r[ds] ][ prot_index ] += 1 if ds==2
101
- end
102
- end
103
- end
104
- ifh.close
105
- $stderr.puts " > Found #{prot_index+1} proteins." unless o[:q]
106
- $stderr.puts " > Saving #{o[:obs]}" unless o[:q]
107
- sum = dist_summary(sample_A, sample_B)
108
- File.open(o[:obs], "w") do |fh|
109
- fh.puts ["AA", "log10_AB"].join("\t")
110
- ALPHABET.each do |i|
111
- fh.puts [i, sum.shift].join("\t")
112
- end
113
- end
114
-
115
- # Permutations
116
- if File.size? o[:null] and not o[:overwrite]
117
- $stderr.puts "Skipping permutations." unless o[:q]
118
- else
119
- $stderr.puts "Permutating." unless o[:q]
120
- permut_sum = []
121
- o[:permutations].times do |i|
122
- permut_A = empty_sample
123
- permut_B = empty_sample
124
- (0 .. prot_index).each do |j|
125
- # Copy counts of the protein
126
- ALPHABET.each do |k|
127
- permut_A[k][j] = sample_A[k][j]
128
- permut_B[k][j] = sample_B[k][j]
129
- end
130
- # Swap labels at random
131
- permut_A,permut_B = permut_B,permut_A if rand(2)==1
132
- end
133
- permut_sum << dist_summary(permut_A, permut_B)
134
- end
135
- $stderr.puts " > Performed #{o[:permutations]} permutations." unless o[:q]
136
- $stderr.puts " > Saving #{o[:null]}" unless o[:q]
137
- File.open(o[:null], "w") do |fh|
138
- fh.puts ALPHABET.join("\t")
139
- permut_sum.each{ |s| fh.puts s.join("\t") }
140
- end
141
- end
142
-
143
- # Bootstraps
144
- if File.size? o[:boot] and not o[:overwrite]
145
- $stderr.puts "Skipping bootstraps." unless o[:q]
146
- else
147
- $stderr.puts "Bootstrapping." unless o[:q]
148
- boot_sum = []
149
- o[:bootstraps].times do |i|
150
- boot_A = empty_sample
151
- boot_B = empty_sample
152
- (0 .. prot_index).each do |j|
153
- # Sample randomly with replacement
154
- jr = rand(prot_index+1)
155
- # Copy counts of the protein
156
- ALPHABET.each do |k|
157
- boot_A[k][j] = sample_A[k][jr]
158
- boot_B[k][j] = sample_B[k][jr]
159
- end
160
- end
161
- boot_sum << dist_summary(boot_A, boot_B)
162
- end
163
- $stderr.puts " > Performed #{o[:bootstraps]} bootstraps." unless o[:q]
164
- $stderr.puts " > Saving #{o[:boot]}" unless o[:q]
165
- File.open(o[:boot], "w") do |fh|
166
- fh.puts ALPHABET.join("\t")
167
- boot_sum.each{ |s| fh.puts s.join("\t") }
168
- end
169
- end
170
-
171
- $stderr.puts "Done. Yayyy!" unless o[:q]