miga-base 0.7.25.2 → 0.7.25.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (277) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +2 -2
  3. data/utils/distance/runner.rb +2 -1
  4. metadata +5 -278
  5. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  6. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  7. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  8. data/utils/FastAAI/FastAAI/FastAAI +0 -1336
  9. data/utils/FastAAI/README.md +0 -84
  10. data/utils/FastAAI/kAAI_v1.0_virus.py +0 -1296
  11. data/utils/enveomics/Docs/recplot2.md +0 -244
  12. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  13. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  14. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  15. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  16. data/utils/enveomics/LICENSE.txt +0 -73
  17. data/utils/enveomics/Makefile +0 -52
  18. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  19. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -786
  20. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  21. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -766
  22. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -243
  23. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  24. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -67
  25. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  26. data/utils/enveomics/Manifest/Tasks/other.json +0 -829
  27. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  28. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -501
  29. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  30. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  31. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  32. data/utils/enveomics/Manifest/categories.json +0 -156
  33. data/utils/enveomics/Manifest/examples.json +0 -154
  34. data/utils/enveomics/Manifest/tasks.json +0 -4
  35. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  36. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  37. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  38. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  39. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  40. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  41. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  42. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  43. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  44. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  45. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  46. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  47. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  48. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  49. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  50. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  51. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  52. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  53. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  54. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  55. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  56. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  57. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  58. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  59. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  60. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  61. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  62. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  63. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  64. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  65. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  66. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  67. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  68. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  69. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  70. data/utils/enveomics/README.md +0 -42
  71. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  72. data/utils/enveomics/Scripts/Aln.cat.rb +0 -163
  73. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  74. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  75. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  76. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  77. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  78. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  79. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  80. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  81. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  82. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  83. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  84. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  85. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  86. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  87. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  88. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  89. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  90. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  91. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  92. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  93. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  94. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -101
  95. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  96. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  97. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  98. data/utils/enveomics/Scripts/FastA.N50.pl +0 -56
  99. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  100. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  101. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  102. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  103. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -92
  104. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  105. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  106. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  107. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  108. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  109. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  110. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  111. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  112. data/utils/enveomics/Scripts/FastA.sample.rb +0 -83
  113. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  114. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  115. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  116. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  117. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  118. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  119. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  120. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  121. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  122. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  123. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -63
  124. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  125. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  126. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  127. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  128. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  129. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  130. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  131. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  132. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  133. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  134. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  135. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  136. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  137. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  138. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  139. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  140. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  141. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  142. data/utils/enveomics/Scripts/SRA.download.bash +0 -57
  143. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  144. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  145. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  146. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  147. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  148. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  149. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  150. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  151. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  152. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  153. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  154. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  155. data/utils/enveomics/Scripts/aai.rb +0 -418
  156. data/utils/enveomics/Scripts/ani.rb +0 -362
  157. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  158. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  159. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  160. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  161. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  162. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  163. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  164. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  165. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  166. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  167. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  168. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
  169. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  170. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  171. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  172. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  173. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  174. data/utils/enveomics/Scripts/ogs.rb +0 -104
  175. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  176. data/utils/enveomics/Scripts/rbm.rb +0 -146
  177. data/utils/enveomics/Tests/Makefile +0 -10
  178. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  179. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  180. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  181. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  182. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  183. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  184. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  185. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  186. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  187. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  188. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  189. data/utils/enveomics/Tests/alkB.nwk +0 -1
  190. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  191. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  192. data/utils/enveomics/Tests/hiv1.faa +0 -59
  193. data/utils/enveomics/Tests/hiv1.fna +0 -134
  194. data/utils/enveomics/Tests/hiv2.faa +0 -70
  195. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  196. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  197. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  198. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  199. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  200. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  201. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  202. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  203. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  204. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  205. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  206. data/utils/enveomics/build_enveomics_r.bash +0 -45
  207. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  208. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  209. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  210. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  211. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  212. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  213. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  214. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  215. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  216. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  217. data/utils/enveomics/enveomics.R/R/utils.R +0 -50
  218. data/utils/enveomics/enveomics.R/README.md +0 -80
  219. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  220. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  221. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -17
  222. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -17
  223. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -17
  224. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  225. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  226. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  227. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  228. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  229. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  230. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -32
  231. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -91
  232. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -57
  233. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  234. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  235. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -39
  236. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -38
  237. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -40
  238. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -67
  239. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -37
  240. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -122
  241. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  242. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  243. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -68
  244. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  245. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  246. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  247. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  248. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -41
  249. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  250. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  251. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -40
  252. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  253. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  254. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  255. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  256. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -41
  257. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  258. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -43
  259. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -37
  260. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -74
  261. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  262. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  263. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -32
  264. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -59
  265. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  266. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  267. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  268. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  269. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -63
  270. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -38
  271. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -38
  272. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -111
  273. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  274. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  275. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  276. data/utils/enveomics/globals.mk +0 -8
  277. data/utils/enveomics/manifest.json +0 -9
@@ -1,93 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'optparse'
4
-
5
- o = {range: 0.5, perseq: false, length: false}
6
- ARGV << '-h' if ARGV.empty?
7
- OptionParser.new do |opt|
8
- opt.banner = "
9
- Estimates the truncated average sequencing depth (TAD) from a BedGraph file.
10
-
11
- IMPORTANT: This script doesn't consider zero-coverage positions if missing
12
- from the file. If you produce your BedGraph file with bedtools genomecov and
13
- want to consider zero-coverage position, be sure to use -bga (not -bg).
14
-
15
- Usage: #{$0} [options]"
16
- opt.separator ''
17
- opt.on('-i', '--input PATH',
18
- 'Input BedGraph file (mandatory).'){ |v| o[:i]=v }
19
- opt.on('-r', '--range FLOAT',
20
- 'Central range to consider, between 0 and 1.',
21
- "By default: #{o[:range]} (inter-quartile range)."
22
- ){ |v| o[:range]=v.to_f }
23
- opt.on('-s', '--per-seq',
24
- 'Calculate averages per reference sequence, not total.',
25
- 'Assumes a sorted BedGraph file.'
26
- ){ |v| o[:perseq] = v }
27
- opt.on('-l', '--length',
28
- 'Add sequence length to the output.'){ |v| o[:length] = v }
29
- opt.on('-h', '--help', 'Display this screen.') do
30
- puts opt
31
- exit
32
- end
33
- opt.separator ''
34
- end.parse!
35
- abort '-i is mandatory.' if o[:i].nil?
36
-
37
- def pad(d, idx, r)
38
- idx.each do |i|
39
- next if d[i].nil?
40
- d[i] -= r
41
- break unless d[i] < 0
42
- r = -d[i]
43
- d[i] = nil
44
- end
45
- d
46
- end
47
-
48
- def report(sq, d, ln, o)
49
- # Estimate padding ranges
50
- pad = (1.0-o[:range])/2.0
51
- r = (pad*ln).round
52
-
53
- # Pad
54
- d = pad(d, d.each_index.to_a, r+0)
55
- d = pad(d, d.each_index.to_a.reverse, r+0)
56
-
57
- # Average
58
- y = [0.0]
59
- unless d.compact.empty?
60
- s = d.each_with_index.to_a.map{ |v,i| v.nil? ? 0 : i*v }.inject(0,:+)
61
- y[0] = s.to_f/d.compact.inject(:+)
62
- end
63
-
64
- # Report
65
- y.unshift(sq) if o[:perseq]
66
- y << ln if o[:length]
67
- puts y.join("\t")
68
- end
69
-
70
- # Read BedGraph
71
- d = []
72
- ln = 0
73
- pre_sq = nil
74
- File.open(o[:i], "r") do |ifh|
75
- ifh.each_line do |i|
76
- next if i =~ /^#/
77
- r = i.chomp.split("\t")
78
- sq = r.shift
79
- if o[:perseq] and !pre_sq.nil? and pre_sq!=sq
80
- report(pre_sq, d, ln, o)
81
- d = []
82
- ln = 0
83
- end
84
- r.map! { |j| j.to_i }
85
- l = r[1]-r[0]
86
- d[ r[2] ] ||= 0
87
- d[ r[2] ] += l
88
- ln += l
89
- pre_sq = sq
90
- end
91
- end
92
- report(pre_sq, d, ln, o)
93
-
@@ -1,71 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'optparse'
4
-
5
- o = {win: 1000}
6
- ARGV << '-h' if ARGV.empty?
7
- OptionParser.new do |opt|
8
- opt.banner = "
9
- Estimates the sequencing depth per windows from a BedGraph file.
10
-
11
- IMPORTANT: This script doesn't consider zero-coverage positions if missing
12
- from the file. If you produce your BedGraph file with bedtools genomecov and
13
- want to consider zero-coverage position, be sure to use -bga (not -bg).
14
-
15
- Usage: #{$0} [options]"
16
- opt.separator ''
17
- opt.on('-i', '--input PATH',
18
- 'Input BedGraph file (mandatory).'){ |v| o[:i]=v }
19
- opt.on('-w', '--win INT',
20
- 'Window size, in base pairs.', "By default: #{o[:win]}."
21
- ){ |v| o[:win]=v.to_i }
22
- opt.on('-h', '--help', 'Display this screen.') do
23
- puts opt
24
- exit
25
- end
26
- opt.separator ''
27
- end.parse!
28
- abort '-i is mandatory.' if o[:i].nil?
29
-
30
- def report(d, a, b, seqs)
31
- # Average
32
- y = 0.0
33
- unless d.compact.empty?
34
- s = d.each_with_index.to_a.map{ |v,i| v.nil? ? 0 : i*v }.inject(0,:+)
35
- y = s.to_f/d.compact.inject(:+)
36
- end
37
-
38
- # Report
39
- puts [a, b, y, seqs.keys.join(",")].join("\t")
40
- end
41
-
42
- # Read BedGraph
43
- d = []
44
- ln = 0
45
- a = 1
46
- seqs = {}
47
- b = o[:win]
48
- File.open(o[:i], "r") do |ifh|
49
- ifh.each_line do |i|
50
- next if i =~ /^#/
51
- r = i.chomp.split("\t")
52
- sq = r.shift
53
- seqs[sq] = 1
54
- r.map!{ |j| j.to_i }
55
- l = r[1]-r[0]
56
- d[ r[2] ] ||= 0
57
- d[ r[2] ] += l
58
- ln += l
59
- while ln >= b
60
- d[ r[2] ] -= (ln-b)
61
- report(d, a, b, seqs)
62
- seqs = {}
63
- seqs[ sq ] = 1 if ln > b
64
- d = []
65
- d[ r[2] ] = (ln-b)
66
- a = b + 1
67
- b = a + o[:win] - 1
68
- end
69
- end
70
- end
71
-
@@ -1,102 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @authors Konstantinos Konstantinidis (initial version)
4
- # modified to work with the BLASTp 2.2.25+ m0 output by
5
- # Despina Tsementzi & Luis M. Rodriguez-R
6
- # @updated Dec-21-2015
7
- #
8
-
9
-
10
- $/ = "Lambda ";
11
- use strict;
12
- my %hash_depth;
13
-
14
- my @query;
15
- my @subject;
16
- my @similarity;
17
- my $length = "0";
18
-
19
- my($cigar_chr, $blast) = @ARGV;
20
-
21
- ($cigar_chr and $blast) or die "
22
- .Description:
23
- Counts the different AA substitutions in the best hit blast alignments, from
24
- a BLASTP pairwise format output (-outfmt 0 in BLAST+, -m 0 in legacy BLAST).
25
-
26
- .Usage: $0 cigar_char blast.m0.txt > aa-subs.list
27
-
28
- cigar_char Use '+' for similar substitutions, use '_' for non similar
29
- substitutions
30
- blast.m0.txt Blast in 'text' format (-outfmt/-m 0).
31
- aa-subs.list A tab-delimited raw file with one substitution per row and
32
- columns:
33
- (1) Name-of-query_Name-of-subject
34
- (2) AA-in-subject
35
- (3) AA-in-query
36
- (4) Total-Align-Length
37
-
38
- ";
39
-
40
- # For each blast result (i.e., for each query)
41
- open BLAST, "<", $blast or die "Cannot read file: $blast: $!\n";
42
- while(my $data=<BLAST>) {
43
- $data =~ s/\r//g;
44
- my ($data_q, @array_matches) = split(/>/,$data);
45
- my ($name_query) = ($data_q =~ /Query\= (\S+?)(?:_GENE|\s)/);
46
- my ($length_query) = ($data_q =~ /\(([\d,]+) letters/ );
47
- ($length_query) = ($data_q =~ /Length=([\d,]+)/) unless $length_query;
48
- $length_query =~ tr/,//d;
49
-
50
- # For each alignment (i.e., for each HSP),
51
- # note the "last" at the end of the block,
52
- # so only the best match is considered
53
- foreach my $data_f (@array_matches) {
54
- # Capture statistics
55
- my ($length_match) = ($data_f =~ /Identities = \d+\/(\d+)/);
56
- my ($identity_match) = ($data_f =~ /Identities = \d+\/\d+ \((\d+)%/);
57
- my ($target_name) = ($data_f =~ /^\s?(\S+)/);
58
-
59
- # If the alignment meets minimum requirements
60
- if ($length_query >30 && ($length_match/$length_query > 0.7) && $identity_match > 60) {
61
- $data_f =~ tr/ /_/;
62
- my @array = split ("\n", $data_f);
63
- my $blanks = 0;
64
- my $prefix_size = 0;
65
-
66
- # For each line in the alignment
67
- for my $data_fff (@array) {
68
- if ($data_fff =~ /(Query[:_]_+\d+_+)([^_]+)/){
69
- # Query lines
70
- $prefix_size = length($1);
71
- $length = length($2);
72
- @query = split (//, $2);
73
- }elsif ($data_fff =~ /^_{11}/){
74
- # Cigar lines
75
- @similarity = split(//, substr($data_fff, $prefix_size, $length));
76
- }elsif ($data_fff =~ /Sbjct[:_]_+\d+_+([^_]+)/){
77
- # Subject lines
78
- @subject = split(//, $1);
79
- # For each alignment column
80
- for(my $i=0; $i <= $length; $i++){
81
- if ($similarity[$i] eq $cigar_chr) {
82
- print "$name_query\_$target_name\t$subject[$i]\t$query[$i]\t$length_match\n";
83
- }
84
- }
85
- undef @query;
86
- undef @similarity;
87
- undef @subject;
88
- }
89
-
90
- # Remove secondary alignments
91
- if ($data_fff =~ /^$/){
92
- $blanks++;
93
- last if $blanks >= 3;
94
- }else{
95
- $blanks=0;
96
- }
97
- } # for my $data_fff (@array)
98
- } # if ($length_query >30 ...
99
- last; # <---- So it takes only the best match!
100
- } # foreach my $data_f (@array_matches)
101
- } # while(my $data=<>)
102
-
@@ -1,63 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author: Luis M. Rodriguez-R
5
- # @license: artistic license 2.0
6
- #
7
-
8
- require 'optparse'
9
-
10
- o = { sbj: false, q: false }
11
- ARGV << '-h' if ARGV.size == 0
12
- OptionParser.new do |opts|
13
- opts.banner = "
14
- Appends an extra column to a BLAST with the length of the query or the subject
15
- sequence. You can pipe two instances to add both:
16
- cat input.blast | #{$0} -f queries.fa | #{$0} -f subjects.fa -s > output.blast
17
-
18
- Usage: #{$0} [options] < input.blast > output.blast"
19
- opts.separator ''
20
- opts.separator 'Mandatory'
21
- opts.on('-f', '--fasta FILE', 'Path to the FastA file'){ |v| o[:fasta] = v }
22
- opts.separator ''
23
- opts.separator 'Options'
24
- opts.on('-s', '--subject',
25
- 'Use the subject column of the BLAST, by default the query column is used'
26
- ){ o[:sbj] = true }
27
- opts.on('-q', '--quiet', 'Run quietly (no STDERR output)'){ o[:q] = true }
28
- opts.on('-h', '--help', 'Display this screen') do
29
- puts opts
30
- exit
31
- end
32
- opts.separator ''
33
- end.parse!
34
- abort '-f is mandatory' if o[:fasta].nil?
35
-
36
- len = {}
37
- id = ''
38
- $stderr.puts "Reading FastA file: #{o[:fasta]}" unless o[:q]
39
- fh = File.open(o[:fasta], 'r')
40
- fh.each_line do |ln|
41
- defline = /^>(\S+)/.match(ln)
42
- if defline.nil?
43
- ln.gsub! /[^A-Za-z]/, ''
44
- abort 'Error: Unsupported format, expecting FastA' if len[id].nil?
45
- len[id] = len[id] + ln.size
46
- else
47
- id = defline[1]
48
- len[id] = 0
49
- end
50
- end
51
- fh.close
52
-
53
- unless o[:q]
54
- $stderr.puts 'Appending %s length column' % (o[:sbj] ? 'subject' : 'query')
55
- end
56
- ARGF.each_line do |ln|
57
- ln.chomp!
58
- row = ln.split /\t/
59
- id = o[:sbj] ? row[1] : row[0]
60
- abort "Impossible to find sequence of #{id}" if len[id].nil?
61
- puts "#{ln}\t#{len[id]}"
62
- end
63
-
@@ -1,48 +0,0 @@
1
- #!/bin/bash
2
-
3
- #
4
- # @author Luis M. Rodriguez-R
5
- # @update Mar-23-2016
6
- # @license artistic license 2.0
7
- #
8
-
9
- if [[ ! $2 ]] ; then
10
- echo "
11
- .DESCRIPTION
12
- Calculates the percentage of a partial BLAST result. The
13
- value produced slightly subestimates the actual advance,
14
- due to un-flushed output and trailing queries that could
15
- be processed but generate no results.
16
-
17
- .USAGE
18
- $0 blast.txt qry.fasta
19
-
20
- blast.txt Incomplete Tabular BLAST output.
21
- qry.fasta FastA file with query sequences.
22
- ";
23
- exit 1;
24
- fi
25
-
26
- if [[ ! -r $1 ]]; then
27
- echo "Cannot open file: $1";
28
- exit 1;
29
- fi
30
-
31
- if [[ ! -r $2 ]]; then
32
- echo "Cannot open file: $2";
33
- exit 1;
34
- fi
35
-
36
- LAST_Q=`tail -n 2 $1 | head -n 1 | awk '{print $1}'`
37
- LAST_Q_NO=`grep -n "^>$LAST_Q\\( \\|$\\)" $2 | sed -e 's/:.*//'`
38
- if [[ ! $LAST_Q_NO ]]; then
39
- echo "Cannot find sequence: $LAST_Q";
40
- echo "Make sure you are providing the right query file.";
41
- exit 1;
42
- fi
43
- TOTAL_Q_NO=`cat $2 | wc -l | sed -e 's/ *//'`
44
- let PERC=100*$LAST_Q_NO/$TOTAL_Q_NO
45
-
46
- echo "$PERC%: $LAST_Q_NO / $TOTAL_Q_NO"
47
- exit 0;
48
-
@@ -1,55 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- #
4
- # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
- # @license: artistic license 2.0
6
- # @last_update: Mar-23-2015
7
- #
8
-
9
- use strict;
10
- use warnings;
11
-
12
- die "
13
- Usage:
14
- sort blast.txt ... | $0 > blast.bh.txt
15
- $0 blast_sorted.txt ... > blast.bh.txt
16
- $0 -h|--help|-?
17
-
18
- blast.txt ... One or more files in Tabular BLAST format.
19
- blast_sorted.txt ... One or more files in Tabular BLAST format pre-sorted.
20
- blast.bh.txt Output file in BLAST format containing best-hits only.
21
- -h|--help|-? Any of these flags trigger this help message and exits.
22
-
23
- NOTE: This script assumes that the BLAST is sorted. Because it can read
24
- from the STDIN, calling this script without arguments cause it to still until
25
- killed or until an EOF (^D) is presented.
26
-
27
- " if exists $ARGV[0] and $ARGV[0] =~ /^\-?\-(h(elp)?|\?)/i;
28
-
29
- my $last_qry = '';
30
- my @best_res;
31
-
32
- sub best_result($$){
33
- my($r1, $r2)=@_;
34
- return $r1 unless $r2;
35
- return $r1->[11] > $r2->[11] ? @$r1 : @$r2;
36
- }
37
-
38
- my $i=0;
39
- while(<>){
40
- chomp;
41
- #print STDERR " Reading entry $i... \r" unless $i%1000;
42
- my @res = split /\t/;
43
- die "\nCannot parse BLAST line $.: $_\n" unless exists $res[1];
44
- if($last_qry eq $res[0]){
45
- @best_res = &best_result(\@res, \@best_res);
46
- }else{
47
- print join("\t", @best_res), "\n" if $#best_res>0;
48
- @best_res = @res;
49
- $last_qry = $res[0];
50
- }
51
- }
52
- print join("\t", @best_res), "\n" if @best_res;
53
-
54
-
55
-
@@ -1,104 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- # @author: Luis M. Rodriguez-R
4
- # @license: Artistic-2.0
5
-
6
- use warnings;
7
- use strict;
8
- use List::Util qw/min max/;
9
- use Getopt::Std;
10
-
11
- sub HELP_MESSAGE { die "
12
-
13
- Description:
14
- Generates a list of hits from a BLAST result concatenating the subject
15
- sequences. This can be used, e.g., to analyze BLAST results against
16
- draft genomes.
17
-
18
- Usage:
19
- $0 [options] seq.fa map.bls
20
-
21
- seq.fa Subject sequences (ref) in FastA format.
22
- map.bls Mapping of the reads to the reference in BLAST Tabular
23
- format.
24
-
25
- Options:
26
- -i <float> Minimum identity to report a result. By default: 70.
27
- -l <int> Minimum alignment length to report a result. By default: 60.
28
- -s The FastA provided is to be treated as a subset of the subject.
29
- By default, it expects all the BLAST subjects to be present in
30
- the FastA.
31
- -q Run quietly.
32
- -h Display this message and exit.
33
-
34
- This script creates two files using <map.bls> as prefix with extensions
35
- .rec (for the recruitment plot) and .lim (for the limits of the different
36
- sequences in <seq.fa>).
37
-
38
- ";}
39
-
40
- my %o;
41
- getopts('i:l:sqh', \%o);
42
- my($fa, $map) = @ARGV;
43
- ($fa and $map) or &HELP_MESSAGE;
44
- $o{h} and &HELP_MESSAGE;
45
- $o{i} ||= 70;
46
- $o{l} ||= 60;
47
-
48
- my %seq = ();
49
- my @seq = ();
50
- my $tot = 0;
51
-
52
- SEQ: {
53
- print STDERR "== Reading reference sequences\n" unless $o{q};
54
- open FA, "<", $fa or die "Cannot read the file: $fa: $!\n";
55
- my $cur_seq = '';
56
- while(<FA>){
57
- chomp;
58
- if(m/^>(\S+)/){
59
- my $c = $1;
60
- $seq{$c} = exists $seq{$cur_seq} ? $seq{$cur_seq}+1 : 1;
61
- push @seq, $c;
62
- $cur_seq = $c;
63
- }else{
64
- s/[^A-Za-z]//g;
65
- $seq{$cur_seq} += length $_;
66
- }
67
- }
68
- close FA;
69
- print STDERR " Found ".(scalar @seq)." sequences.\n" unless $o{q};
70
- }
71
-
72
- open LIM, ">", "$map.lim" or die "Cannot create the file: $map.lim: $!\n";
73
- my $l = 0;
74
- for my $s (@seq){
75
- print LIM "$s\t".(++$l)."\t$seq{$s}\n";
76
- ($l, $seq{$s}) = ($seq{$s}, $l);
77
- }
78
- close LIM;
79
-
80
- MAP: {
81
- print STDERR "== Reading mapping\n" unless $o{q};
82
- open BLS, "<", $map or die "Cannot read the file: $map: $!\n";
83
- open REC, ">", "$map.rec" or die "Cannot create the file: $map.rec: $!\n";
84
- RESULT: while(<BLS>){
85
- chomp;
86
- my @ln = split /\t/;
87
- $ln[11] or die "Cannot parse line $map:$.: $_\n";
88
- next unless $ln[3]>=$o{l};
89
- next unless $ln[2]>=$o{i};
90
- unless(exists $seq{$ln[1]}){
91
- die "Cannot find the subject sequence: $ln[1]\n" unless $o{s};
92
- next RESULT;
93
- }
94
- my $start = $seq{$ln[1]}+min($ln[8], $ln[9]);
95
- my $end = $seq{$ln[1]}+max($ln[8], $ln[9]);
96
- print REC "$start\t$end\t$ln[2]\t$ln[11]\t$ln[0]",
97
- (exists($ln[13])?"\t".($ln[2]*$ln[3]/min($ln[12],$ln[13]))."\t":
98
- exists($ln[12])?"\t".($ln[2]*$ln[3]/$ln[12])."\t":""),"\n";
99
- }
100
- close BLS;
101
- close REC;
102
- print STDERR " done.\n" unless $o{q};
103
- }
104
-