miga-base 0.7.25.2 → 0.7.25.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (277) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +2 -2
  3. data/utils/distance/runner.rb +2 -1
  4. metadata +5 -278
  5. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  6. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  7. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  8. data/utils/FastAAI/FastAAI/FastAAI +0 -1336
  9. data/utils/FastAAI/README.md +0 -84
  10. data/utils/FastAAI/kAAI_v1.0_virus.py +0 -1296
  11. data/utils/enveomics/Docs/recplot2.md +0 -244
  12. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  13. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  14. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  15. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  16. data/utils/enveomics/LICENSE.txt +0 -73
  17. data/utils/enveomics/Makefile +0 -52
  18. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  19. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -786
  20. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  21. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -766
  22. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -243
  23. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  24. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -67
  25. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  26. data/utils/enveomics/Manifest/Tasks/other.json +0 -829
  27. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  28. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -501
  29. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  30. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  31. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  32. data/utils/enveomics/Manifest/categories.json +0 -156
  33. data/utils/enveomics/Manifest/examples.json +0 -154
  34. data/utils/enveomics/Manifest/tasks.json +0 -4
  35. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  36. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  37. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  38. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  39. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  40. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  41. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  42. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  43. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  44. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  45. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  46. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  47. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  48. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  49. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  50. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  51. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  52. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  53. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  54. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  55. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  56. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  57. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  58. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  59. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  60. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  61. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  62. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  63. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  64. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  65. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  66. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  67. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  68. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  69. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  70. data/utils/enveomics/README.md +0 -42
  71. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  72. data/utils/enveomics/Scripts/Aln.cat.rb +0 -163
  73. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  74. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  75. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  76. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  77. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  78. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  79. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  80. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  81. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  82. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  83. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  84. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  85. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  86. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  87. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  88. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  89. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  90. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  91. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  92. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  93. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  94. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -101
  95. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  96. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  97. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  98. data/utils/enveomics/Scripts/FastA.N50.pl +0 -56
  99. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  100. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  101. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  102. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  103. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -92
  104. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  105. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  106. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  107. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  108. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  109. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  110. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  111. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  112. data/utils/enveomics/Scripts/FastA.sample.rb +0 -83
  113. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  114. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  115. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  116. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  117. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  118. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  119. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  120. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  121. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  122. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  123. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -63
  124. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  125. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  126. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  127. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  128. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  129. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  130. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  131. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  132. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  133. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  134. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  135. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  136. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  137. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  138. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  139. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  140. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  141. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  142. data/utils/enveomics/Scripts/SRA.download.bash +0 -57
  143. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  144. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  145. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  146. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  147. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  148. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  149. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  150. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  151. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  152. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  153. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  154. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  155. data/utils/enveomics/Scripts/aai.rb +0 -418
  156. data/utils/enveomics/Scripts/ani.rb +0 -362
  157. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  158. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  159. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  160. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  161. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  162. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  163. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  164. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  165. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  166. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  167. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  168. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
  169. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  170. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  171. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  172. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  173. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  174. data/utils/enveomics/Scripts/ogs.rb +0 -104
  175. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  176. data/utils/enveomics/Scripts/rbm.rb +0 -146
  177. data/utils/enveomics/Tests/Makefile +0 -10
  178. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  179. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  180. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  181. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  182. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  183. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  184. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  185. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  186. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  187. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  188. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  189. data/utils/enveomics/Tests/alkB.nwk +0 -1
  190. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  191. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  192. data/utils/enveomics/Tests/hiv1.faa +0 -59
  193. data/utils/enveomics/Tests/hiv1.fna +0 -134
  194. data/utils/enveomics/Tests/hiv2.faa +0 -70
  195. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  196. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  197. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  198. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  199. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  200. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  201. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  202. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  203. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  204. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  205. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  206. data/utils/enveomics/build_enveomics_r.bash +0 -45
  207. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  208. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  209. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  210. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  211. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  212. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  213. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  214. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  215. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  216. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  217. data/utils/enveomics/enveomics.R/R/utils.R +0 -50
  218. data/utils/enveomics/enveomics.R/README.md +0 -80
  219. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  220. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  221. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -17
  222. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -17
  223. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -17
  224. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  225. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  226. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  227. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  228. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  229. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  230. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -32
  231. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -91
  232. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -57
  233. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  234. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  235. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -39
  236. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -38
  237. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -40
  238. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -67
  239. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -37
  240. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -122
  241. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  242. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  243. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -68
  244. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  245. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  246. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  247. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  248. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -41
  249. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  250. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  251. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -40
  252. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  253. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  254. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  255. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  256. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -41
  257. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  258. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -43
  259. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -37
  260. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -74
  261. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  262. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  263. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -32
  264. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -59
  265. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  266. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  267. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  268. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  269. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -63
  270. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -38
  271. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -38
  272. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -111
  273. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  274. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  275. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  276. data/utils/enveomics/globals.mk +0 -8
  277. data/utils/enveomics/manifest.json +0 -9
@@ -1,48 +0,0 @@
1
- #!/usr/bin/env Rscript
2
-
3
- # @author Luis M. Rodriguez-R
4
- # @license Artistic-2.0
5
-
6
- #= Load stuff
7
- suppressPackageStartupMessages(library(enveomics.R))
8
- args <- commandArgs(trailingOnly = FALSE)
9
- enveomics_R <- file.path(dirname(
10
- sub("^--file=", "", args[grep("^--file=", args)])),
11
- "lib", "enveomics.R")
12
-
13
- #= Generate interface
14
- opt <- enve.cliopts(enve.recplot2,
15
- file.path(enveomics_R, "man", "enve.recplot2.Rd"),
16
- positional_arguments=c(1,4),
17
- usage="usage: %prog [options] output.Rdata [output.pdf [width height]]",
18
- mandatory=c("prefix"),
19
- o_desc=list(pos.breaks="Breaks in the positions histogram.",
20
- pos.breaks.tsv="File with (absolute) coordinates of breaks in the position histogram",
21
- id.breaks="Breaks in the identity histogram.",
22
- id.summary="Function summarizing the identity bins. By default: sum.",
23
- peaks.col="Color of peaks, mandatory for peak-finding (e.g., darkred).",
24
- peaks.method="Method to detect peaks; one of emauto, em, or mower."),
25
- p_desc=paste("","Produce recruitment plot objects provided that",
26
- "BlastTab.catsbj.pl has been previously executed.", sep="\n\t"),
27
- ignore=c("plot"),
28
- defaults=c(pos.breaks.tsv=NA, id.metric="identity", peaks.col=NA,
29
- peaks.method="emauto"))
30
-
31
- #= Run it!
32
- if(length(opt$args)>1){
33
- args = as.list(opt$args[-1])
34
- for(i in 2:3) if(length(args)>=i) args[[i]] <- as.numeric(args[[i]])
35
- do.call("pdf", args)
36
- }else{
37
- opt$options[["plot"]] <- FALSE
38
- }
39
- pc <- opt$options[["peaks.col"]]
40
- if(!is.na(pc) && pc=="NA") opt$options[["peaks.col"]] <- NA
41
- if(!is.null(opt$options[["peaks.method"]])){
42
- opt$options[["peaks.opts"]] <- list(method=opt$options[["peaks.method"]])
43
- opt$options[["peaks.method"]] <- NULL
44
- }
45
- rp <- do.call("enve.recplot2", opt$options)
46
- save(rp, file=opt$args[1])
47
- if(length(opt$args)>1) dev.off()
48
-
@@ -1,86 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author: Luis M Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @license: artistic license 2.0
5
- # @update: Mar-23-2015
6
- #
7
-
8
- use strict;
9
- use warnings;
10
- use List::Util qw/min max sum/;
11
-
12
- my $fna = shift @ARGV;
13
- $fna or die "
14
- Usage:
15
- cat blast1... | $0 genes_or_ctgs.fna > genes_or_ctgs.cov
16
-
17
- blast1... One or more Tabular BLAST files of reads vs genes (or contigs).
18
- genes_or_ctgs.fna A FastA file containing the genes or the contigs (db).
19
- genes_or_ctgs.cov The output file.
20
-
21
- Output:
22
- A tab-delimited file with the following columns:
23
- 1. Subject ID
24
- 2. Average sequencing depth
25
- 3. Median sequencing depth
26
- 4. Number of mapped reads
27
- 5. Length of the subject sequence
28
-
29
- ";
30
-
31
- my $size = {};
32
- my $gene = {};
33
- my $reads = {};
34
-
35
- SIZE:{
36
- local $/=">";
37
- print STDERR "== Reading fasta\n";
38
- open FNA, "<", $fna or die "Cannot read the file: $fna: $!\n";
39
- my $i=0;
40
- while(<FNA>){
41
- chomp;
42
- my @g = split /\n/, $_, 2;
43
- next unless $g[1];
44
- #$g[1] =~ s/[^A-Za-z]//g;
45
- #$size->{$g[0]} = length $g[1];
46
- $g[0] =~ s/\s.*//;
47
- $size->{$g[0]} = ( $g[1] =~ tr/[A-Za-z]// );
48
- print STDERR " Measuring sequence ".($i).": $g[0] \r" unless ++$i%500;
49
- }
50
- close FNA;
51
- print STDERR " Found $i sequences".(" "x30)."\n";
52
- }
53
-
54
- MAP:{
55
- print STDERR "== Reading mapping\n";
56
- my $i=0;
57
- while(<>){
58
- my @ln = split /\t/;
59
- $gene->{$ln[1]} ||= [];
60
- for my $pos (min($ln[8], $ln[9]) .. max($ln[8], $ln[9])){ ($gene->{$ln[1]}->[$pos]||=0)++ }
61
- ($reads->{$ln[1]} ||= 0)++;
62
- print STDERR " Saving hit ".($i).": $ln[1] \r" unless ++$i%5000;
63
- }
64
- print STDERR " Found $i hits".(" "x30)."\n";
65
- }
66
-
67
- OUT:{
68
- print STDERR "== Creating output\n";
69
- my $i=0;
70
- for my $g (keys %$gene){
71
- $gene->{$g}->[$_] ||= 0 for (0 .. $size->{$g});
72
- my @sorted = sort {$a <=> $b} @{$gene->{$g}};
73
- die "Cannot find gene in $fna: $g.\n" unless exists $size->{$g};
74
- printf "%s\t%.6f\t%d\t%d\t%d\n", $g,
75
- sum(@{$gene->{$g}})/$size->{$g},
76
- $sorted[$#sorted/2],
77
- $reads->{$g},
78
- $size->{$g};
79
- delete $gene->{$g};
80
- print STDERR " Saving sequence $g:".($i)."\r" unless ++$i%500;
81
- }
82
- print STDERR " Saved $i sequences".(" "x30)."\n";
83
- }
84
-
85
- print STDERR " done.\n";
86
-
@@ -1,119 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author: Luis M Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @license: artistic license 2.0
5
- # @update: Mar-23-2015
6
- #
7
-
8
- use strict;
9
- use warnings;
10
- use List::Util qw/min max sum/;
11
-
12
- my $fna = shift @ARGV;
13
- $fna or die "
14
- Description:
15
- Estimates the average sequencing depth of subject sequences (genes or contigs)
16
- assuming a Zero-Inflated Poisson distribution (ZIP) to correct for non-covered
17
- positions. It uses the corrected method of moments estimators (CMMEs) as described
18
- by Beckett et al [1]. Note that [1] has a mistake in eq. (2.4), that should be:
19
- pi-hat-MM = 1 - (X-bar / lambda-hat-MM)
20
-
21
- Also note that a more elaborated mixture distribution can arise from coverage
22
- histograms (e.g., see [2] for an additional correction called 'tail distribution'
23
- and mixtures involving negative binomial) so take these results cum grano salis.
24
-
25
- Usage:
26
- cat blast1... | $0 genes_or_ctgs.fna > genes_or_ctgs.cov
27
-
28
- blast1... One or more Tabular BLAST files of reads vs genes (or contigs).
29
- genes_or_ctgs.fna A FastA file containing the genes or the contigs (db).
30
- genes_or_ctgs.cov The output file.
31
-
32
- Output:
33
- A tab-delimited file with the following columns (the one you want is #2):
34
- 1. Subject ID
35
- 2. Estimated average sequencing depth (CMME lambda)
36
- 3. Zero-inflation (CMME pi)
37
- 4. Observed average sequencing depth
38
- 5. Observed median sequencing depth
39
- 6. Observed median sequencing depth excluding zeroes
40
- 7. Number of mapped reads
41
- 8. Length of the subject sequence
42
-
43
- References:
44
- [1] http://anisette.ucs.louisiana.edu/Academic/Sciences/MATH/stage/stat2012.pdf
45
- [2] Lindner et al, Bioinformatics, 2013.
46
-
47
- ";
48
-
49
- my $size = {};
50
- my $gene = {};
51
- my $reads = {};
52
-
53
- SIZE:{
54
- local $/=">";
55
- print STDERR "== Reading fasta\n";
56
- open FNA, "<", $fna or die "Cannot read the file: $fna: $!\n";
57
- my $i=0;
58
- while(<FNA>){
59
- chomp;
60
- my @g = split /\n/, $_, 2;
61
- next unless $g[1];
62
- #$g[1] =~ s/[^A-Za-z]//g;
63
- #$size->{$g[0]} = length $g[1];
64
- $g[0] =~ s/\s.*//;
65
- $size->{$g[0]} = ( $g[1] =~ tr/[A-Za-z]// );
66
- print STDERR " Measuring sequence ".($i).": $g[0] \r" unless ++$i%500;
67
- }
68
- close FNA;
69
- print STDERR " Found $i sequences".(" "x30)."\n";
70
- }
71
-
72
- MAP:{
73
- print STDERR "== Reading mapping\n";
74
- my $i=0;
75
- while(<>){
76
- my @ln = split /\t/;
77
- $gene->{$ln[1]} ||= [];
78
- for my $pos (min($ln[8], $ln[9]) .. max($ln[8], $ln[9])){ ($gene->{$ln[1]}->[$pos]||=0)++ }
79
- ($reads->{$ln[1]} ||= 0)++;
80
- print STDERR " Saving hit ".($i).": $ln[1] \r" unless ++$i%5000;
81
- }
82
- print STDERR " Found $i hits".(" "x30)."\n";
83
- }
84
-
85
- OUT:{
86
- print STDERR "== Creating output\n";
87
- my $i=0;
88
- for my $g (keys %$gene){
89
- unless(exists $size->{$g}){
90
- warn "Warning: Cannot find gene in $fna: $g.\n";
91
- next;
92
- }
93
- $gene->{$g}->[$_] ||= 0 for (0 .. $size->{$g});
94
- die "Hits out-of-boundaries in gene $g: $#{$gene->{$g}} != $size->{$g}.\n" if $#{$gene->{$g}} != $size->{$g};
95
- my @sorted = sort {$a <=> $b} @{$gene->{$g}};
96
- my @sorted_nz = grep { $_>0 } @sorted;
97
- my $xbar = sum(@{$gene->{$g}})/$size->{$g};
98
- my $xsqbar = sum(map { ($_ - $xbar)**2 } @{$gene->{$g}})/($size->{$g}-1);
99
- my $var = $xsqbar - $xbar**2;
100
- my $lambdaMM = $xbar + ($var/$xbar) - 1;
101
- my $piMM = $lambdaMM==0 ? 0 : 1 - $xbar/$lambdaMM;
102
- printf "%s\t%.6f\t%.6f\t%.6f\t%d\t%d\t%d\t%d\n", $g,
103
- ($xbar >= $var ? $xbar : $lambdaMM),
104
- ($xbar >= $var ? 0 : $piMM),
105
- #$lambdaMM,
106
- #$piMM,
107
- sum(@{$gene->{$g}})/$size->{$g},
108
- $sorted[$#sorted/2],
109
- $sorted_nz[$#sorted_nz/2],
110
- $reads->{$g},
111
- $size->{$g};
112
- delete $gene->{$g};
113
- print STDERR " Saving sequence $g:".($i)." \r" unless ++$i%500;
114
- }
115
- print STDERR " Saved $i sequences".(" "x30)." \n";
116
- }
117
-
118
- print STDERR " done.\n";
119
-
@@ -1,86 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author: Luis M Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @license: artistic license 2.0
5
- # @update: Mar-23-2015
6
- #
7
-
8
- use strict;
9
- use warnings;
10
- use List::Util qw/min max sum/;
11
-
12
- my $fna = shift @ARGV;
13
- $fna or die "
14
- Usage:
15
- cat blast1... | $0 genes_or_ctgs.fna > genes_or_ctgs.cov
16
-
17
- blast1... One or more Tabular BLAST files of reads vs genes (or contigs).
18
- genes_or_ctgs.fna A FastA file containing the genes or the contigs (db).
19
- genes_or_ctgs.cov The output file.
20
-
21
- Output:
22
- A tab-delimited file with the following columns:
23
- 1. Subject ID
24
- 2. Average sequencing depth
25
- 3. Number of mapped reads
26
- 4. Length of the subject sequence
27
-
28
- Note:
29
- The values reported by this script may differ from those of BlastTab.seqdepth.pl,
30
- because this script uses the aligned length of the read while BlastTab.seqdepth.pl
31
- uses the aligned length of the subject sequence.
32
-
33
- ";
34
-
35
- my $size = {};
36
- my $gene = {};
37
- my $reads = {};
38
-
39
- SIZE:{
40
- local $/=">";
41
- print STDERR "== Reading fasta\n";
42
- open FNA, "<", $fna or die "Cannot read the file: $fna: $!\n";
43
- my $i=0;
44
- while(<FNA>){
45
- chomp;
46
- my @g = split /\n/, $_, 2;
47
- next unless $g[1];
48
- #$g[1] =~ s/[^A-Za-z]//g;
49
- #$size->{$g[0]} = length $g[1];
50
- $g[0] =~ s/\s.*//;
51
- $size->{$g[0]} = ( $g[1] =~ tr/[A-Za-z]// );
52
- print STDERR " Measuring sequence ".($i).": $g[0] \r" unless ++$i%500;
53
- }
54
- close FNA;
55
- print STDERR " Found $i sequences".(" "x30)."\n";
56
- }
57
-
58
- MAP:{
59
- print STDERR "== Reading mapping\n";
60
- my $i=0;
61
- while(<>){
62
- my @ln = split /\t/;
63
- $gene->{$ln[1]} ||= 0;
64
- $gene->{$ln[1]} += abs($ln[6]-$ln[7])+1;
65
- ($reads->{$ln[1]} ||= 0)++;
66
- print STDERR " Saving hit ".($i).": $ln[1] \r" unless ++$i%5000;
67
- }
68
- print STDERR " Found $i hits".(" "x30)."\n";
69
- }
70
-
71
- OUT:{
72
- print STDERR "== Creating output\n";
73
- my $i=0;
74
- for my $g (keys %$gene){
75
- die "Cannot find gene in $fna: $g.\n" unless exists $size->{$g};
76
- printf "%s\t%.6f\t%d\t%d\n", $g,
77
- $gene->{$g}/$size->{$g},
78
- $reads->{$g},
79
- $size->{$g};
80
- print STDERR " Saving sequence $g:".($i)."\r" unless ++$i%500;
81
- }
82
- print STDERR " Saved $i sequences".(" "x30)."\n";
83
- }
84
-
85
- print STDERR " done.\n";
86
-
@@ -1,47 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @license artistic license 2.0
5
- # @update Mar-23-2015
6
- #
7
-
8
- use strict;
9
- use warnings;
10
-
11
- my($blast, $fasta) = @ARGV;
12
- ($blast and $fasta) or die "
13
- Description:
14
- Filters a BLAST output including only the hits produced by
15
- any of the given sequences as query.
16
-
17
- Usage:
18
- $0 blast.tab sample.fa > out.tab
19
-
20
- blast.tab BLAST output to be filtered (tabular format).
21
- sample.fa Sequences to use as query.
22
- out.tab The filtered BLAST output (tabular format).
23
-
24
- ";
25
-
26
- print STDERR "== Reading sequences\n";
27
- my $seq = {};
28
- open FASTA, "<", $fasta or die "Cannot read the file: $fasta: $!\n";
29
- while(<FASTA>){
30
- next unless /^>(\S+)/;
31
- $seq->{$1} = 1;
32
- }
33
- close FASTA;
34
- print STDERR " ".(scalar keys %$seq)." sequences to be used as query.\n";
35
-
36
- print STDERR "== Reading BLAST\n";
37
- my ($N,$n)=(0,0);
38
- open BLAST, "<", $blast or die "Cannot read the file: $blast: $!\n";
39
- while(my $ln = <BLAST>){
40
- next if $ln=~/^#/;
41
- $N++; my ($qry) = split /\t/, $ln;
42
- next unless exists $seq->{$qry};
43
- $n++; print $ln;
44
- }
45
- close BLAST;
46
- print STDERR " Reported $n entries out of $N.\n";
47
-
@@ -1,114 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update Mar-23-2016
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
- use Getopt::Std;
11
-
12
- sub HELP_MESSAGE {
13
- die "
14
- .Description
15
- Sums the weights of all the queries hitting each subject. Often (but not
16
- necessarily) the BLAST files contain only best matches. The weights can be
17
- any number, but a common use of this Script is to add up counts (weights are
18
- integers). For example, in a BLAST of predicted genes vs some annotation
19
- source, the weights could be the number of reads recruited by each gene.
20
-
21
- .Usage:
22
- $0 [options] blast... > out-file
23
-
24
- blast... * One or more BLAST files.
25
- out-file A two-columns tab-delimited file containing the summed weights
26
- per hit.
27
-
28
- -w <str> Weights file: A two-columns tab-delimited file containing the
29
- name (column 1) and the weight (column 2) of each query.
30
- -s <float> Minimum score. By default: 0.
31
- -i <float> Minimum identity (in percentage). By default: 0.
32
- -m <int> Maximum number of queries. Set to 0 for all. By default: 0.
33
- -n Normalize weights by the number of hits per query.
34
- -z Add zero when weight is not found (by default: doesn't list
35
- them).
36
- -q Run quietly.
37
- -h Display this message and exit.
38
-
39
- * Mandatory
40
-
41
- .Note:
42
- The weights (-w parameter) are optional, but its use is encouraged. When
43
- weights are not passed, the script simply assumes all queries to be equally
44
- weighted (unity), a result that can be faster to compute with, for example:
45
- cat blast | cut -f 2 | sort | uniq -c | awk '{print \$2\"\\t\"\$1}' > out
46
- It is equivalent to simply count the number of times that each subject
47
- occurs.
48
- "
49
- }
50
-
51
- my %o = ();
52
- getopts('w:s:i:m:znqh', \%o);
53
- $o{h} and &HELP_MESSAGE;
54
- $o{s}||=0;
55
- $o{i}||=0;
56
- $o{m}||=0;
57
-
58
- my %count;
59
- if($o{w}){
60
- print STDERR "Reading counts.\n" unless $o{q};
61
- open COUNT, "<", $o{w} or die "Cannot open file: $o{w}: $!\n";
62
- %count = map {split /\t/} <COUNT>;
63
- close COUNT;
64
- }
65
-
66
- print STDERR "Reading BLASTs.\n" unless $o{q};
67
- my $qry = '';
68
- my $hits = 0;
69
- my @buf = ();
70
- my $qries = 0;
71
- my $noQry = 0;
72
- my $ln1 = 0;
73
- my %out = ();
74
- BFILE: for my $blast (@ARGV){
75
- print STDERR " o $blast\n" unless $o{q};
76
- open BLAST, "<", $blast or die "Cannot open file: $blast: $!\n";
77
- BLINE: while(<BLAST>){
78
- chomp;
79
- my @ln = split /\t/;
80
- $ln1 ||= $#ln;
81
- die "Bad line $.: $_\n" unless $#ln==$ln1;
82
- next if ($o{s} and $ln[11]<$o{s}) or ($o{i} and $ln[2]<$o{i});
83
- unless(exists $count{$ln[0]}){
84
- $noQry++;
85
- if(not $o{w}){
86
- $count{$ln[0]}=1;
87
- }elsif($o{z}){
88
- $count{$ln[0]}=0;
89
- }else{
90
- next BLINE;
91
- }
92
- }
93
-
94
- if($qry ne $ln[0]){
95
- $qries++;
96
- ($out{$_->[0]}||=0) += ($_->[1]/($o{n}?$hits:1)) for @buf;
97
- last BFILE if $o{m} and $qries >= $o{m};
98
- @buf = ();
99
- $qry = $ln[0];
100
- $hits = 0;
101
- }
102
-
103
- push @buf, [$ln[1], $count{$ln[0]}];
104
- $hits++;
105
- }
106
- ($out{$_->[0]}||=0) += ($_->[1]/($o{n}?$hits:1)) for @buf;
107
- close BLAST;
108
- }
109
- print STDERR "Warning: Couldn't find $noQry queries\n" if $noQry and $o{w};
110
-
111
- for my $h (keys %out){
112
- print "$h\t".$out{$h}."\n";
113
- }
114
-