miga-base 0.7.26.0 → 0.7.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +1 -1
  3. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  4. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  5. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  6. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  7. data/utils/FastAAI/README.md +84 -0
  8. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  9. data/utils/enveomics/Docs/recplot2.md +244 -0
  10. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  11. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  12. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  13. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  14. data/utils/enveomics/LICENSE.txt +73 -0
  15. data/utils/enveomics/Makefile +52 -0
  16. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  17. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  18. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  19. data/utils/enveomics/Manifest/Tasks/fasta.json +766 -0
  20. data/utils/enveomics/Manifest/Tasks/fastq.json +243 -0
  21. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  22. data/utils/enveomics/Manifest/Tasks/mapping.json +67 -0
  23. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  24. data/utils/enveomics/Manifest/Tasks/other.json +829 -0
  25. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  26. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +501 -0
  27. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  28. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  29. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  30. data/utils/enveomics/Manifest/categories.json +156 -0
  31. data/utils/enveomics/Manifest/examples.json +154 -0
  32. data/utils/enveomics/Manifest/tasks.json +4 -0
  33. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  34. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  35. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  36. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  37. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  38. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  39. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  40. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  41. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  42. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  43. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  44. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  45. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  46. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  47. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  48. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  49. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  50. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  51. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  52. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  53. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  54. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  55. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  56. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  57. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  58. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  59. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  60. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  61. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  62. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  63. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  64. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  65. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  66. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  67. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  68. data/utils/enveomics/README.md +42 -0
  69. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  70. data/utils/enveomics/Scripts/Aln.cat.rb +163 -0
  71. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  72. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  73. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  74. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  75. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  76. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  77. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  78. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  79. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  80. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  81. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  82. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  83. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  84. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  85. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  86. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  87. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  88. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  89. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  90. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  91. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  92. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  93. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  94. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  95. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  96. data/utils/enveomics/Scripts/FastA.N50.pl +56 -0
  97. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  98. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  99. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  100. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  101. data/utils/enveomics/Scripts/FastA.fragment.rb +92 -0
  102. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  103. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  104. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  105. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  106. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  107. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  108. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  109. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  110. data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
  111. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  112. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  113. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  114. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  115. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  116. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  117. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  118. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  119. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  120. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  121. data/utils/enveomics/Scripts/FastQ.tag.rb +63 -0
  122. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  123. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  124. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  125. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  126. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  127. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  128. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  129. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  130. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  131. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  132. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  133. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  134. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  135. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  136. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  137. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  138. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  139. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  140. data/utils/enveomics/Scripts/SRA.download.bash +57 -0
  141. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  142. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  143. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  144. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  145. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  146. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  147. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  148. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  149. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  150. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  151. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  152. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  153. data/utils/enveomics/Scripts/aai.rb +418 -0
  154. data/utils/enveomics/Scripts/ani.rb +362 -0
  155. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  156. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  157. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  158. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  159. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  160. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  161. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  162. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  163. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  164. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  165. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  166. data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +30 -0
  167. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  168. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  169. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  170. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  171. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  172. data/utils/enveomics/Scripts/ogs.rb +104 -0
  173. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  174. data/utils/enveomics/Scripts/rbm.rb +146 -0
  175. data/utils/enveomics/Tests/Makefile +10 -0
  176. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  177. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  178. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  179. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  180. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  181. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  182. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  183. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  184. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  185. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  186. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  187. data/utils/enveomics/Tests/alkB.nwk +1 -0
  188. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  189. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  190. data/utils/enveomics/Tests/hiv1.faa +59 -0
  191. data/utils/enveomics/Tests/hiv1.fna +134 -0
  192. data/utils/enveomics/Tests/hiv2.faa +70 -0
  193. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  194. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  195. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  196. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  197. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  198. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  199. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  200. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  201. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  202. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  203. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  204. data/utils/enveomics/build_enveomics_r.bash +45 -0
  205. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  206. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  207. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  208. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  209. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  210. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  211. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  212. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  213. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  214. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  215. data/utils/enveomics/enveomics.R/R/utils.R +50 -0
  216. data/utils/enveomics/enveomics.R/README.md +80 -0
  217. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  218. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  219. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  220. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  221. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  222. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  223. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  224. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  225. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  226. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  227. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  228. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -0
  229. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -0
  230. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -0
  231. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  232. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  233. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -0
  234. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -0
  235. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -0
  236. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -0
  237. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -0
  238. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -0
  239. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  240. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  241. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -0
  242. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  243. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  244. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  245. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  246. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -0
  247. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  248. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  249. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -0
  250. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  251. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  252. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  253. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  254. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -0
  255. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  256. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -0
  257. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +37 -0
  258. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -0
  259. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  260. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  261. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -0
  262. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -0
  263. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  264. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  265. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  266. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  267. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -0
  268. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -0
  269. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -0
  270. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -0
  271. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  272. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  273. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  274. data/utils/enveomics/globals.mk +8 -0
  275. data/utils/enveomics/manifest.json +9 -0
  276. metadata +277 -4
@@ -0,0 +1,27 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.findPeaks.__emauto_one}
4
+ \alias{enve.recplot2.findPeaks.__emauto_one}
5
+ \title{Enveomics: Recruitment Plot (2) EMauto Peak Finder - Internal Ancillary Function}
6
+ \usage{
7
+ enve.recplot2.findPeaks.__emauto_one(x, comp, do_crit, best, verbose, ...)
8
+ }
9
+ \arguments{
10
+ \item{x}{\code{\link{enve.RecPlot2}} object}
11
+
12
+ \item{comp}{Components}
13
+
14
+ \item{do_crit}{Function estimating the criterion}
15
+
16
+ \item{best}{Best solution thus far}
17
+
18
+ \item{verbose}{If verbose}
19
+
20
+ \item{...}{Additional parameters for \code{\link{enve.recplot2.findPeaks.em}}}
21
+ }
22
+ \description{
23
+ Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.emauto}}).
24
+ }
25
+ \author{
26
+ Luis M. Rodriguez-R [aut, cre]
27
+ }
@@ -0,0 +1,41 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.findPeaks.__mow_one}
4
+ \alias{enve.recplot2.findPeaks.__mow_one}
5
+ \title{Enveomics: Recruitment Plot (2) Mowing Peak Finder - Internal Ancillary Function 1}
6
+ \usage{
7
+ enve.recplot2.findPeaks.__mow_one(lsd1, min.points, quant.est, mlv.opts,
8
+ fitdist.opts, with.skewness, optim.rounds, optim.epsilon, n.total,
9
+ merge.logdist, verbose, log)
10
+ }
11
+ \arguments{
12
+ \item{lsd1}{Vector of log-transformed sequencing depths}
13
+
14
+ \item{min.points}{Minimum number of points}
15
+
16
+ \item{quant.est}{Quantile estimate}
17
+
18
+ \item{mlv.opts}{List of options for \code{mlv}}
19
+
20
+ \item{fitdist.opts}{List of options for \code{fitdist}}
21
+
22
+ \item{with.skewness}{If skewed-normal should be used}
23
+
24
+ \item{optim.rounds}{Maximum number of optimization rounds}
25
+
26
+ \item{optim.epsilon}{Minimum difference considered negligible}
27
+
28
+ \item{n.total}{Global number of windows}
29
+
30
+ \item{merge.logdist}{Attempted \code{merge.logdist} parameter}
31
+
32
+ \item{verbose}{If verbose}
33
+
34
+ \item{log}{If log-transformed depths}
35
+ }
36
+ \description{
37
+ Internall ancillary function (see \code{\link{enve.recplot2.findPeaks.mower}}).
38
+ }
39
+ \author{
40
+ Luis M. Rodriguez-R [aut, cre]
41
+ }
@@ -0,0 +1,17 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.findPeaks.__mower}
4
+ \alias{enve.recplot2.findPeaks.__mower}
5
+ \title{Enveomics: Recruitment Plot (2) Mowing Peak Finder - Internal Ancillary Function 2}
6
+ \usage{
7
+ enve.recplot2.findPeaks.__mower(peaks.opts)
8
+ }
9
+ \arguments{
10
+ \item{peaks.opts}{List of options for \code{\link{enve.recplot2.findPeaks.__mow_one}}}
11
+ }
12
+ \description{
13
+ Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.mower}}).
14
+ }
15
+ \author{
16
+ Luis M. Rodriguez-R [aut, cre]
17
+ }
@@ -0,0 +1,43 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.findPeaks.em}
4
+ \alias{enve.recplot2.findPeaks.em}
5
+ \title{Enveomics: Recruitment Plot (2) Em Peak Finder}
6
+ \usage{
7
+ enve.recplot2.findPeaks.em(x, max.iter = 1000, ll.diff.res = 1e-08,
8
+ components = 2, rm.top = 0.05, verbose = FALSE, init, log = TRUE)
9
+ }
10
+ \arguments{
11
+ \item{x}{An \code{\link{enve.RecPlot2}} object.}
12
+
13
+ \item{max.iter}{Maximum number of EM iterations.}
14
+
15
+ \item{ll.diff.res}{Maximum Log-Likelihood difference to be considered as convergent.}
16
+
17
+ \item{components}{Number of distributions assumed in the mixture.}
18
+
19
+ \item{rm.top}{Top-values to remove before finding peaks, as a quantile probability.
20
+ This step is useful to remove highly conserved regions, but can be
21
+ turned off by setting \code{rm.top=0}. The quantile is determined
22
+ \strong{after} removing zero-coverage windows.}
23
+
24
+ \item{verbose}{Display (mostly debugging) information.}
25
+
26
+ \item{init}{Initialization parameters. By default, these are derived from k-means
27
+ clustering. A named list with vectors for \code{mu}, \code{sd}, and
28
+ \code{alpha}, each of length \code{components}.}
29
+
30
+ \item{log}{Logical value indicating if the estimations should be performed in
31
+ natural logarithm units. Do not change unless you know what you're
32
+ doing.}
33
+ }
34
+ \value{
35
+ Returns a list of \code{\link{enve.RecPlot2.Peak}} objects.
36
+ }
37
+ \description{
38
+ Identifies peaks in the population histogram using a Gaussian Mixture
39
+ Model Expectation Maximization (GMM-EM) method.
40
+ }
41
+ \author{
42
+ Luis M. Rodriguez-R [aut, cre]
43
+ }
@@ -0,0 +1,37 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.findPeaks.emauto}
4
+ \alias{enve.recplot2.findPeaks.emauto}
5
+ \title{Enveomics: Recruitment Plot (2) Emauto Peak Finder}
6
+ \usage{
7
+ enve.recplot2.findPeaks.emauto(x, components = seq(1, 5),
8
+ criterion = "aic", merge.tol = 2L, verbose = FALSE, ...)
9
+ }
10
+ \arguments{
11
+ \item{x}{An \code{\link{enve.RecPlot2}} object.}
12
+
13
+ \item{components}{A vector of number of components to evaluate.}
14
+
15
+ \item{criterion}{Criterion to use for components selection. Must be one of:
16
+ \code{aic} (Akaike Information Criterion), \code{bic} or \code{sbc}
17
+ (Bayesian Information Criterion or Schwarz Criterion).}
18
+
19
+ \item{merge.tol}{When attempting to merge peaks with very similar sequencing depth, use
20
+ this number of significant digits (in log-scale).}
21
+
22
+ \item{verbose}{Display (mostly debugging) information.}
23
+
24
+ \item{...}{Any additional parameters supported by
25
+ \code{\link{enve.recplot2.findPeaks.em}}.}
26
+ }
27
+ \value{
28
+ Returns a list of \code{\link{enve.RecPlot2.Peak}} objects.
29
+ }
30
+ \description{
31
+ Identifies peaks in the population histogram using a Gaussian Mixture
32
+ Model Expectation Maximization (GMM-EM) method with number of components
33
+ automatically detected.
34
+ }
35
+ \author{
36
+ Luis M. Rodriguez-R [aut, cre]
37
+ }
@@ -0,0 +1,74 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.findPeaks.mower}
4
+ \alias{enve.recplot2.findPeaks.mower}
5
+ \title{Enveomics: Recruitment Plot (2) Mowing Peak Finder}
6
+ \usage{
7
+ enve.recplot2.findPeaks.mower(x, min.points = 10, quant.est = c(0.002,
8
+ 0.998), mlv.opts = list(method = "parzen"),
9
+ fitdist.opts.sn = list(distr = "sn", method = "qme", probs = c(0.1,
10
+ 0.5, 0.8), start = list(omega = 1, alpha = -1), lower = c(0, -Inf,
11
+ -Inf)), fitdist.opts.norm = list(distr = "norm", method = "qme", probs
12
+ = c(0.4, 0.6), start = list(sd = 1), lower = c(0, -Inf)),
13
+ rm.top = 0.05, with.skewness = TRUE, optim.rounds = 200,
14
+ optim.epsilon = 1e-04, merge.logdist = log(1.75), verbose = FALSE,
15
+ log = TRUE)
16
+ }
17
+ \arguments{
18
+ \item{x}{An \code{\link{enve.RecPlot2}} object.}
19
+
20
+ \item{min.points}{Minimum number of points in the quantile-estimation-range
21
+ \code{(quant.est)} to estimate a peak.}
22
+
23
+ \item{quant.est}{Range of quantiles to be used in the estimation of a peak's
24
+ parameters.}
25
+
26
+ \item{mlv.opts}{Ignored. For backwards compatibility.}
27
+
28
+ \item{fitdist.opts.sn}{Options passed to \code{fitdist} to estimate the standard deviation if
29
+ \code{with.skewness=TRUE}. Note that the \code{start} parameter will be
30
+ ammended with \code{xi=estimated} mode for each peak.}
31
+
32
+ \item{fitdist.opts.norm}{Options passed to \code{fitdist} to estimate the standard deviation if
33
+ \code{with.skewness=FALSE}. Note that the \code{start} parameter will be
34
+ ammended with \code{mean=estimated} mode for each peak.}
35
+
36
+ \item{rm.top}{Top-values to remove before finding peaks, as a quantile probability.
37
+ This step is useful to remove highly conserved regions, but can be
38
+ turned off by setting \code{rm.top=0}. The quantile is determined
39
+ \strong{after} removing zero-coverage windows.}
40
+
41
+ \item{with.skewness}{Allow skewness correction of the peaks. Typically, the
42
+ sequencing-depth distribution for a single peak is left-skewed, due
43
+ partly (but not exclusively) to fragmentation and mapping sensitivity.
44
+ See \emph{Lindner et al 2013, Bioinformatics 29(10):1260-7} for an
45
+ alternative solution for the first problem (fragmentation) called
46
+ "tail distribution".}
47
+
48
+ \item{optim.rounds}{Maximum rounds of peak optimization.}
49
+
50
+ \item{optim.epsilon}{Trace change at which optimization stops (unless \code{optim.rounds} is
51
+ reached first). The trace change is estimated as the sum of square
52
+ differences between parameters in one round and those from two rounds
53
+ earlier (to avoid infinite loops from approximation).}
54
+
55
+ \item{merge.logdist}{Maximum value of \code{|log-ratio|} between centrality parameters in peaks
56
+ to attempt merging. The default of ~0.22 corresponds to a maximum
57
+ difference of 25\%.}
58
+
59
+ \item{verbose}{Display (mostly debugging) information.}
60
+
61
+ \item{log}{Logical value indicating if the estimations should be performed in
62
+ natural logarithm units. Do not change unless you know what you're
63
+ doing.}
64
+ }
65
+ \value{
66
+ Returns a list of \code{\link{enve.RecPlot2.Peak}} objects.
67
+ }
68
+ \description{
69
+ Identifies peaks in the population histogram potentially indicating
70
+ sub-population mixtures, using a custom distribution-mowing method.
71
+ }
72
+ \author{
73
+ Luis M. Rodriguez-R [aut, cre]
74
+ }
@@ -0,0 +1,59 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \docType{class}
4
+ \name{enve.RecPlot2.Peak-class}
5
+ \alias{enve.RecPlot2.Peak-class}
6
+ \alias{enve.RecPlot2.Peak}
7
+ \title{Enveomics: Recruitment Plot (2) Peak - S4 Class}
8
+ \description{
9
+ Enve-omics representation of a peak in the sequencing depth histogram
10
+ of a Recruitment plot (see \code{\link{enve.recplot2.findPeaks}}).
11
+ }
12
+ \section{Slots}{
13
+
14
+ \describe{
15
+ \item{\code{dist}}{\code{(character)}
16
+ Distribution of the peak. Currently supported: \code{norm} (normal) and \code{sn}
17
+ (skew-normal).}
18
+
19
+ \item{\code{values}}{\code{(numeric)}
20
+ Sequencing depth values predicted to conform the peak.}
21
+
22
+ \item{\code{values.res}}{\code{(numeric)}
23
+ Sequencing depth values not explained by this or previously identified
24
+ peaks.}
25
+
26
+ \item{\code{mode}}{\code{(numeric)}
27
+ Seed-value of mode anchoring the peak.}
28
+
29
+ \item{\code{param.hat}}{\code{(list)}
30
+ Parameters of the distribution. A list of two values if dist=\code{norm} (sd
31
+ and mean), or three values if dist=\code{sn}(omega=scale, alpha=shape, and
32
+ xi=location). Note that the "dispersion" parameter is always first and
33
+ the "location" parameter is always last.}
34
+
35
+ \item{\code{n.hat}}{\code{(numeric)}
36
+ Number of bins estimated to be explained by this peak. This should
37
+ ideally be equal to the length of \code{values}, but it's not an integer.}
38
+
39
+ \item{\code{n.total}}{\code{(numeric)}
40
+ Total number of bins from which the peak was extracted. I.e., total
41
+ number of position bins with non-zero sequencing depth in the recruitment
42
+ plot (regardless of peak count).}
43
+
44
+ \item{\code{err.res}}{\code{(numeric)}
45
+ Error left after adding the peak (mower) or log-likelihood (em or emauto).}
46
+
47
+ \item{\code{merge.logdist}}{\code{(numeric)}
48
+ Attempted \code{merge.logdist} parameter.}
49
+
50
+ \item{\code{seq.depth}}{\code{(numeric)}
51
+ Best estimate available for the sequencing depth of the peak (centrality).}
52
+
53
+ \item{\code{log}}{\code{(logical)}
54
+ Indicates if the estimation was performed in natural logarithm space.}
55
+ }}
56
+
57
+ \author{
58
+ Luis M. Rodriguez-R [aut, cre]
59
+ }
@@ -0,0 +1,27 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.seqdepth}
4
+ \alias{enve.recplot2.seqdepth}
5
+ \title{Enveomics: Recruitment Plot (2) Sequencing Depth}
6
+ \usage{
7
+ enve.recplot2.seqdepth(x, sel, low.identity = FALSE)
8
+ }
9
+ \arguments{
10
+ \item{x}{\code{\link{enve.RecPlot2}} object.}
11
+
12
+ \item{sel}{Window(s) for which the sequencing depth is to be calculated. If not
13
+ passed, it returns the sequencing depth of all windows.}
14
+
15
+ \item{low.identity}{A logical indicating if the sequencing depth is to be estimated only
16
+ with low-identity matches. By default, only high-identity matches are
17
+ used.}
18
+ }
19
+ \value{
20
+ Returns a numeric vector of sequencing depths (in bp/bp).
21
+ }
22
+ \description{
23
+ Calculate the sequencing depth of the given window(s).
24
+ }
25
+ \author{
26
+ Luis M. Rodriguez-R [aut, cre]
27
+ }
@@ -0,0 +1,32 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.windowDepthThreshold}
4
+ \alias{enve.recplot2.windowDepthThreshold}
5
+ \title{Enveomics: Recruitment Plot (2) Window Depth Threshold}
6
+ \usage{
7
+ enve.recplot2.windowDepthThreshold(rp, peak, lower.tail = TRUE,
8
+ significance = 0.05)
9
+ }
10
+ \arguments{
11
+ \item{rp}{Recruitment plot, an \code{\link{enve.RecPlot2}} object.}
12
+
13
+ \item{peak}{Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to be a
14
+ list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core peak is
15
+ used (see \code{\link{enve.recplot2.corePeak}}).}
16
+
17
+ \item{lower.tail}{If \code{FALSE}, it returns windows significantly above the peak in
18
+ sequencing depth.}
19
+
20
+ \item{significance}{Significance threshold (alpha) to select windows.}
21
+ }
22
+ \value{
23
+ Returns a float. The units are depth if the peaks were estimated in
24
+ linear scale, or log-depth otherwise (\code{peak$log}).
25
+ }
26
+ \description{
27
+ Identifies the threshold below which windows should be identified as
28
+ variable or absent.
29
+ }
30
+ \author{
31
+ Luis M. Rodriguez-R [aut, cre]
32
+ }
@@ -0,0 +1,59 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/tribs.R
3
+ \name{enve.tribs}
4
+ \alias{enve.tribs}
5
+ \title{Enveomics: TRIBS}
6
+ \usage{
7
+ enve.tribs(dist, selection = labels(dist), replicates = 1000,
8
+ summary.fx = median, dist.method = "euclidean", subsamples = seq(0,
9
+ 1, by = 0.01), dimensions = ceiling(length(selection) * 0.05),
10
+ metaMDS.opts = list(), threads = 2, verbosity = 1, points,
11
+ pre.tribs)
12
+ }
13
+ \arguments{
14
+ \item{dist}{Distances as a \code{dist} object.}
15
+
16
+ \item{selection}{Objects to include in the subsample. By default, all objects are
17
+ selected.}
18
+
19
+ \item{replicates}{Number of replications per point.}
20
+
21
+ \item{summary.fx}{Function to summarize the distance distributions in a given replicate. By
22
+ default, the median distance is estimated.}
23
+
24
+ \item{dist.method}{Distance method between random points and samples in the transformed
25
+ space. See \code{dist}.}
26
+
27
+ \item{subsamples}{Subsampling fractions.}
28
+
29
+ \item{dimensions}{Dimensions to use in the NMDS. By default, 5\% of the selection length.}
30
+
31
+ \item{metaMDS.opts}{Any additional options to pass to metaMDS, as \code{list}.}
32
+
33
+ \item{threads}{Number of threads to use.}
34
+
35
+ \item{verbosity}{Verbosity. Use 0 to run quietly, increase for additional information.}
36
+
37
+ \item{points}{Optional. If passed, the MDS step is skipped and this object is used
38
+ instead. It can be the \code{$points} slot of class \code{metaMDS}
39
+ (from \code{vegan}).
40
+ It must be a matrix or matrix-coercible object, with samples as rows and
41
+ dimensions as columns.}
42
+
43
+ \item{pre.tribs}{Optional. If passed, the points are recovered from this object (except if
44
+ \code{points} is also passed. This should be an \code{\link{enve.TRIBS}} object
45
+ estimated on the same objects (the selection is unimportant).}
46
+ }
47
+ \value{
48
+ Returns an \code{\link{enve.TRIBS}} object.
49
+ }
50
+ \description{
51
+ Subsample any objects in "distance space" to reduce the effect of
52
+ sample-clustering. This function was originally designed to subsample
53
+ genomes in "phylogenetic distance space", a clear case of strong
54
+ clustering bias in sampling, by Luis M. Rodriguez-R and Michael R
55
+ Weigand.
56
+ }
57
+ \author{
58
+ Luis M. Rodriguez-R [aut, cre]
59
+ }
@@ -0,0 +1,28 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/tribs.R
3
+ \name{enve.tribs.test}
4
+ \alias{enve.tribs.test}
5
+ \title{Enveomics: TRIBS Test}
6
+ \usage{
7
+ enve.tribs.test(dist, selection, bins = 50, ...)
8
+ }
9
+ \arguments{
10
+ \item{dist}{Distances as \code{dist} object.}
11
+
12
+ \item{selection}{Selection defining the subset.}
13
+
14
+ \item{bins}{Number of bins to evaluate in the range of distances.}
15
+
16
+ \item{...}{Any other parameters supported by \code{\link{enve.tribs}},
17
+ except \code{subsamples}.}
18
+ }
19
+ \value{
20
+ Returns an \code{\link{enve.TRIBStest}} object.
21
+ }
22
+ \description{
23
+ Estimates the empirical difference between all the distances in a set of
24
+ objects and a subset, together with its statistical significance.
25
+ }
26
+ \author{
27
+ Luis M. Rodriguez-R [aut, cre]
28
+ }