miga-base 0.7.26.0 → 1.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/classify_wf.rb +2 -2
  7. data/lib/miga/cli/action/derep_wf.rb +1 -1
  8. data/lib/miga/cli/action/doctor.rb +57 -14
  9. data/lib/miga/cli/action/doctor/base.rb +47 -23
  10. data/lib/miga/cli/action/init.rb +11 -7
  11. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  12. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  13. data/lib/miga/cli/action/tax_dist.rb +2 -2
  14. data/lib/miga/cli/action/wf.rb +5 -4
  15. data/lib/miga/common.rb +1 -0
  16. data/lib/miga/daemon.rb +11 -4
  17. data/lib/miga/dataset/result.rb +10 -6
  18. data/lib/miga/json.rb +5 -4
  19. data/lib/miga/metadata.rb +5 -1
  20. data/lib/miga/parallel.rb +36 -0
  21. data/lib/miga/project.rb +8 -8
  22. data/lib/miga/project/base.rb +4 -4
  23. data/lib/miga/project/result.rb +2 -2
  24. data/lib/miga/sqlite.rb +10 -2
  25. data/lib/miga/version.rb +23 -9
  26. data/scripts/aai_distances.bash +16 -18
  27. data/scripts/ani_distances.bash +16 -17
  28. data/scripts/assembly.bash +31 -16
  29. data/scripts/haai_distances.bash +3 -27
  30. data/scripts/miga.bash +6 -4
  31. data/scripts/p.bash +1 -1
  32. data/scripts/read_quality.bash +9 -18
  33. data/scripts/trimmed_fasta.bash +14 -30
  34. data/scripts/trimmed_reads.bash +36 -36
  35. data/test/parallel_test.rb +31 -0
  36. data/test/project_test.rb +2 -1
  37. data/test/remote_dataset_test.rb +1 -1
  38. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  39. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  40. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  41. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  42. data/utils/FastAAI/README.md +84 -0
  43. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  44. data/utils/distance/commands.rb +1 -0
  45. data/utils/distance/database.rb +0 -1
  46. data/utils/distance/runner.rb +2 -4
  47. data/utils/enveomics/Docs/recplot2.md +244 -0
  48. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  49. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  50. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  51. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  52. data/utils/enveomics/LICENSE.txt +73 -0
  53. data/utils/enveomics/Makefile +52 -0
  54. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  55. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  56. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  57. data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
  58. data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
  59. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  60. data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
  61. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  62. data/utils/enveomics/Manifest/Tasks/other.json +906 -0
  63. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  64. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +638 -0
  65. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  66. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  67. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  68. data/utils/enveomics/Manifest/categories.json +165 -0
  69. data/utils/enveomics/Manifest/examples.json +154 -0
  70. data/utils/enveomics/Manifest/tasks.json +4 -0
  71. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  72. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  73. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  74. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  75. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  76. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  77. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  78. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  79. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  80. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  81. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  82. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  83. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  84. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  85. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  86. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  87. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  88. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  89. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  90. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  91. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  92. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  93. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  94. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  95. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  96. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  97. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  98. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  99. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  100. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  101. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  102. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  103. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  104. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  105. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  106. data/utils/enveomics/README.md +42 -0
  107. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  108. data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
  109. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  110. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  111. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  112. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  113. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  114. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  115. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  116. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  117. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  118. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  119. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  120. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  121. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  122. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  123. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  124. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  125. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  126. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  127. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  128. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  129. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  130. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  131. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  132. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  133. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  134. data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
  135. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  136. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  137. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  138. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  139. data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
  140. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  141. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  142. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  143. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  144. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  145. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  146. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  147. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  148. data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
  149. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  150. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  151. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  152. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  153. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  154. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  155. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  156. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  157. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  158. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  159. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  160. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  161. data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
  162. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  163. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  164. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  165. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  166. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  167. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  168. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  169. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  170. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  171. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  172. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  173. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  174. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  175. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  176. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  177. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  178. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  179. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  180. data/utils/enveomics/Scripts/SRA.download.bash +55 -0
  181. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  182. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  183. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  184. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  185. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  186. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  187. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  188. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  189. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  190. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  191. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  192. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  193. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  194. data/utils/enveomics/Scripts/aai.rb +419 -0
  195. data/utils/enveomics/Scripts/ani.rb +362 -0
  196. data/utils/enveomics/Scripts/anir.rb +137 -0
  197. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  198. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  199. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  200. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  201. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  202. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  203. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  204. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  205. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  206. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  207. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  208. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  209. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  210. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  211. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  212. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  213. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  214. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  215. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  216. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  217. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  218. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  219. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  220. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  221. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  222. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  223. data/utils/enveomics/Scripts/ogs.rb +104 -0
  224. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  225. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  226. data/utils/enveomics/Scripts/rbm.rb +100 -0
  227. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  228. data/utils/enveomics/Tests/Makefile +10 -0
  229. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  230. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  231. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  232. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  233. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  234. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  235. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  236. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  237. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  238. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  239. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  240. data/utils/enveomics/Tests/alkB.nwk +1 -0
  241. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  242. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  243. data/utils/enveomics/Tests/hiv1.faa +59 -0
  244. data/utils/enveomics/Tests/hiv1.fna +134 -0
  245. data/utils/enveomics/Tests/hiv2.faa +70 -0
  246. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  247. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  248. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  249. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  250. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  251. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  252. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  253. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  254. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  255. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  256. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  257. data/utils/enveomics/build_enveomics_r.bash +45 -0
  258. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  259. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  260. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  261. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  262. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  263. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  264. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  265. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  266. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  267. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  268. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  269. data/utils/enveomics/enveomics.R/R/utils.R +80 -0
  270. data/utils/enveomics/enveomics.R/README.md +81 -0
  271. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  272. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  273. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
  274. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
  275. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
  276. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  277. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  278. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  279. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  280. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  281. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  282. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
  283. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
  284. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
  285. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  286. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  287. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
  288. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
  289. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
  290. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
  291. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  292. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
  293. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
  294. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  295. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  296. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
  297. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  298. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  299. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  300. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  301. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
  302. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  303. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  304. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
  305. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  306. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  307. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  308. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  309. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
  310. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  311. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
  312. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
  313. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
  314. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  315. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  316. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
  317. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  318. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
  319. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  320. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  321. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  322. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  323. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
  324. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
  325. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
  326. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
  327. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  328. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  329. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  330. data/utils/enveomics/globals.mk +8 -0
  331. data/utils/enveomics/manifest.json +9 -0
  332. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  333. data/utils/multitrim/README.md +67 -0
  334. data/utils/multitrim/multitrim.py +1555 -0
  335. data/utils/multitrim/multitrim.yml +13 -0
  336. data/utils/requirements.txt +4 -3
  337. metadata +304 -3
@@ -0,0 +1,29 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.coordinates}
4
+ \alias{enve.recplot2.coordinates}
5
+ \title{Enveomics: Recruitment Plot (2) Coordinates}
6
+ \usage{
7
+ enve.recplot2.coordinates(x, bins)
8
+ }
9
+ \arguments{
10
+ \item{x}{\code{\link{enve.RecPlot2}} object.}
11
+
12
+ \item{bins}{Vector of selected bins to return. It can be a vector of logical values
13
+ with the same length as \code{x$pos.breaks-1} or a vector of integers. If
14
+ missing, returns the coordinates of all windows.}
15
+ }
16
+ \value{
17
+ Returns a data.frame with five columns: \code{name.from} (character),
18
+ \code{pos.from} (numeric), \code{name.to} (character), \code{pos.to}
19
+ (numeric), and \code{seq.name} (character).
20
+ The first two correspond to sequence and position of the start point of the
21
+ bin. The next two correspond to the sequence and position of the end point of
22
+ the bin. The last one indicates the name of the sequence (if defined).
23
+ }
24
+ \description{
25
+ Returns the sequence name and coordinates of the requested position bins.
26
+ }
27
+ \author{
28
+ Luis M. Rodriguez-R [aut, cre]
29
+ }
@@ -0,0 +1,18 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.corePeak}
4
+ \alias{enve.recplot2.corePeak}
5
+ \title{Enveomics: Recruitment Plot (2) Core Peak Finder}
6
+ \usage{
7
+ enve.recplot2.corePeak(x)
8
+ }
9
+ \arguments{
10
+ \item{x}{\code{list} of \code{\link{enve.RecPlot2.Peak}} objects.}
11
+ }
12
+ \description{
13
+ Finds the peak in a list of peaks that is most likely to represent the
14
+ "core genome" of a population.
15
+ }
16
+ \author{
17
+ Luis M. Rodriguez-R [aut, cre]
18
+ }
@@ -0,0 +1,45 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.extractWindows}
4
+ \alias{enve.recplot2.extractWindows}
5
+ \title{Enveomics: Recruitment Plot (2) Extract Windows}
6
+ \usage{
7
+ enve.recplot2.extractWindows(
8
+ rp,
9
+ peak,
10
+ lower.tail = TRUE,
11
+ significance = 0.05,
12
+ seq.names = FALSE
13
+ )
14
+ }
15
+ \arguments{
16
+ \item{rp}{Recruitment plot, a \code{\link{enve.RecPlot2}} object.}
17
+
18
+ \item{peak}{Peak, an \code{\link{enve.RecPlot2.Peak}} object. If list, it is assumed to be a
19
+ list of \code{\link{enve.RecPlot2.Peak}} objects, in which case the core peak is
20
+ used (see \code{\link{enve.recplot2.corePeak}}).}
21
+
22
+ \item{lower.tail}{If \code{FALSE}, it returns windows significantly above the peak in
23
+ sequencing depth.}
24
+
25
+ \item{significance}{Significance threshold (alpha) to select windows.}
26
+
27
+ \item{seq.names}{Returns subject sequence names instead of a vector of Booleans. If
28
+ the recruitment plot was generated with named position bins (e.g, using
29
+ \code{pos.breaks=0} or a two-column \code{pos.breaks.tsv}), it returns a
30
+ vector of characters (the sequence identifiers), otherwise it returns a
31
+ data.frame with a name column and two columns of coordinates.}
32
+ }
33
+ \value{
34
+ Returns a vector of logicals if \code{seq.names = FALSE}.
35
+ If \code{seq.names = TRUE}, it returns a data.frame with five columns:
36
+ \code{name.from}, \code{name.to}, \code{pos.from}, \code{pos.to}, and
37
+ \code{seq.name} (see \code{\link{enve.recplot2.coordinates}}).
38
+ }
39
+ \description{
40
+ Extract windows significantly below (or above) the peak in sequencing
41
+ depth.
42
+ }
43
+ \author{
44
+ Luis M. Rodriguez-R [aut, cre]
45
+ }
@@ -0,0 +1,36 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.findPeaks}
4
+ \alias{enve.recplot2.findPeaks}
5
+ \title{Enveomics: Recruitment Plot (2) Peak Finder}
6
+ \usage{
7
+ enve.recplot2.findPeaks(x, method = "emauto", ...)
8
+ }
9
+ \arguments{
10
+ \item{x}{An \code{\link{enve.RecPlot2}} object.}
11
+
12
+ \item{method}{Peak-finder method. This should be one of:
13
+ \itemize{
14
+ \item \strong{emauto}
15
+ (Expectation-Maximization with auto-selection of components)
16
+ \item \strong{em}
17
+ (Expectation-Maximization)
18
+ \item \strong{mower}
19
+ (Custom distribution-mowing method)
20
+ }}
21
+
22
+ \item{...}{Any additional parameters supported by
23
+ \code{\link{enve.recplot2.findPeaks}}.}
24
+ }
25
+ \value{
26
+ Returns a list of \code{\link{enve.RecPlot2.Peak}} objects.
27
+ }
28
+ \description{
29
+ Identifies peaks in the population histogram potentially indicating
30
+ sub-population mixtures.
31
+ }
32
+ \author{
33
+ Luis M. Rodriguez-R [aut, cre]
34
+
35
+ export
36
+ }
@@ -0,0 +1,19 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.findPeaks.__em_e}
4
+ \alias{enve.recplot2.findPeaks.__em_e}
5
+ \title{Enveomics: Recruitment Plot (2) EM Peak Finder - Internal Ancillary Function Expectation}
6
+ \usage{
7
+ enve.recplot2.findPeaks.__em_e(x, theta)
8
+ }
9
+ \arguments{
10
+ \item{x}{Vector of log-transformed sequencing depths}
11
+
12
+ \item{theta}{Parameters list}
13
+ }
14
+ \description{
15
+ Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.em}}).
16
+ }
17
+ \author{
18
+ Luis M. Rodriguez-R [aut, cre]
19
+ }
@@ -0,0 +1,19 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.findPeaks.__em_m}
4
+ \alias{enve.recplot2.findPeaks.__em_m}
5
+ \title{Enveomics: Recruitment Plot (2) Em Peak Finder - Internal Ancillary Function Maximization}
6
+ \usage{
7
+ enve.recplot2.findPeaks.__em_m(x, posterior)
8
+ }
9
+ \arguments{
10
+ \item{x}{Vector of log-transformed sequencing depths}
11
+
12
+ \item{posterior}{Posterior probability}
13
+ }
14
+ \description{
15
+ Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.em}}).
16
+ }
17
+ \author{
18
+ Luis M. Rodriguez-R [aut, cre]
19
+ }
@@ -0,0 +1,27 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.findPeaks.__emauto_one}
4
+ \alias{enve.recplot2.findPeaks.__emauto_one}
5
+ \title{Enveomics: Recruitment Plot (2) EMauto Peak Finder - Internal Ancillary Function}
6
+ \usage{
7
+ enve.recplot2.findPeaks.__emauto_one(x, comp, do_crit, best, verbose, ...)
8
+ }
9
+ \arguments{
10
+ \item{x}{\code{\link{enve.RecPlot2}} object}
11
+
12
+ \item{comp}{Components}
13
+
14
+ \item{do_crit}{Function estimating the criterion}
15
+
16
+ \item{best}{Best solution thus far}
17
+
18
+ \item{verbose}{If verbose}
19
+
20
+ \item{...}{Additional parameters for \code{\link{enve.recplot2.findPeaks.em}}}
21
+ }
22
+ \description{
23
+ Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.emauto}}).
24
+ }
25
+ \author{
26
+ Luis M. Rodriguez-R [aut, cre]
27
+ }
@@ -0,0 +1,52 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.findPeaks.__mow_one}
4
+ \alias{enve.recplot2.findPeaks.__mow_one}
5
+ \title{Enveomics: Recruitment Plot (2) Mowing Peak Finder - Internal Ancillary Function 1}
6
+ \usage{
7
+ enve.recplot2.findPeaks.__mow_one(
8
+ lsd1,
9
+ min.points,
10
+ quant.est,
11
+ mlv.opts,
12
+ fitdist.opts,
13
+ with.skewness,
14
+ optim.rounds,
15
+ optim.epsilon,
16
+ n.total,
17
+ merge.logdist,
18
+ verbose,
19
+ log
20
+ )
21
+ }
22
+ \arguments{
23
+ \item{lsd1}{Vector of log-transformed sequencing depths}
24
+
25
+ \item{min.points}{Minimum number of points}
26
+
27
+ \item{quant.est}{Quantile estimate}
28
+
29
+ \item{mlv.opts}{List of options for \code{mlv}}
30
+
31
+ \item{fitdist.opts}{List of options for \code{fitdist}}
32
+
33
+ \item{with.skewness}{If skewed-normal should be used}
34
+
35
+ \item{optim.rounds}{Maximum number of optimization rounds}
36
+
37
+ \item{optim.epsilon}{Minimum difference considered negligible}
38
+
39
+ \item{n.total}{Global number of windows}
40
+
41
+ \item{merge.logdist}{Attempted \code{merge.logdist} parameter}
42
+
43
+ \item{verbose}{If verbose}
44
+
45
+ \item{log}{If log-transformed depths}
46
+ }
47
+ \description{
48
+ Internall ancillary function (see \code{\link{enve.recplot2.findPeaks.mower}}).
49
+ }
50
+ \author{
51
+ Luis M. Rodriguez-R [aut, cre]
52
+ }
@@ -0,0 +1,17 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.findPeaks.__mower}
4
+ \alias{enve.recplot2.findPeaks.__mower}
5
+ \title{Enveomics: Recruitment Plot (2) Mowing Peak Finder - Internal Ancillary Function 2}
6
+ \usage{
7
+ enve.recplot2.findPeaks.__mower(peaks.opts)
8
+ }
9
+ \arguments{
10
+ \item{peaks.opts}{List of options for \code{\link{enve.recplot2.findPeaks.__mow_one}}}
11
+ }
12
+ \description{
13
+ Internal ancillary function (see \code{\link{enve.recplot2.findPeaks.mower}}).
14
+ }
15
+ \author{
16
+ Luis M. Rodriguez-R [aut, cre]
17
+ }
@@ -0,0 +1,51 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.findPeaks.em}
4
+ \alias{enve.recplot2.findPeaks.em}
5
+ \title{Enveomics: Recruitment Plot (2) Em Peak Finder}
6
+ \usage{
7
+ enve.recplot2.findPeaks.em(
8
+ x,
9
+ max.iter = 1000,
10
+ ll.diff.res = 1e-08,
11
+ components = 2,
12
+ rm.top = 0.05,
13
+ verbose = FALSE,
14
+ init,
15
+ log = TRUE
16
+ )
17
+ }
18
+ \arguments{
19
+ \item{x}{An \code{\link{enve.RecPlot2}} object.}
20
+
21
+ \item{max.iter}{Maximum number of EM iterations.}
22
+
23
+ \item{ll.diff.res}{Maximum Log-Likelihood difference to be considered as convergent.}
24
+
25
+ \item{components}{Number of distributions assumed in the mixture.}
26
+
27
+ \item{rm.top}{Top-values to remove before finding peaks, as a quantile probability.
28
+ This step is useful to remove highly conserved regions, but can be
29
+ turned off by setting \code{rm.top=0}. The quantile is determined
30
+ \strong{after} removing zero-coverage windows.}
31
+
32
+ \item{verbose}{Display (mostly debugging) information.}
33
+
34
+ \item{init}{Initialization parameters. By default, these are derived from k-means
35
+ clustering. A named list with vectors for \code{mu}, \code{sd}, and
36
+ \code{alpha}, each of length \code{components}.}
37
+
38
+ \item{log}{Logical value indicating if the estimations should be performed in
39
+ natural logarithm units. Do not change unless you know what you're
40
+ doing.}
41
+ }
42
+ \value{
43
+ Returns a list of \code{\link{enve.RecPlot2.Peak}} objects.
44
+ }
45
+ \description{
46
+ Identifies peaks in the population histogram using a Gaussian Mixture
47
+ Model Expectation Maximization (GMM-EM) method.
48
+ }
49
+ \author{
50
+ Luis M. Rodriguez-R [aut, cre]
51
+ }
@@ -0,0 +1,43 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.findPeaks.emauto}
4
+ \alias{enve.recplot2.findPeaks.emauto}
5
+ \title{Enveomics: Recruitment Plot (2) Emauto Peak Finder}
6
+ \usage{
7
+ enve.recplot2.findPeaks.emauto(
8
+ x,
9
+ components = seq(1, 5),
10
+ criterion = "aic",
11
+ merge.tol = 2L,
12
+ verbose = FALSE,
13
+ ...
14
+ )
15
+ }
16
+ \arguments{
17
+ \item{x}{An \code{\link{enve.RecPlot2}} object.}
18
+
19
+ \item{components}{A vector of number of components to evaluate.}
20
+
21
+ \item{criterion}{Criterion to use for components selection. Must be one of:
22
+ \code{aic} (Akaike Information Criterion), \code{bic} or \code{sbc}
23
+ (Bayesian Information Criterion or Schwarz Criterion).}
24
+
25
+ \item{merge.tol}{When attempting to merge peaks with very similar sequencing depth, use
26
+ this number of significant digits (in log-scale).}
27
+
28
+ \item{verbose}{Display (mostly debugging) information.}
29
+
30
+ \item{...}{Any additional parameters supported by
31
+ \code{\link{enve.recplot2.findPeaks.em}}.}
32
+ }
33
+ \value{
34
+ Returns a list of \code{\link{enve.RecPlot2.Peak}} objects.
35
+ }
36
+ \description{
37
+ Identifies peaks in the population histogram using a Gaussian Mixture
38
+ Model Expectation Maximization (GMM-EM) method with number of components
39
+ automatically detected.
40
+ }
41
+ \author{
42
+ Luis M. Rodriguez-R [aut, cre]
43
+ }
@@ -0,0 +1,82 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{enve.recplot2.findPeaks.mower}
4
+ \alias{enve.recplot2.findPeaks.mower}
5
+ \title{Enveomics: Recruitment Plot (2) Mowing Peak Finder}
6
+ \usage{
7
+ enve.recplot2.findPeaks.mower(
8
+ x,
9
+ min.points = 10,
10
+ quant.est = c(0.002, 0.998),
11
+ mlv.opts = list(method = "parzen"),
12
+ fitdist.opts.sn = list(distr = "sn", method = "qme", probs = c(0.1, 0.5, 0.8), start =
13
+ list(omega = 1, alpha = -1), lower = c(0, -Inf, -Inf)),
14
+ fitdist.opts.norm = list(distr = "norm", method = "qme", probs = c(0.4, 0.6), start =
15
+ list(sd = 1), lower = c(0, -Inf)),
16
+ rm.top = 0.05,
17
+ with.skewness = TRUE,
18
+ optim.rounds = 200,
19
+ optim.epsilon = 1e-04,
20
+ merge.logdist = log(1.75),
21
+ verbose = FALSE,
22
+ log = TRUE
23
+ )
24
+ }
25
+ \arguments{
26
+ \item{x}{An \code{\link{enve.RecPlot2}} object.}
27
+
28
+ \item{min.points}{Minimum number of points in the quantile-estimation-range
29
+ \code{(quant.est)} to estimate a peak.}
30
+
31
+ \item{quant.est}{Range of quantiles to be used in the estimation of a peak's
32
+ parameters.}
33
+
34
+ \item{mlv.opts}{Ignored. For backwards compatibility.}
35
+
36
+ \item{fitdist.opts.sn}{Options passed to \code{fitdist} to estimate the standard deviation if
37
+ \code{with.skewness=TRUE}. Note that the \code{start} parameter will be
38
+ ammended with \code{xi=estimated} mode for each peak.}
39
+
40
+ \item{fitdist.opts.norm}{Options passed to \code{fitdist} to estimate the standard deviation if
41
+ \code{with.skewness=FALSE}. Note that the \code{start} parameter will be
42
+ ammended with \code{mean=estimated} mode for each peak.}
43
+
44
+ \item{rm.top}{Top-values to remove before finding peaks, as a quantile probability.
45
+ This step is useful to remove highly conserved regions, but can be
46
+ turned off by setting \code{rm.top=0}. The quantile is determined
47
+ \strong{after} removing zero-coverage windows.}
48
+
49
+ \item{with.skewness}{Allow skewness correction of the peaks. Typically, the
50
+ sequencing-depth distribution for a single peak is left-skewed, due
51
+ partly (but not exclusively) to fragmentation and mapping sensitivity.
52
+ See \emph{Lindner et al 2013, Bioinformatics 29(10):1260-7} for an
53
+ alternative solution for the first problem (fragmentation) called
54
+ "tail distribution".}
55
+
56
+ \item{optim.rounds}{Maximum rounds of peak optimization.}
57
+
58
+ \item{optim.epsilon}{Trace change at which optimization stops (unless \code{optim.rounds} is
59
+ reached first). The trace change is estimated as the sum of square
60
+ differences between parameters in one round and those from two rounds
61
+ earlier (to avoid infinite loops from approximation).}
62
+
63
+ \item{merge.logdist}{Maximum value of \code{|log-ratio|} between centrality parameters in peaks
64
+ to attempt merging. The default of ~0.22 corresponds to a maximum
65
+ difference of 25\%.}
66
+
67
+ \item{verbose}{Display (mostly debugging) information.}
68
+
69
+ \item{log}{Logical value indicating if the estimations should be performed in
70
+ natural logarithm units. Do not change unless you know what you're
71
+ doing.}
72
+ }
73
+ \value{
74
+ Returns a list of \code{\link{enve.RecPlot2.Peak}} objects.
75
+ }
76
+ \description{
77
+ Identifies peaks in the population histogram potentially indicating
78
+ sub-population mixtures, using a custom distribution-mowing method.
79
+ }
80
+ \author{
81
+ Luis M. Rodriguez-R [aut, cre]
82
+ }