miga-base 0.7.26.0 → 1.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/classify_wf.rb +2 -2
  7. data/lib/miga/cli/action/derep_wf.rb +1 -1
  8. data/lib/miga/cli/action/doctor.rb +57 -14
  9. data/lib/miga/cli/action/doctor/base.rb +47 -23
  10. data/lib/miga/cli/action/init.rb +11 -7
  11. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  12. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  13. data/lib/miga/cli/action/tax_dist.rb +2 -2
  14. data/lib/miga/cli/action/wf.rb +5 -4
  15. data/lib/miga/common.rb +1 -0
  16. data/lib/miga/daemon.rb +11 -4
  17. data/lib/miga/dataset/result.rb +10 -6
  18. data/lib/miga/json.rb +5 -4
  19. data/lib/miga/metadata.rb +5 -1
  20. data/lib/miga/parallel.rb +36 -0
  21. data/lib/miga/project.rb +8 -8
  22. data/lib/miga/project/base.rb +4 -4
  23. data/lib/miga/project/result.rb +2 -2
  24. data/lib/miga/sqlite.rb +10 -2
  25. data/lib/miga/version.rb +23 -9
  26. data/scripts/aai_distances.bash +16 -18
  27. data/scripts/ani_distances.bash +16 -17
  28. data/scripts/assembly.bash +31 -16
  29. data/scripts/haai_distances.bash +3 -27
  30. data/scripts/miga.bash +6 -4
  31. data/scripts/p.bash +1 -1
  32. data/scripts/read_quality.bash +9 -18
  33. data/scripts/trimmed_fasta.bash +14 -30
  34. data/scripts/trimmed_reads.bash +36 -36
  35. data/test/parallel_test.rb +31 -0
  36. data/test/project_test.rb +2 -1
  37. data/test/remote_dataset_test.rb +1 -1
  38. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  39. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  40. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  41. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  42. data/utils/FastAAI/README.md +84 -0
  43. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  44. data/utils/distance/commands.rb +1 -0
  45. data/utils/distance/database.rb +0 -1
  46. data/utils/distance/runner.rb +2 -4
  47. data/utils/enveomics/Docs/recplot2.md +244 -0
  48. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  49. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  50. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  51. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  52. data/utils/enveomics/LICENSE.txt +73 -0
  53. data/utils/enveomics/Makefile +52 -0
  54. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  55. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  56. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  57. data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
  58. data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
  59. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  60. data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
  61. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  62. data/utils/enveomics/Manifest/Tasks/other.json +906 -0
  63. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  64. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +638 -0
  65. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  66. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  67. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  68. data/utils/enveomics/Manifest/categories.json +165 -0
  69. data/utils/enveomics/Manifest/examples.json +154 -0
  70. data/utils/enveomics/Manifest/tasks.json +4 -0
  71. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  72. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  73. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  74. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  75. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  76. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  77. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  78. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  79. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  80. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  81. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  82. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  83. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  84. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  85. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  86. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  87. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  88. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  89. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  90. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  91. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  92. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  93. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  94. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  95. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  96. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  97. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  98. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  99. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  100. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  101. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  102. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  103. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  104. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  105. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  106. data/utils/enveomics/README.md +42 -0
  107. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  108. data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
  109. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  110. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  111. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  112. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  113. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  114. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  115. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  116. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  117. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  118. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  119. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  120. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  121. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  122. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  123. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  124. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  125. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  126. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  127. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  128. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  129. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  130. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  131. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  132. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  133. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  134. data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
  135. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  136. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  137. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  138. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  139. data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
  140. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  141. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  142. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  143. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  144. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  145. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  146. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  147. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  148. data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
  149. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  150. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  151. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  152. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  153. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  154. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  155. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  156. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  157. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  158. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  159. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  160. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  161. data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
  162. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  163. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  164. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  165. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  166. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  167. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  168. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  169. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  170. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  171. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  172. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  173. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  174. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  175. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  176. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  177. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  178. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  179. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  180. data/utils/enveomics/Scripts/SRA.download.bash +55 -0
  181. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  182. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  183. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  184. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  185. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  186. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  187. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  188. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  189. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  190. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  191. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  192. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  193. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  194. data/utils/enveomics/Scripts/aai.rb +419 -0
  195. data/utils/enveomics/Scripts/ani.rb +362 -0
  196. data/utils/enveomics/Scripts/anir.rb +137 -0
  197. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  198. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  199. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  200. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  201. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  202. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  203. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  204. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  205. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  206. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  207. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  208. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  209. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  210. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  211. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  212. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  213. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  214. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  215. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  216. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  217. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  218. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  219. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  220. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  221. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  222. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  223. data/utils/enveomics/Scripts/ogs.rb +104 -0
  224. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  225. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  226. data/utils/enveomics/Scripts/rbm.rb +100 -0
  227. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  228. data/utils/enveomics/Tests/Makefile +10 -0
  229. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  230. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  231. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  232. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  233. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  234. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  235. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  236. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  237. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  238. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  239. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  240. data/utils/enveomics/Tests/alkB.nwk +1 -0
  241. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  242. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  243. data/utils/enveomics/Tests/hiv1.faa +59 -0
  244. data/utils/enveomics/Tests/hiv1.fna +134 -0
  245. data/utils/enveomics/Tests/hiv2.faa +70 -0
  246. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  247. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  248. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  249. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  250. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  251. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  252. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  253. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  254. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  255. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  256. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  257. data/utils/enveomics/build_enveomics_r.bash +45 -0
  258. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  259. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  260. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  261. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  262. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  263. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  264. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  265. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  266. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  267. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  268. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  269. data/utils/enveomics/enveomics.R/R/utils.R +80 -0
  270. data/utils/enveomics/enveomics.R/README.md +81 -0
  271. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  272. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  273. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
  274. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
  275. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
  276. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  277. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  278. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  279. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  280. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  281. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  282. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
  283. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
  284. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
  285. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  286. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  287. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
  288. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
  289. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
  290. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
  291. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  292. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
  293. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
  294. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  295. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  296. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
  297. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  298. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  299. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  300. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  301. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
  302. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  303. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  304. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
  305. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  306. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  307. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  308. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  309. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
  310. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  311. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
  312. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
  313. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
  314. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  315. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  316. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
  317. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  318. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
  319. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  320. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  321. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  322. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  323. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
  324. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
  325. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
  326. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
  327. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  328. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  329. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  330. data/utils/enveomics/globals.mk +8 -0
  331. data/utils/enveomics/manifest.json +9 -0
  332. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  333. data/utils/multitrim/README.md +67 -0
  334. data/utils/multitrim/multitrim.py +1555 -0
  335. data/utils/multitrim/multitrim.yml +13 -0
  336. data/utils/requirements.txt +4 -3
  337. metadata +304 -3
@@ -0,0 +1,80 @@
1
+ #' Enveomics: Color Alpha
2
+ #'
3
+ #' Modify alpha in a color (or vector of colors).
4
+ #'
5
+ #' @param col Color or vector of colors. It can be any value supported by
6
+ #' \code{\link[grDevices]{col2rgb}}, such as \code{darkred} or \code{#009988}.
7
+ #' @param alpha Alpha value to add to the color, from 0 to 1.
8
+ #'
9
+ #' @return Returns a color or a vector of colors in \emph{hex} notation,
10
+ #' including \code{alpha}.
11
+ #'
12
+ #' @author Luis M. Rodriguez-R [aut, cre]
13
+ #'
14
+ #' @export
15
+
16
+ enve.col.alpha <- function
17
+ (col,
18
+ alpha=1/2
19
+ ){
20
+ return(
21
+ apply(col2rgb(col), 2,
22
+ function(x) do.call(rgb, as.list(c(x[1:3]/256, alpha))) ) )
23
+ }
24
+
25
+ #' Enveomics: Truncate
26
+ #'
27
+ #' Removes the \code{n} highest and lowest values from a vector, and applies
28
+ #' summary function. The value of \code{n} is determined such that the central
29
+ #' range is used, corresponding to the \code{f} fraction of values.
30
+ #'
31
+ #' @param x A vector of numbers.
32
+ #' @param f The fraction of values to retain.
33
+ #' @param FUN Summary function to apply to the vectors. To obtain the
34
+ #' truncated vector itself, use \code{c}.
35
+ #'
36
+ #' @return Returns the summary \code{(FUN)} of the truncated vector.
37
+ #'
38
+ #' @author Luis M. Rodriguez-R [aut, cre]
39
+ #'
40
+ #' @export
41
+
42
+ enve.truncate <- function
43
+ (x,
44
+ f=0.95,
45
+ FUN=mean
46
+ ){
47
+ n <- round(length(x)*(1-f)/2)
48
+ y <- sort(x)[ -c(seq(1, n), seq(length(x)+1-n, length(x))) ]
49
+ return(FUN(y))
50
+ }
51
+
52
+ #' Enveomics: Selection vector
53
+ #'
54
+ #' Normalizes a selection vector \code{sel} to a logical vector with indexes
55
+ #' from \code{dim.names}.
56
+ #'
57
+ #' @param sel A vector of numbers, characters, or booleans.
58
+ #' @param dim.names A vector of names from which to select.
59
+ #'
60
+ #' @return Returns a logical vector with the same length as \code{dim.name}.
61
+ #'
62
+ #' @author Luis M. Rodriguez-R [aut, cre]
63
+ #'
64
+ #' @export
65
+
66
+ enve.selvector <- function(sel, dim.names) {
67
+ if(is.logical(sel)) {
68
+ if(length(sel) != length(dim.names))
69
+ stop('sel is logical but differs in length from dim.names')
70
+ sel
71
+ } else if(is.numeric(sel)) {
72
+ if(max(sel) > length(dim.names))
73
+ stop('sel includes numeric index beyond the length of dim.names')
74
+ 1:length(dim.names) %in% sel
75
+ } else {
76
+ if(any(!sel %in% dim.names))
77
+ stop('sel includes character index missing from dim.names')
78
+ dim.names %in% sel
79
+ }
80
+ }
@@ -0,0 +1,81 @@
1
+ # enveomics.R
2
+
3
+ ## Installing `enveomics.R`
4
+ To install the latest version of `enveomics.R` uploaded to CRAN, execute in R:
5
+
6
+ ```R
7
+ install.packages('enveomics.R')
8
+ ```
9
+
10
+ To install the current developer version of `enveomics.R`, execute in R:
11
+
12
+ ```R
13
+ install.packages('devtools')
14
+ library('devtools')
15
+ install_github('lmrodriguezr/enveomics', subdir='enveomics.R')
16
+ ```
17
+
18
+ ## Using `enveomics.R`
19
+ To load enveomics.R, simply execute:
20
+
21
+ ```R
22
+ library(enveomics.R);
23
+ ```
24
+
25
+ And open help messages using any of the following commands:
26
+
27
+ ```R
28
+ ?enveomics.R
29
+ ?enve.barplot
30
+ ?enve.recplot2
31
+ ?enve.recplot2.compareIdentities
32
+ ?enve.recplot2.changeCutoff
33
+ ?enve.recplot2.findPeaks
34
+ ?enve.recplot2.corePeak
35
+ ?enve.recplot2.windowDepthThreshold
36
+ ?enve.recplot2.extractWindows
37
+ ?enve.recplot2.coordinates
38
+ ?enve.recplot2.seqdepth
39
+ ?enve.recplot2.ANIr
40
+ ?enve.prune.dist
41
+ ?enve.tribs
42
+ ?enve.tribs.test
43
+ ?enve.growthcurve
44
+ ?enve.col.alpha
45
+ ?enve.truncate
46
+ ```
47
+
48
+ You can run some examples using these libraries in the
49
+ [enveomics-GUI](https://github.com/lmrodriguezr/enveomics-gui).
50
+
51
+ For additional information on recruitment plots, see the
52
+ [Recruitment plots working document](https://github.com/lmrodriguezr/enveomics/blob/master/Docs/recplot2.md).
53
+
54
+ ## Changelog
55
+ * 1.8.0: New functions `enve.selvector` and `enve.prefscore`.
56
+ * 1.7.1: Improved efficiency of `enve.df2dist` about five-fold.
57
+ * 1.7.0: Uniformized output for `enve.recplot2.extractWindows` and
58
+ `enve.recplot2.coordinates` to ease automation. Thanks to Tomeu Viver and
59
+ Roth Conrad for troubleshooting.
60
+ * 1.6.0: Speed up in recplot2 with proper structure manipulation
61
+ (by: Kenji Gerhardt). Also, default value for `id.breaks` was changed from
62
+ 300 to 60.
63
+ * 1.5.0: Modernized documentation, now in ROxygen2 (by: Tatyana Kiryutina)
64
+ * 1.4.4: Removes modeest library as requirement, and replaces mower peak-finder
65
+ initialization to median (instead of mode).
66
+ * 1.4.2: Solved bug #36.
67
+ * 1.4.0: New option `pos.breaks.tsv` for `enve.recplot2`.
68
+ * 1.3.4: Gracefully handles and plots recruitment plots with insufficient data
69
+ to find peaks.
70
+ * 1.3.3: New function `enve.recplot2.windowDepthThreshold`.
71
+ * 1.3.2: New option `panel.fun` for `plot.enve.RecPlot2`.
72
+ * 1.3.1: New function enve.truncate.
73
+ * 1.3: Several bug fixes and new utilities for recruitment plots (recplot2).
74
+ * 1.1.0: New function enve.growthcurve and related class enve.GrowthCurve
75
+ with S3 methods plot and summary.
76
+ * 1.0.2: Fine-tuned default parameters in enve.recplot2.findPeaks and
77
+ solved a minor bug in enve.recplot2 that caused failures in low-coverage
78
+ datasets when using too many threads.
79
+ * 1.0.1: enve.recplot2 now supports pos.breaks=0 to define a
80
+ bin per subject sequence.
81
+
@@ -0,0 +1,16 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/growthcurve.R
3
+ \name{$,enve.GrowthCurve-method}
4
+ \alias{$,enve.GrowthCurve-method}
5
+ \title{Attribute accessor}
6
+ \usage{
7
+ \S4method{$}{enve.GrowthCurve}(x, name)
8
+ }
9
+ \arguments{
10
+ \item{x}{Object}
11
+
12
+ \item{name}{Attribute name}
13
+ }
14
+ \description{
15
+ Attribute accessor
16
+ }
@@ -0,0 +1,16 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{$,enve.RecPlot2-method}
4
+ \alias{$,enve.RecPlot2-method}
5
+ \title{Attribute accessor}
6
+ \usage{
7
+ \S4method{$}{enve.RecPlot2}(x, name)
8
+ }
9
+ \arguments{
10
+ \item{x}{Object}
11
+
12
+ \item{name}{Attribute name}
13
+ }
14
+ \description{
15
+ Attribute accessor
16
+ }
@@ -0,0 +1,16 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/recplot2.R
3
+ \name{$,enve.RecPlot2.Peak-method}
4
+ \alias{$,enve.RecPlot2.Peak-method}
5
+ \title{Attribute accessor}
6
+ \usage{
7
+ \S4method{$}{enve.RecPlot2.Peak}(x, name)
8
+ }
9
+ \arguments{
10
+ \item{x}{Object}
11
+
12
+ \item{name}{Attribute name}
13
+ }
14
+ \description{
15
+ Attribute accessor
16
+ }
@@ -0,0 +1,25 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/growthcurve.R
3
+ \docType{class}
4
+ \name{enve.GrowthCurve-class}
5
+ \alias{enve.GrowthCurve-class}
6
+ \alias{enve.GrowthCurve}
7
+ \title{Enveomics: Growth Curve S4 Class}
8
+ \description{
9
+ Enve-omics representation of fitted growth curves.
10
+ }
11
+ \section{Slots}{
12
+
13
+ \describe{
14
+ \item{\code{design}}{\code{(array)} Experimental design of the experiment.}
15
+
16
+ \item{\code{models}}{\code{(list)} Fitted growth curve models.}
17
+
18
+ \item{\code{predict}}{\code{(list)} Fitted growth curve values.}
19
+
20
+ \item{\code{call}}{\code{(call)} Call producing this object.}
21
+ }}
22
+
23
+ \author{
24
+ Luis M. Rodriguez-R [aut, cre]
25
+ }
@@ -0,0 +1,46 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/tribs.R
3
+ \docType{class}
4
+ \name{enve.TRIBS-class}
5
+ \alias{enve.TRIBS-class}
6
+ \alias{enve.TRIBS}
7
+ \title{Enveomics: TRIBS S4 Class}
8
+ \description{
9
+ Enve-omics representation of "Transformed-space Resampling In Biased Sets
10
+ (TRIBS)". This object represents sets of distances between objects,
11
+ sampled nearly-uniformly at random in "distance space". Subsampling
12
+ without selection is trivial, since both the distances space and the
13
+ selection occur in the same transformed space. However, it's useful to
14
+ compare randomly subsampled sets against a selected set of objects. This
15
+ is intended to identify overdispersion or overclustering (see
16
+ \code{\link{enve.TRIBStest}}) of a subset against the entire collection of objects
17
+ with minimum impact of sampling biases. This object can be produced by
18
+ \code{\link{enve.tribs}} and supports S4 methods \code{plot} and \code{summary}.
19
+ }
20
+ \section{Slots}{
21
+
22
+ \describe{
23
+ \item{\code{distance}}{\code{(numeric)} Centrality measurement of the distances
24
+ between the selected objects (without subsampling).}
25
+
26
+ \item{\code{points}}{\code{(matrix)} Position of the different objects in distance
27
+ space.}
28
+
29
+ \item{\code{distances}}{\code{(matrix)} Subsampled distances, where the rows are
30
+ replicates and the columns are subsampling levels.}
31
+
32
+ \item{\code{spaceSize}}{\code{(numeric)} Number of objects.}
33
+
34
+ \item{\code{selSize}}{\code{(numeric)} Number of selected objects.}
35
+
36
+ \item{\code{dimensions}}{\code{(numeric)} Number of dimensions in the distance space.}
37
+
38
+ \item{\code{subsamples}}{\code{(numeric)} Subsampling levels (as fractions, from
39
+ 0 to 1).}
40
+
41
+ \item{\code{call}}{\code{(call)} Call producing this object.}
42
+ }}
43
+
44
+ \author{
45
+ Luis M. Rodriguez-R [aut, cre]
46
+ }
@@ -0,0 +1,23 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/tribs.R
3
+ \name{enve.TRIBS.merge}
4
+ \alias{enve.TRIBS.merge}
5
+ \title{Enveomics: TRIBS Merge}
6
+ \usage{
7
+ enve.TRIBS.merge(x, y)
8
+ }
9
+ \arguments{
10
+ \item{x}{First \code{\link{enve.TRIBS}} object.}
11
+
12
+ \item{y}{Second \code{\link{enve.TRIBS}} object.}
13
+ }
14
+ \value{
15
+ Returns an \code{\link{enve.TRIBS}} object.
16
+ }
17
+ \description{
18
+ Merges two \code{\link{enve.TRIBS}} objects generated from the same objects at
19
+ different subsampling levels.
20
+ }
21
+ \author{
22
+ Luis M. Rodriguez-R [aut, cre]
23
+ }
@@ -0,0 +1,47 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/tribs.R
3
+ \docType{class}
4
+ \name{enve.TRIBStest-class}
5
+ \alias{enve.TRIBStest-class}
6
+ \alias{enve.TRIBStest}
7
+ \title{Enveomics: TRIBS Test S4 Class}
8
+ \description{
9
+ Test of significance of overclustering or overdispersion in a selected
10
+ set of objects with respect to the entire set (see \code{\link{enve.TRIBS}}). This
11
+ object can be produced by \code{\link{enve.tribs.test}} and supports S4 methods
12
+ \code{plot} and \code{summary}.
13
+ }
14
+ \section{Slots}{
15
+
16
+ \describe{
17
+ \item{\code{pval.gt}}{\code{(numeric)}
18
+ P-value for the overdispersion test.}
19
+
20
+ \item{\code{pval.lt}}{\code{(numeric)}
21
+ P-value for the overclustering test.}
22
+
23
+ \item{\code{all.dist}}{\code{(numeric)}
24
+ Empiric PDF of distances for the entire dataset (subsampled at selection
25
+ size).}
26
+
27
+ \item{\code{sel.dist}}{\code{(numeric)}
28
+ Empiric PDF of distances for the selected objects (without subsampling).}
29
+
30
+ \item{\code{diff.dist}}{\code{(numeric)}
31
+ Empiric PDF of the difference between \code{all.dist} and \code{sel.dist}.
32
+ The p-values are estimating by comparing areas in this PDF greater than and
33
+ lesser than zero.}
34
+
35
+ \item{\code{dist.mids}}{\code{(numeric)}
36
+ Midpoints of the empiric PDFs of distances.}
37
+
38
+ \item{\code{diff.mids}}{\code{(numeric)}
39
+ Midpoints of the empiric PDF of difference of distances.}
40
+
41
+ \item{\code{call}}{\code{(call)}
42
+ Call producing this object.}
43
+ }}
44
+
45
+ \author{
46
+ Luis M. Rodriguez-R [aut, cre]
47
+ }
@@ -0,0 +1,23 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/autoprune.R
3
+ \name{enve.__prune.iter}
4
+ \alias{enve.__prune.iter}
5
+ \title{Enveomics: Prune Iter (Internal Function)}
6
+ \usage{
7
+ enve.__prune.iter(t, dist, min_dist, quiet)
8
+ }
9
+ \arguments{
10
+ \item{t}{A \strong{phylo} object}
11
+
12
+ \item{dist}{Cophenetic distance matrix}
13
+
14
+ \item{min_dist}{Minimum distance}
15
+
16
+ \item{quiet}{If running quietly}
17
+ }
18
+ \description{
19
+ Internal function for \code{\link{enve.prune.dist}}.
20
+ }
21
+ \author{
22
+ Luis M. Rodriguez-R [aut, cre]
23
+ }
@@ -0,0 +1,23 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/autoprune.R
3
+ \name{enve.__prune.reduce}
4
+ \alias{enve.__prune.reduce}
5
+ \title{Enveomics: Prune Reduce (Internal Function)}
6
+ \usage{
7
+ enve.__prune.reduce(t, nodes, min_dist, quiet)
8
+ }
9
+ \arguments{
10
+ \item{t}{A \strong{phylo} object}
11
+
12
+ \item{nodes}{Vector of nodes}
13
+
14
+ \item{min_dist}{Minimum distance}
15
+
16
+ \item{quiet}{If running quietly}
17
+ }
18
+ \description{
19
+ Internal function for \code{\link{enve.prune.dist}}.
20
+ }
21
+ \author{
22
+ Luis M. Rodriguez-R [aut, cre]
23
+ }
@@ -0,0 +1,40 @@
1
+ % Generated by roxygen2: do not edit by hand
2
+ % Please edit documentation in R/tribs.R
3
+ \name{enve.__tribs}
4
+ \alias{enve.__tribs}
5
+ \title{Enveomics: TRIBS - Internal Ancillary Function}
6
+ \usage{
7
+ enve.__tribs(
8
+ rep,
9
+ frx,
10
+ selection,
11
+ dimensions,
12
+ dots,
13
+ dist.method,
14
+ summary.fx,
15
+ dist
16
+ )
17
+ }
18
+ \arguments{
19
+ \item{rep}{Replicates}
20
+
21
+ \item{frx}{Fraction}
22
+
23
+ \item{selection}{Selection}
24
+
25
+ \item{dimensions}{Dimensions}
26
+
27
+ \item{dots}{Sampling points}
28
+
29
+ \item{dist.method}{Distance method}
30
+
31
+ \item{summary.fx}{Summary function}
32
+
33
+ \item{dist}{Distance}
34
+ }
35
+ \description{
36
+ Internal ancillary function (see \code{\link{enve.tribs}}).
37
+ }
38
+ \author{
39
+ Luis M. Rodriguez-R [aut, cre]
40
+ }