miga-base 0.7.26.0 → 1.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/classify_wf.rb +2 -2
  7. data/lib/miga/cli/action/derep_wf.rb +1 -1
  8. data/lib/miga/cli/action/doctor.rb +57 -14
  9. data/lib/miga/cli/action/doctor/base.rb +47 -23
  10. data/lib/miga/cli/action/init.rb +11 -7
  11. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  12. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  13. data/lib/miga/cli/action/tax_dist.rb +2 -2
  14. data/lib/miga/cli/action/wf.rb +5 -4
  15. data/lib/miga/common.rb +1 -0
  16. data/lib/miga/daemon.rb +11 -4
  17. data/lib/miga/dataset/result.rb +10 -6
  18. data/lib/miga/json.rb +5 -4
  19. data/lib/miga/metadata.rb +5 -1
  20. data/lib/miga/parallel.rb +36 -0
  21. data/lib/miga/project.rb +8 -8
  22. data/lib/miga/project/base.rb +4 -4
  23. data/lib/miga/project/result.rb +2 -2
  24. data/lib/miga/sqlite.rb +10 -2
  25. data/lib/miga/version.rb +23 -9
  26. data/scripts/aai_distances.bash +16 -18
  27. data/scripts/ani_distances.bash +16 -17
  28. data/scripts/assembly.bash +31 -16
  29. data/scripts/haai_distances.bash +3 -27
  30. data/scripts/miga.bash +6 -4
  31. data/scripts/p.bash +1 -1
  32. data/scripts/read_quality.bash +9 -18
  33. data/scripts/trimmed_fasta.bash +14 -30
  34. data/scripts/trimmed_reads.bash +36 -36
  35. data/test/parallel_test.rb +31 -0
  36. data/test/project_test.rb +2 -1
  37. data/test/remote_dataset_test.rb +1 -1
  38. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  39. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  40. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  41. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  42. data/utils/FastAAI/README.md +84 -0
  43. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  44. data/utils/distance/commands.rb +1 -0
  45. data/utils/distance/database.rb +0 -1
  46. data/utils/distance/runner.rb +2 -4
  47. data/utils/enveomics/Docs/recplot2.md +244 -0
  48. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  49. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  50. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  51. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  52. data/utils/enveomics/LICENSE.txt +73 -0
  53. data/utils/enveomics/Makefile +52 -0
  54. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  55. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  56. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  57. data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
  58. data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
  59. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  60. data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
  61. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  62. data/utils/enveomics/Manifest/Tasks/other.json +906 -0
  63. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  64. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +638 -0
  65. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  66. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  67. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  68. data/utils/enveomics/Manifest/categories.json +165 -0
  69. data/utils/enveomics/Manifest/examples.json +154 -0
  70. data/utils/enveomics/Manifest/tasks.json +4 -0
  71. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  72. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  73. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  74. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  75. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  76. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  77. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  78. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  79. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  80. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  81. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  82. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  83. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  84. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  85. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  86. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  87. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  88. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  89. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  90. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  91. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  92. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  93. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  94. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  95. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  96. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  97. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  98. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  99. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  100. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  101. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  102. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  103. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  104. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  105. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  106. data/utils/enveomics/README.md +42 -0
  107. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  108. data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
  109. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  110. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  111. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  112. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  113. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  114. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  115. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  116. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  117. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  118. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  119. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  120. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  121. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  122. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  123. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  124. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  125. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  126. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  127. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  128. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  129. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  130. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  131. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  132. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  133. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  134. data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
  135. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  136. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  137. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  138. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  139. data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
  140. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  141. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  142. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  143. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  144. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  145. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  146. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  147. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  148. data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
  149. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  150. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  151. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  152. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  153. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  154. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  155. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  156. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  157. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  158. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  159. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  160. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  161. data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
  162. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  163. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  164. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  165. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  166. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  167. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  168. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  169. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  170. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  171. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  172. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  173. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  174. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  175. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  176. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  177. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  178. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  179. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  180. data/utils/enveomics/Scripts/SRA.download.bash +55 -0
  181. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  182. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  183. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  184. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  185. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  186. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  187. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  188. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  189. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  190. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  191. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  192. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  193. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  194. data/utils/enveomics/Scripts/aai.rb +419 -0
  195. data/utils/enveomics/Scripts/ani.rb +362 -0
  196. data/utils/enveomics/Scripts/anir.rb +137 -0
  197. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  198. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  199. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  200. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  201. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  202. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  203. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  204. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  205. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  206. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  207. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  208. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  209. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  210. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  211. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  212. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  213. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  214. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  215. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  216. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  217. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  218. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  219. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  220. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  221. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  222. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  223. data/utils/enveomics/Scripts/ogs.rb +104 -0
  224. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  225. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  226. data/utils/enveomics/Scripts/rbm.rb +100 -0
  227. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  228. data/utils/enveomics/Tests/Makefile +10 -0
  229. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  230. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  231. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  232. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  233. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  234. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  235. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  236. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  237. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  238. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  239. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  240. data/utils/enveomics/Tests/alkB.nwk +1 -0
  241. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  242. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  243. data/utils/enveomics/Tests/hiv1.faa +59 -0
  244. data/utils/enveomics/Tests/hiv1.fna +134 -0
  245. data/utils/enveomics/Tests/hiv2.faa +70 -0
  246. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  247. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  248. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  249. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  250. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  251. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  252. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  253. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  254. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  255. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  256. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  257. data/utils/enveomics/build_enveomics_r.bash +45 -0
  258. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  259. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  260. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  261. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  262. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  263. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  264. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  265. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  266. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  267. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  268. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  269. data/utils/enveomics/enveomics.R/R/utils.R +80 -0
  270. data/utils/enveomics/enveomics.R/README.md +81 -0
  271. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  272. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  273. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
  274. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
  275. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
  276. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  277. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  278. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  279. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  280. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  281. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  282. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
  283. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
  284. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
  285. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  286. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  287. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
  288. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
  289. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
  290. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
  291. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  292. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
  293. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
  294. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  295. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  296. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
  297. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  298. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  299. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  300. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  301. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
  302. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  303. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  304. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
  305. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  306. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  307. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  308. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  309. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
  310. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  311. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
  312. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
  313. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
  314. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  315. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  316. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
  317. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  318. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
  319. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  320. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  321. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  322. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  323. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
  324. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
  325. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
  326. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
  327. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  328. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  329. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  330. data/utils/enveomics/globals.mk +8 -0
  331. data/utils/enveomics/manifest.json +9 -0
  332. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  333. data/utils/multitrim/README.md +67 -0
  334. data/utils/multitrim/multitrim.py +1555 -0
  335. data/utils/multitrim/multitrim.yml +13 -0
  336. data/utils/requirements.txt +4 -3
  337. metadata +304 -3
@@ -0,0 +1,331 @@
1
+ #==============> Define S4 classes
2
+
3
+ #' Enveomics: Growth Curve S4 Class
4
+ #'
5
+ #' Enve-omics representation of fitted growth curves.
6
+ #'
7
+ #' @slot design \code{(array)} Experimental design of the experiment.
8
+ #' @slot models \code{(list)} Fitted growth curve models.
9
+ #' @slot predict \code{(list)} Fitted growth curve values.
10
+ #' @slot call \code{(call)} Call producing this object.
11
+ #'
12
+ #' @author Luis M. Rodriguez-R [aut, cre]
13
+ #'
14
+ #' @exportClass
15
+
16
+ enve.GrowthCurve <- setClass("enve.GrowthCurve",
17
+ representation(
18
+ design = "array",
19
+ models = "list",
20
+ predict = "list",
21
+ call='call')
22
+ ,package='enveomics.R');
23
+
24
+ #' Attribute accessor
25
+ #'
26
+ #' @param x Object
27
+ #' @param name Attribute name
28
+ setMethod("$", "enve.GrowthCurve", function(x, name) attr(x, name))
29
+
30
+ #' Enveomics: Plot of Growth Curve
31
+ #'
32
+ #' Plots an \code{\link{enve.GrowthCurve}} object.
33
+ #'
34
+ #' @param x An \code{\link{enve.GrowthCurve}} object to plot.
35
+ #' @param col Base colors to use for the different samples. Can be recycled.
36
+ #' By default, grey for one sample or rainbow colors for more than one.
37
+ #' @param pt.alpha Color alpha for the observed data points, using \code{col}
38
+ #' as a base.
39
+ #' @param ln.alpha Color alpha for the fitted growth curve, using \code{col}
40
+ #' as a base.
41
+ #' @param ln.lwd Line width for the fitted curve.
42
+ #' @param ln.lty Line type for the fitted curve.
43
+ #' @param band.alpha Color alpha for the confidence interval band of the
44
+ #' fitted growth curve, using \code{col} as a base.
45
+ #' @param band.density Density of the filling pattern in the interval band.
46
+ #' If \code{NULL}, a solid color is used.
47
+ #' @param band.angle Angle of the density filling pattern in the interval
48
+ #' band. Ignored if \code{band.density} is \code{NULL}.
49
+ #' @param xp.alpha Color alpha for the line connecting individual experiments,
50
+ #' using \code{col} as a base.
51
+ #' @param xp.lwd Width of line for the experiments.
52
+ #' @param xp.lty Type of line for the experiments.
53
+ #' @param pch Point character for observed data points.
54
+ #' @param new Should a new plot be generated? If \code{FALSE}, the existing
55
+ #' canvas is used.
56
+ #' @param legend Should the plot include a legend? If \code{FALSE}, no legend
57
+ #' is added. If \code{TRUE}, a legend is added in the bottom-right corner.
58
+ #' Otherwise, a legend is added in the position specified as \code{xy.coords}.
59
+ #' @param add.params Should the legend include the parameters of the fitted
60
+ #' model?
61
+ #' @param ... Any other graphic parameters.
62
+ #'
63
+ #' @author Luis M. Rodriguez-R [aut, cre]
64
+ #'
65
+ #' @method plot enve.GrowthCurve
66
+ #' @export
67
+
68
+ #==============> Define S4 methods
69
+ plot.enve.GrowthCurve <- function
70
+ (x,
71
+ col,
72
+ pt.alpha=0.9,
73
+ ln.alpha=1.0,
74
+ ln.lwd=1,
75
+ ln.lty=1,
76
+ band.alpha=0.4,
77
+ band.density=NULL,
78
+ band.angle=45,
79
+ xp.alpha=0.5,
80
+ xp.lwd=1,
81
+ xp.lty=1,
82
+ pch=19,
83
+ new=TRUE,
84
+ legend=new,
85
+ add.params=FALSE,
86
+ ...
87
+ ){
88
+
89
+ # Arguments
90
+ if(missing(col)){
91
+ col <-
92
+ if(length(x$design)==0) grey(0.2)
93
+ else rainbow(length(x$design), v=3/5, s=3/5)
94
+ }
95
+
96
+ if(new){
97
+ # Initiate canvas
98
+ od.fit.max <- max(sapply(x$predict, function(x) max(x[,"upr"])))
99
+ od.obs.max <- max(sapply(x$models, function(x) max(x$data[,"od"])))
100
+ opts <- list(...)
101
+ plot.defaults <- list(xlab="Time", ylab="Density",
102
+ xlim=range(x$predict[[1]][,"t"]), ylim=c(0, max(od.fit.max, od.obs.max)))
103
+ for(i in names(plot.defaults)){
104
+ if(is.null(opts[[i]])) opts[[i]] <- plot.defaults[[i]]
105
+ }
106
+ opts[["x"]] <- 1
107
+ opts[["type"]] <- "n"
108
+ do.call(plot, opts)
109
+ }
110
+
111
+ # Graphic default
112
+ pch <- rep(pch, length.out=length(x$design))
113
+ col <- rep(col, length.out=length(x$design))
114
+ pt.col <- enve.col2alpha(col, pt.alpha)
115
+ ln.col <- enve.col2alpha(col, ln.alpha)
116
+ band.col <- enve.col2alpha(col, band.alpha)
117
+ xp.col <- enve.col2alpha(col, xp.alpha)
118
+ band.angle <- rep(band.angle, length.out=length(x$design))
119
+ if(!all(is.null(band.density))){
120
+ band.density <- rep(band.density, length.out=length(x$design))
121
+ }
122
+
123
+ for(i in 1:length(x$design)){
124
+ # Observed data
125
+ d <- x$models[[i]]$data
126
+ points(d[,"t"], d[,"od"], pch=pch[i], col=pt.col[i])
127
+ for(j in unique(d[,"replicate"])){
128
+ sel <- d[,"replicate"]==j
129
+ lines(d[sel,"t"], d[sel,"od"], col=xp.col[i], lwd=xp.lwd, lty=xp.lty)
130
+ }
131
+ # Fitted growth curves
132
+ if(x$models[[i]]$convInfo$isConv){
133
+ d <- x$predict[[i]]
134
+ lines(d[,"t"], d[,"fit"], col=ln.col[i], lwd=ln.lwd, lty=ln.lty)
135
+ polygon(c(d[,"t"], rev(d[,"t"])), c(d[,"lwr"], rev(d[,"upr"])),
136
+ border=NA, col=band.col[i], density=band.density[i],
137
+ angle=band.angle[i])
138
+ }
139
+ }
140
+
141
+ if(!all(is.logical(legend)) || legend){
142
+ if(all(is.logical(legend))) legend <- "bottomright"
143
+ legend.txt <- names(x$design)
144
+ if(add.params){
145
+ for(p in names(coef(x$models[[1]]))){
146
+ legend.txt <- paste(legend.txt, ", ", p, "=",
147
+ sapply(x$models, function(x) signif(coef(x)[p],2)) , sep="")
148
+ }
149
+ }
150
+ legend(legend, legend=legend.txt, pch=pch, col=ln.col)
151
+ }
152
+ }
153
+
154
+ #' Enveomics: Summary of Growth Curve
155
+ #'
156
+ #' Summary of an \code{\link{enve.GrowthCurve}} object.
157
+ #'
158
+ #' @param object An \code{\link{enve.GrowthCurve}} object.
159
+ #' @param ... No additional parameters are currently supported.
160
+ #'
161
+ #' @author Luis M. Rodriguez-R [aut, cre]
162
+ #'
163
+ #' @method summary enve.GrowthCurve
164
+ #' @export
165
+
166
+ summary.enve.GrowthCurve <- function(
167
+ object,
168
+ ...
169
+ ){
170
+
171
+ x <- object
172
+ cat('===[ enve.GrowthCurves ]------------------\n')
173
+ for(i in names(x$design)){
174
+ cat(i, ':\n', sep='')
175
+ if(x$models[[i]]$convInfo$isConv){
176
+ for(j in names(coef(x$models[[i]]))){
177
+ cat(' - ', j, ' = ', coef(x$models[[i]])[j], '\n', sep='')
178
+ }
179
+ }else{
180
+ cat(' Model didn\'t converge:\n ',
181
+ x$models[[i]]$convInfo$stopMessage, '\n', sep='')
182
+ }
183
+ cat(' ', nrow(x$models[[i]]$data), ' observations, ',
184
+ length(unique(x$models[[i]]$data[,"replicate"])), ' replicates.\n',
185
+ sep='')
186
+ }
187
+ cat('------------------------------------------\n')
188
+ cat('call:',as.character(attr(x,'call')),'\n')
189
+ cat('------------------------------------------\n')
190
+ }
191
+
192
+ #' Enveomics: Growth Curve
193
+ #'
194
+ #' Calculates growth curves using the logistic growth function.
195
+ #'
196
+ #' @param x Data frame (or coercible) containing the observed growth data
197
+ #' (e.g., O.D. values). Each column is an independent growth curve and each
198
+ #' row is a time point. \code{NA}'s are allowed.
199
+ #' @param times Vector with the times at which each row was taken. By default,
200
+ #' all rows are assumed to be part of constantly periodic measurements.
201
+ #' @param triplicates If \code{TRUE}, the columns are assumed to be sorted by
202
+ #' sample with three replicates by sample. It requires a number of columns
203
+ #' multiple of 3.
204
+ #' @param design Experimental design of the data. An \strong{array} of mode list
205
+ #' with sample names as index and the list of column names in each sample as
206
+ #' the values. By default, each column is assumed to be an independent sample
207
+ #' if \code{triplicates} is \code{FALSE}, or every three columns are assumed
208
+ #' to be a sample if \code{triplicates} is \code{TRUE}. In the latter case,
209
+ #' samples are simply numbered.
210
+ #' @param new.times Values of time for the fitted curve.
211
+ #' @param level Confidence (or prediction) interval in the fitted curve.
212
+ #' @param interval Type of interval to be calculated for the fitted curve.
213
+ #' @param plot Should the growth curve be plotted?
214
+ #' @param FUN Function to fit. By default: logistic growth with paramenters
215
+ #' \code{K}: carrying capacity,
216
+ #' \code{r}: intrinsic growth rate, and
217
+ #' \code{P0}: Initial population.
218
+ #' @param nls.opt Any additional options passed to \code{nls}.
219
+ #' @param ... Any additional parameters to be passed to
220
+ #' \code{plot.enve.GrowthCurve}.
221
+ #'
222
+ #' @return Returns an \code{\link{enve.GrowthCurve}} object.
223
+ #'
224
+ #' @author Luis M. Rodriguez-R [aut, cre]
225
+ #'
226
+ #' @examples
227
+ #' # Load data
228
+ #' data("growth.curves", package="enveomics.R", envir=environment())
229
+ #' # Generate growth curves with different colors
230
+ #' g <- enve.growthcurve(growth.curves[,-1], growth.curves[,1], triplicates=TRUE)
231
+ #' # Generate black-and-white growth curves with different symbols
232
+ #' plot(g, pch=15:17, col="black", band.density=45, band.angle=c(-45,45,0))
233
+ #'
234
+ #' @export
235
+
236
+ #==============> Core functions
237
+ enve.growthcurve <- structure(function(
238
+ x,
239
+ times=1:nrow(x),
240
+ triplicates=FALSE,
241
+ design,
242
+ new.times=seq(min(times), max(times), length.out=length(times)*10),
243
+ level=0.95,
244
+ interval=c("confidence","prediction"),
245
+ plot=TRUE,
246
+ FUN=function(t,K,r,P0) K*P0*exp(r*t)/(K+P0*(exp(r*t)-1)),
247
+ nls.opt=list(),
248
+ ...
249
+ ){
250
+
251
+ # Arguments
252
+ if(missing(design)){
253
+ design <-
254
+ if(triplicates)
255
+ tapply(colnames(x), colnames(x)[rep(1:(ncol(x)/3)*3-2, each=3)], c,
256
+ simplify=FALSE)
257
+ else tapply(colnames(x), colnames(x), c, simplify=FALSE)
258
+ }
259
+ mod <- list()
260
+ fit <- list()
261
+ interval <- match.arg(interval)
262
+ enve._growth.fx <- NULL
263
+ enve._growth.fx <<- FUN
264
+
265
+ for(sample in names(design)){
266
+ od <- c()
267
+ for(col in design[[sample]]){
268
+ od <- c(od, x[,col])
269
+ }
270
+ data <- data.frame(t=rep(times, length(design[[sample]])), od=od,
271
+ replicate=rep(1:length(design[[sample]]), each=length(times)))
272
+ data <- data[!is.na(data$od),]
273
+ opts <- nls.opt
274
+ opts[["data"]] <- data
275
+ opt.defaults <- list(formula = od ~ enve._growth.fx(t, K, r, P0),
276
+ algorithm="port", lower=list(P0=1e-16),
277
+ control=nls.control(warnOnly=TRUE),
278
+ start=list(
279
+ K = 2*max(data$od),
280
+ r = length(times)/max(data$t),
281
+ P0 = min(data$od[data$od>0])
282
+ ))
283
+ for(i in names(opt.defaults)){
284
+ if(is.null(opts[[i]])){
285
+ opts[[i]] <- opt.defaults[[i]]
286
+ }
287
+ }
288
+ mod[[sample]] <- do.call(nls, opts)
289
+ fit[[sample]] <- cbind(t=new.times,
290
+ predFit(mod[[sample]], level=level, interval=interval,
291
+ newdata=data.frame(t=new.times)))
292
+ }
293
+ enve._growth.fx <<- NULL
294
+ gc <- new("enve.GrowthCurve",
295
+ design=design, models=mod, predict=fit,
296
+ call=match.call());
297
+ if(plot) plot(gc, ...);
298
+ return(gc)
299
+ }, ex=function(){
300
+ # Load data
301
+ data("growth.curves", package="enveomics.R", envir=environment())
302
+ # Generate growth curves with different colors
303
+ g <- enve.growthcurve(growth.curves[,-1], growth.curves[,1], triplicates=TRUE)
304
+ # Generate black-and-white growth curves with different symbols
305
+ plot(g, pch=15:17, col="black", band.density=45, band.angle=c(-45,45,0))
306
+ });
307
+
308
+ #' Enveomics: Color to Alpha
309
+ #'
310
+ #' Takes a vector of colors and sets the alpha.
311
+ #'
312
+ #' @param x A vector of any value base colors.
313
+ #' @param alpha Alpha level to set (in the 0-1 range).
314
+ #'
315
+ #' @author Luis M. Rodriguez-R [aut, cre]
316
+ #'
317
+ #' @export
318
+
319
+ enve.col2alpha <- function(
320
+ x,
321
+ alpha
322
+ ){
323
+ out <- c()
324
+ for(i in x){
325
+ opt <- as.list(col2rgb(i)[,1]/256)
326
+ opt[["alpha"]] = alpha
327
+ out <- c(out, do.call(rgb, opt))
328
+ }
329
+ names(out) <- names(x)
330
+ return(out)
331
+ }
@@ -0,0 +1,79 @@
1
+ #' Enveomics: Pref Score
2
+ #'
3
+ #' Estimate preference score of species based on occupancy in biased sample sets
4
+ #'
5
+ #' @param x
6
+ #' Occupancy matrix (logical or numeric binary) with species as rows and samples
7
+ #' as columns
8
+ #' @param set
9
+ #' Vector indicating samples in the test set. It can be any selection vector:
10
+ #' boolean (same length as the number of columns in \code{x}), or numeric or
11
+ #' character vector with indexes of the \code{x} columns.
12
+ #' @param ignore
13
+ #' Vector indicating species to ignore. It can be any selection vector with
14
+ #' respect to the rows in \code{x} (see \code{set}).
15
+ #' @param signif.thr Absolute value of the significance threshold
16
+ #' @param plot Indicates if a plot should be generated
17
+ #' @param col.above Color for points significantly above zero
18
+ #' @param col.equal Color for points not significantly different from zero
19
+ #' @param col.below Color for points significantly below zero
20
+ #' @param ... Any additional parameters supported by \code{plot}
21
+ #'
22
+ #' @return Returns a named vector of preference scores.
23
+ #'
24
+ #' @author Luis M. Rodriguez-R [aut, cre]
25
+ #'
26
+ #' @export
27
+
28
+ enve.prefscore <- function
29
+ (
30
+ x,
31
+ set,
32
+ ignore = NULL,
33
+ signif.thr,
34
+ plot = TRUE,
35
+ col.above = rgb(148, 17, 0, maxColorValue = 255),
36
+ col.equal = rgb(189, 189, 189, maxColorValue = 255),
37
+ col.below = rgb(47, 84, 150, maxColorValue = 255),
38
+ ...
39
+ ) {
40
+ # Normalize classes and filter universe
41
+ x <- !!as.matrix(x)
42
+ if(is.null(colnames(x))) colnames(x) <- 1:ncol(x)
43
+ if(is.null(rownames(x))) rownames(x) <- 1:nrow(x)
44
+ set <- enve.selvector(set, colnames(x))
45
+ universe <- !enve.selvector(ignore, rownames(x))
46
+ x.u <- x[universe, ]
47
+ if(missing(signif.thr)) signif.thr <- 1 + 100 / length(universe)
48
+
49
+ # Base (null) probabilities
50
+ p_a <- (rowSums(x.u) + 1) / (ncol(x.u) + 2)
51
+ p_b <- (colSums(x.u) + 1) / (nrow(x.u) + 2)
52
+ p_p <- p_a %*% t(p_b)
53
+
54
+ # Set preference score
55
+ expected <- (rowSums(p_p[, set]) - rowSums(p_p[, !set])) / sum(p_p)
56
+ observed <- (rowSums(x.u[, set]) - rowSums(x.u[, !set])) / sum(x.u)
57
+ y <- observed / abs(expected)
58
+ names(y) <- rownames(x)[universe]
59
+ y.code <- cut(y, c(-Inf, -signif.thr, signif.thr, Inf), 1:3)
60
+
61
+ # Plot
62
+ if(plot) {
63
+ idx <- (1:nrow(x))[universe]
64
+ opts.def <- list(x = idx, y = y, ylim = c(-1, 1) * max(abs(y)),
65
+ xlab = 'Species', ylab = 'Preference score', xlim = c(0, nrow(x)+1),
66
+ col = c(col.above, col.equal, col.below)[y.code],
67
+ las = 1, xaxs = 'i', pch = 15)
68
+ opts <- list(...)
69
+ for(i in names(opts.def)) if(is.null(opts[[i]])) opts[[i]] <- opts.def[[i]]
70
+ do.call('plot', opts)
71
+ abline(h = 0, lty = 1, col = rgb(0, 0, 0, 1/4))
72
+ abline(h = c(-1, 1) * signif.thr, lty = 2, col = rgb(0, 0, 0, 1/4))
73
+ }
74
+
75
+ # Print and return
76
+ print(table(c(c('<', '=', '>')[y.code], rep('Tot', length(y.code)))))
77
+ cat('---------\n')
78
+ return(y)
79
+ }
@@ -0,0 +1,354 @@
1
+ #' Enveomics: Recruitment Plots
2
+ #'
3
+ #' @description
4
+ #' Produces recruitment plots provided that BlastTab.catsbj.pl has
5
+ #' been previously executed. Requires the \pkg{gplots} library.
6
+ #'
7
+ #' @param prefix
8
+ #' Path to the prefix of the BlastTab.catsbj.pl output files. At
9
+ #' least the files \strong{.rec} and \strong{.lim} must exist with this prefix.
10
+ #' @param id.min
11
+ #' Minimum identity to be considered. By default, the minimum detected
12
+ #' identity. This value is a percentage.
13
+ #' @param id.max
14
+ #' Maximum identity to be considered. By default, 100\%.
15
+ #' @param id.binsize
16
+ #' Size of the identity bins (vertical histograms). By default, 0.1 for
17
+ #' identity metrics and 5 for bit score.
18
+ #' @param id.splines
19
+ #' Smoothing parameter for the splines in the identity histogram. Zero (0) for no
20
+ #' splines. A generally good value is 1/2. If non-zero, requires the \pkg{stats} package.
21
+ #' @param id.metric
22
+ #' Metric of identity to be used (Y-axis).
23
+ #' It can be any unambiguous prefix of:
24
+ #' \itemize{
25
+ #' \item "identity"
26
+ #' \item "corrected identity"
27
+ #' \item "bit score"}
28
+ #' @param id.summary
29
+ #' Method used to build the identity histogram (Horizontal axis of the right panel).
30
+ #' It can be any unambiguous prefix of:
31
+ #' \itemize{
32
+ #' \item "sum"
33
+ #' \item "average"
34
+ #' \item "median"
35
+ #' \item "90\% lower bound"
36
+ #' \item "90\% upper bound"
37
+ #' \item "95\% lower bound"
38
+ #' \item "95\% upper bound" }
39
+ #' The last four options
40
+ #' correspond to the upper and lower boundaries of the 90\% and 95\% empirical confidence
41
+ #' intervals.
42
+ #' @param pos.min
43
+ #' Minimum (leftmost) position in the reference (concatenated) genome (in bp).
44
+ #' @param pos.max
45
+ #' Maximum (rightmost) position in the reference (concatenated) genome (in bp).
46
+ #' By default: Length of the genome.
47
+ #' @param pos.binsize
48
+ #' Size of the position bins (horizontal histograms) in bp.
49
+ #' @param pos.splines
50
+ #' Smoothing parameter for the splines in the position histogram. Zero (0) for no splines.
51
+ #' If non-zero, requires the stats package.
52
+ #' @param rec.col1
53
+ #' Lightest color in the recruitment plot.
54
+ #' @param rec.col2
55
+ #' Darkest color in the recruitment plot.
56
+ #' @param main
57
+ #' Title of the plot.
58
+ #' @param contig.col
59
+ #' Color of the Contig boundaries. Set to \code{NA} to ignore Contig boundaries.
60
+ #' @param ret.recplot
61
+ #' Indicates if the matrix of the recruitment plot is to be returned.
62
+ #' @param ret.hist
63
+ #' Ignored, for backwards compatibility.
64
+ #' @param ret.mode
65
+ #' Indicates if the mode of the identity is to be computed. It requires the
66
+ #' \pkg{modeest} package.
67
+ #' @param id.cutoff
68
+ #' Minimum identity to consider an alignment as "top". By default, it is 0.95 for the
69
+ #' identity metrics and 95\% of the best scoring alignment for bit score.
70
+ #' @param verbose
71
+ #' Indicates if the function should report the advance.
72
+ #' @param ...
73
+ #' Any additional graphic parameters to be passed to plot for all panels except the
74
+ #' recruitment plot (lower-left).
75
+ #'
76
+ #' @return
77
+ #'
78
+ #' Returns a list with the following elements:
79
+ #'
80
+ #' \describe{
81
+ #' \item{\code{pos.marks}}{Midpoints of the position histogram.}
82
+ #' \item{\code{id.matrix}}{Midpoints of the identity histogram.}
83
+ #' \item{\code{recplot}}{Matrix containing the recruitment plot values
84
+ #' (if \code{ret.recplot=TRUE}).}
85
+ #' \item{\code{id.mean}}{Mean identity.}
86
+ #' \item{\code{id.median}}{Median identity.}
87
+ #' \item{\code{id.mode}}{Mode of the identity (if \code{ret.mode=TRUE}). Deprecated.}
88
+ #' \item{\code{id.hist}}{Values of the identity histogram (if \code{ret.hist=TRUE}).}
89
+ #' \item{\code{pos.hist.low}}{Values of the position histogram (depth) with "low"
90
+ #' identity (i.e., below id.cutoff) (if \code{ret.hist=TRUE}).}
91
+ #' \item{\code{pos.hist.top}}{Values of the position histogram (depth) with "top"
92
+ #' identity (i.e., above id.cutoff) (if \code{ret.hist=TRUE}).}
93
+ #' \item{\code{id.max}}{Value of \code{id.max}. This is returned because
94
+ #' \code{id.max=NULL} may vary.}
95
+ #' \item{\code{id.cutoff}}{Value of \code{id.cutoff}.
96
+ #' This is returned because \code{id.cutoff=NULL} may vary.}
97
+ #' \item{\code{seqdepth.mean.top}}{Average sequencing depth with identity above
98
+ #' \code{id.cutoff}.}
99
+ #' \item{\code{seqdepth.mean.low}}{Average sequencing depth with identity below
100
+ #' \code{id.cutoff}.}
101
+ #' \item{\code{seqdepth.mean.all}}{Average sequencing depth without identity filtering.}
102
+ #' \item{\code{seqdepth.median.top}}{Median sequencing depth with identity above
103
+ #' \code{id.cutoff}.}
104
+ #' \item{\code{seqdepth.median.low}}{Median sequencing depth with identity below
105
+ #' \code{id.cutoff}.}
106
+ #' \item{\code{seqdepth.median.all}}{Median sequencing depth without identity filtering.}
107
+ #' \item{\code{id.metric}}{Full name of the used identity metric.}
108
+ #' \item{\code{id.summary}}{Full name of the summary method used to build the identity plot.}}
109
+ #'
110
+ #' @author Luis M. Rodriguez-R [aut, cre]
111
+ #'
112
+ #' @export
113
+
114
+ enve.recplot <- structure(function(
115
+ prefix,
116
+
117
+ # Id. hist.
118
+ id.min=NULL,
119
+ id.max=NULL,
120
+ id.binsize=NULL,
121
+ id.splines=0,
122
+ id.metric='id',
123
+ id.summary='sum',
124
+
125
+ # Pos. hist.
126
+ pos.min=1,
127
+ pos.max=NULL,
128
+ pos.binsize=1e3,
129
+ pos.splines=0,
130
+
131
+ # Rec. plot
132
+ rec.col1='white',
133
+ rec.col2='black',
134
+
135
+ # General
136
+ main=NULL,
137
+ contig.col=grey(0.85),
138
+
139
+ # Return
140
+ ret.recplot=FALSE,
141
+ ret.hist=FALSE,
142
+ ret.mode=FALSE,
143
+
144
+ # General
145
+ id.cutoff=NULL,
146
+ verbose=TRUE,
147
+ ...
148
+ ){
149
+
150
+ # Settings
151
+ METRICS <- c('identity', 'corrected identity', 'bit score');
152
+ SUMMARY <- c('sum', 'average', 'median', '');
153
+ if(is.null(prefix)) stop('Parameter prefix is mandatory.');
154
+ if(!requireNamespace("gplots", quietly=TRUE)) stop('Unavailable gplots library.');
155
+
156
+ # Read files
157
+ if(verbose) cat("Reading files.\n")
158
+ rec <- read.table(paste(prefix, '.rec', sep=''), sep="\t", comment.char='', quote='');
159
+ lim <- read.table(paste(prefix, '.lim', sep=''), sep="\t", comment.char='', quote='');
160
+
161
+ # Configure ID summary
162
+ id.summary <- pmatch(id.summary, SUMMARY);
163
+ if(is.na(id.summary)) stop('Invalid identity summary.');
164
+ if(id.summary == -1) stop('Ambiguous identity summary.');
165
+ if(id.summary==1){
166
+ id.summary.func <- function(x) colSums(x);
167
+ id.summary.name <- 'sum'
168
+ }else if(id.summary==2){
169
+ id.summary.func <- function(x) colMeans(x);
170
+ id.summary.name <- 'mean'
171
+ }else if(id.summary==3){
172
+ id.summary.func <- function(x) apply(x,2,median);
173
+ id.summary.name <- 'median'
174
+ }else if(id.summary==4){
175
+ id.summary.func <- function(x) apply(x,2,quantile,probs=0.05,names=FALSE);
176
+ id.summary.name <- '90% LB'
177
+ }else if(id.summary==5){
178
+ id.summary.func <- function(x) apply(x,2,quantile,probs=0.95,names=FALSE);
179
+ id.summary.name <- '90% UB'
180
+ }else if(id.summary==6){
181
+ id.summary.func <- function(x) apply(x,2,quantile,probs=0.025,names=FALSE);
182
+ id.summary.name <- '95% LB'
183
+ }else if(id.summary==7){
184
+ id.summary.func <- function(x) apply(x,2,quantile,probs=0.975,names=FALSE);
185
+ id.summary.name <- '95% UB'
186
+ }
187
+
188
+ # Configure metrics
189
+ id.metric <- pmatch(id.metric, METRICS);
190
+ if(is.na(id.metric)) stop('Invalid identity metric.');
191
+ if(id.metric == -1) stop('Ambiguous identity metric.');
192
+ if(id.metric==1){
193
+ id.reccol <- 3
194
+ id.shortname <- 'Id.'
195
+ id.fullname <- 'Identity'
196
+ id.units <- '%'
197
+ id.hallmarks <- seq(0, 100, by=5)
198
+ if(is.null(id.max)) id.max <- 100
199
+ if(is.null(id.cutoff)) id.cutoff <- 95
200
+ if(is.null(id.binsize)) id.binsize <- 0.1
201
+ }else if(id.metric==2){
202
+ if(ncol(rec)<6) stop("Requesting corrected identity, but .rec file doesn't have 6th column")
203
+ id.reccol <- 6
204
+ id.shortname <- 'cId.'
205
+ id.fullname <- 'Corrected identity'
206
+ id.units <- '%'
207
+ id.hallmarks <- seq(0, 100, by=5)
208
+ if(is.null(id.max)) id.max <- 100
209
+ if(is.null(id.cutoff)) id.cutoff <- 95
210
+ if(is.null(id.binsize)) id.binsize <- 0.1
211
+ }else if(id.metric==3){
212
+ id.reccol <- 4
213
+ id.shortname <- 'BSc.'
214
+ id.fullname <- 'Bit score'
215
+ id.units <- 'bits'
216
+ max.bs <- max(rec[, id.reccol])
217
+ id.hallmarks <- seq(0, max.bs*1.2, by=50)
218
+ if(is.null(id.max)) id.max <- max.bs
219
+ if(is.null(id.cutoff)) id.cutoff <- 0.95 * max.bs
220
+ if(is.null(id.binsize)) id.binsize <- 5
221
+ }
222
+ if(is.null(id.min)) id.min <- min(rec[, id.reccol]);
223
+ if(is.null(pos.max)) pos.max <- max(lim[, 3]);
224
+ id.lim <- c(id.min, id.max);
225
+ pos.lim <- c(pos.min, pos.max)/1e6;
226
+ id.breaks <- round((id.max-id.min)/id.binsize);
227
+ pos.breaks <- round((pos.max-pos.min)/pos.binsize);
228
+ if(is.null(main)) main <- paste('Recruitment plot of ', prefix, sep='');
229
+ pos.marks=seq(pos.min, pos.max, length.out=pos.breaks+1)/1e6;
230
+ id.marks=seq(id.min, id.max, length.out=id.breaks+1);
231
+ id.topclasses <- 0;
232
+ for(i in length(id.marks):1) if(id.marks[i]>id.cutoff) id.topclasses <- id.topclasses + 1;
233
+
234
+ # Set-up image
235
+ layout(matrix(c(3,4,1,2), nrow=2, byrow=TRUE), widths=c(2,1), heights=c(1,2));
236
+ out <- list();
237
+
238
+ # Recruitment plot
239
+ if(verbose) cat("Rec. plot.\n")
240
+ par(mar=c(5,4,0,0)+0.1);
241
+ rec.hist <- matrix(0, nrow=pos.breaks, ncol=id.breaks);
242
+ for(i in 1:nrow(rec)){
243
+ id.class <- ceiling((id.breaks)*((rec[i, id.reccol]-id.min)/(id.max-id.min)));
244
+ if(id.class<=id.breaks & id.class>0){
245
+ for(pos in rec[i, 1]:rec[i, 2]){
246
+ pos.class <- ceiling((pos.breaks)*((pos-pos.min)/(pos.max-pos.min)));
247
+ if(pos.class<=pos.breaks & pos.class>0) rec.hist[pos.class, id.class] <- rec.hist[pos.class, id.class]+1;
248
+ }
249
+ }
250
+ }
251
+ id.top <- c((1-id.topclasses):0) + id.breaks;
252
+ rec.col=gplots::colorpanel(256, rec.col1, rec.col2);
253
+ image(x=pos.marks, y=id.marks, z=log10(rec.hist),
254
+ breaks=seq(0, log10(max(rec.hist)), length.out=1+length(rec.col)), col=rec.col,
255
+ xlim=pos.lim, ylim=id.lim, xlab='Position in genome (Mbp)',
256
+ ylab=paste(id.fullname, ' (',id.units,')', sep=''), xaxs='i', yaxs='r');
257
+ if(!is.na(contig.col)) abline(v=c(lim$V2, lim$V3)/1e6, lty=1, col=contig.col);
258
+ abline(h=id.hallmarks, lty=2, col=grey(0.7));
259
+ abline(h=id.marks[id.top[1]], lty=3, col=grey(0.5))
260
+ legend('bottomleft', 'Rec. plot', bg=rgb(1,1,1,2/3));
261
+ out <- c(out, list(pos.marks=pos.marks, id.marks=id.marks));
262
+ if(ret.recplot) out <- c(out, list(recplot=rec.hist));
263
+
264
+ # Identity histogram
265
+ if(verbose) cat(id.shortname, " hist.\n", sep='')
266
+ par(mar=c(5,0,0,2)+0.1);
267
+ id.hist <- id.summary.func(rec.hist);
268
+ plot(1, t='n', xlim=c(1, max(id.hist)), ylim=id.lim, ylab='', yaxt='n', xlab=paste('Sequences (bp),', id.summary.name), log='x', ...);
269
+ id.x <- rep(id.marks, each=2)[2:(id.breaks*2+1)]
270
+ id.f <- rep(id.hist, each=2)[1:(id.breaks*2)]
271
+ if(sum(id.f)>0){
272
+ lines(id.f, id.x, lwd=ifelse(id.splines>0, 1/2, 2), type='o', pch='.');
273
+ if(id.splines>0){
274
+ id.spline <- smooth.spline(id.x[id.f>0], log(id.f[id.f>0]), spar=id.splines)
275
+ lines(exp(id.spline$y), id.spline$x, lwd=2)
276
+ }
277
+ }
278
+
279
+ abline(h=id.hallmarks, lty=2, col=grey(0.7));
280
+ abline(h=id.marks[id.top[1]], lty=3, col=grey(0.5))
281
+ legend('bottomright', paste(id.shortname, 'histogram'), bg=rgb(1,1,1,2/3));
282
+ out <- c(out, list(id.mean=mean(rec[, id.reccol])));
283
+ out <- c(out, list(id.median=median(rec[, id.reccol])));
284
+ if(ret.hist) out <- c(out, list(id.hist=id.hist));
285
+
286
+ # Position histogram
287
+ if(verbose) cat("Pos. hist.\n")
288
+ par(mar=c(0,4,4,0)+0.1);
289
+ h1<-rep(0,nrow(rec.hist)) ;
290
+ h2<-rep(0,nrow(rec.hist)) ;
291
+ pos.winsize <- (pos.max-pos.min+1)/pos.breaks;
292
+ if(sum(rec.hist[, id.top])>0) h1 <- rowSums(matrix(rec.hist[, id.top], nrow=nrow(rec.hist)))/pos.winsize;
293
+ if(sum(rec.hist[,-id.top])>0) h2 <- rowSums(matrix(rec.hist[,-id.top], nrow=nrow(rec.hist)))/pos.winsize;
294
+
295
+ ymin <- min(1, h1[h1>0], h2[h2>0]);
296
+ ymax <- max(10, h1, h2);
297
+ if(is.na(ymin) || ymin<=0) ymin <- 1e-10;
298
+ if(is.na(ymax) || ymax<=0) ymax <- 1;
299
+ plot(1, t='n', xlab='', xaxt='n', ylab='Sequencing depth (X)', log='y', xlim=pos.lim,
300
+ ylim=c(ymin, ymax), xaxs='i', main=main, ...);
301
+ if(!is.na(contig.col)) abline(v=c(lim[,2], lim[,3])/1e6, lty=1, col=contig.col);
302
+ abline(h=10^c(0:5), lty=2, col=grey(0.7));
303
+ if(sum(h2)>0){
304
+ h2.x <- rep(pos.marks, each=2)[2:(pos.breaks*2+1)]
305
+ h2.y <- rep(h2, each=2)[1:(pos.breaks*2)]
306
+ lines(h2.x, h2.y, lwd=ifelse(pos.splines>0, 1/2, 2), col=grey(0.5));
307
+ if(pos.splines>0){
308
+ h2.spline <- smooth.spline(h2.x[h2.y>0], log(h2.y[h2.y>0]), spar=pos.splines)
309
+ lines(h2.spline$x, exp(h2.spline$y), lwd=2, col=grey(0.5))
310
+ }
311
+ if(ret.hist) out <- c(out, list(pos.hist.low=h2.y));
312
+ }
313
+ if(sum(h1)>0){
314
+ h1.x <- rep(pos.marks, each=2)[2:(pos.breaks*2+1)]
315
+ h1.y <- rep(h1, each=2)[1:(pos.breaks*2)]
316
+ lines(h1.x, h1.y, lwd=ifelse(pos.splines>0, 1/2, 2), col=grey(0));
317
+ if(pos.splines>0){
318
+ h1.spline <- smooth.spline(h1.x[h1.y>0], log(h1.y[h1.y>0]), spar=pos.splines)
319
+ lines(h1.spline$x, exp(h1.spline$y), lwd=2, col=grey(0))
320
+ }
321
+ if(ret.hist) out <- c(out, list(pos.hist.top=h1.y));
322
+ }
323
+ legend('topleft', 'Pos. histogram', bg=rgb(1,1,1,2/3));
324
+ out <- c(out, list(id.max=id.max, id.cutoff=id.marks[id.top[1]]));
325
+ out <- c(out, list(seqdepth.mean.top=mean(h1)));
326
+ out <- c(out, list(seqdepth.mean.low=mean(h2)));
327
+ out <- c(out, list(seqdepth.mean=mean(h1+h2)));
328
+ out <- c(out, list(seqdepth.median.top=median(h1)));
329
+ out <- c(out, list(seqdepth.median.low=median(h2)));
330
+ out <- c(out, list(seqdepth.median=median(h1+h2)));
331
+ out <- c(out, list(id.metric=id.fullname));
332
+ out <- c(out, list(id.summary=id.summary.name));
333
+
334
+ # Legend
335
+ par(mar=c(0,0,4,2)+0.1);
336
+ plot(1, t='n', xlab='', xaxt='n', ylab='', yaxt='n', xlim=c(0,1), ylim=c(0,1), xaxs='r', yaxs='i', ...);
337
+ text(1/2, 5/6, labels=paste('Reads per ', signif((pos.max-pos.min)/pos.breaks, 2), ' bp (rec. plot)', sep=''), pos=3);
338
+ leg.col <- gplots::colorpanel(100, rec.col1, rec.col2);
339
+ leg.lab <- signif(10^seq(0, log10(max(rec.hist)), length.out=10), 2);
340
+ for(i in 1:10){
341
+ for(j in 1:10){
342
+ k <- (i-1)*10 + j;
343
+ polygon(c(k-1, k, k, k-1)/100, c(2/3, 2/3, 5/6, 5/6), border=leg.col[k], col=leg.col[k]);
344
+ }
345
+ text((i-0.5)/10, 2/3, labels=paste(leg.lab[i], ''), srt=90, pos=2, offset=0, cex=3/4);
346
+ }
347
+ legend('bottom',
348
+ legend=c('Contig boundary', 'Hallmark', paste(id.fullname, 'cutoff'),
349
+ paste('Pos. hist.: ',id.shortname,' > ',signif(id.marks[id.top[1]],2),id.units,sep=''),
350
+ paste('Pos. hist.: ',id.shortname,' < ',signif(id.marks[id.top[1]],2),id.units,sep='')), ncol=2,
351
+ col=grey(c(0.85, 0.7, 0.5, 0, 0.5)), lty=c(1,2,3,1,1), lwd=c(1,1,1,2,2), bty='n', inset=0.05, cex=5/6);
352
+ return(out);
353
+ });
354
+