miga-base 0.7.26.0 → 1.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/classify_wf.rb +2 -2
  7. data/lib/miga/cli/action/derep_wf.rb +1 -1
  8. data/lib/miga/cli/action/doctor.rb +57 -14
  9. data/lib/miga/cli/action/doctor/base.rb +47 -23
  10. data/lib/miga/cli/action/init.rb +11 -7
  11. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  12. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  13. data/lib/miga/cli/action/tax_dist.rb +2 -2
  14. data/lib/miga/cli/action/wf.rb +5 -4
  15. data/lib/miga/common.rb +1 -0
  16. data/lib/miga/daemon.rb +11 -4
  17. data/lib/miga/dataset/result.rb +10 -6
  18. data/lib/miga/json.rb +5 -4
  19. data/lib/miga/metadata.rb +5 -1
  20. data/lib/miga/parallel.rb +36 -0
  21. data/lib/miga/project.rb +8 -8
  22. data/lib/miga/project/base.rb +4 -4
  23. data/lib/miga/project/result.rb +2 -2
  24. data/lib/miga/sqlite.rb +10 -2
  25. data/lib/miga/version.rb +23 -9
  26. data/scripts/aai_distances.bash +16 -18
  27. data/scripts/ani_distances.bash +16 -17
  28. data/scripts/assembly.bash +31 -16
  29. data/scripts/haai_distances.bash +3 -27
  30. data/scripts/miga.bash +6 -4
  31. data/scripts/p.bash +1 -1
  32. data/scripts/read_quality.bash +9 -18
  33. data/scripts/trimmed_fasta.bash +14 -30
  34. data/scripts/trimmed_reads.bash +36 -36
  35. data/test/parallel_test.rb +31 -0
  36. data/test/project_test.rb +2 -1
  37. data/test/remote_dataset_test.rb +1 -1
  38. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  39. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  40. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  41. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  42. data/utils/FastAAI/README.md +84 -0
  43. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  44. data/utils/distance/commands.rb +1 -0
  45. data/utils/distance/database.rb +0 -1
  46. data/utils/distance/runner.rb +2 -4
  47. data/utils/enveomics/Docs/recplot2.md +244 -0
  48. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  49. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  50. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  51. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  52. data/utils/enveomics/LICENSE.txt +73 -0
  53. data/utils/enveomics/Makefile +52 -0
  54. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  55. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  56. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  57. data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
  58. data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
  59. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  60. data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
  61. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  62. data/utils/enveomics/Manifest/Tasks/other.json +906 -0
  63. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  64. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +638 -0
  65. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  66. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  67. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  68. data/utils/enveomics/Manifest/categories.json +165 -0
  69. data/utils/enveomics/Manifest/examples.json +154 -0
  70. data/utils/enveomics/Manifest/tasks.json +4 -0
  71. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  72. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  73. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  74. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  75. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  76. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  77. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  78. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  79. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  80. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  81. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  82. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  83. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  84. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  85. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  86. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  87. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  88. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  89. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  90. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  91. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  92. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  93. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  94. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  95. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  96. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  97. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  98. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  99. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  100. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  101. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  102. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  103. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  104. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  105. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  106. data/utils/enveomics/README.md +42 -0
  107. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  108. data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
  109. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  110. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  111. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  112. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  113. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  114. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  115. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  116. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  117. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  118. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  119. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  120. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  121. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  122. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  123. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  124. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  125. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  126. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  127. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  128. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  129. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  130. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  131. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  132. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  133. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  134. data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
  135. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  136. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  137. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  138. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  139. data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
  140. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  141. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  142. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  143. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  144. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  145. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  146. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  147. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  148. data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
  149. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  150. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  151. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  152. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  153. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  154. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  155. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  156. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  157. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  158. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  159. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  160. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  161. data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
  162. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  163. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  164. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  165. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  166. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  167. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  168. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  169. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  170. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  171. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  172. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  173. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  174. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  175. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  176. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  177. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  178. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  179. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  180. data/utils/enveomics/Scripts/SRA.download.bash +55 -0
  181. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  182. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  183. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  184. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  185. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  186. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  187. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  188. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  189. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  190. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  191. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  192. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  193. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  194. data/utils/enveomics/Scripts/aai.rb +419 -0
  195. data/utils/enveomics/Scripts/ani.rb +362 -0
  196. data/utils/enveomics/Scripts/anir.rb +137 -0
  197. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  198. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  199. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  200. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  201. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  202. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  203. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  204. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  205. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  206. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  207. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  208. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  209. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  210. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  211. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  212. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  213. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  214. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  215. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  216. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  217. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  218. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  219. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  220. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  221. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  222. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  223. data/utils/enveomics/Scripts/ogs.rb +104 -0
  224. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  225. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  226. data/utils/enveomics/Scripts/rbm.rb +100 -0
  227. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  228. data/utils/enveomics/Tests/Makefile +10 -0
  229. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  230. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  231. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  232. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  233. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  234. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  235. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  236. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  237. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  238. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  239. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  240. data/utils/enveomics/Tests/alkB.nwk +1 -0
  241. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  242. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  243. data/utils/enveomics/Tests/hiv1.faa +59 -0
  244. data/utils/enveomics/Tests/hiv1.fna +134 -0
  245. data/utils/enveomics/Tests/hiv2.faa +70 -0
  246. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  247. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  248. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  249. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  250. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  251. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  252. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  253. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  254. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  255. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  256. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  257. data/utils/enveomics/build_enveomics_r.bash +45 -0
  258. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  259. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  260. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  261. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  262. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  263. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  264. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  265. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  266. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  267. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  268. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  269. data/utils/enveomics/enveomics.R/R/utils.R +80 -0
  270. data/utils/enveomics/enveomics.R/README.md +81 -0
  271. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  272. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  273. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
  274. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
  275. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
  276. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  277. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  278. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  279. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  280. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  281. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  282. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
  283. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
  284. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
  285. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  286. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  287. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
  288. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
  289. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
  290. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
  291. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  292. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
  293. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
  294. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  295. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  296. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
  297. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  298. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  299. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  300. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  301. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
  302. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  303. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  304. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
  305. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  306. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  307. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  308. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  309. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
  310. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  311. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
  312. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
  313. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
  314. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  315. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  316. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
  317. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  318. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
  319. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  320. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  321. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  322. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  323. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
  324. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
  325. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
  326. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
  327. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  328. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  329. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  330. data/utils/enveomics/globals.mk +8 -0
  331. data/utils/enveomics/manifest.json +9 -0
  332. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  333. data/utils/multitrim/README.md +67 -0
  334. data/utils/multitrim/multitrim.py +1555 -0
  335. data/utils/multitrim/multitrim.yml +13 -0
  336. data/utils/requirements.txt +4 -3
  337. metadata +304 -3
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env Rscript
2
+ #
3
+ # @author Luis M. Rodriguez-R
4
+ # @update Dec-29-2015
5
+ # @license artistic license 2.0
6
+ #
7
+
8
+ #= Load stuff
9
+ args <- commandArgs(trailingOnly = F)
10
+ enveomics_R <- file.path(dirname(
11
+ sub("^--file=", "", args[grep("^--file=", args)])),
12
+ "lib", "enveomics.R")
13
+ source(file.path(enveomics_R, "R", "cliopts.R"))
14
+ source(file.path(enveomics_R, "R", "utils.R"))
15
+ source(file.path(enveomics_R, "R", "barplot.R"))
16
+
17
+ #= Generate interface
18
+ opt <- enve.cliopts(enve.barplot,
19
+ file.path(enveomics_R, "man", "enve.barplot.Rd"),
20
+ positional_arguments=c(1,3),
21
+ usage="usage: %prog [options] output.pdf [width height]",
22
+ mandatory=c("x"), vectorize=c("sizes","order","col"),
23
+ number=c("sizes","order"),
24
+ o_desc=list(x="A tab-delimited file containing header (first row) and row names (first column)."))
25
+
26
+ #= Run it!
27
+ args = as.list(opt$args)
28
+ for(i in 2:3) if(length(args)>=i) args[[i]] <- as.numeric(args[[i]])
29
+ do.call("pdf", args)
30
+ do.call("enve.barplot", opt$options)
31
+ dev.off()
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env Rscript
2
+ #
3
+ # @author Luis M. Rodriguez-R
4
+ # @update Jan-04-2016
5
+ # @license artistic license 2.0
6
+ #
7
+
8
+ #= Load stuff
9
+ args <- commandArgs(trailingOnly = F)
10
+ enveomics_R <- file.path(dirname(
11
+ sub("^--file=", "", args[grep("^--file=", args)])),
12
+ "lib", "enveomics.R")
13
+ source(file.path(enveomics_R, "R", "cliopts.R"))
14
+ source(file.path(enveomics_R, "R", "df2dist.R"))
15
+
16
+ #= Generate interface
17
+ opt <- enve.cliopts(enve.df2dist,
18
+ file.path(enveomics_R, "man", "enve.df2dist.Rd"),
19
+ positional_arguments=1,
20
+ usage="usage: %prog [options] output.mat",
21
+ mandatory=c("x"),
22
+ number=c("default.d", "max.sim"),
23
+ o_desc=list(x="A tab-delimited table with the distances."),
24
+ p_desc="Transform a tab-delimited list of distances into a squared matrix.")
25
+
26
+ #= Run it!
27
+ opt$options[['x']] <- read.table(opt$options[['x']],
28
+ header=TRUE, sep="\t", as.is=TRUE)
29
+ dist <- do.call("enve.df2dist", opt$options)
30
+ write.table(as.matrix(dist), opt$args[1], quote=FALSE, sep="\t", col.names=NA)
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
+ # @update: Mar-23-2015
5
+ # @license: artistic license 2.0
6
+ #
7
+
8
+ use warnings;
9
+ use strict;
10
+ use Getopt::Std;
11
+
12
+ my %o;
13
+ getopts('k:s:ihn', \%o);
14
+ my($list, $table) = @ARGV;
15
+
16
+ ($list and $table) or die "
17
+ .Description:
18
+ Extracts (and re-orders) a subset of rows from a raw table.
19
+
20
+ .Usage: $0 [options] list.txt table.txt > subset.txt
21
+
22
+ Options:
23
+ -k <int> Column of the table to use as key to filter. By default, 1.
24
+ -s <str> String to use as separation between rows. By default, tabulation.
25
+ -i If set, reports the inverse of the list (i.e., reports only rows
26
+ absent in the list). Implies -n.
27
+ -h Keep first row of the table (header) untouched.
28
+ -n No re-order. The output has the same order of the table. By
29
+ default, it prints in the order of the list.
30
+
31
+ list.txt List of IDs to extract.
32
+ table.txt Table file containing the superset.
33
+ subset.txt Table file to be created.
34
+
35
+ ";
36
+
37
+ $o{k} ||= 1;
38
+ $o{s} ||= "\t";
39
+ $o{n}=1 if $o{i};
40
+ my $HEADER = "";
41
+
42
+ my $tbl2 = $o{n} ? $list : $table;
43
+ open TBL, "<", $tbl2 or die "Cannot read file: $tbl2: $!\n";
44
+ $HEADER = <TBL> if $o{h} and not $o{n};
45
+ my %tbl2 = map { my $l=$_; chomp $l; my @r=split $o{s}, $l; $r[ $o{n} ? 0 : $o{k}-1] => $l } <TBL>;
46
+ close TBL;
47
+
48
+ my $tbl1 = $o{n} ? $table : $list;
49
+ open TBL, "<", $tbl1 or die "Cannot read file: $tbl1: $!\n";
50
+ $HEADER = <TBL> if $o{h} and $o{n};
51
+ print $HEADER;
52
+ while(my $ln = <TBL>){
53
+ chomp $ln;
54
+ next unless $ln;
55
+ my @ln = split $o{s}, $ln;
56
+ my $good = exists $tbl2{ $ln[$o{n} ? $o{k}-1 : 0] };
57
+ $good = not $good if $o{i};
58
+ print "".($o{n} ? $ln : $tbl2{$ln[0]})."\n" if $good;
59
+ }
60
+ close TBL;
61
+
@@ -0,0 +1,77 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
+ # @update: Sep-20-2015
5
+ # @license: artistic license 2.0
6
+ #
7
+
8
+ use warnings;
9
+ use strict;
10
+ use Getopt::Std;
11
+
12
+ my %o;
13
+ getopts('si:o:ne:h:H:r:', \%o);
14
+ my @files = @ARGV;
15
+
16
+ $#files>0 or die "
17
+ .Description:
18
+ Merges multiple (two-column) lists into one table.
19
+
20
+ .Usage:
21
+ $0 [options] files... > output.txt
22
+
23
+ Options:
24
+ -s Values are read as Strings. By default, values are read as numbers.
25
+ -i <str> Input field-delimiter. By default: tabulation (\"\\t\").
26
+ -o <str> Output field-delimiter. By default: tabulation (\"\\t\").
27
+ -n No-header. By default, the header is determined by the file names.
28
+ -e <str> Default string when no value is found. By default, the \"empty\" value
29
+ is 0 if values are numeric (i.e., unless -s is set) or an empty string
30
+ otherwise.
31
+ -h <str> Header of the first column, containing the IDs. By default: \"Tag\".
32
+ -H <str> Format of filenames capturing the column header in the first capturing
33
+ parenthesis. Non-capturing paretheses can be defined as (?:...). By
34
+ default: \"(?:.*/)?([^\\.]+)\", which captures the part of the basename
35
+ of the file before the first dot (if any).
36
+ -r <int> Number of leading rows to ignore in the input files. Zero by default.
37
+
38
+ ";
39
+ $o{i} ||= "\t";
40
+ $o{o} ||= "\t";
41
+ $o{e} ||= ($o{s} ? "" : 0);
42
+ $o{h} ||= "Tag";
43
+ $o{H} ||= "(?:.*/)?([^\\.]+)";
44
+ $o{r} += 0;
45
+
46
+ my $notes = {};
47
+
48
+ print $o{h} unless $o{n};
49
+ my $i = 0;
50
+ for my $file (@files){
51
+ unless($o{n}){
52
+ $file =~ m/$o{H}/ or die "Filename '$file' doesn't match format '$o{H}'.";
53
+ my $tag=$1;
54
+ print $o{o}.$tag;
55
+ }
56
+ open IN, "<", $file or die "Cannot read file: $file: $!\n";
57
+ while(<IN>){
58
+ next if $. <= $o{r};
59
+ chomp;
60
+ my @l = split $o{i};
61
+ $l[1]+=0 unless $o{s};
62
+ $notes->{$l[0]} ||= [];
63
+ $notes->{$l[0]}->[$i] = $l[1];
64
+ }
65
+ close IN;
66
+ $i++;
67
+ }
68
+ print "\n" unless $o{n};
69
+
70
+ for my $id (keys %$notes){
71
+ print $id;
72
+ for my $i (0 .. $#files){
73
+ print $o{o}.(( defined $notes->{$id}->[$i] ? $notes->{$id}->[$i] : $o{e} ));
74
+ }
75
+ print "\n";
76
+ }
77
+
@@ -0,0 +1,60 @@
1
+ #!/usr/bin/env Rscript
2
+
3
+ #= Load stuff
4
+ args <- commandArgs(trailingOnly = FALSE)
5
+ enveomics_R <- file.path(
6
+ dirname(sub('^--file=', '', args[grep('^--file=', args)])),
7
+ 'lib',
8
+ 'enveomics.R'
9
+ )
10
+ for(file in c('cliopts.R','utils.R','prefscore.R'))
11
+ source(file.path(enveomics_R, 'R', file))
12
+
13
+ #= Generate interface
14
+ opt <- enve.cliopts(
15
+ enve.prefscore,
16
+ file.path(enveomics_R, 'man', 'enve.prefscore.Rd'),
17
+ positional_arguments = c(1, 4),
18
+ usage = 'usage: %prog [options] output.tsv [output.pdf [width height]]',
19
+ mandatory = c('x', 'set'),
20
+ number = c('signif.thr'),
21
+ ignore = c('plot'),
22
+ o_desc = list(
23
+ x = 'A tab-delimited table of presence/absence (1/0) with species as rows and samples as columns.',
24
+ set = 'A list of sample names that constitute the test set, one per line',
25
+ ignore = 'A list of species to exclude from the analysis, one per line'
26
+ )
27
+ )
28
+
29
+ #= Set output files
30
+ opt$options[['x']] <- read.table(
31
+ opt$options[['x']],
32
+ header = TRUE,
33
+ row.names = 1,
34
+ sep = '\t'
35
+ )
36
+ opt$options[['set']] <- read.table(
37
+ opt$options[['set']],
38
+ header = FALSE,
39
+ sep = '\t',
40
+ as.is = TRUE
41
+ )[,1]
42
+ if(!is.null(opt$options[['ignore']]))
43
+ opt$options[['ignore']] <- read.table(
44
+ opt$options[['ignore']],
45
+ header = FALSE,
46
+ sep = '\t',
47
+ as.is = TRUE
48
+ )[,1]
49
+ if(length(opt$args) > 1) {
50
+ args <- as.list(opt$args[-1])
51
+ for(i in 2:3) if(length(args) >= i) args[[i]] <- as.numeric(args[[i]])
52
+ do.call('pdf', args)
53
+ } else {
54
+ opt$options[['plot']] <- FALSE
55
+ }
56
+
57
+ #= Run it!
58
+ y <- do.call('enve.prefscore', opt$options)
59
+ write.table(y, opt$args[1], quote = FALSE, sep = '\t', col.names = FALSE)
60
+ if(length(opt$args)>1) ttt <- dev.off()
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
+ # @update Feb 01 2016
6
+ # @license artistic license 2.0
7
+ #
8
+
9
+ require "optparse"
10
+
11
+ o = {delimiter: "\t", key: 1, default: ""}
12
+ ARGV << "-h" if ARGV.size==0
13
+ OptionParser.new do |opts|
14
+ opts.banner = "\nReplaces a field in a table using a mapping file."
15
+ opts.on("-m", "--map FILE",
16
+ "Mapping file with two columns (key and replacement)."){ |v| o[:map] = v }
17
+ opts.on("-i", "--in FILE", "Input table."){ |v| o[:in] = v }
18
+ opts.on("-o", "--out FILE", "Output table."){ |v| o[:out] = v }
19
+ opts.on("-k", "--key INT",
20
+ "Column to replace in --in. By deafult: 1."){ |v| o[:key] = v.to_i }
21
+ opts.on("-u", "--unknown STR",
22
+ "String to use whenever the key is not found in --map."
23
+ ){ |v| o[:default] = v }
24
+ opts.on("-d", "--delimiter STR",
25
+ "String delimiting columns. By default, tabulation."
26
+ ){ |v| o[:delimiter] = v }
27
+ opts.on("-h", "--help", "Display this screen") do
28
+ puts opts
29
+ exit
30
+ end
31
+ opts.separator ""
32
+ end.parse!
33
+ abort "-m is mandatory" if o[:map].nil?
34
+ abort "-i is mandatory" if o[:in].nil?
35
+ abort "-o is mandatory" if o[:out].nil?
36
+
37
+ class String
38
+ def is_number?
39
+ true if Float(self) rescue false
40
+ end
41
+ end
42
+
43
+ begin
44
+ # Read mapping file
45
+ ifh = File.open(o[:map], "r")
46
+ map = {}
47
+ while(ln = ifh.gets)
48
+ row = ln.chomp.split(o[:delimiter])
49
+ map[ row[0] ] = row[1]
50
+ end
51
+ ifh.close
52
+ # Process table
53
+ ifh = File.open(o[:in], "r")
54
+ ofh = File.open(o[:out], "w")
55
+ while(ln = ifh.gets)
56
+ row = ln.chomp.split(o[:delimiter])
57
+ k = row[ o[:key]-1 ]
58
+ v = map[ k ]
59
+ v = o[:default] if v.nil?
60
+ row[ o[:key]-1 ] = v
61
+ ofh.puts(row.join(o[:delimiter]))
62
+ end
63
+ ifh.close
64
+ ofh.close
65
+ rescue => err
66
+ $stderr.puts "Exception: #{err}\n\n"
67
+ err.backtrace.each { |l| $stderr.puts " - " + l + "\n" }
68
+ err
69
+ end
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
+ # @update: Feb 04 2015
6
+ # @license: artistic license 2.0
7
+ #
8
+
9
+ require 'optparse'
10
+
11
+ o = {:ndigits=>0, :action=>:round, :delimiter=>"\t"}
12
+ ARGV << '-h' if ARGV.size==0
13
+ OptionParser.new do |opts|
14
+ opts.banner = "\nRounds numbers in a table."
15
+ opts.on("-i", "--in FILE", "Input table."){ |v| o[:in] = v}
16
+ opts.on("-o", "--out FILE", "Output table."){ |v| o[:out] = v }
17
+ opts.on("-n", "--ndigits INT", "Number of decimal digits. By default: #{o[:ndigits]}"){ |v| o[:ndigits] = v.to_i }
18
+ opts.on("-f", "--floor", "Floors the values instead of rounding them. Ignores -n."){ o[:action] = :floor }
19
+ opts.on("-c", "--ceil", "Ceils the values instead of rounding them. Ignores -n."){ o[:action] = :ceil }
20
+ opts.on("-d", "--delimiter STR", "String delimiting columns. By default, tabulation."){ |v| o[:delimiter] = v }
21
+ opts.on("-h", "--help", "Display this screen") do
22
+ puts opts
23
+ exit
24
+ end
25
+ opts.separator ""
26
+ end.parse!
27
+ abort "-i is mandatory" if o[:in].nil?
28
+ abort "-o is mandatory" if o[:out].nil?
29
+
30
+ class String
31
+ def is_number?
32
+ true if Float(self) rescue false
33
+ end
34
+ end
35
+
36
+ begin
37
+ ifh = File.open(o[:in], "r")
38
+ ofh = File.open(o[:out], "w")
39
+ while(ln = ifh.gets)
40
+ ln.chomp!
41
+ row = []
42
+ ln.split(o[:delimiter]).each do |value|
43
+ if value.is_number?
44
+ case o[:action]
45
+ when :round
46
+ value = value.to_f.round(o[:ndigits])
47
+ when :floor
48
+ value = value.to_f.floor
49
+ when :ceil
50
+ value = value.to_f.ceil
51
+ end
52
+ end
53
+ row.push value.to_s
54
+ end
55
+ ofh.puts(row.join(o[:delimiter]))
56
+ end
57
+ ifh.close
58
+ ofh.close
59
+ rescue => err
60
+ $stderr.puts "Exception: #{err}\n\n"
61
+ err.backtrace.each { |l| $stderr.puts " - " + l + "\n" }
62
+ err
63
+ end
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
+ # @update Feb-01-2016
5
+ # @license artistic license 2.0
6
+ #
7
+
8
+ use warnings;
9
+ use strict;
10
+ use Getopt::Std;
11
+ use Symbol;
12
+
13
+ my %o;
14
+ getopts('i:o:d:e:h', \%o);
15
+ my $file = shift @ARGV;
16
+
17
+ ($file and not $o{h}) or die "
18
+ .Description:
19
+ Split a file with multiple columns into multiple two-columns lists.
20
+
21
+ .Usage:
22
+ $0 [options] file
23
+
24
+ Options:
25
+ -i <str> Input field-delimiter. By default: tabulation (\"\\t\").
26
+ -o <str> Prefix of the output files. By default: no prefix (\"\").
27
+ -d <str> Output directory. By default: current directory (\"\").
28
+
29
+ ";
30
+ $o{i} ||= "\t";
31
+ $o{o} ||= "";
32
+ $o{o} = $o{d}."/".$o{o} if $o{d};
33
+
34
+ my $open=0;
35
+ my @fhs=();
36
+ open IN, "<", $file or die "Cannot read file: $file: $!\n";
37
+ while(<IN>){
38
+ chomp;
39
+ my @row = split $o{i};
40
+ my $h = shift @row;
41
+ if($open){
42
+ for my $i (0 .. $#row){
43
+ print { qualify_to_ref $fhs[$i] } $h.$o{i}.$row[$i]."\n" if $row[$i];
44
+ }
45
+ }else{
46
+ $open++;
47
+ for my $l (@row){
48
+ $l =~ s/[\.\/:]/_/g;
49
+ my $gs = gensym;
50
+ open($gs, '>', $o{o}.$l.".txt") or die "Cannot create file: $o{o}$l.txt: $!\n";
51
+ push @fhs, $gs;
52
+ }
53
+ }
54
+ }
55
+ close IN;
56
+ close $_ for @fhs;
57
+