miga-base 0.7.26.0 → 1.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/classify_wf.rb +2 -2
  7. data/lib/miga/cli/action/derep_wf.rb +1 -1
  8. data/lib/miga/cli/action/doctor.rb +57 -14
  9. data/lib/miga/cli/action/doctor/base.rb +47 -23
  10. data/lib/miga/cli/action/init.rb +11 -7
  11. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  12. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  13. data/lib/miga/cli/action/tax_dist.rb +2 -2
  14. data/lib/miga/cli/action/wf.rb +5 -4
  15. data/lib/miga/common.rb +1 -0
  16. data/lib/miga/daemon.rb +11 -4
  17. data/lib/miga/dataset/result.rb +10 -6
  18. data/lib/miga/json.rb +5 -4
  19. data/lib/miga/metadata.rb +5 -1
  20. data/lib/miga/parallel.rb +36 -0
  21. data/lib/miga/project.rb +8 -8
  22. data/lib/miga/project/base.rb +4 -4
  23. data/lib/miga/project/result.rb +2 -2
  24. data/lib/miga/sqlite.rb +10 -2
  25. data/lib/miga/version.rb +23 -9
  26. data/scripts/aai_distances.bash +16 -18
  27. data/scripts/ani_distances.bash +16 -17
  28. data/scripts/assembly.bash +31 -16
  29. data/scripts/haai_distances.bash +3 -27
  30. data/scripts/miga.bash +6 -4
  31. data/scripts/p.bash +1 -1
  32. data/scripts/read_quality.bash +9 -18
  33. data/scripts/trimmed_fasta.bash +14 -30
  34. data/scripts/trimmed_reads.bash +36 -36
  35. data/test/parallel_test.rb +31 -0
  36. data/test/project_test.rb +2 -1
  37. data/test/remote_dataset_test.rb +1 -1
  38. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  39. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  40. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  41. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  42. data/utils/FastAAI/README.md +84 -0
  43. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  44. data/utils/distance/commands.rb +1 -0
  45. data/utils/distance/database.rb +0 -1
  46. data/utils/distance/runner.rb +2 -4
  47. data/utils/enveomics/Docs/recplot2.md +244 -0
  48. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  49. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  50. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  51. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  52. data/utils/enveomics/LICENSE.txt +73 -0
  53. data/utils/enveomics/Makefile +52 -0
  54. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  55. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  56. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  57. data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
  58. data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
  59. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  60. data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
  61. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  62. data/utils/enveomics/Manifest/Tasks/other.json +906 -0
  63. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  64. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +638 -0
  65. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  66. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  67. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  68. data/utils/enveomics/Manifest/categories.json +165 -0
  69. data/utils/enveomics/Manifest/examples.json +154 -0
  70. data/utils/enveomics/Manifest/tasks.json +4 -0
  71. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  72. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  73. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  74. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  75. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  76. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  77. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  78. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  79. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  80. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  81. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  82. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  83. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  84. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  85. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  86. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  87. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  88. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  89. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  90. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  91. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  92. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  93. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  94. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  95. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  96. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  97. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  98. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  99. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  100. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  101. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  102. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  103. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  104. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  105. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  106. data/utils/enveomics/README.md +42 -0
  107. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  108. data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
  109. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  110. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  111. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  112. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  113. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  114. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  115. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  116. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  117. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  118. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  119. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  120. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  121. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  122. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  123. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  124. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  125. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  126. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  127. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  128. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  129. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  130. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  131. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  132. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  133. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  134. data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
  135. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  136. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  137. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  138. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  139. data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
  140. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  141. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  142. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  143. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  144. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  145. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  146. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  147. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  148. data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
  149. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  150. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  151. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  152. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  153. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  154. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  155. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  156. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  157. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  158. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  159. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  160. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  161. data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
  162. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  163. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  164. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  165. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  166. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  167. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  168. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  169. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  170. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  171. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  172. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  173. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  174. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  175. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  176. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  177. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  178. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  179. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  180. data/utils/enveomics/Scripts/SRA.download.bash +55 -0
  181. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  182. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  183. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  184. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  185. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  186. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  187. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  188. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  189. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  190. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  191. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  192. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  193. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  194. data/utils/enveomics/Scripts/aai.rb +419 -0
  195. data/utils/enveomics/Scripts/ani.rb +362 -0
  196. data/utils/enveomics/Scripts/anir.rb +137 -0
  197. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  198. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  199. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  200. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  201. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  202. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  203. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  204. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  205. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  206. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  207. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  208. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  209. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  210. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  211. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  212. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  213. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  214. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  215. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  216. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  217. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  218. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  219. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  220. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  221. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  222. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  223. data/utils/enveomics/Scripts/ogs.rb +104 -0
  224. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  225. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  226. data/utils/enveomics/Scripts/rbm.rb +100 -0
  227. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  228. data/utils/enveomics/Tests/Makefile +10 -0
  229. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  230. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  231. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  232. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  233. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  234. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  235. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  236. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  237. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  238. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  239. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  240. data/utils/enveomics/Tests/alkB.nwk +1 -0
  241. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  242. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  243. data/utils/enveomics/Tests/hiv1.faa +59 -0
  244. data/utils/enveomics/Tests/hiv1.fna +134 -0
  245. data/utils/enveomics/Tests/hiv2.faa +70 -0
  246. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  247. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  248. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  249. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  250. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  251. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  252. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  253. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  254. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  255. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  256. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  257. data/utils/enveomics/build_enveomics_r.bash +45 -0
  258. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  259. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  260. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  261. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  262. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  263. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  264. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  265. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  266. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  267. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  268. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  269. data/utils/enveomics/enveomics.R/R/utils.R +80 -0
  270. data/utils/enveomics/enveomics.R/README.md +81 -0
  271. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  272. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  273. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
  274. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
  275. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
  276. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  277. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  278. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  279. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  280. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  281. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  282. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
  283. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
  284. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
  285. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  286. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  287. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
  288. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
  289. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
  290. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
  291. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  292. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
  293. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
  294. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  295. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  296. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
  297. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  298. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  299. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  300. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  301. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
  302. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  303. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  304. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
  305. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  306. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  307. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  308. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  309. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
  310. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  311. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
  312. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
  313. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
  314. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  315. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  316. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
  317. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  318. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
  319. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  320. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  321. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  322. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  323. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
  324. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
  325. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
  326. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
  327. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  328. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  329. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  330. data/utils/enveomics/globals.mk +8 -0
  331. data/utils/enveomics/manifest.json +9 -0
  332. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  333. data/utils/multitrim/README.md +67 -0
  334. data/utils/multitrim/multitrim.py +1555 -0
  335. data/utils/multitrim/multitrim.yml +13 -0
  336. data/utils/requirements.txt +4 -3
  337. metadata +304 -3
data/lib/miga/metadata.rb CHANGED
@@ -105,7 +105,11 @@ class MiGA::Metadata < MiGA::MiGA
105
105
  ##
106
106
  # Return the value of +k+ in #data
107
107
  def [](k)
108
- data[k.to_sym]
108
+ if k.to_s =~ /(.+):(.+)/
109
+ data[$1.to_sym]&.fetch($2)
110
+ else
111
+ data[k.to_sym]
112
+ end
109
113
  end
110
114
 
111
115
  ##
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # Parallel execution in MiGA.
5
+ class MiGA::Parallel < MiGA::MiGA
6
+ class << self
7
+ ##
8
+ # Executes the passed block with the thread number as argument (0-numbered)
9
+ # in +threads+ processes
10
+ def process(threads)
11
+ threads.times do |i|
12
+ Process.fork { yield(i) }
13
+ end
14
+ Process.waitall
15
+ end
16
+
17
+ ##
18
+ # Distributes +enum+ across +threads+ and calls the passed block with args:
19
+ # 1. Unitary object from +enum+
20
+ # 2. Index of the unitary object
21
+ # 3. Index of the acting thread
22
+ def distribute(enum, threads, &blk)
23
+ process(threads) { |thr| thread_enum(enum, threads, thr, &blk) }
24
+ end
25
+
26
+ ##
27
+ # Enum through +enum+ executing the passed block only for thread with index
28
+ # +thr+, one of +threads+ threads. The passed block has the same arguments
29
+ # as the one in +#distribute+
30
+ def thread_enum(enum, threads, thr)
31
+ enum.each_with_index do |obj, idx|
32
+ yield(obj, idx, thr) if idx % threads == thr
33
+ end
34
+ end
35
+ end
36
+ end
data/lib/miga/project.rb CHANGED
@@ -42,18 +42,18 @@ class MiGA::Project < MiGA::MiGA
42
42
  # Create an empty project
43
43
  def create
44
44
  unless MiGA::MiGA.initialized?
45
- raise 'Impossible to create project in uninitialized MiGA.'
45
+ warn 'Projects cannot be processed yet, first run: miga init'
46
46
  end
47
47
 
48
- dirs = [path] + @@FOLDERS.map { |d| "#{path}/#{d}" } +
49
- @@DATA_FOLDERS.map { |d| "#{path}/data/#{d}" }
50
- dirs.each { |d| Dir.mkdir(d) unless Dir.exist? d }
48
+ dirs = @@FOLDERS.map { |d| File.join(path, d) }
49
+ dirs += @@DATA_FOLDERS.map { |d| File.join(path, 'data', d) }
50
+ dirs.each { |d| FileUtils.mkdir_p(d) }
51
51
  @metadata = MiGA::Metadata.new(
52
- File.expand_path('miga.project.json', path),
53
- { datasets: [], name: File.basename(path) }
52
+ File.join(path, 'miga.project.json'),
53
+ datasets: [], name: File.basename(path)
54
54
  )
55
- d_path = File.expand_path('daemon/daemon.json', path)
56
- File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist? d_path
55
+ d_path = File.join(path, 'daemon', 'daemon.json')
56
+ File.open(d_path, 'w') { |fh| fh.puts '{}' } unless File.exist?(d_path)
57
57
  pull_hook :on_create
58
58
  self.load
59
59
  end
@@ -131,15 +131,15 @@ module MiGA::Project::Base
131
131
  },
132
132
  haai_p: {
133
133
  desc: 'Value of aai.rb -p on hAAI', type: String,
134
- default: proc { |project| project.clade? ? 'no' : 'blast+' },
135
- in: %w[fastaai blast+ blast blat diamond no]
134
+ default: proc { |project| project.clade? ? 'no' : 'fastaai' },
135
+ in: %w[blast+ blast blat diamond fastaai no]
136
136
  },
137
137
  aai_p: {
138
- desc: 'Value of aai.rb -p on AAI', default: 'blast+', type: String,
138
+ desc: 'Value of aai.rb -p on AAI', default: 'diamond', type: String,
139
139
  in: %w[blast+ blast blat diamond]
140
140
  },
141
141
  ani_p: {
142
- desc: 'Value of ani.rb -p on ANI', default: 'blast+', type: String,
142
+ desc: 'Value of ani.rb -p on ANI', default: 'fastani', type: String,
143
143
  in: %w[blast+ blast blat fastani]
144
144
  },
145
145
  max_try: {
@@ -55,12 +55,12 @@ module MiGA::Project::Result
55
55
  ##
56
56
  # Add result of any type +:*_distances+ at +base+ (no +_opts+ supported).
57
57
  def add_result_distances(base, _opts)
58
- return nil unless result_files_exist?(base, %w[.Rdata .log .txt])
58
+ return nil unless result_files_exist?(base, %w[.Rdata .txt])
59
59
 
60
60
  r = MiGA::Result.new("#{base}.json")
61
61
  r.add_file(:rdata, 'miga-project.Rdata')
62
62
  r.add_file(:matrix, 'miga-project.txt')
63
- r.add_file(:log, 'miga-project.log')
63
+ r.add_file(:log, 'miga-project.log') # Legacy file
64
64
  r.add_file(:hist, 'miga-project.hist')
65
65
  r
66
66
  end
data/lib/miga/sqlite.rb CHANGED
@@ -37,12 +37,20 @@ class MiGA::SQLite < MiGA::MiGA
37
37
  # Executes +cmd+ and returns the result
38
38
  def run(*cmd)
39
39
  busy_attempts ||= 0
40
- conn = SQLite3::Database.new(path)
41
- conn.execute(*cmd)
40
+ io_attempts ||= 0
41
+ y = nil
42
+ SQLite3::Database.new(path) { |conn| y = conn.execute(*cmd) }
43
+ y
42
44
  rescue SQLite3::BusyException => e
43
45
  busy_attempts += 1
44
46
  raise "Database busy #{path}: #{e.message}" if busy_attempts >= 3
45
47
 
48
+ sleep(1)
49
+ retry
50
+ rescue SQLite3::IOException => e
51
+ io_attempts += 1
52
+ raise "Database I/O error #{path}: #{e.message}" if io_attempts >= 3
53
+
46
54
  sleep(1)
47
55
  retry
48
56
  end
data/lib/miga/version.rb CHANGED
@@ -9,23 +9,33 @@ module MiGA
9
9
  # Current version of MiGA. An Array with three values:
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
- # - Integer representing minor changes that require new version number.
13
- VERSION = [0.7, 26, 0].freeze
12
+ # - String indicating release status:
13
+ # - rc* release candidate, not released as gem
14
+ # - [0-9]+ stable release, released as gem
15
+ VERSION = [1.0, 0, 1].freeze
14
16
 
15
17
  ##
16
18
  # Nickname for the current major.minor version.
17
- VERSION_NAME = 'lithograph'
19
+ VERSION_NAME = 'prima'
18
20
 
19
21
  ##
20
22
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2021, 3, 1)
23
+ VERSION_DATE = Date.new(2021, 4, 13)
22
24
 
23
25
  ##
24
- # Reference of MiGA.
25
- CITATION = 'Rodriguez-R et al (2018). ' \
26
- 'The Microbial Genomes Atlas (MiGA) webserver: taxonomic and gene ' \
27
- 'diversity analysis of Archaea and Bacteria at the whole genome level. ' \
28
- 'Nucleic Acids Research 46(W1):W282-W288. doi:10.1093/nar/gky467.'
26
+ # References of MiGA
27
+ CITATION = []
28
+ CITATION << <<~REF
29
+ Rodriguez-R et al (2018). The Microbial Genomes Atlas (MiGA) webserver:
30
+ taxonomic and gene diversity analysis of Archaea and Bacteria at the whole
31
+ genome level. Nucleic Acids Research 46(W1):W282-W288.
32
+ doi:10.1093/nar/gky467.
33
+ REF
34
+ CITATION << <<~REF
35
+ Rodriguez-R et al (2020). Classifying prokaryotic genomes using the
36
+ Microbial Genomes Atlas (MiGA) webserver. Bergey's Manual of Systematics
37
+ of Archaea and Bacteria.
38
+ REF
29
39
  end
30
40
 
31
41
  class MiGA::MiGA
@@ -58,6 +68,10 @@ class MiGA::MiGA
58
68
  ##
59
69
  # Reference of MiGA
60
70
  def self.CITATION
71
+ CITATION.map { |i| "- #{i}" }.join
72
+ end
73
+
74
+ def self.CITATION_ARRAY
61
75
  CITATION
62
76
  end
63
77
  end
@@ -9,34 +9,32 @@ DIR="$PROJECT/data/09.distances/02.aai"
9
9
  # Initialize
10
10
  miga_start_project_step "$DIR"
11
11
 
12
- echo -n "" > miga-project.log
13
- DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
14
-
15
12
  # Extract values
16
13
  rm -f miga-project.txt
14
+ SQL="SELECT seq1, seq2, aai, sd, n, omega from aai;"
15
+ DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
17
16
  (
18
- echo "metric a b value sd n omega" | tr " " "\\t"
17
+ echo "a b value sd n omega" | tr " " "\\t"
19
18
  for i in $DS ; do
20
- echo "SELECT CASE WHEN omega!=0 THEN 'AAI' ELSE 'hAAI_AAI' END," \
21
- " seq1, seq2, aai, sd, n, omega from aai;" \
22
- | sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
23
- echo "$i" >> miga-project.log
19
+ echo "$SQL" | sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
24
20
  done
25
21
  ) | gzip -9c > miga-project.txt.gz
26
22
 
27
23
  # R-ify
28
- echo "
29
- aai <- read.table(gzfile('miga-project.txt.gz'), sep='\\t', h=T, as.is=TRUE);
30
- save(aai, file='miga-project.Rdata');
31
- if(sum(aai[,'a'] != aai[,'b']) > 0){
32
- h <- hist(aai[aai[,'a'] != aai[,'b'], 'value'], breaks=100, plot=FALSE);
24
+ cat <<R | R --vanilla
25
+ file <- gzfile('miga-project.txt.gz')
26
+ aai <- read.table(file, sep = '\t', header = TRUE, as.is = TRUE)
27
+ save(aai, file = 'miga-project.Rdata')
28
+ if(sum(aai[, 'a'] != aai[, 'b']) > 0) {
29
+ h <- hist(aai[aai[, 'a'] != aai[, 'b'], 'value'], breaks = 100, plot = FALSE)
30
+ len <- length(h[['breaks']])
33
31
  write.table(
34
- cbind(h[['breaks']][-length(h[['breaks']])],
35
- h[['breaks']][-1], h[['counts']]),
36
- file='miga-project.hist', quote=FALSE, sep='\\t',
37
- col.names=FALSE, row.names=FALSE);
32
+ cbind(h[['breaks']][-len], h[['breaks']][-1], h[['counts']]),
33
+ file = 'miga-project.hist', quote = FALSE, sep = '\t',
34
+ col.names = FALSE, row.names = FALSE
35
+ )
38
36
  }
39
- " | R --vanilla
37
+ R
40
38
 
41
39
  # Finalize
42
40
  miga_end_project_step "$DIR"
@@ -9,33 +9,32 @@ DIR="$PROJECT/data/09.distances/03.ani"
9
9
  # Initialize
10
10
  miga_start_project_step "$DIR"
11
11
 
12
- echo -n "" > miga-project.log
13
- DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
14
-
15
12
  # Extract values
16
13
  rm -f miga-project.txt
14
+ SQL="SELECT seq1, seq2, ani, sd, n, omega from ani;"
15
+ DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
17
16
  (
18
- echo "metric a b value sd n omega" | tr " " "\\t"
17
+ echo "a b value sd n omega" | tr " " "\\t"
19
18
  for i in $DS ; do
20
- echo "SELECT 'ANI', seq1, seq2, ani, sd, n, omega from ani ;" \
21
- | sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
22
- echo "$i" >> miga-project.log
19
+ echo "$SQL" | sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
23
20
  done
24
21
  ) | gzip -9c > miga-project.txt.gz
25
22
 
26
23
  # R-ify
27
- echo "
28
- ani <- read.table(gzfile('miga-project.txt.gz'), sep='\\t', h=T, as.is=TRUE);
29
- save(ani, file='miga-project.Rdata');
30
- if(sum(ani[,'a'] != ani[,'b']) > 0){
31
- h <- hist(ani[ani[,'a'] != ani[,'b'], 'value'], breaks=100, plot=FALSE);
24
+ cat <<R | R --vanilla
25
+ file <- gzfile('miga-project.txt.gz')
26
+ ani <- read.table(file, sep = '\t', header = TRUE, as.is = TRUE)
27
+ save(ani, file = 'miga-project.Rdata')
28
+ if(sum(ani[, 'a'] != ani[, 'b']) > 0) {
29
+ h <- hist(ani[ani[, 'a'] != ani[, 'b'], 'value'], breaks = 100, plot = FALSE)
30
+ len <- length(h[['breaks']])
32
31
  write.table(
33
- cbind(h[['breaks']][-length(h[['breaks']])],
34
- h[['breaks']][-1], h[['counts']]),
35
- file='miga-project.hist', quote=FALSE, sep='\\t',
36
- col.names=FALSE, row.names=FALSE);
32
+ cbind(h[['breaks']][-len], h[['breaks']][-1], h[['counts']]),
33
+ file = 'miga-project.hist', quote = FALSE, sep = '\t',
34
+ col.names = FALSE, row.names = FALSE
35
+ )
37
36
  }
38
- " | R --vanilla
37
+ R
39
38
 
40
39
  # Finalize
41
40
  miga_end_project_step "$DIR"
@@ -11,30 +11,44 @@ miga date > "$DATASET.start"
11
11
 
12
12
  # Interpose (if needed)
13
13
  TF="../04.trimmed_fasta"
14
- if [[ -s "$TF/$DATASET.1.fasta" \
15
- && -s "$TF/$DATASET.2.fasta" \
16
- && ! -s "$TF/$DATASET.CoupledReads.fa" ]] ; then
17
- FastA.interpose.pl "$TF/$DATASET.CoupledReads.fa" "$TF/$DATASET".[12].fasta
18
- gzip -9 -f "$TF/$DATASET.1.fasta"
19
- gzip -9 -f "$TF/$DATASET.2.fasta"
20
- miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta -f
14
+ b=$DATASET
15
+ if [[ -s "$TF/${b}.2.fasta" || -s "$TF/${b}.2.fasta.gz" ]] ; then
16
+ cr="$TF/${b}.CoupledReads.fa"
17
+ if [[ ! -s "$cr" && ! -s "${cr}.gz" ]] ; then
18
+ for s in 1 2 ; do
19
+ if [[ -s "$TF/${b}.${s}.fasta" ]] ; then
20
+ ln -s "$TF/${b}.${s}.fasta" "${b}.${s}.tmp"
21
+ else
22
+ gzip -cd "$TF/${b}.${s}.fasta.gz" > "${b}.${s}.tmp"
23
+ fi
24
+ done
25
+ FastA.interpose.pl "$cr" "$b".[12].tmp
26
+ rm "$b".[12].tmp
27
+ miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta -f
28
+ fi
21
29
  fi
22
30
 
31
+ # Gzip (if needed)
32
+ for i in SingleReads CoupledReads ; do
33
+ base="$TF/${DATASET}.${i}.fa"
34
+ if [[ -e "$base" && ! -s "${base}.gz" ]] ; then
35
+ gzip -9f "$base"
36
+ miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta -f
37
+ fi
38
+ done
39
+
23
40
  # Assemble
24
- FA="$TF/$DATASET.CoupledReads.fa"
25
- [[ -e "$FA" ]] || FA="$FA.gz"
26
- [[ -e "$FA" ]] || FA="../04.trimmed_fasta/$DATASET.SingleReads.fa"
27
- [[ -e "$FA" ]] || FA="$FA.gz"
41
+ FA="$TF/${DATASET}.CoupledReads.fa.gz"
42
+ [[ -e "$FA" ]] || FA="$TF/${DATASET}.SingleReads.fa.gz"
28
43
  RD="r"
29
44
  [[ $FA == *.SingleReads.fa* ]] && RD="l"
30
- idba_ud --pre_correction -$RD "$FA" -o "$DATASET" --num_threads "$CORES" || true
45
+ gzip -cd "$FA" \
46
+ | idba_ud --pre_correction -$RD /dev/stdin \
47
+ -o "$DATASET" --num_threads "$CORES" || true
31
48
  [[ -s "$DATASET/contig.fa" ]] || exit 1
32
49
 
33
50
  # Clean
34
- (
35
- cd "$DATASET"
36
- rm kmer graph-*.fa align-* local-contig-*.fa contig-*.fa
37
- )
51
+ ( cd "$DATASET" && rm kmer graph-*.fa align-* local-contig-*.fa contig-*.fa )
38
52
 
39
53
  # Extract
40
54
  if [[ -s "$DATASET/scaffold.fa" ]] ; then
@@ -49,3 +63,4 @@ FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \
49
63
  # Finalize
50
64
  miga date > "$DATASET.done"
51
65
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
66
+
@@ -12,34 +12,10 @@ miga_start_project_step "$DIR"
12
12
  # Cleanup databases
13
13
  ruby -I "$MIGA/lib" "$MIGA/utils/cleanup-databases.rb" "$PROJECT" "$CORES"
14
14
 
15
- # Run hAAI
15
+ # No real need for hAAI distributions at all
16
16
  echo -n "" > miga-project.log
17
- DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
18
-
19
- # Extract values
20
- rm -f miga-project.txt
21
- (
22
- echo "metric a b value sd n omega" | tr " " "\\t"
23
- for i in $DS ; do
24
- echo "SELECT 'hAAI', seq1, seq2, aai, sd, n, omega from aai ;" \
25
- | sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
26
- echo "$i" >> miga-project.log
27
- done
28
- ) | gzip -9c > miga-project.txt.gz
29
-
30
- # R-ify
31
- echo "
32
- haai <- read.table(gzfile('miga-project.txt.gz'), sep='\\t', h=T, as.is=TRUE);
33
- save(haai, file='miga-project.Rdata');
34
- if(sum(haai[,'a'] != haai[,'b']) > 0){
35
- h <- hist(haai[haai[,'a'] != haai[,'b'], 'value'], breaks=100, plot=FALSE);
36
- write.table(
37
- cbind(h[['breaks']][-length(h[['breaks']])],
38
- h[['breaks']][-1], h[['counts']]),
39
- file='miga-project.hist', quote=FALSE, sep='\\t',
40
- col.names=FALSE, row.names=FALSE);
41
- }
42
- " | R --vanilla
17
+ echo -n "" > miga-project.txt
18
+ echo "aai <- NULL; save(aai, file = 'miga-project.Rdata')" | R --vanilla
43
19
 
44
20
  # Finalize
45
21
  miga_end_project_step "$DIR"
data/scripts/miga.bash CHANGED
@@ -7,9 +7,11 @@ SCRIPT=${SCRIPT:-$(basename "$0" .bash)}
7
7
  # shellcheck source=/dev/null
8
8
  . "$MIGA_HOME/.miga_rc"
9
9
 
10
- # Ensure submodules are first in PATH
11
- export PATH="$MIGA/bin:$MIGA/utils/enveomics/Scripts:$PATH"
12
- export PATH="$MIGA/utils/FastAAI/FastAAI:$PATH"
10
+ # Ensure MiGA & submodules are first in PATH
11
+ export PATH="$MIGA/bin:$PATH"
12
+ for util in enveomics/Scripts FastAAI/FastAAI multitrim ; do
13
+ export PATH="$MIGA/utils/$util:$PATH"
14
+ done
13
15
 
14
16
  # Ancillary functions
15
17
  function exists { [[ -e "$1" ]] ; }
@@ -38,7 +40,7 @@ if [[ "$SCRIPT" != "d" && "$SCRIPT" != "p" ]] ; then
38
40
  echo ""
39
41
  echo "######[ $SCRIPT ]######"
40
42
  echo "# Date: $(miga date)"
41
- echo "# Host: $(hostname)"
43
+ echo "# Host: $(hostname) [$CORES]"
42
44
  echo "# MiGA: $MIGA"
43
45
  echo "# Project: $PROJECT"
44
46
  if [[ -n $DATASET ]] ; then
data/scripts/p.bash CHANGED
@@ -12,7 +12,7 @@ while true ; do
12
12
  if [[ "$res" == "$last_res" ]] ; then
13
13
  let k=$k+1
14
14
  if [[ $k -gt 10 ]] ; then
15
- miga new --update -P "$PROJECT" \
15
+ miga edit -P "$PROJECT" \
16
16
  -m "run_$res=false,warn=Too many failed attempts to run $res"
17
17
  fi
18
18
  else
@@ -6,28 +6,19 @@ SCRIPT="read_quality"
6
6
  . "$MIGA/scripts/miga.bash" || exit 1
7
7
  cd "$PROJECT/data/03.read_quality"
8
8
 
9
- b=$DATASET
10
-
11
9
  # Initialize
12
10
  miga date > "$DATASET.start"
13
11
 
14
- # FastQC
15
- [[ -d "$b.fastqc" ]] || mkdir "$b.fastqc"
16
- fastqc "../02.trimmed_reads/$b".[12].clipped.fastq -o "$b.fastqc"
17
-
18
- # SolexaQA++
19
- [[ -d "$b.solexaqa" ]] || mkdir "$b.solexaqa"
20
- exists "../02.trimmed_reads/$b".[12].*.pdf \
21
- && mv "../02.trimmed_reads/$b".[12].*.pdf "$b.solexaqa/"
22
-
23
- # Clean 02.trimmed_reads
24
- rm -f "../02.trimmed_reads/$b".[12].fastq_trimmed.segments
25
- rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed.paired
26
- rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed.single
27
- rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed
28
- rm -f "../02.trimmed_reads/$b".[12].fastq
29
- miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_reads -f
12
+ # Gzip (if necessary)
13
+ for s in 1 2 ; do
14
+ in="../02.trimmed_reads/${DATASET}.${s}.clipped.fastq"
15
+ if [[ -s "$in" ]] ; then
16
+ gzip -9f "$in"
17
+ miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_reads -f
18
+ fi
19
+ done
30
20
 
31
21
  # Finalize
32
22
  miga date > "$DATASET.done"
33
23
  miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
24
+