miga-base 0.7.26.0 → 1.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/classify_wf.rb +2 -2
  7. data/lib/miga/cli/action/derep_wf.rb +1 -1
  8. data/lib/miga/cli/action/doctor.rb +57 -14
  9. data/lib/miga/cli/action/doctor/base.rb +47 -23
  10. data/lib/miga/cli/action/init.rb +11 -7
  11. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  12. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  13. data/lib/miga/cli/action/tax_dist.rb +2 -2
  14. data/lib/miga/cli/action/wf.rb +5 -4
  15. data/lib/miga/common.rb +1 -0
  16. data/lib/miga/daemon.rb +11 -4
  17. data/lib/miga/dataset/result.rb +10 -6
  18. data/lib/miga/json.rb +5 -4
  19. data/lib/miga/metadata.rb +5 -1
  20. data/lib/miga/parallel.rb +36 -0
  21. data/lib/miga/project.rb +8 -8
  22. data/lib/miga/project/base.rb +4 -4
  23. data/lib/miga/project/result.rb +2 -2
  24. data/lib/miga/sqlite.rb +10 -2
  25. data/lib/miga/version.rb +23 -9
  26. data/scripts/aai_distances.bash +16 -18
  27. data/scripts/ani_distances.bash +16 -17
  28. data/scripts/assembly.bash +31 -16
  29. data/scripts/haai_distances.bash +3 -27
  30. data/scripts/miga.bash +6 -4
  31. data/scripts/p.bash +1 -1
  32. data/scripts/read_quality.bash +9 -18
  33. data/scripts/trimmed_fasta.bash +14 -30
  34. data/scripts/trimmed_reads.bash +36 -36
  35. data/test/parallel_test.rb +31 -0
  36. data/test/project_test.rb +2 -1
  37. data/test/remote_dataset_test.rb +1 -1
  38. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  39. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  40. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  41. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  42. data/utils/FastAAI/README.md +84 -0
  43. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  44. data/utils/distance/commands.rb +1 -0
  45. data/utils/distance/database.rb +0 -1
  46. data/utils/distance/runner.rb +2 -4
  47. data/utils/enveomics/Docs/recplot2.md +244 -0
  48. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  49. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  50. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  51. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  52. data/utils/enveomics/LICENSE.txt +73 -0
  53. data/utils/enveomics/Makefile +52 -0
  54. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  55. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  56. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  57. data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
  58. data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
  59. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  60. data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
  61. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  62. data/utils/enveomics/Manifest/Tasks/other.json +906 -0
  63. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  64. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +638 -0
  65. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  66. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  67. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  68. data/utils/enveomics/Manifest/categories.json +165 -0
  69. data/utils/enveomics/Manifest/examples.json +154 -0
  70. data/utils/enveomics/Manifest/tasks.json +4 -0
  71. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  72. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  73. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  74. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  75. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  76. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  77. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  78. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  79. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  80. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  81. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  82. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  83. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  84. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  85. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  86. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  87. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  88. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  89. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  90. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  91. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  92. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  93. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  94. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  95. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  96. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  97. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  98. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  99. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  100. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  101. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  102. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  103. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  104. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  105. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  106. data/utils/enveomics/README.md +42 -0
  107. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  108. data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
  109. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  110. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  111. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  112. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  113. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  114. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  115. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  116. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  117. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  118. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  119. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  120. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  121. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  122. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  123. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  124. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  125. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  126. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  127. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  128. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  129. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  130. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  131. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  132. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  133. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  134. data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
  135. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  136. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  137. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  138. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  139. data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
  140. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  141. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  142. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  143. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  144. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  145. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  146. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  147. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  148. data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
  149. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  150. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  151. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  152. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  153. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  154. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  155. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  156. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  157. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  158. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  159. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  160. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  161. data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
  162. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  163. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  164. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  165. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  166. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  167. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  168. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  169. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  170. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  171. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  172. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  173. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  174. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  175. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  176. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  177. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  178. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  179. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  180. data/utils/enveomics/Scripts/SRA.download.bash +55 -0
  181. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  182. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  183. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  184. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  185. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  186. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  187. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  188. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  189. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  190. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  191. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  192. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  193. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  194. data/utils/enveomics/Scripts/aai.rb +419 -0
  195. data/utils/enveomics/Scripts/ani.rb +362 -0
  196. data/utils/enveomics/Scripts/anir.rb +137 -0
  197. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  198. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  199. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  200. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  201. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  202. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  203. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  204. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  205. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  206. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  207. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  208. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  209. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  210. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  211. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  212. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  213. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  214. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  215. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  216. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  217. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  218. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  219. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  220. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  221. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  222. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  223. data/utils/enveomics/Scripts/ogs.rb +104 -0
  224. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  225. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  226. data/utils/enveomics/Scripts/rbm.rb +100 -0
  227. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  228. data/utils/enveomics/Tests/Makefile +10 -0
  229. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  230. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  231. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  232. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  233. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  234. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  235. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  236. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  237. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  238. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  239. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  240. data/utils/enveomics/Tests/alkB.nwk +1 -0
  241. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  242. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  243. data/utils/enveomics/Tests/hiv1.faa +59 -0
  244. data/utils/enveomics/Tests/hiv1.fna +134 -0
  245. data/utils/enveomics/Tests/hiv2.faa +70 -0
  246. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  247. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  248. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  249. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  250. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  251. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  252. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  253. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  254. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  255. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  256. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  257. data/utils/enveomics/build_enveomics_r.bash +45 -0
  258. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  259. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  260. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  261. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  262. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  263. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  264. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  265. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  266. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  267. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  268. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  269. data/utils/enveomics/enveomics.R/R/utils.R +80 -0
  270. data/utils/enveomics/enveomics.R/README.md +81 -0
  271. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  272. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  273. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
  274. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
  275. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
  276. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  277. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  278. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  279. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  280. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  281. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  282. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
  283. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
  284. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
  285. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  286. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  287. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
  288. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
  289. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
  290. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
  291. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  292. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
  293. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
  294. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  295. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  296. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
  297. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  298. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  299. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  300. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  301. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
  302. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  303. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  304. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
  305. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  306. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  307. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  308. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  309. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
  310. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  311. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
  312. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
  313. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
  314. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  315. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  316. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
  317. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  318. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
  319. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  320. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  321. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  322. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  323. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
  324. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
  325. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
  326. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
  327. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  328. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  329. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  330. data/utils/enveomics/globals.mk +8 -0
  331. data/utils/enveomics/manifest.json +9 -0
  332. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  333. data/utils/multitrim/README.md +67 -0
  334. data/utils/multitrim/multitrim.py +1555 -0
  335. data/utils/multitrim/multitrim.yml +13 -0
  336. data/utils/requirements.txt +4 -3
  337. metadata +304 -3
@@ -0,0 +1,109 @@
1
+ #!/bin/bash
2
+
3
+ ##################### VARIABLES
4
+ # Find the directory of the pipeline
5
+ if [[ "$PDIR" == "" ]] ; then PDIR=$(dirname $(readlink -f $0)); fi ;
6
+ CWD=$(pwd)
7
+
8
+ # Load config
9
+ if [[ "$PROJ" == "" ]] ; then PROJ="$1" ; fi
10
+ if [[ "$TASK" == "" ]] ; then TASK="$2" ; fi
11
+ if [[ "$TASK" == "" ]] ; then TASK="check" ; fi
12
+ NAMES=$(ls $PDIR/CONFIG.*.bash | sed -e 's/.*CONFIG\./ o /' | sed -e 's/\.bash//');
13
+ if [[ "$PROJ" == "" ]] ; then
14
+ if [[ "$HELP" == "" ]] ; then
15
+ echo "
16
+ Usage:
17
+ $0 name task
18
+
19
+ name The name of the run. CONFIG.name.bash must exist.
20
+ task The action to perform. One of:
21
+ o run: Executes the BLAST.
22
+ o check: Indicates the progress of the task (default).
23
+ o pause: Cancels running jobs (resume using run).
24
+ o dry: Checks that the parameters are correct, but doesn't run.
25
+ o eo: Review all eo files produced in the project.
26
+
27
+ See $PDIR/README.md for more information.
28
+
29
+ Available names are:
30
+ $NAMES
31
+ " >&2
32
+ else
33
+ echo "$HELP
34
+ Available names are:
35
+ $NAMES
36
+ " >&2
37
+ fi
38
+ exit 1
39
+ fi
40
+ if [[ ! -e "$PDIR/CONFIG.$PROJ.bash" ]] ; then
41
+ echo "$0: Error: Impossible to find $PDIR/CONFIG.$PROJ.bash, available names are:
42
+ $NAMES" >&2
43
+ exit 1
44
+ fi
45
+ source "$PDIR/CONFIG.$PROJ.bash" ;
46
+ SCRATCH="$SCRATCH_DIR/$PROJ" ;
47
+ MINVARS="PDIR=$PDIR,SCRATCH=$SCRATCH,PROJ=$PROJ" ;
48
+ case $QUEUE in
49
+ bioforce-6)
50
+ MAX_H=120 ;;
51
+ iw-shared-6)
52
+ MAX_H=12 ;;
53
+ micro-largedata)
54
+ MAX_H=120 ;;
55
+ biocluster-6 | biohimem-6 | microcluster)
56
+ MAX_H=240 ;;
57
+ *)
58
+ echo "Unrecognized queue: $QUEUE." >&2 ;
59
+ exit 1 ;;
60
+ esac ;
61
+
62
+ ##################### FUNCTIONS
63
+ function REGISTER_JOB {
64
+ local STEP=$1
65
+ local SUBSTEP=$2
66
+ local MESSAGE=$3
67
+ local JOBID=$4
68
+
69
+ if [[ "$JOBID" != "" ]] ; then
70
+ MESSAGE="$MESSAGE [$JOBID]" ;
71
+ echo "$STEP: $SUBSTEP: $(date)" >> "$SCRATCH/log/active/$JOBID" ;
72
+ fi
73
+ echo "$MESSAGE." >> "$SCRATCH/log/status/$STEP" ;
74
+ }
75
+
76
+ function LAUNCH_JOB {
77
+ local STEP=$1
78
+ local SUBSTEP=$2
79
+ local MESSAGE=$3
80
+ local BASHFILE=$4
81
+
82
+ cd "$SCRATCH/log/eo" ;
83
+ date >> "$SCRATCH/etc/trials" ;
84
+ source "$BASHFILE" || exit 1 ;
85
+ cd $CWD ;
86
+ if [[ "$SENTINEL_JOBID" != "" ]] ; then
87
+ REGISTER_JOB "$STEP" "$SUBSTEP" "Guarding job $NEW_JOBID" "$SENTINEL_JOBID" ;
88
+ fi ;
89
+ REGISTER_JOB "$STEP" "$SUBSTEP" "$MESSAGE" "$NEW_JOBID" ;
90
+ echo $NEW_JOBID ;
91
+ }
92
+
93
+ function JOB_DONE {
94
+ STEP=$1
95
+
96
+ echo "Done." >> "$SCRATCH/log/status/$STEP" ;
97
+ touch "$SCRATCH/success/$STEP" ;
98
+ echo -n '# ' > "$SCRATCH/etc/trials" ;
99
+ }
100
+
101
+ ##################### RUN
102
+ # Execute task
103
+ if [[ ! -e "$PDIR/TASK.$TASK.bash" ]] ; then
104
+ echo "Unrecognized task: $TASK." >&2 ;
105
+ exit 1 ;
106
+ else
107
+ source "$PDIR/TASK.$TASK.bash"
108
+ fi
109
+
@@ -0,0 +1,128 @@
1
+ #!/bin/bash
2
+
3
+ ##################### RUN
4
+ # Check if it was sourced from RUNME.bash
5
+ if [[ "$PDIR" == "" ]] ; then
6
+ echo "$0: Error: This file is not stand-alone." >&2
7
+ echo " Execute RUNME.bash as described in the README.txt file" >&2
8
+ exit 1
9
+ fi
10
+
11
+ # Check if the project exists
12
+ if [[ ! -d "$SCRATCH" ]] ; then
13
+ echo "The project $PROJ doesn't exist at $SCRATCH_DIR." >&2
14
+ echo " Execute '$PDIR/RUNME.bash $PROJ run' first." >&2
15
+ exit 1
16
+ fi
17
+
18
+ # Get log:
19
+ echo "==[ Running tasks ]=="
20
+ for i in $(ls $SCRATCH/log/status/* 2>/dev/null) ; do
21
+ echo " $(basename $i): $(tail -n 1 $i)"
22
+ done
23
+ echo ""
24
+
25
+ # Get active jobs:
26
+ echo "==[ Active jobs ]=="
27
+ job_r=0
28
+ job_i=0
29
+ job_c=0
30
+ for i in $(ls $SCRATCH/log/active/* 2>/dev/null) ; do
31
+ jid=$(basename $i)
32
+ stat=$(qstat -f1 $jid 2>&1)
33
+ state=$(echo "$stat" | grep '^ *job_state = ' | sed -e 's/.*job_state = //')
34
+ case $state in
35
+ C)
36
+ code=$(echo "$stat" | grep '^ *exit_status = ' | sed -e 's/.*exit_status = //')
37
+ if [[ "$code" == "0" ]] ; then
38
+ mv "$i" "$SCRATCH/log/done/"
39
+ let job_c=$job_c+1
40
+ else
41
+ echo "Warning: Job $jid ($(cat $i|tr -d '\n')) failed with code $code." >&2
42
+ echo " see errors at: $(echo "$stat" | grep '^ *Error_Path = ' | sed -e 's/.*Error_Path = //')"
43
+ mv "$i" "$SCRATCH/log/failed/"
44
+ fi ;;
45
+ R)
46
+ echo " Running: $jid: $(cat "$i")"
47
+ let job_r=$job_r+1 ;;
48
+ [HQW])
49
+ echo " Idle: $jid: $(cat "$i")"
50
+ let job_i=$job_i+1 ;;
51
+ E)
52
+ echo " Canceling: $jid: $(cat "$i")" ;;
53
+ *)
54
+ tmp_err=$(echo "$stat" | grep ERROR)
55
+ if [[ "$tmp_err" == "" ]] ; then
56
+ echo "Warning: Unrecognized state: $jid: $state." >&2
57
+ echo " Please report this problem." >&2
58
+ else
59
+ echo " Error: $jid: $tmp_err"
60
+ fi ;;
61
+ esac
62
+ #subjobs=$(echo "$stat" | grep 'Sub-jobs:' | sed -e 's/.*: *//')
63
+ #if [[ "$subjobs" -gt 0 ]] ; then
64
+ # echo "$stat" | grep '^ *\(Sub-jobs\|Active\|Eligible\|Blocked\|Completed\):' | sed -e 's/^ *//' | sed -e 's/ *//' | tr '\n' ' ' | sed -e 's/^/ /'
65
+ # echo
66
+ #fi
67
+ done
68
+ if [[ $job_c -gt 0 ]] ; then
69
+ echo ""
70
+ echo " Completed since last check: $job_c."
71
+ fi
72
+ if [[ $job_r -gt 0 || $job_i -gt 0 ]] ; then
73
+ echo ""
74
+ echo " Running jobs: $job_r."
75
+ echo " Idle jobs: $job_i."
76
+ fi
77
+ echo ""
78
+
79
+ # Auto-trials
80
+ echo "==[ Auto-trials ]=="
81
+ if [[ -e "$SCRATCH/etc/trials" ]] ; then
82
+ trials=$(cat "$SCRATCH/etc/trials" | wc -l | sed -e 's/ //g')
83
+ if [[ $trials -gt 1 ]] ; then
84
+ echo " $trials trials attempted:"
85
+ else
86
+ echo " No recent failures in the current step, job launched:"
87
+ fi
88
+ cat "$SCRATCH/etc/trials" | sed -e 's/^/ o /' | sed -e 's/# $/No active trials\n/g'
89
+ fi
90
+ echo ""
91
+
92
+ # Step-specific checks:
93
+ echo "==[ Step summary ]=="
94
+ todo=1
95
+ if [[ -e "$SCRATCH/success/00" ]] ; then
96
+ echo " Successful project initialization."
97
+ if [[ -e "$SCRATCH/success/01" ]] ; then
98
+ echo " Successful input preparation."
99
+ if [[ -e "$SCRATCH/success/02" ]] ; then
100
+ echo " Successful BLAST execution."
101
+ if [[ -e "$SCRATCH/success/02" ]] ; then
102
+ echo " Successful concatenation."
103
+ echo " Project finished successfully!"
104
+ todo=0
105
+ else
106
+ echo " Concatenating results."
107
+ fi
108
+ else
109
+ echo " Running BLAST."
110
+ fi
111
+ else
112
+ echo " Preparing input."
113
+ fi
114
+ else
115
+ echo " Initializing project."
116
+ fi
117
+
118
+ if [[ "$todo" -eq 1 && $job_r -eq 0 && $job_i -eq 0 ]] ; then
119
+ echo " Job currently paused. To resume, execute:"
120
+ echo " $PDIR/RUNME.bash $PROJ run"
121
+ fi
122
+ echo
123
+
124
+ # Entire log
125
+ echo "==[ Complete log ]=="
126
+ for i in $(ls $SCRATCH/log/status/* 2>/dev/null) ; do
127
+ cat "$i" | sed -e "s/^/ $(basename $i): /"
128
+ done
@@ -0,0 +1,16 @@
1
+
2
+ [[ "$QUEUE" == "" ]] && echo "Undefined QUEUE" >&2 && exit 1;
3
+ [[ "$MAX_JOBS" == "" ]] && echo "Undefined MAX_JOBS" >&2 && exit 1;
4
+ [[ "$PPN" == "" ]] && echo "Undefined PPN" >&2 && exit 1;
5
+ [[ "$RAM" == "" ]] && echo "Undefined RAM" >&2 && exit 1;
6
+ [[ "$SCRATCH_DIR" == "" ]] && echo "Undefined SCRATCH_DIR" >&2 && exit 1;
7
+ [[ "$INPUT" == "" ]] && echo "Undefined INPUT" >&2 && exit 1;
8
+ [[ "$DB" == "" ]] && echo "Undefined DB" >&2 && exit 1;
9
+ [[ "$PROGRAM" == "" ]] && echo "Undefined PROGRAM" >&2 && exit 1;
10
+ [[ "$MAX_TRIALS" == "" ]] && echo "Undefined MAX_TRIALS" >&2 && exit 1;
11
+ [[ "$(type -t BEGIN)" == "function" ]] || ( echo "Undefined function BEGIN" && exit 1 ) ;
12
+ [[ "$(type -t BEFORE_BLAST)" == "function" ]] || ( echo "Undefined function BEFORE_BLAST" && exit 1 ) ;
13
+ [[ "$(type -t RUN_BLAST)" == "function" ]] || ( echo "Undefined function RUN_BLAST" && exit 1 ) ;
14
+ [[ "$(type -t AFTER_BLAST)" == "function" ]] || ( echo "Undefined function AFTER_BLAST" && exit 1 ) ;
15
+ [[ "$(type -t END)" == "function" ]] || ( echo "Undefined function END" && exit 1 ) ;
16
+
@@ -0,0 +1,22 @@
1
+ #!/bin/bash
2
+
3
+ ##################### RUN
4
+ # Check if it was sourced from RUNME.bash
5
+ if [[ "$PDIR" == "" ]] ; then
6
+ echo "$0: Error: This file is not stand-alone." >&2
7
+ echo " Execute RUNME.bash as described in the README.txt file" >&2 ;
8
+ exit 1 ;
9
+ fi ;
10
+
11
+ # Check if the project exists
12
+ if [[ ! -d "$SCRATCH" ]] ; then
13
+ echo "The project $PROJ doesn't exist at $SCRATCH_DIR." >&2 ;
14
+ echo " Execute '$PDIR/RUNME.bash $PROJ run' first." >&2 ;
15
+ exit 1 ;
16
+ fi ;
17
+
18
+ # Review errors
19
+ (echo -e "==[ Last 10 lines of all e files ]==\nPress q to exit\n" ; tail -n 10 $SCRATCH/log/eo/*.e* ) | less
20
+ # Review output
21
+ (echo -e "==[ Last 100 lines of all o files ]==\nPress q to exit\n" ; tail -n 100 $SCRATCH/log/eo/*.o* ) | less
22
+
@@ -0,0 +1,26 @@
1
+ #!/bin/bash
2
+
3
+ ##################### RUN
4
+ # Check if it was sourced from RUNME.bash
5
+ if [[ "$PDIR" == "" ]] ; then
6
+ echo "$0: Error: This file is not stand-alone." >&2
7
+ echo " Execute RUNME.bash as described in the README.txt file" >&2 ;
8
+ exit 1 ;
9
+ fi ;
10
+
11
+ # Get active jobs:
12
+ echo "======[ check ]======"
13
+ job_r=0;
14
+ job_i=0;
15
+ job_c=0;
16
+
17
+ echo "======[ pause ]======"
18
+ for i in $(ls $SCRATCH/log/active/* 2>/dev/null) ; do
19
+ echo " Pausing $jid." ;
20
+ jid=$(basename $i) ;
21
+ qdel $jid ;
22
+ done ;
23
+
24
+ # Restart auto-trials
25
+ echo -n > "$SCRATCH/etc/trials" ;
26
+
@@ -0,0 +1,89 @@
1
+ #!/bin/bash
2
+
3
+ ##################### RUN
4
+ # Check if it was sourced from RUNME.bash
5
+ if [[ "$PDIR" == "" ]] ; then
6
+ echo "$0: Error: This file is not stand-alone. Execute RUNME.bash as described in the README.txt file" >&2
7
+ exit 1
8
+ fi
9
+
10
+ # Check if too many auto-trials were attempted
11
+ trials=0 ;
12
+ if [[ -e "$SCRATCH/etc/trials" ]] ; then
13
+ trials=$(cat "$SCRATCH/etc/trials" | wc -l | sed -e 's/ //g');
14
+ if [[ $trials -ge $MAX_TRIALS ]] ; then
15
+ echo "The maximum number of trials was already attempted, halting." >&2 ;
16
+ exit 1 ;
17
+ fi ;
18
+ fi ;
19
+
20
+ # Create the scratch directory
21
+ if [[ ! -d "$SCRATCH" ]] ; then mkdir -p "$SCRATCH" || exit 1 ; fi;
22
+
23
+ if [[ ! -e "$SCRATCH/success/00" ]] ; then
24
+ # 00. Initialize the project
25
+ echo "00. Initializing project." >&2 ;
26
+ mkdir -p "$SCRATCH/tmp" "$SCRATCH/etc" "$SCRATCH/results" "$SCRATCH/success" || exit 1 ;
27
+ mkdir -p "$SCRATCH/log/active" "$SCRATCH/log/done" "$SCRATCH/log/failed" || exit 1 ;
28
+ mkdir -p "$SCRATCH/log/status" "$SCRATCH/log/eo" || exit 1 ;
29
+ echo "Preparing structure." >> "$SCRATCH/log/status/00" ;
30
+ # Build 01.bash
31
+ echo "NEW_JOBID=\$(qsub -q '$QUEUE' -l 'walltime=$MAX_H:00:00,mem=$RAM' -v '$MINVARS' -N '$PROJ-01' \\
32
+ '$PDIR/01.pbs.bash'|tr -d '\\n')" \
33
+ > "$SCRATCH/etc/01.bash" || exit 1 ;
34
+ echo "SENTINEL_JOBID=\$(qsub -q '$QUEUE' -l 'walltime=2:00:00' -W \"depend=afterany:\$NEW_JOBID\" \\
35
+ -v \"$MINVARS,STEP=01,AFTERJOB=\$NEW_JOBID\" -N '$PROJ-01-sentinel' '$PDIR/sentinel.pbs.bash'|tr -d '\\n')" \
36
+ >> "$SCRATCH/etc/01.bash" || exit 1 ;
37
+ # Build 02.bash
38
+ echo "NEW_JOBID=\$(qsub -q '$QUEUE' -l 'walltime=$MAX_H:00:00,mem=$RAM,nodes=1:ppn=$PPN' \\
39
+ -v '$MINVARS' -N '$PROJ-02' -t '1-$MAX_JOBS' '$PDIR/02.pbs.bash'|tr -d '\\n')" \
40
+ > "$SCRATCH/etc/02.bash" \
41
+ || exit 1 ;
42
+ echo "SENTINEL_JOBID=\$(qsub -q '$QUEUE' -l 'walltime=2:00:00' -W \"depend=afteranyarray:\$NEW_JOBID\" \\
43
+ -v \"$MINVARS,STEP=02,AFTERJOB=\$NEW_JOBID\" -N '$PROJ-02-sentinel' '$PDIR/sentinel.pbs.bash'|tr -d '\\n')" \
44
+ >> "$SCRATCH/etc/02.bash" \
45
+ || exit 1 ;
46
+ # Build 03.bash
47
+ echo "NEW_JOBID=\$(qsub -q '$QUEUE' -l 'walltime=$MAX_H:00:00,mem=$RAM' -v '$MINVARS' -N '$PROJ-03' \\
48
+ '$PDIR/03.pbs.bash'|tr -d '\\n')" \
49
+ > "$SCRATCH/etc/03.bash" || exit 1 ;
50
+ echo "SENTINEL_JOBID=\$(qsub -q '$QUEUE' -l 'walltime=2:00:00' -W \"depend=afterany:\$NEW_JOBID\" \\
51
+ -v \"$MINVARS,STEP=03,AFTERJOB=\$NEW_JOBID\" -N '$PROJ-03-sentinel' '$PDIR/sentinel.pbs.bash'|tr -d '\\n')" \
52
+ >> "$SCRATCH/etc/03.bash" || exit 1 ;
53
+
54
+ JOB_DONE "00" ;
55
+ fi ;
56
+
57
+ if [[ ! -e "$SCRATCH/success/01" ]] ; then
58
+ # 01. Preparing input
59
+ echo "01. Preparing input." >&2 ;
60
+ JOB01=$(LAUNCH_JOB "01" "00" "Preparing input files" "$SCRATCH/etc/01.bash") ;
61
+ echo " New job: $JOB01." >&2 ;
62
+ else
63
+ if [[ ! -e "$SCRATCH/success/02" ]] ; then
64
+ # 02. Launching BLAST
65
+ echo "02. Launching BLAST." >&2 ;
66
+ JOB02=$(LAUNCH_JOB "02" "00" "Running BLAST" "$SCRATCH/etc/02.bash") ;
67
+ echo " New job: $JOB02." >&2 ;
68
+ # Clean on resubmission
69
+ cleaned=0
70
+ echo -n " Cleaning completed sub-jobs on $JOB02: " >&2 ;
71
+ for jobi in $(seq 1 $MAX_JOBS) ; do
72
+ if [[ -e "$SCRATCH/success/02.$jobi" ]] ; then
73
+ qdel "$JOB02""[$jobi]" &> /dev/null ;
74
+ let cleaned=$cleaned+1 ;
75
+ fi ;
76
+ done ;
77
+ echo "$cleaned sub-jobs completed." >&2 ;
78
+ else
79
+ if [[ ! -e "$SCRATCH/success/03" ]] ; then
80
+ # 03. Finalize
81
+ echo "03. Finalizing." >&2 ;
82
+ JOB03=$(LAUNCH_JOB "03" "00" "Concatenating results" "$SCRATCH/etc/03.bash") ;
83
+ echo " New job: $JOB03." >&2 ;
84
+ else
85
+ echo "Project complete, nothing to run." ;
86
+ fi ;
87
+ fi ;
88
+ fi ;
89
+
@@ -0,0 +1,29 @@
1
+ # blast.pbs pipeline
2
+ # Sentinel script
3
+
4
+ echo "Sentinel script after $AFTERJOB" ;
5
+
6
+ # Step-specific checks
7
+ if [[ "$STEP" == "02" ]] ; then
8
+ # Read configuration
9
+ cd $SCRATCH ;
10
+ TASK="dry" ;
11
+ source "$PDIR/RUNME.bash" ;
12
+
13
+ # Check tasks
14
+ INCOMPLETE=0;
15
+ for i in $(seq 1 $MAX_JOBS) ; do
16
+ if [[ ! -e "$SCRATCH/success/02.$i" ]] ; then
17
+ let INCOMPLETE=$INCOMPLETE+1 ;
18
+ fi ;
19
+ done
20
+ if [[ $INCOMPLETE -eq 0 ]] ; then
21
+ JOB_DONE "02" ;
22
+ else
23
+ echo "$INCOMPLETE incomplete jobs, re-launching step 02." ;
24
+ fi ;
25
+ fi
26
+
27
+ # Continue the workflow
28
+ "$PDIR/RUNME.bash" "$PROJ" run || exit 1 ;
29
+
@@ -0,0 +1,49 @@
1
+ @author: Luis Miguel Rodriguez-R <lmrodriguezr at gmail dot com>
2
+
3
+ @update: Feb-26-2015
4
+
5
+ @license: artistic 2.0
6
+
7
+ @status: auto
8
+
9
+ @pbs: yes
10
+
11
+ # IMPORTANT
12
+
13
+ This pipeline was developed for the [PACE cluster](http://pace.gatech.edu/). You
14
+ are free to use it in other platforms with adequate adjustments.
15
+
16
+ # PURPOSE
17
+
18
+ Performs assembly using IDBA-UD, designed for Single-Cell Genomics and Metagenomics.
19
+
20
+ # HELP
21
+
22
+ 1. Files preparation:
23
+
24
+ 1.1. Obtain the enveomics package in the cluster. You can use:
25
+ `git clone https://github.com/lmrodriguezr/enveomics.git`
26
+
27
+ 1.2. Prepare the trimmed reads (e.g., use trim.bs) in interposed FastA format. Files
28
+ must be raw, not zipped or packaged. Filenames must conform the format:
29
+ <name>.CoupledReads.fa, where <name> is the name of the sample. Locate all the
30
+ files within a folder named 04.trimmed_fasta, within your project folder. If you
31
+ used trim.pbs, no further action is necessary.
32
+
33
+ 2. Pipeline execution:
34
+
35
+ 2.1. Simply execute `./RUNME.bash <dir> <data_type>`, where `<dir>` is the folder containing
36
+ the 04.trimmed_fasta folder, and `<data_type>` is a supported type of data (see help
37
+ message running `./RUNME.bash` without arguments).
38
+
39
+ 3. What to expect:
40
+
41
+ By the end of the run, you should find the folder *05.assembly*, including the following
42
+ files for each dataset:
43
+
44
+ 3.1. `<dataset>`: The IDBA output folder.
45
+
46
+ 3.2. `<dataset>.AllContigs.fna`: All contigs longer than 200bp in FastA format.
47
+
48
+ 3.2. `<dataset>.LargeContigs.fna`: Contigs longer than 500bp in FastA format.
49
+