miga-base 0.7.26.0 → 1.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/classify_wf.rb +2 -2
  7. data/lib/miga/cli/action/derep_wf.rb +1 -1
  8. data/lib/miga/cli/action/doctor.rb +57 -14
  9. data/lib/miga/cli/action/doctor/base.rb +47 -23
  10. data/lib/miga/cli/action/init.rb +11 -7
  11. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  12. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  13. data/lib/miga/cli/action/tax_dist.rb +2 -2
  14. data/lib/miga/cli/action/wf.rb +5 -4
  15. data/lib/miga/common.rb +1 -0
  16. data/lib/miga/daemon.rb +11 -4
  17. data/lib/miga/dataset/result.rb +10 -6
  18. data/lib/miga/json.rb +5 -4
  19. data/lib/miga/metadata.rb +5 -1
  20. data/lib/miga/parallel.rb +36 -0
  21. data/lib/miga/project.rb +8 -8
  22. data/lib/miga/project/base.rb +4 -4
  23. data/lib/miga/project/result.rb +2 -2
  24. data/lib/miga/sqlite.rb +10 -2
  25. data/lib/miga/version.rb +23 -9
  26. data/scripts/aai_distances.bash +16 -18
  27. data/scripts/ani_distances.bash +16 -17
  28. data/scripts/assembly.bash +31 -16
  29. data/scripts/haai_distances.bash +3 -27
  30. data/scripts/miga.bash +6 -4
  31. data/scripts/p.bash +1 -1
  32. data/scripts/read_quality.bash +9 -18
  33. data/scripts/trimmed_fasta.bash +14 -30
  34. data/scripts/trimmed_reads.bash +36 -36
  35. data/test/parallel_test.rb +31 -0
  36. data/test/project_test.rb +2 -1
  37. data/test/remote_dataset_test.rb +1 -1
  38. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  39. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  40. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  41. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  42. data/utils/FastAAI/README.md +84 -0
  43. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  44. data/utils/distance/commands.rb +1 -0
  45. data/utils/distance/database.rb +0 -1
  46. data/utils/distance/runner.rb +2 -4
  47. data/utils/enveomics/Docs/recplot2.md +244 -0
  48. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  49. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  50. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  51. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  52. data/utils/enveomics/LICENSE.txt +73 -0
  53. data/utils/enveomics/Makefile +52 -0
  54. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  55. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  56. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  57. data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
  58. data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
  59. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  60. data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
  61. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  62. data/utils/enveomics/Manifest/Tasks/other.json +906 -0
  63. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  64. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +638 -0
  65. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  66. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  67. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  68. data/utils/enveomics/Manifest/categories.json +165 -0
  69. data/utils/enveomics/Manifest/examples.json +154 -0
  70. data/utils/enveomics/Manifest/tasks.json +4 -0
  71. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  72. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  73. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  74. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  75. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  76. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  77. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  78. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  79. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  80. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  81. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  82. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  83. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  84. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  85. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  86. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  87. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  88. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  89. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  90. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  91. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  92. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  93. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  94. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  95. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  96. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  97. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  98. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  99. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  100. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  101. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  102. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  103. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  104. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  105. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  106. data/utils/enveomics/README.md +42 -0
  107. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  108. data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
  109. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  110. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  111. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  112. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  113. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  114. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  115. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  116. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  117. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  118. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  119. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  120. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  121. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  122. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  123. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  124. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  125. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  126. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  127. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  128. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  129. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  130. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  131. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  132. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  133. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  134. data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
  135. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  136. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  137. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  138. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  139. data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
  140. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  141. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  142. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  143. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  144. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  145. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  146. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  147. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  148. data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
  149. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  150. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  151. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  152. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  153. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  154. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  155. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  156. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  157. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  158. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  159. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  160. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  161. data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
  162. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  163. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  164. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  165. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  166. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  167. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  168. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  169. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  170. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  171. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  172. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  173. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  174. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  175. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  176. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  177. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  178. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  179. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  180. data/utils/enveomics/Scripts/SRA.download.bash +55 -0
  181. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  182. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  183. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  184. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  185. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  186. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  187. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  188. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  189. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  190. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  191. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  192. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  193. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  194. data/utils/enveomics/Scripts/aai.rb +419 -0
  195. data/utils/enveomics/Scripts/ani.rb +362 -0
  196. data/utils/enveomics/Scripts/anir.rb +137 -0
  197. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  198. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  199. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  200. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  201. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  202. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  203. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  204. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  205. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  206. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  207. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  208. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  209. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  210. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  211. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  212. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  213. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  214. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  215. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  216. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  217. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  218. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  219. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  220. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  221. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  222. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  223. data/utils/enveomics/Scripts/ogs.rb +104 -0
  224. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  225. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  226. data/utils/enveomics/Scripts/rbm.rb +100 -0
  227. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  228. data/utils/enveomics/Tests/Makefile +10 -0
  229. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  230. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  231. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  232. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  233. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  234. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  235. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  236. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  237. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  238. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  239. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  240. data/utils/enveomics/Tests/alkB.nwk +1 -0
  241. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  242. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  243. data/utils/enveomics/Tests/hiv1.faa +59 -0
  244. data/utils/enveomics/Tests/hiv1.fna +134 -0
  245. data/utils/enveomics/Tests/hiv2.faa +70 -0
  246. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  247. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  248. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  249. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  250. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  251. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  252. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  253. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  254. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  255. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  256. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  257. data/utils/enveomics/build_enveomics_r.bash +45 -0
  258. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  259. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  260. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  261. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  262. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  263. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  264. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  265. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  266. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  267. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  268. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  269. data/utils/enveomics/enveomics.R/R/utils.R +80 -0
  270. data/utils/enveomics/enveomics.R/README.md +81 -0
  271. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  272. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  273. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
  274. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
  275. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
  276. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  277. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  278. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  279. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  280. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  281. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  282. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
  283. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
  284. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
  285. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  286. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  287. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
  288. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
  289. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
  290. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
  291. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  292. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
  293. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
  294. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  295. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  296. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
  297. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  298. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  299. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  300. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  301. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
  302. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  303. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  304. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
  305. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  306. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  307. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  308. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  309. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
  310. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  311. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
  312. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
  313. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
  314. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  315. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  316. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
  317. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  318. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
  319. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  320. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  321. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  322. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  323. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
  324. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
  325. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
  326. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
  327. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  328. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  329. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  330. data/utils/enveomics/globals.mk +8 -0
  331. data/utils/enveomics/manifest.json +9 -0
  332. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  333. data/utils/multitrim/README.md +67 -0
  334. data/utils/multitrim/multitrim.py +1555 -0
  335. data/utils/multitrim/multitrim.yml +13 -0
  336. data/utils/requirements.txt +4 -3
  337. metadata +304 -3
@@ -0,0 +1,95 @@
1
+ #!/bin/bash
2
+
3
+ if [[ "$1" == "" || "$1" == "-h" || "$2" == "" ]] ; then
4
+ echo "
5
+ Usage: ./RUNME.bash folder data_type [max_jobs]
6
+
7
+ folder Path to the folder containing the 04.trimmed_fasta folder. The
8
+ trimmed reads must be in interposed FastA format, and filenames
9
+ must follow the format: <name>.CoupledReads.fa, where <name> is
10
+ the name of the sample. If non-paired, the filenames must follow
11
+ the format: <name>.SingleReads.fa. If both suffixes are found
12
+ for the same <name> prefix, they are both used.
13
+ data_type Type of datasets in the project. One of: mg (for metagenomes),
14
+ scg (for single-cell genomes), g (for traditional genomes), or t
15
+ (for transcriptomes).
16
+ max_jobs (optional) Maximum number of jobs to run in parallel. This
17
+ number can be increased, but bear in mind that this process is
18
+ highly I/O-intensive, and likely to crash or significantly slow
19
+ down the hard drive if many jobs are running simultaneously. By
20
+ default: 5.
21
+ " >&2
22
+ exit 1
23
+ fi
24
+ TYPE=$2
25
+ if [[ "$TYPE" != "g" && "$TYPE" != "mg" && "$TYPE" != "scg" \
26
+ && "$TYPE" != "t" ]] ; then
27
+ echo "Unsupported data type: $TYPE." >&2
28
+ exit 1
29
+ fi
30
+ if [[ "$3" == "" ]] ; then
31
+ MAX=5
32
+ else
33
+ let MAX=$3+0
34
+ fi
35
+
36
+ dir=$(readlink -f $1)
37
+ pac=$(dirname $(readlink -f $0))
38
+ cwd=$(pwd)
39
+
40
+ cd $dir
41
+ if [[ ! -e 04.trimmed_fasta ]] ; then
42
+ echo "Cannot locate the 04.trimmed_fasta directory, aborting..." >&2
43
+ exit 1
44
+ fi
45
+ for i in 05.assembly ; do
46
+ [[ -d $i ]] || mkdir $i
47
+ done
48
+
49
+ k=0
50
+ for i in $dir/04.trimmed_fasta/*.SingleReads.fa ; do
51
+ b=$(basename $i .SingleReads.fa)
52
+ touch $dir/04.trimmed_fasta/$b.CoupledReads.fa
53
+ done
54
+
55
+ for i in $dir/04.trimmed_fasta/*.CoupledReads.fa ; do
56
+ b=$(basename $i .CoupledReads.fa)
57
+ [[ -d $dir/05.assembly/$b ]] && continue
58
+ EXTRA=""
59
+ EXTRA_MSG=""
60
+ if [[ $k -ge $MAX ]] ; then
61
+ let prek=$k-$MAX
62
+ EXTRA="-W depend=afterany:${jids[$prek]}"
63
+ EXTRA_MSG=" (waiting for ${jids[$prek]})"
64
+ fi
65
+
66
+ # Predict time (in hours)
67
+ SIZE_M=$(($(ls -pl 04.trimmed_fasta/$b.CoupledReads.fa \
68
+ | awk '{print $5}')/1000000))
69
+ let TIME_H=6+$SIZE_M*2/1000
70
+ let RAM_G=20+$SIZE_M*20/1000
71
+
72
+ # Find the right queue
73
+ if [[ $TIME_H -lt 12 ]] ; then
74
+ QUEUE="-q iw-shared-6 -l walltime=12:00:00"
75
+ elif [[ $TIME_H -lt 120 ]] ; then
76
+ QUEUE="-q microcluster -l walltime=120:00:00"
77
+ else
78
+ QUEUE="-q microcluster -l walltime=2000:00:00"
79
+ fi
80
+
81
+ # Launch job
82
+ mkdir $dir/05.assembly/$b
83
+ OPTS="SAMPLE=$b,FOLDER=$dir,TYPE=$TYPE"
84
+ if [[ -s $dir/04.trimmed_fasta/$b.SingleReads.fa ]] ; then
85
+ OPTS="$OPTS,FA=$dir/04.trimmed_fasta/$b.SingleReads.fa"
86
+ [[ -s $dir/04.trimmed_fasta/$b.CoupledReads.fa ]] \
87
+ && OPTS="$OPTS,FA_RL2=$dir/04.trimmed_fasta/$b.CoupledReads.fa"
88
+ else
89
+ OPTS="$OPTS,FA=$dir/04.trimmed_fasta/$b.CoupledReads.fa"
90
+ fi
91
+ jids[$k]=$(qsub -v "$OPTS" -N "IDBA-$b" -l "mem=${RAM_G}g" \
92
+ $QUEUE $EXTRA $pac/run.pbs | grep .)
93
+ echo "$b: ${jids[$k]}$EXTRA_MSG"
94
+ let k=$k+1
95
+ done
@@ -0,0 +1,56 @@
1
+ #!/bin/bash
2
+ #PBS -l nodes=1:ppn=10
3
+ #PBS -k eo
4
+
5
+ module load idba/1.1.1
6
+
7
+ b=$SAMPLE
8
+ shared=/nv/gpfs-gateway-pace1/project/bio-konstantinidis/shared3
9
+ enve=$shared/apps/enveomics/Scripts
10
+ THR=10
11
+
12
+ #---------------------------------------------------------
13
+
14
+ echo "==[ 05.assembly: $(date) ]"
15
+ cd $FOLDER/05.assembly
16
+
17
+ CMD=""
18
+ case "$TYPE" in
19
+ *g)
20
+ CMD="idba_ud" ;;
21
+ t)
22
+ CMD="idba_tran" ;;
23
+ *)
24
+ echo "Unsupported data type: $TYPE" >&2
25
+ exit 1
26
+ ;;
27
+ esac
28
+ CMD="$CMD --pre_correction -r $FA -o $SAMPLE --num_threads $THR"
29
+ [[ -n "$FA_RL2" ]] && CMD="$CMD --read_level_2 $FA_RL2"
30
+ [[ -n "$FA_RL3" ]] && CMD="$CMD --read_level_3 $FA_RL3"
31
+ [[ -n "$FA_RL4" ]] && CMD="$CMD --read_level_4 $FA_RL4"
32
+ [[ -n "$FA_RL5" ]] && CMD="$CMD --read_level_5 $FA_RL5"
33
+
34
+ time $CMD
35
+
36
+ rm $SAMPLE/kmer
37
+ rm $SAMPLE/graph-*.fa
38
+ rm $SAMPLE/align-*
39
+ rm $SAMPLE/local-contig-*.fa
40
+ rm $SAMPLE/contig-*.fa
41
+
42
+ if [[ -s $SAMPLE/scaffold.fa ]] ; then
43
+ ln -s $SAMPLE/scaffold.fa $SAMPLE.AllContigs.fna
44
+ else
45
+ ln -s $SAMPLE/contig.fa $SAMPLE.AllContigs.fna
46
+ fi
47
+ time $enve/FastA.length.pl $SAMPLE.AllContigs.fna | awk '$2>=500{print $1}' \
48
+ > $SAMPLE.LargeContigs.ids
49
+ time $enve/FastA.filter.pl $SAMPLE.LargeContigs.ids $SAMPLE.AllContigs.fna \
50
+ > $SAMPLE.LargeContigs.fna
51
+ rm $SAMPLE.LargeContigs.ids
52
+
53
+ #---------------------------------------------------------
54
+
55
+ echo "Done: $(date)."
56
+
@@ -0,0 +1,54 @@
1
+ @author: Luis Miguel Rodriguez-R <lmrodriguezr at gmail dot com>
2
+
3
+ @update: Oct-30-2014
4
+
5
+ @license: artistic 2.0
6
+
7
+ @status: auto
8
+
9
+ @pbs: yes
10
+
11
+ # IMPORTANT
12
+
13
+ This pipeline was developed for the [PACE cluster](http://pace.gatech.edu/). You
14
+ are free to use it in other platforms with adequate adjustments.
15
+
16
+ # PURPOSE
17
+
18
+ Performs various trimming and quality-control analyses over raw reads.
19
+
20
+ # HELP
21
+
22
+ 1. Files preparation:
23
+
24
+ 1.1. Obtain the enveomics package in the cluster. You can use:
25
+ `git clone https://github.com/lmrodriguezr/enveomics.git`
26
+
27
+ 1.2. Prepare the raw reads in FastQ format. Files must be raw, not zipped or packaged.
28
+ Filenames must conform the format: <name>.<sis>.fastq, where <name> is the name
29
+ of the sample, and <sis> is 1 or 2 indicating which sister read the file contains.
30
+ Use only '1' as <sis> if you have single reads.
31
+
32
+ 1.3. Gather all the FastQ files into the same folder.
33
+
34
+ 2. Pipeline execution:
35
+
36
+ 2.1. Simply execute `./RUNME.bash <dir>`, where <dir> is the folder containing
37
+ the FastQ files.
38
+
39
+ 3. What to expect:
40
+
41
+ By the end of the run, you should find the following folders:
42
+
43
+ 3.1. *01.raw_reads*: Gzip'ed raw FastQ files.
44
+
45
+ 3.2. *02.trimmed_reads*: Trimmed and clipped reads. For each sample, there should be
46
+ nine files for paired-end, and two for single-reads.
47
+
48
+ 3.3. *03.read_quality*: Quality reports. For each sample, there should be two directories,
49
+ one with SolexaQA++ information, another with FastQC information.
50
+
51
+ 3.4. *04.trimmed_fasta*: Trimmed and clipped in FastA format (and gzip'ed, in the case of
52
+ individual files for paired-end).
53
+
54
+
@@ -0,0 +1,70 @@
1
+ #!/bin/bash
2
+
3
+ if [[ "$1" == "" || "$1" == "-h" ]] ; then
4
+ echo "
5
+ Usage: ./RUNME.bash folder [clipper [max_jobs]]
6
+
7
+ folder Path to the folder containing the raw reads. The raw reads must be in FastQ format,
8
+ and filenames must follow the format: <name>.<sis>.fastq, where <name> is the name
9
+ of the sample, and <sis> is 1 or 2 indicating which sister read the file contains.
10
+ Use only '1' as <sis> if you have single reads.
11
+ clipper (optional) One of: trimmomatic, scythe, or none. By default: scythe.
12
+ max_jobs (optional) Maximum number of jobs to run in parallel. This number can be increased,
13
+ but bear in mind that this process is highly I/O-intensive, and likely to crash or
14
+ significantly slow down the hard drive if many jobs are running simultaneously. By
15
+ default: 5.
16
+ " >&2 ;
17
+ exit 1 ;
18
+ fi ;
19
+ CLIPPER=$2
20
+ if [[ "$CLIPPER" == "" ]] ; then
21
+ CLIPPER="scythe"
22
+ fi ;
23
+ if [[ "$3" == "" ]] ; then
24
+ MAX=5 ;
25
+ else
26
+ let MAX=$3+0 ;
27
+ fi ;
28
+
29
+ dir=$(readlink -f $1) ;
30
+ pac=$(dirname $(readlink -f $0)) ;
31
+ cwd=$(pwd) ;
32
+
33
+ cd $dir ;
34
+ for i in 01.raw_reads 02.trimmed_reads 03.read_quality 04.trimmed_fasta zz.info ; do
35
+ if [[ ! -d $i ]] ; then mkdir $i ; fi ;
36
+ done ;
37
+
38
+ k=0 ;
39
+ for i in $dir/*.1.fastq ; do
40
+ EXTRA="" ;
41
+ EXTRA_MSG="" ;
42
+ if [[ $k -ge $MAX ]] ; then
43
+ let prek=$k-$MAX ;
44
+ EXTRA="-W depend=afterany:${jids[$prek]}" ;
45
+ EXTRA_MSG=" (waiting for ${jids[$prek]})"
46
+ fi ;
47
+ b=$(basename $i .1.fastq) ;
48
+ mv $b.[12].fastq 01.raw_reads/ ;
49
+ # Predict time (in hours)
50
+ SIZE_M=$(($(ls -pl 01.raw_reads/$b.1.fastq | awk '{print $5}')/1000000)) ;
51
+ let TIME_H=$SIZE_M*5/1000 ;
52
+ [[ -e 01.raw_reads/$b.2.fastq ]] || let TIME_H=$TIME_H/2 ;
53
+ let RAM_G=$SIZE_M*8/1000 ;
54
+ [[ $RAM_G -lt 10 ]] && RAM_G=10 ;
55
+
56
+ # Find the right queue
57
+ if [[ $TIME_H -lt 12 ]] ; then
58
+ QUEUE="-q iw-shared-6 -l walltime=12:00:00" ;
59
+ elif [[ $TIME_H -lt 120 ]] ; then
60
+ QUEUE="-q microcluster -l walltime=120:00:00" ;
61
+ else
62
+ QUEUE="-q microcluster -l walltime=2000:00:00" ;
63
+ fi ;
64
+ # Launch job
65
+ jids[$k]=$(qsub -v "SAMPLE=$b,FOLDER=$dir,CLIPPER=$CLIPPER" -N "Trim-$b" -l "mem=${RAM_G}g" $QUEUE $EXTRA $pac/run.pbs | grep .) ;
66
+ echo "$b: ${jids[$k]}$EXTRA_MSG" ;
67
+ let k=$k+1 ;
68
+ done ;
69
+
70
+
@@ -0,0 +1,130 @@
1
+ #!/bin/bash
2
+ #PBS -l mem=10g
3
+ #PBS -l nodes=1:ppn=1
4
+ #PBS -k eo
5
+
6
+ module load fastqc/0.11.2
7
+ module load scythe/0.993
8
+
9
+ shared=/gpfs/pace1/project/bio-konstantinidis/shared3
10
+ b=$SAMPLE ;
11
+ sqa=$shared/bin/SolexaQA++
12
+ scythe=scythe
13
+ enve=$shared/apps/enveomics/Scripts
14
+ trim=$shared/apps/Trimmomatic-0.32/trimmomatic-0.32.jar
15
+ SEadapters=$shared/apps/Trimmomatic-0.32/adapters/ALL-SE_PE.fa
16
+ PEadapters=$shared/apps/Trimmomatic-0.32/adapters/ALL-PE.fa
17
+
18
+ #---------------------------------------------------------
19
+
20
+ echo "==[ 02.trimmed_reads: $(date) ]" ;
21
+ cd $FOLDER/02.trimmed_reads ;
22
+
23
+ time $enve/FastQ.tag.rb -i ../01.raw_reads/$b.1.fastq -p "$b-" -s "/1" -o $b.1.fastq ;
24
+ [[ -e ../01.raw_reads/$b.2.fastq ]] && time $enve/FastQ.tag.rb -i ../01.raw_reads/$b.2.fastq -p "$b-" -s "/2" -o $b.2.fastq ;
25
+
26
+ RAW_READS=$(cat $b.1.fastq | paste - - - - | wc -l | sed -e 's/ *//') ;
27
+ RAW_LENGTH=$(head -n 40000 $b.1.fastq | paste - - - - | awk 'BEGIN{FS="\\t"}{SUM+=length($2)}END{print SUM/NR}') ;
28
+
29
+ time $sqa dynamictrim $b.[12].fastq -h 20 -d . ;
30
+ time $sqa lengthsort $b.[12].fastq.trimmed -l 50 -d . ;
31
+
32
+ if [[ "$CLIPPER" == "trimmomatic" ]] ; then
33
+ if [[ -e $b.2.fastq.trimmed.paired ]] ; then
34
+ time java -jar $trim PE -threads 1 \
35
+ $b.1.fastq.trimmed.paired \
36
+ $b.2.fastq.trimmed.paired \
37
+ $b.1.clipped.fastq $b.1.clipped.single.fastq \
38
+ $b.2.clipped.fastq $b.2.clipped.single.fastq \
39
+ ILLUMINACLIP:$PEadapters:2:30:10 MINLEN:50
40
+ else
41
+ time java -jar $trim SE -threads 1 \
42
+ $b.1.fastq.trimmed.single $b.1.clipped.fastq \
43
+ ILLUMINACLIP:$SEadapters:2:30:10 MINLEN:50
44
+ fi ;
45
+ elif [[ "$CLIPPER" == "scythe" ]]; then
46
+ if [[ -e $b.2.fastq.trimmed.paired ]] ; then
47
+ $scythe -a $PEadapters $b.1.fastq.trimmed.paired > $b.1.clipped.all.fastq ;
48
+ $scythe -a $PEadapters $b.2.fastq.trimmed.paired > $b.2.clipped.all.fastq ;
49
+ time $sqa lengthsort $b.[12].clipped.all.fastq -l 50 -d . ;
50
+ rm $b.[12].clipped.all.fastq ;
51
+ [[ -e $b.1.clipped.all.fastq.single ]] && mv $b.1.clipped.all.fastq.single $b.1.clipped.single.fastq ;
52
+ [[ -e $b.2.clipped.all.fastq.single ]] && mv $b.2.clipped.all.fastq.single $b.2.clipped.single.fastq ;
53
+ mv $b.1.clipped.all.fastq.paired $b.1.clipped.fastq ;
54
+ mv $b.2.clipped.all.fastq.paired $b.2.clipped.fastq ;
55
+ rm $b.1.clipped.all.fastq.summary.txt $b.1.clipped.all.fastq.summary.txt.pdf &>/dev/null ;
56
+ else
57
+ $scythe -a $PEadapters $b.1.fastq.trimmed.single > $b.1.clipped.all.fastq ;
58
+ time $sqa lengthsort $b.1.clipped.all.fastq -l 50 -d . ;
59
+ rm $b.1.clipped.all.fastq ;
60
+ mv $b.1.clipped.all.fastq.single $b.1.clipped.fastq ;
61
+ fi ;
62
+ rm $b.[12].*.discard &>/dev/null ;
63
+ else
64
+ if [[ -e $b.2.fastq.trimmed.paired ]] ; then
65
+ ln -s $b.1.fastq.trimmed.paired $b.1.clipped.fastq ;
66
+ ln -s $b.2.fastq.trimmed.paired $b.2.clipped.fastq ;
67
+ else
68
+ ln -s $b.1.fastq.trimmed.single $b.1.clipped.fastq ;
69
+ fi ;
70
+ fi ;
71
+
72
+ TRIMMED_READS=$(cat $b.1.clipped.fastq | paste - - - - | wc -l | sed -e 's/ *//') ;
73
+ TRIMMED_LENGTH=$(head -n 40000 $b.1.clipped.fastq | paste - - - - | awk 'BEGIN{FS="\\t"}{SUM+=length($2)}END{print SUM/NR}') ;
74
+
75
+ #---------------------------------------------------------
76
+
77
+ echo "==[ 03.read_quality: $(date) ]" ;
78
+ cd $FOLDER/03.read_quality ;
79
+ if [ ! -d $b.fastqc ] ; then mkdir $b.fastqc ; fi ;
80
+ perl $(which fastqc) ../02.trimmed_reads/$b.[12].clipped.fastq -o $b.fastqc ;
81
+
82
+ if [ ! -d $b ] ; then mkdir $b ; fi ;
83
+ time $sqa analysis ../01.raw_reads/$b.[12].fastq -h 20 -d $b -v -m ;
84
+ rm $b/*.segments ;
85
+ mv ../02.trimmed_reads/$b.[12].fastq_trimmed.segments* $b/
86
+ mv ../02.trimmed_reads/$b.[12].fastq.trimmed.summary.txt* $b/
87
+
88
+
89
+ cd $FOLDER/02.trimmed_reads ;
90
+ rm $b.[12].fastq.trimmed.discard ;
91
+ rm $b.[12].fastq.trimmed ;
92
+ rm $b.[12].fastq ;
93
+
94
+ #---------------------------------------------------------
95
+
96
+ echo "==[ 04.trimmed_fasta: $(date) ]" ;
97
+ cd $FOLDER/04.trimmed_fasta ;
98
+ cat ../02.trimmed_reads/$b.1.clipped.fastq | paste - - - - | awk 'BEGIN{FS="\\t"}{print ">"substr($1,2)"\\n"$2}' > $b.1.fasta ;
99
+ if [[ -e ../02.trimmed_reads/$b.2.clipped.fastq ]] ; then
100
+ cat ../02.trimmed_reads/$b.2.clipped.fastq | paste - - - - | awk 'BEGIN{FS="\\t"}{print ">"substr($1,2)"\\n"$2}' > $b.2.fasta ;
101
+ time $enve/FastA.interpose.pl $b.CoupledReads.fa $b.[12].fasta ;
102
+ time gzip $b.2.fasta ;
103
+ time gzip $b.1.fasta ;
104
+ else
105
+ mv $b.1.fasta $b.SingleReads.fa ;
106
+ fi ;
107
+
108
+ #---------------------------------------------------------
109
+
110
+ echo "==[ zz.info: $(date) ]" ;
111
+ cd $FOLDER/zz.info ;
112
+ echo "
113
+ RAW_LENGTH: $RAW_LENGTH
114
+ RAW_READS: $RAW_READS
115
+ TRIMMED_LENGTH: $TRIMMED_LENGTH
116
+ TRIMMED_READS: $TRIMMED_READS
117
+ " > $b.summary.txt ;
118
+
119
+ #---------------------------------------------------------
120
+
121
+ echo "==[ 01.raw_reads: $(date) ]"
122
+ cd $FOLDER/01.raw_reads ;
123
+ for i in $b.[12].fastq ; do
124
+ time gzip $i ;
125
+ done ;
126
+
127
+ #---------------------------------------------------------
128
+
129
+ echo "Done: $(date)." ;
130
+
@@ -0,0 +1,42 @@
1
+ # Enveomics Collection
2
+
3
+ Scripts and reference libraries at [Kostas lab](http://enve-omics.gatech.edu).
4
+
5
+ ## Prerequisites
6
+
7
+ The enveomics collection as a whole has very modest requirements, essentially a
8
+ *nix system with `bash`, `perl`, `ruby`, and `R`. Some scripts may require
9
+ additional libraries, or even external Software, but you'll be forewarned about
10
+ these requirements in the documentation accompanying each script. If you prefer,
11
+ you can also use the Graphical User Interface (GUI), that comes with additional
12
+ tests to let you know if your system is ready to use any given script.
13
+
14
+ ## Graphical User Interface (GUI)
15
+
16
+ The enveomics collection now has a graphical user interface! To learn more,
17
+ please visit [enveomics-gui](https://github.com/lmrodriguezr/enveomics-gui).
18
+
19
+ ## License
20
+
21
+ The files in this repository are licensed under the terms of the
22
+ Artistic License 2.0, except when otherwise noted.
23
+
24
+ You can find a copy of the license in [LICENSE.txt](LICENSE.txt) or at
25
+ http://www.perlfoundation.org/artistic_license_2_0.
26
+
27
+ ## Documentation
28
+
29
+ Most scripts in this repository are self-documented. However,
30
+ more extensive documentation (and some discussion) can be found at the
31
+ [documentation website](http://enve-omics.ce.gatech.edu/enveomics/docs).
32
+ Additional documentation for recruitment plots can be found
33
+ [here](Docs/recplot2.md).
34
+
35
+ ## Citation
36
+
37
+ If you use any of the utilitites in the Enveomics Collection in your research
38
+ please cite:
39
+
40
+ > Rodriguez-R LM & Konstantinidis KT (2016). The enveomics collection: a toolbox
41
+ > for specialized analyses of microbial genomes and metagenomes.
42
+ > [PeerJ Preprints 4:e1900v1](https://peerj.com/preprints/1900/).