miga-base 1.2.15.2 → 1.2.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli/action/download/gtdb.rb +4 -1
  3. data/lib/miga/cli/action/gtdb_get.rb +4 -0
  4. data/lib/miga/daemon.rb +4 -1
  5. data/lib/miga/lair.rb +6 -4
  6. data/lib/miga/remote_dataset/download.rb +3 -2
  7. data/lib/miga/remote_dataset.rb +25 -7
  8. data/lib/miga/taxonomy.rb +6 -0
  9. data/lib/miga/version.rb +2 -2
  10. metadata +6 -302
  11. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  12. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  13. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  14. data/utils/FastAAI/FastAAI +0 -3659
  15. data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
  16. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
  17. data/utils/FastAAI/README.md +0 -84
  18. data/utils/enveomics/Docs/recplot2.md +0 -244
  19. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  20. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  21. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  22. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  23. data/utils/enveomics/LICENSE.txt +0 -73
  24. data/utils/enveomics/Makefile +0 -52
  25. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  26. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
  27. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  28. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
  29. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
  30. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  31. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
  32. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  33. data/utils/enveomics/Manifest/Tasks/other.json +0 -906
  34. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  35. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
  36. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  37. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  38. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  39. data/utils/enveomics/Manifest/categories.json +0 -165
  40. data/utils/enveomics/Manifest/examples.json +0 -162
  41. data/utils/enveomics/Manifest/tasks.json +0 -4
  42. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  49. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  50. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  51. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  52. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  53. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  54. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  55. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  56. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  57. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  58. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  59. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  60. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  61. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  62. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  63. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  64. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  68. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  69. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  70. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  71. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  72. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  73. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  74. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  75. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  76. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  77. data/utils/enveomics/README.md +0 -42
  78. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  79. data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
  80. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  81. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  82. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  83. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  84. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  85. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  86. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  87. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  88. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  89. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  90. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  91. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  92. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  93. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  94. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  95. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  96. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  97. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  98. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  99. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  100. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  101. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
  102. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  103. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  104. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  105. data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
  106. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  107. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  108. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  109. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  110. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
  111. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  112. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  113. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  114. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  115. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  116. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  117. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  118. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  119. data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
  120. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  121. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  122. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  123. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  124. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  125. data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
  126. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  127. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  128. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  129. data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
  130. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  131. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  132. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
  133. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  134. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  135. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  136. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  137. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  138. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  139. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  140. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  141. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  142. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  143. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  144. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  145. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  146. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  147. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  148. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  149. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  150. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  151. data/utils/enveomics/Scripts/SRA.download.bash +0 -55
  152. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  153. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  154. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  155. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  156. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  157. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  158. data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
  159. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  160. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  161. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  162. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  163. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  164. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  165. data/utils/enveomics/Scripts/aai.rb +0 -421
  166. data/utils/enveomics/Scripts/ani.rb +0 -362
  167. data/utils/enveomics/Scripts/anir.rb +0 -137
  168. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  169. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  170. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  171. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  172. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  173. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  174. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
  175. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
  176. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  177. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
  178. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
  179. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  180. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
  181. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  182. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
  183. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  184. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  185. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
  186. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
  187. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
  188. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
  189. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  190. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  191. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  192. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  193. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  194. data/utils/enveomics/Scripts/ogs.rb +0 -104
  195. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  196. data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
  197. data/utils/enveomics/Scripts/rbm.rb +0 -108
  198. data/utils/enveomics/Scripts/sam.filter.rb +0 -148
  199. data/utils/enveomics/Tests/Makefile +0 -10
  200. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  201. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  202. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  203. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  204. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  205. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  206. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  207. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  208. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  209. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  210. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  211. data/utils/enveomics/Tests/alkB.nwk +0 -1
  212. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  213. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  214. data/utils/enveomics/Tests/hiv1.faa +0 -59
  215. data/utils/enveomics/Tests/hiv1.fna +0 -134
  216. data/utils/enveomics/Tests/hiv2.faa +0 -70
  217. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  218. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  219. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  220. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  221. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  222. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  223. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  224. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  225. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  226. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  227. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  228. data/utils/enveomics/build_enveomics_r.bash +0 -45
  229. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  230. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  231. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  232. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  233. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  234. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  235. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  236. data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
  237. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  238. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  239. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  240. data/utils/enveomics/enveomics.R/R/utils.R +0 -80
  241. data/utils/enveomics/enveomics.R/README.md +0 -81
  242. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  243. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  244. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
  245. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
  246. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
  247. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  248. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  249. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  250. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  251. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  252. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  253. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
  254. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
  255. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
  256. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  257. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  258. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
  259. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
  260. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
  261. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
  262. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
  263. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
  264. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
  265. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  266. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  267. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
  268. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  269. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  270. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  271. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  272. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
  273. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  274. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  275. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
  276. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  277. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  278. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  279. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  280. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
  281. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  282. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
  283. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
  284. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
  285. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  286. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  287. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
  288. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
  289. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
  290. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  291. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  292. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  293. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  294. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
  295. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
  296. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
  297. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
  298. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  299. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  300. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  301. data/utils/enveomics/globals.mk +0 -8
  302. data/utils/enveomics/manifest.json +0 -9
  303. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  304. data/utils/multitrim/README.md +0 -67
  305. data/utils/multitrim/multitrim.py +0 -1555
  306. data/utils/multitrim/multitrim.yml +0 -13
@@ -1,57 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update Dec-22-2015
5
- # @license artistic license 2.0
6
- #
7
- use strict;
8
- use warnings;
9
- use List::Util qw/sum min max/;
10
-
11
- my ($seqs, $minlen) = @ARGV;
12
- $seqs or die "
13
- Description:
14
- Calculates the quartiles of the length in a set of sequences. The Q2 is
15
- also known as the median. Q0 is the minimum length, and Q4 is the maximum
16
- length. It also calculates TOTAL, the added length of the sequences in
17
- the file, and AVG, the average length.
18
-
19
- Usage:
20
- $0 seqs.fa[ minlen]
21
-
22
- seqs.fa A FastA file containing the sequences.
23
- minlen (optional) The minimum length to take into consideration.
24
- By default: 0.
25
-
26
- ";
27
- $minlen ||= 0;
28
-
29
- # Read files
30
- my @len = ();
31
- open FA, "<", $seqs or die "Cannot open file: $seqs: $!\n";
32
- my $def = '';
33
- my $len = 0;
34
- while(<FA>){
35
- next if /^;/;
36
- if(m/^>(\S+)\s?/){
37
- push(@len, int($len)) if $def and not $len<$minlen;
38
- $def = $1;
39
- $len = 0;
40
- }else{
41
- s/[^A-Za-z]//g;
42
- $len+= length $_;
43
- }
44
- }
45
- push(@len, int($len)) if $def and not $len<$minlen;
46
- close FA;
47
-
48
- # Sort and estimates quantiles
49
- @len = sort { $a <=> $b } @len;
50
- for my $q (0 .. 4){
51
- my $ii = int(my $i = $#len*$q/4);
52
- print "Q$q: ".($i==$ii ? $len[$i] : ($len[$ii]+$len[$ii+1])/2 )."\n";
53
- }
54
- my $sum = sum @len;
55
- print "N: ".scalar(@len)."\n";
56
- print "TOTAL: $sum\n";
57
- print "AVG: ".($sum/scalar(@len))."\n";
@@ -1,65 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update Oct-07-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
- use Getopt::Std;
11
-
12
- sub HELP_MESSAGE { die "
13
- .Description:
14
- Renames a set of sequences in FastA format.
15
-
16
- .Usage: $0 [options] list.txt seqs.fa > renamed.fa
17
-
18
- [options]
19
- -f Filter list. Ignores sequences NOT present in the list.
20
- -q Runs quietly.
21
- -h Prints this message and exits.
22
-
23
- [mandatory]
24
- list.txt Tab-delimited list of sequences, with the original ID in the
25
- first column and the ID to use in the second.
26
- seqs.fa FastA file containing the superset of sequences.
27
- renamed.fa FastA file to be created.
28
-
29
- " }
30
-
31
- my %o=();
32
- getopts('fhq', \%o);
33
- my($list, $fa) = @ARGV;
34
- ($list and $fa) or &HELP_MESSAGE;
35
- $o{h} and &HELP_MESSAGE;
36
-
37
- print STDERR "Reading list.\n" unless $o{q};
38
- open LI, "<", $list or die "Cannot read file: $list: $!\n";
39
- my %li = map { my $l=$_; chomp $l; my @r=split(/\t/,$l); $r[0] => $r[1] } <LI>;
40
- close LI;
41
-
42
- print STDERR "Renaming FastA.\n" unless $o{q};
43
- open FA, "<", $fa or die "Cannot read file: $fa: $!\n";
44
- my $good = 0;
45
- while(my $ln = <FA>){
46
- next if $ln =~ /^;/;
47
- chomp $ln;
48
- if($ln =~ m/^>((\S+).*)/){
49
- my $rep=0;
50
- $rep = ">".$li{$ln} if exists $li{$ln};
51
- $rep = ">".$li{$1} if exists $li{$1} and not $rep;
52
- $rep = ">".$li{">$1"} if exists $li{">$1"} and not $rep;
53
- $rep = ">".$li{$2} if exists $li{$2} and not $rep;
54
- if($rep){
55
- $ln = $rep;
56
- $good = 1;
57
- }
58
- }elsif($ln =~ m/^>/){
59
- $good=0;
60
- print STDERR "Warning: Non-cannonical defline, line $.: $ln\n";
61
- }
62
- print "$ln\n" if $good or not $o{f};
63
- }
64
- close FA;
65
-
@@ -1,23 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update: Dec-25-2015
5
- # @license: artistic license 2.0
6
- #
7
- use strict;
8
- use warnings;
9
- use Bio::SeqIO;
10
-
11
- ($ARGV[0] and $ARGV[0] =~ /--?h(elp)?/) and die "
12
- Description:
13
- Reverse-complement sequences in FastA format.
14
-
15
- Usage:
16
- $0 < input.fa > output.fa
17
-
18
- ";
19
-
20
- my @len = ();
21
- my $seqI = Bio::SeqIO->new(-fh => \*STDIN, -format=>"FastA");
22
- my $seqO = Bio::SeqIO->new(-fh => \*STDOUT, -format=>"FastA");
23
- while(my $seq = $seqI->next_seq){ $seqO->write_seq($seq->revcom) }
@@ -1,98 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # frozen_string_literal: false
4
-
5
- $VERSION = 1.0
6
- $:.push File.expand_path('../lib', __FILE__)
7
- require 'enveomics_rb/enveomics'
8
-
9
- o = { q: false, rep: false }
10
-
11
- OptionParser.new do |opt|
12
- Enveomics.opt_banner(
13
- opt, 'Samples a random set of sequences from a multi-FastA file',
14
- "#{File.basename($0)} -i seq.fa -o 10pc.fa -f 0.1 [options]"
15
- )
16
- opt.separator 'Mandatory'
17
- opt.on(
18
- '-i', '--in PATH',
19
- 'Input FastA file',
20
- 'Supports compression with .gz extension, use - for STDIN'
21
- ) { |v| o[:i] = v }
22
- opt.on(
23
- '-o', '--out PATH',
24
- 'Output FastA file',
25
- 'Supports compression with .gz extension, use - for STDOUT'
26
- ) { |v| o[:o] = v }
27
- opt.on(
28
- '-f', '--fraction FLOAT', Float,
29
- 'Fraction of sequences to sample [0-1].',
30
- 'Mandatory unless -c is provided.'
31
- ) { |v| o[:f] = v }
32
- opt.separator ''
33
-
34
- opt.separator 'Options'
35
- opt.on(
36
- '-c', '--number INT', Integer,
37
- 'Number of sequences to sample',
38
- 'Mandatory unless -f is provided'
39
- ) { |v| o[:n] = v }
40
- opt.on('-r', '--replacement','Sample with replacement') { |v| o[:rep] = v }
41
- opt.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
42
- opt.on('-h', '--help', 'Display this screen.') do
43
- puts opt
44
- exit
45
- end
46
- opt.separator ''
47
- end.parse!
48
-
49
- raise Enveomics::OptionError.new('-i is mandatory') if o[:i].nil?
50
- raise Enveomics::OptionError.new('-o is mandatory') if o[:o].nil?
51
- if o[:f].nil? && o[:n].nil?
52
- raise Enveomics::OptionError.new('-f or -n is mandatory')
53
- end
54
- $QUIET = o[:q]
55
-
56
- # Functions to parse sequences
57
- def do_stuff(id, sq)
58
- return if id.nil? or sq.empty?
59
- @n_in += 1
60
- sq.gsub!(/[^A-Za-z]/, '')
61
- i = 0
62
- @coll.extract(id, sq).each do |new_sq|
63
- @ofh.puts ">#{id}:#{i += 1}"
64
- @ofh.puts new_sq
65
- @n_out += 1
66
- end
67
- end
68
-
69
- # Parse sequences
70
- say 'Parsing sequences'
71
- seq = []
72
- fh = reader(o[:i])
73
- id = nil
74
- sq = ''
75
- fh.each do |ln|
76
- next if ln =~ /^;/
77
- if ln =~ /^>(.+)/
78
- seq << [id, sq] unless id.nil?
79
- id = $1
80
- sq = ''
81
- else
82
- sq << ln
83
- end
84
- end
85
- seq << [id, sq] unless id.nil?
86
- fh.close
87
- say "Input sequences: #{seq.size}"
88
-
89
- o[:n] ||= (seq.size * o[:f]).round
90
- seq_o = o[:rep] ? o[:n].times.map { seq.sample } : seq.sample(o[:n])
91
- fh = writer(o[:o])
92
- seq_o.each do |i|
93
- fh.puts ">#{i[0]}"
94
- fh.puts i[1]
95
- end
96
- fh.close
97
- say "Output sequences: #{seq_o.size}"
98
-
@@ -1,85 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update Oct-07-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- use strict;
9
- use List::Util qw/min/;
10
-
11
- push @ARGV, undef unless $#ARGV%2;
12
- my %params = @ARGV;
13
-
14
- die "
15
- Usage:
16
- $0 [options] < in.fa > out.fa
17
-
18
- in.fa Input file in FastA format.
19
- out.fa Output file in FastA format.
20
-
21
- Options:
22
- -seq <str> Input file. If not set, it is expected to be in STDIN.
23
- -out <str> Output file. If not set, it is sent to STDOUT.
24
- -win <int> Window size. By default: 18.
25
- -step <int> Step size. By default: 1.
26
- -lerr <int> Expected error in chunks length. By default: 2.
27
- -comm <1|0> Generate FastA comments (leaded by semi-colon) to separate
28
- input sequences. By default: 0.
29
- -short <1|0> Use chunks shorter than the window size 'as is'. By
30
- default: 0 (discard those chunks).
31
- -h Displays this help message and exits.
32
-
33
- " if exists $params{'--help'} or
34
- exists $params{'-h'} or exists $params{'-help'};
35
-
36
- if($params{'-seq'}){
37
- open SEQ, "<", $params{'-seq'} or
38
- die "I can not open '".$params{'-seq'}."': $!\n";
39
- }else{
40
- *SEQ = *STDIN;
41
- print STDERR "Please input your sequence, and hit ".
42
- "Intro and Ctrl+D when you are done:\n";
43
- }
44
-
45
- if($params{'-out'}){
46
- open OUT, ">", $params{'-out'} or
47
- die "I can not open '".$params{'-out'}."': $!\n";
48
- }else{
49
- *OUT = *STDOUT;
50
- }
51
-
52
- $params{'-win'} ||= 18;
53
- $params{'-step'} ||= 1;
54
- $params{'-lerr'} ||= 2;
55
- $params{'-comm'} ||= 0;
56
- $params{'-short'} ||= 0;
57
-
58
- my $win = $params{'-win'}+0;
59
- my $stp = $params{'-step'}+0;
60
- my $lerr = $params{'-lerr'}+0;
61
- my $buffer = "";
62
- my $i = 0;
63
- while(<SEQ>){
64
- next if /^;/;
65
- chomp;
66
- if(m/^>/){
67
- print OUT ">", ++$i, "\n", $buffer, "\n" if
68
- $params{'-short'}==1 and $buffer;
69
- $buffer = "";
70
- print OUT ";--- INPUT: $_ ---\n" unless $params{'-comm'}==0;
71
- next;
72
- }
73
- s/[^A-Za-z]//g;
74
- $buffer.= $_;
75
- while(length($buffer) >= $win){
76
- print OUT ">", ++$i, "\n",
77
- substr($buffer, 0, $win+int(rand($lerr*2)-$lerr)), "\n";
78
- $buffer = substr $buffer, $stp;
79
- }
80
- }
81
- close SEQ if $params{'-seq'};
82
- close OUT if $params{'-out'};
83
- print STDERR "$i chunks produced.\n";
84
-
85
-
@@ -1,55 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update Oct-13-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
- use Symbol;
11
-
12
- my ($file, $base, $outN) = @ARGV;
13
-
14
- $outN ||= 12;
15
- ($file and $base) or die "
16
- Usage
17
- $0 in_file.fa out_base[ no_files]
18
-
19
- in_file.fa Input file in FastA format.
20
- out_base Prefix for the name of the output files. It will
21
- be appended with .<i>.fa, where <i> is a consecutive
22
- number starting in 1.
23
- no_files Number of files to generate. By default: 12.
24
-
25
- ";
26
-
27
-
28
- my @outSym = ();
29
- for my $i (1 .. $outN){
30
- $outSym[$i-1] = gensym;
31
- open $outSym[$i-1], ">", "$base.$i.fa" or
32
- die "I can not create the file: $base.$i.fa: $!\n";
33
- }
34
-
35
-
36
- my($i, $seq) = (-1, '');
37
- open FILE, "<", $file or die "I can not read the file: $file: $!\n";
38
- while(my $ln=<FILE>){
39
- next if $ln=~/^;/;
40
- if($ln =~ m/^>/){
41
- print { $outSym[$i % $outN] } $seq if $seq;
42
- $i++;
43
- $seq = '';
44
- }
45
- $seq.=$ln;
46
- }
47
- print { $outSym[$i % $outN] } $seq if $seq;
48
- close FILE;
49
-
50
- for(my $j=0; $j<$outN; $j++){
51
- close $outSym[$j];
52
- }
53
-
54
- print STDERR "Sequences: ".($i+1)."\nFiles: $outN\n";
55
-
@@ -1,79 +0,0 @@
1
- #!/usr/bin/env ruby
2
- #
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @license Artistic-2.0
5
- #
6
-
7
- require "optparse"
8
-
9
- o = {q:false, n:12, lett:false, dc:false, z:false, out:"%s.%s.fa"}
10
- ARGV << "-h" if ARGV.size==0
11
-
12
- OptionParser.new do |opt|
13
- opt.banner = "
14
- Evenly splits a multi-FastA file into multiple multi-FastA files.
15
-
16
- Usage: #{$0} [options]"
17
- opt.separator ""
18
- opt.separator "Mandatory"
19
- opt.on("-i", "--input PATH", "Input FastA file."){ |v| o[:i] = v}
20
- opt.on("-p", "--prefix PATH", "Prefix of output FastA files."){ |v| o[:p] = v}
21
- opt.separator ""
22
- opt.separator "Options"
23
- opt.on("-n", "--number INT",
24
- "Number of output files to produce. By default: #{o[:n]}."
25
- ){ |v| o[:n] = v.to_i }
26
- opt.on("-z", "--zero-padded",
27
- "Use zero-padded numbers as output index."){ o[:lett]=false; o[:z]=true }
28
- opt.on("-l", "--lowercase-letters",
29
- "Use lowercase letters as output index."){ o[:lett]=true ; o[:dc]=true }
30
- opt.on("-u", "--uppercase-letters",
31
- "Use uppercase letters as output index."){ o[:lett]=true }
32
- opt.on("-o", "--out STR",
33
- "Format of output filenames, where %s are replaced by prefix and index.",
34
- "By default: #{o[:out]}."){ |v| o[:out] = v }
35
- opt.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
36
- opt.on("-h", "--help", "Display this screen.") do
37
- puts opt
38
- exit
39
- end
40
- opt.separator ""
41
- end.parse!
42
- abort "-i is mandatory." if o[:i].nil?
43
- abort "-p is mandatory." if o[:p].nil?
44
-
45
- ofh = []
46
- idx = if o[:lett]
47
- k = Math::log(o[:n], 26).ceil
48
- r = o[:dc] ? ["a","z"] : ["A","Z"]
49
- ((r[0]*k) .. (r[1]*k)).first(o[:n])
50
- elsif o[:z]
51
- k = Math::log(o[:n], 10).ceil
52
- (1 .. o[:n]).map{ |i| "%0#{k}d" % i }
53
- else
54
- (1 .. o[:n]).map{ |i| i.to_s }
55
- end
56
- idx.each do |i|
57
- fn = o[:out] % [o[:p], i]
58
- ofh << File.open(fn, "w")
59
- end
60
-
61
- i = -1
62
- seq = ""
63
- File.open(o[:i], "r") do |ifh|
64
- ifh.each_line do |ln|
65
- next if ln =~ /^;/
66
- if ln =~ /^>/
67
- ofh[i % o[:n]].print seq
68
- i += 1
69
- seq = ""
70
- end
71
- seq << ln
72
- end
73
- ofh[i % o[:n]].print seq
74
- end
75
-
76
- ofh.each{ |i| i.close }
77
-
78
- $stderr.puts "Sequences: #{i+1}.", "Files: #{o[:n]}." unless o[:q]
79
-
@@ -1,131 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update Oct-07-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- use strict;
9
- use warnings;
10
- use List::Util qw| max min sum |;
11
- use Getopt::Std;
12
- use Symbol;
13
-
14
- my %o;
15
- getopts('f:r:o:Fzhq', \%o);
16
-
17
- my $HELP = <<HELP
18
-
19
- Description:
20
- Subsamples a set of sequences.
21
-
22
- Usage:
23
- # IMPORTANT: options *MUST* precede the input file(s).
24
- $0 [options] input.fa...
25
-
26
- Where,
27
- input.fa... : File (or files) containing the sequences.
28
-
29
- Options:
30
- -f <float> : Fraction of the library to be sampled (as percentage). It can
31
- include several values (separated by comma), as well as ranges
32
- of values in the form 'from-to/by'. For example, the -f value
33
- 1-5/1,10-50/10,75,99 will produce 12 subsamples with expected
34
- fractions 1%, 2%, 3%, 4%, 5%, 10%, 20%, 30%, 40%, 50%, 75%,
35
- and 99%. By default: 10.
36
- -r <int> : Number of replicates per fraction. By default: 1.
37
- -o <str> : Prefix of the output files to be created. The output files
38
- will have a suffix of the form '.fraction-replicate.fa', where
39
- 'fraction' is the percentage sampled and 'replicate' is an
40
- increasing integer for replicates of the same fraction. By
41
- default: Path to the input file.
42
- -F : Force overwriting output file(s).
43
- -z : Include leading zeroes in the numeric parts of the output
44
- files (e.g., file.002.50-01.fa instead of file.2.50-1.fa), so
45
- that alphabetic sorting of files reflects the sampled
46
- fraction.
47
- -q : Run quietly.
48
- -h : Displays this message and exits.
49
-
50
- HELP
51
- ;
52
-
53
- sub thousands($){ my $i=shift; $i=~s/(\d{1,3}?)(?=(\d{3})+$)/$1,/g; $i }
54
- my @in = @ARGV;
55
- $o{f} ||= '10';
56
- $o{r} ||= 1;
57
- $o{o} ||= $in[0];
58
- $#in>=0 or die $HELP;
59
-
60
- my $samples = {};
61
- my $sample_no=0;
62
- my $format = ($o{z} ? "%s\.%08s\-%02i.fa" : "%s.%s-%s.fa");
63
- for my $value (split /,/, $o{f}){
64
- my $from = $value;
65
- my $to = $value;
66
- my $by = 1;
67
- if($value =~ m/^([^-]+)-([^\/]+)\/(.+)$/){
68
- $from = $1;
69
- $to = $2;
70
- $by = $3;
71
- ($from,$to) = ($to,$from) if $from > $to;
72
- }
73
- for(my $p=$from; $p<=$to; $p+=$by){
74
- die "Percentage out of the [0,100] range: $p\n" if $p>100 or $p<0;
75
- $samples->{$p} ||= [];
76
- for (1 .. $o{r}){
77
- my $r = $#{$samples->{$p}}+2;
78
- my $file = sprintf $format, $o{o}, sprintf("%.4f", $p), $r;
79
- die "File exists: $file.\n" if !$o{F} and -e $file;
80
- $samples->{$p}->[$r-1] = [$p, 0, gensym(), $file];
81
- open $samples->{$p}->[$r-1]->[2], ">", $file;
82
- $sample_no++;
83
- }
84
- }
85
- }
86
- print STDERR "Open samples: $sample_no.\n" unless $o{q};
87
-
88
- my $sprob = ($o{s} || '10');
89
-
90
- die $HELP unless $sprob and $#in>=0;
91
- $o{'h'} and die $HELP;
92
-
93
- my $N = 0;
94
- my @ck = qw(*... **.. ***. .*** ..** ...*);
95
- SAMPLING: {
96
- local $/ = "\n>";
97
- print STDERR "Sampling sequences.\n" unless $o{q};
98
- FILE: for my $in (@in){
99
- open IN, '<', $in or die "I can not open $in: $!\n";
100
- SEQ: while(my $seq = <IN>){
101
- $N++;
102
- $seq =~ s/^>?/>/;
103
- $seq =~ s/>$//;
104
- $seq =~ s/^;.*//gm;
105
- PERC: for my $sperc (values %$samples){
106
- SAMPLE: for my $sample (@$sperc){
107
- if($sample->[0] > rand 100){
108
- $sample->[1]++;
109
- print { qualify_to_ref $sample->[2] } $seq;
110
- }
111
- }
112
- }
113
- print STDERR " [".$ck[($N/5000)%@ck]."] ".&thousands($N).
114
- " seqs. \r" unless $o{q} or $N%5000;
115
- }
116
- close IN;
117
- }
118
- }
119
-
120
- print STDERR " Total sequences: ".&thousands($N).". \n" unless $o{q};
121
- for my $p (values %$samples){
122
- for my $s (@$p){
123
- printf STDERR "
124
- Sample file: %s
125
- Sampled sequences: %d
126
- Sampled fraction: %.2f%%\n",
127
- $s->[3], $s->[1], $s->[1]*100/$N unless $o{q};
128
- close $s->[2];
129
- }
130
- }
131
-
@@ -1,65 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @author Luis M. Rodriguez-R
4
- # @license artistic license 2.0
5
-
6
- require 'optparse'
7
-
8
- o = {q: false, p: '', s: '', d: false}
9
- ARGV << '-h' if ARGV.size==0
10
- OptionParser.new do |opts|
11
- opts.banner = "
12
- Generates easy-to-parse tagged reads from FastA files.
13
-
14
- Usage: #{$0} [options]"
15
- opts.separator ''
16
- opts.separator 'Mandatory'
17
- opts.on('-i', '--in FILE',
18
- 'Path to the FastA file containing the sequences.'){ |v| o[:in] = v }
19
- opts.on('-o', '--out FILE',
20
- 'Path to the FastA to create.'){ |v| o[:out] = v }
21
- opts.separator ''
22
- opts.separator 'ID options'
23
- opts.on('-p', '--prefix STR', 'Prefix to use in all IDs.'){ |v| o[:p] = v }
24
- opts.on('-s', '--suffix STR', 'Suffix to use in all IDs.'){ |v| o[:s] = v }
25
- opts.on('-d', '--defline',
26
- 'Keep the original defline after a space.'){ o[:d] = true }
27
- opts.on('-l', '--list FILE',
28
- 'Reads a list of IDS.'){ |v| o[:l] = v }
29
- opts.separator ''
30
- opts.separator 'Other Options'
31
- opts.on('-q', '--quiet', 'Run quietly (no STDERR output)'){ o[:q] = true }
32
- opts.on('-h', '--help', 'Display this screen') do
33
- puts opts
34
- exit
35
- end
36
- opts.separator ''
37
- end.parse!
38
- abort '-i is mandatory' if o[:in].nil?
39
- abort '-o is mandatory' if o[:out].nil?
40
-
41
- begin
42
- list = o[:l].nil? ? nil :
43
- File.readlines(o[:l]).map{ |i| i.chomp.gsub(/^>/, '') }
44
- ofh = File.open(o[:out], 'w')
45
- i = 0
46
- File.open(o[:in], 'r') do |ifh|
47
- ifh.each do |ln|
48
- ln.chomp!
49
- next if ln =~ /^;/
50
- unless /^>/.match(ln).nil?
51
- i += 1
52
- new_id = o[:l].nil? ? i : list.shift
53
- ofh.puts ">#{o[:p]}#{new_id}#{o[:s]}#{o[:d]?" #{ln[1, ln.size-1]}":''}"
54
- else
55
- ofh.puts ln
56
- end
57
- end
58
- end
59
- ofh.close
60
- rescue => err
61
- $stderr.puts "Exception: #{err}\n\n"
62
- err.backtrace.each { |l| $stderr.puts l + "\n" }
63
- err
64
- end
65
-