miga-base 1.2.15.2 → 1.2.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli/action/download/gtdb.rb +4 -1
  3. data/lib/miga/cli/action/gtdb_get.rb +4 -0
  4. data/lib/miga/daemon.rb +4 -1
  5. data/lib/miga/lair.rb +6 -4
  6. data/lib/miga/remote_dataset/download.rb +3 -2
  7. data/lib/miga/remote_dataset.rb +25 -7
  8. data/lib/miga/taxonomy.rb +6 -0
  9. data/lib/miga/version.rb +2 -2
  10. metadata +6 -302
  11. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
  12. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
  13. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
  14. data/utils/FastAAI/FastAAI +0 -3659
  15. data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
  16. data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
  17. data/utils/FastAAI/README.md +0 -84
  18. data/utils/enveomics/Docs/recplot2.md +0 -244
  19. data/utils/enveomics/Examples/aai-matrix.bash +0 -66
  20. data/utils/enveomics/Examples/ani-matrix.bash +0 -66
  21. data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
  22. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
  23. data/utils/enveomics/LICENSE.txt +0 -73
  24. data/utils/enveomics/Makefile +0 -52
  25. data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
  26. data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
  27. data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
  28. data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
  29. data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
  30. data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
  31. data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
  32. data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
  33. data/utils/enveomics/Manifest/Tasks/other.json +0 -906
  34. data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
  35. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
  36. data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
  37. data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
  38. data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
  39. data/utils/enveomics/Manifest/categories.json +0 -165
  40. data/utils/enveomics/Manifest/examples.json +0 -162
  41. data/utils/enveomics/Manifest/tasks.json +0 -4
  42. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  43. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  47. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  48. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  49. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  50. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  51. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  52. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  53. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  54. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  55. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  56. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  57. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  58. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  59. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  60. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  61. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  62. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  63. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  64. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  65. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  66. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  67. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  68. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  69. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  70. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  71. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  72. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  73. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  74. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  75. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  76. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
  77. data/utils/enveomics/README.md +0 -42
  78. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
  79. data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
  80. data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
  81. data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
  82. data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
  83. data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
  84. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
  85. data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
  86. data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
  87. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
  88. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
  89. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
  90. data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
  91. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
  92. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
  93. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
  94. data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
  95. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
  96. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
  97. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
  98. data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
  99. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
  100. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
  101. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
  102. data/utils/enveomics/Scripts/Chao1.pl +0 -97
  103. data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
  104. data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
  105. data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
  106. data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
  107. data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
  108. data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
  109. data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
  110. data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
  111. data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
  112. data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
  113. data/utils/enveomics/Scripts/FastA.length.pl +0 -38
  114. data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
  115. data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
  116. data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
  117. data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
  118. data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
  119. data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
  120. data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
  121. data/utils/enveomics/Scripts/FastA.split.pl +0 -55
  122. data/utils/enveomics/Scripts/FastA.split.rb +0 -79
  123. data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
  124. data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
  125. data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
  126. data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
  127. data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
  128. data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
  129. data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
  130. data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
  131. data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
  132. data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
  133. data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
  134. data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
  135. data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
  136. data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
  137. data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
  138. data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
  139. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
  140. data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
  141. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
  142. data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
  143. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
  144. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
  145. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
  146. data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
  147. data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
  148. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
  149. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
  150. data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
  151. data/utils/enveomics/Scripts/SRA.download.bash +0 -55
  152. data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
  153. data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
  154. data/utils/enveomics/Scripts/Table.barplot.R +0 -31
  155. data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
  156. data/utils/enveomics/Scripts/Table.filter.pl +0 -61
  157. data/utils/enveomics/Scripts/Table.merge.pl +0 -77
  158. data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
  159. data/utils/enveomics/Scripts/Table.replace.rb +0 -69
  160. data/utils/enveomics/Scripts/Table.round.rb +0 -63
  161. data/utils/enveomics/Scripts/Table.split.pl +0 -57
  162. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
  163. data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
  164. data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
  165. data/utils/enveomics/Scripts/aai.rb +0 -421
  166. data/utils/enveomics/Scripts/ani.rb +0 -362
  167. data/utils/enveomics/Scripts/anir.rb +0 -137
  168. data/utils/enveomics/Scripts/clust.rand.rb +0 -102
  169. data/utils/enveomics/Scripts/gi2tax.rb +0 -103
  170. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
  171. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  172. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  173. data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
  174. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
  175. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
  176. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
  177. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
  178. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
  179. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
  180. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
  181. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
  182. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
  183. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
  184. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
  185. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
  186. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
  187. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
  188. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
  189. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
  190. data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
  191. data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
  192. data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
  193. data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
  194. data/utils/enveomics/Scripts/ogs.rb +0 -104
  195. data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
  196. data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
  197. data/utils/enveomics/Scripts/rbm.rb +0 -108
  198. data/utils/enveomics/Scripts/sam.filter.rb +0 -148
  199. data/utils/enveomics/Tests/Makefile +0 -10
  200. data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
  201. data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
  202. data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
  203. data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
  204. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  205. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
  206. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
  207. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
  208. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
  209. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
  210. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
  211. data/utils/enveomics/Tests/alkB.nwk +0 -1
  212. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
  213. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
  214. data/utils/enveomics/Tests/hiv1.faa +0 -59
  215. data/utils/enveomics/Tests/hiv1.fna +0 -134
  216. data/utils/enveomics/Tests/hiv2.faa +0 -70
  217. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
  218. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
  219. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
  220. data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
  221. data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
  222. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
  223. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
  224. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
  225. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
  226. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
  227. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
  228. data/utils/enveomics/build_enveomics_r.bash +0 -45
  229. data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
  230. data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
  231. data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
  232. data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
  233. data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
  234. data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
  235. data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
  236. data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
  237. data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
  238. data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
  239. data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
  240. data/utils/enveomics/enveomics.R/R/utils.R +0 -80
  241. data/utils/enveomics/enveomics.R/README.md +0 -81
  242. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  243. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  244. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
  245. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
  246. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
  247. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
  248. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
  249. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
  250. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
  251. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
  252. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
  253. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
  254. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
  255. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
  256. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
  257. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
  258. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
  259. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
  260. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
  261. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
  262. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
  263. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
  264. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
  265. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
  266. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
  267. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
  268. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
  269. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
  270. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
  271. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
  272. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
  273. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
  274. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
  275. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
  276. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
  277. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
  278. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
  279. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
  280. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
  281. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
  282. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
  283. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
  284. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
  285. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
  286. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
  287. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
  288. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
  289. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
  290. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
  291. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
  292. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
  293. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
  294. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
  295. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
  296. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
  297. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
  298. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
  299. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
  300. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
  301. data/utils/enveomics/globals.mk +0 -8
  302. data/utils/enveomics/manifest.json +0 -9
  303. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  304. data/utils/multitrim/README.md +0 -67
  305. data/utils/multitrim/multitrim.py +0 -1555
  306. data/utils/multitrim/multitrim.yml +0 -13
@@ -1,152 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- #
4
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
- # @license Artistic-2.0
6
- #
7
-
8
- require 'optparse'
9
-
10
- o = {q: false}
11
- ARGV << '-h' if ARGV.size==0
12
-
13
- OptionParser.new do |opt|
14
- opt.banner = "
15
- Extracts a list of sequences and/or coordinates from multi-FastA files.
16
-
17
- Usage: #{$0} [options]"
18
- opt.separator ''
19
- opt.separator 'Mandatory'
20
- opt.on('-i', '--in PATH', 'Input FastA file.'){ |v| o[:i] = v }
21
- opt.on('-o', '--out PATH', 'Output FastA file.'){ |v| o[:o] = v }
22
- opt.on('-c', '--coords STRING',
23
- 'Comma-delimited list of coordinates (mandatory unless -C is passed).',
24
- 'The format of the coordinates is "SEQ:FROM..TO" or "SEQ:FROM~LEN":',
25
- 'SEQ: Sequence ID, or * (asterisk) to extract range from all sequences',
26
- 'FROM: Integer, position of the first base to include (can be negative)',
27
- 'TO: Integer, last base to include (can be negative)',
28
- 'LEN: Length of the range to extract'
29
- ){ |v| o[:c] = v }
30
- opt.separator ''
31
- opt.separator 'Options'
32
- opt.on('-C', '--coords-file PATH',
33
- 'File containing the coordinates, one per line.',
34
- 'Each line must follow the format described for -c.'){ |v| o[:C] = v }
35
- opt.on('-q', '--quiet', 'Run quietly (no STDERR output).'){ o[:q] = true }
36
- opt.on('-h', '--help', 'Display this screen.') do
37
- puts opt
38
- exit
39
- end
40
- opt.separator ''
41
- end.parse!
42
- abort '-i is mandatory.' if o[:i].nil?
43
- abort '-o is mandatory.' if o[:o].nil?
44
- abort '-c is mandatory.' if o[:c].nil? and o[:C].nil?
45
-
46
- # Classses to parse coordinates
47
- class SeqCoords
48
- attr :id, :from, :to, :length, :str
49
- def initialize(str)
50
- @str = str
51
- m = /(\S+):(-?\d+)(~|\.\.)(-?\d+)/.match str
52
- raise "Cannot parse coordinates: #{str}" if m.nil?
53
- @id = m[1]
54
- @from = m[2].to_i
55
- if m[3] == '~'
56
- @length = m[4].to_i
57
- else
58
- @to = m[4].to_i
59
- end
60
- end
61
-
62
- def extract(id, seq)
63
- return nil unless concerns? id
64
- from_i = from > 0 ? from : seq.length + 1 + from
65
- if to.nil?
66
- seq[from_i, length]
67
- else
68
- to_i = to > 0 ? to : seq.length + 1 + to
69
- seq[from_i .. to_i]
70
- end
71
- end
72
-
73
- def concerns?(seq_id)
74
- return true if id == '*'
75
- return id == seq_id
76
- end
77
- end
78
-
79
- class SeqCoordsCollection
80
- class << self
81
- def from_str(str)
82
- c = new
83
- str.split(',').each { |i| c << SeqCoords.new(i) }
84
- c
85
- end
86
- def from_file(path)
87
- c = new
88
- File.open(path, 'r') do |fh|
89
- fh.each{ |i| c << SeqCoords.new(i.chomp) }
90
- end
91
- c
92
- end
93
- end
94
-
95
- attr :collection
96
-
97
- def initialize
98
- @collection = []
99
- end
100
-
101
- def <<(coords)
102
- @collection << coords
103
- end
104
-
105
- def extract(id, seq)
106
- @collection.map{ |c| c.extract(id, seq) }.compact
107
- end
108
- end
109
-
110
- # Functions to parse sequences
111
- def do_stuff(id, sq)
112
- return if id.nil? or sq.empty?
113
- @n_in += 1
114
- sq.gsub!(/[^A-Za-z]/, '')
115
- i = 0
116
- @coll.extract(id, sq).each do |new_sq|
117
- @ofh.puts ">#{id}:#{i += 1}"
118
- @ofh.puts new_sq
119
- @n_out += 1
120
- end
121
- end
122
-
123
- # Parse coordinates
124
- $stderr.puts 'Parsing coordinates' unless o[:q]
125
- @coll = o[:c].nil? ? SeqCoordsCollection.from_file(o[:C]) :
126
- SeqCoordsCollection.from_str(o[:c])
127
- $stderr.puts " Coordinates found: #{@coll.collection.size}"
128
-
129
- # Parse sequences
130
- $stderr.puts 'Parsing sequences' unless o[:q]
131
- @n_in = 0
132
- @n_out = 0
133
- @ofh = File.open(o[:o], 'w')
134
- File.open(o[:i], 'r') do |fh|
135
- id = nil
136
- sq = ''
137
- fh.each do |ln|
138
- next if ln =~ /^;/
139
- if ln =~ /^>(\S+)/
140
- id = $1
141
- do_stuff(id, sq)
142
- sq = ''
143
- else
144
- sq << ln
145
- end
146
- end
147
- do_stuff(id, sq)
148
- end
149
- @ofh.close
150
- $stderr.puts " Input sequences: #{@n_in}"
151
- $stderr.puts " Output fragments: #{@n_out}"
152
-
@@ -1,52 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
4
- # @update Oct-07-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
- use Getopt::Std;
11
-
12
- sub HELP_MESSAGE { die "
13
- .Description:
14
- Extracts a subset of sequences from a FastA file.
15
-
16
- .Usage: $0 [options] list.txt seqs.fa > subset.fa
17
-
18
- [options]
19
- -r Reverse list. Extracts sequences NOT present in the list.
20
- -q Runs quietly.
21
- -h Prints this message and exits.
22
-
23
- [mandatory]
24
- list.txt List of sequences to extract.
25
- seqs.fa FastA file containing the superset of sequences.
26
- subset.fa FastA file to be created.
27
-
28
- " }
29
-
30
- my %o=();
31
- getopts('rhq', \%o);
32
- my($list, $fa) = @ARGV;
33
- ($list and $fa) or &HELP_MESSAGE;
34
- $o{h} and &HELP_MESSAGE;
35
-
36
- print STDERR "Reading list.\n" unless $o{q};
37
- open LI, "<", $list or die "Cannot read file: $list: $!\n";
38
- my %li = map { chomp; $_ => 1 } <LI>;
39
- close LI;
40
-
41
- print STDERR "Filtering FastA.\n" unless $o{q};
42
- open FA, "<", $fa or die "Cannot read file: $fa: $!\n";
43
- my $good = 0;
44
- while(my $ln = <FA>){
45
- next if $ln =~ /^;/;
46
- chomp $ln;
47
- if($ln =~ m/^>((\S+).*)/){ $good = (exists $li{$1} or exists $li{">$1"} or exists $li{$2} or exists $li{$ln}) }
48
- elsif($ln =~ m/^>/){ $good=$o{r}; print STDERR "Warning: Non-cannonical defline, line $.: $ln\n" }
49
- print "$ln\n" if (($good and not $o{r}) or ($o{r} and not $good));
50
- }
51
- close FA;
52
-
@@ -1,28 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- use warnings;
4
- use strict;
5
- use Bio::SeqIO;
6
-
7
- my $file = $ARGV[0];
8
- my $min = $ARGV[1];
9
- ($file and $min) or die <<HELP
10
-
11
- This script will filter a multi fastA file by length
12
-
13
- Usage "perl $0 fastafile minlenght "
14
- HELP
15
- ;
16
- my $seq_in = Bio::SeqIO->new( -format => 'fasta',-file => $file);
17
-
18
- while( my $seq1 = $seq_in->next_seq() ) {
19
-
20
- my $id = $seq1->primary_id;
21
- chomp $id;
22
- my $seq = $seq1->seq;
23
- chomp $seq;
24
- my $lseq = length($seq);
25
- if($lseq>=$min){
26
- print ">$id","\n",$seq,"\n";
27
- }
28
- }
@@ -1,60 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M. Rodriguez-R
4
- # @update Oct-07-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
-
11
- my($file, $content, $stretch) = @ARGV;
12
- $file or die <<HELP
13
-
14
- Description:
15
- Filter sequences by N-content and presence of long homopolymers.
16
- Usage:
17
- $0 sequences.fa [content [stretch]] > filtered.fa
18
- Where:
19
- sequences.fa Input file in FastA format
20
- content A number between 0 and 1 indicating the maximum proportion of Ns
21
- (1 to turn off, 0.5 by default)
22
- stretch A number indicating the maximum number of consecutive identical
23
- nucleotides allowed (0 to turn off, 100 by default)
24
- filtered.fa Filtered set of sequences.
25
-
26
- HELP
27
- ;
28
- ($content ||= 0.5)+=0;
29
- ($stretch ||= 100)+=0;
30
-
31
- my $good = 0;
32
- my $N = 0;
33
-
34
- FASTA: {
35
- local $/ = "\n>";
36
- open FILE, "<", $file or die "I can not open the file: $file: $!\n";
37
- SEQ: while(<FILE>){
38
- $N++;
39
- s/^;.*//gm;
40
- s/>//g;
41
- my($n,$s) = split /\n/, $_, 2;
42
- (my $clean = $s) =~ s/[^ACTGN]//g;
43
- if($content < 1){
44
- (my $Ns = $clean) =~ s/[^N]//g;
45
- next SEQ if length($Ns)>length($clean)*$content;
46
- }
47
- if($stretch > 0){
48
- for my $nuc (qw(A C T G N)){
49
- next SEQ if $clean =~ m/[$nuc]{$stretch}/;
50
- }
51
- }
52
- print ">$n\n$s\n";
53
- $good++;
54
- }
55
- close FILE;
56
- print STDERR "Total sequences: $N\nAfter filtering: $good\n";
57
- }
58
-
59
-
60
-
@@ -1,100 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # frozen_string_literal: true
4
-
5
- $:.push File.expand_path('../lib', __FILE__)
6
- require 'enveomics_rb/enveomics'
7
- require 'enveomics_rb/stats'
8
- $VERSION = 1.0
9
-
10
- o = { q: false, completeness: nil, minlen: 500, shuffle: true }
11
- OptionParser.new do |opts|
12
- opts.version = $VERSION
13
- Enveomics.opt_banner(
14
- opts, 'Simulates incomplete (fragmented) drafts from complete genomes',
15
- "#{File.basename($0)} -i in.fasta -o out.fasta -c 0.5 [options]"
16
- )
17
-
18
- opts.separator 'Mandatory'
19
- opts.on(
20
- '-i', '--in FILE',
21
- 'Path to the FastA file containing the complete sequences',
22
- 'Supports compression with .gz extension, use - for STDIN'
23
- ) { |v| o[:in] = v }
24
- opts.on(
25
- '-o', '--out FILE', 'Path to the FastA to create',
26
- 'Supports compression with .gz extension, use - for STDOUT'
27
- ) { |v| o[:out] = v }
28
- opts.on(
29
- '-c', '--completeness FLOAT',
30
- 'Fraction of genome completeness to simulate from 0 to 1'
31
- ) { |v| o[:completeness] = v.to_f }
32
-
33
- opts.separator ''
34
- opts.separator 'Options'
35
- opts.on(
36
- '-m', '--minlen INT',
37
- "Minimum fragment length to report. By default: #{o[:minlen]}"
38
- ) { |v| o[:minlen] = v.to_i }
39
- opts.on(
40
- '-s', '--sorted', 'Keep fragments sorted as in the input file',
41
- 'By default, fragments are shuffled'
42
- ) { |v| o[:shuffle] = !v }
43
- opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
44
- opts.on('-h', '--help', 'Display this screen') { puts opts ; exit }
45
- opts.separator ''
46
- end.parse!
47
-
48
- raise Enveomics::OptionError.new('-i is mandatory') if o[:in].nil?
49
- raise Enveomics::OptionError.new('-o is mandatory') if o[:out].nil?
50
- raise Enveomics::OptionError.new('-c is mandatory') if o[:completeness].nil?
51
-
52
- begin
53
- # Read input sequences
54
- g_id = []
55
- g_seq = []
56
- ifh = reader(o[:in])
57
- id = ''
58
- ifh.each_line do |ln|
59
- if ln =~ /^>(\S*)/
60
- g_id << $1
61
- g_seq << ''
62
- else
63
- g_seq[g_seq.size - 1] += ln.gsub(/[^A-Za-z]/, '')
64
- end
65
- end
66
- ifh.close
67
-
68
- # Fragment genomes
69
- f = {}
70
- binlen = [1, (o[:minlen].to_f/(1.5**2)).ceil].max
71
- p = [0.001, [1.0, 1.0 - (o[:completeness]/1.25 + 0.1)].min].max
72
- while !g_seq.empty?
73
- id = g_id.shift
74
- seq = g_seq.shift
75
- gL = seq.length
76
- while !seq.empty?
77
- rand_x =
78
- Enveomics::Stats.r_geom(p).to_f + Enveomics::Stats.r_unif(-0.5, 0.5)
79
- fL = [0, (rand_x * binlen).round].max
80
- f["#{f.size+1}_#{id}"] = seq[0, fL] if fL >= o[:minlen]
81
- seq = seq[(fL + 1) .. -1]
82
- seq = '' if seq.nil?
83
- end
84
- end
85
-
86
- # Save output
87
- k = f.keys
88
- k.shuffle! if o[:shuffle]
89
- ofh = writer(o[:out])
90
- k.each do |id|
91
- ofh.puts ">#{id}"
92
- ofh.puts f[id].gsub(/(\S{50})/, "\\1\n")
93
- end
94
- ofh.close
95
- rescue => err
96
- $stderr.puts "Exception: #{err}\n\n"
97
- err.backtrace.each { |l| $stderr.puts l + "\n" }
98
- err
99
- end
100
-
@@ -1,42 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M Rodriguez-R
4
- # @update Mar-23-2016
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
-
11
- $#ARGV>=0 or die "
12
- Usage:
13
- $0 seqs.fa... > gc.txt
14
-
15
- seqs.fa One or more FastA files.
16
- gc.txt A table with the G+C content of the sequences.
17
-
18
- ";
19
-
20
- for my $fa (@ARGV){
21
- open FA, "<", $fa or die "Cannot open file: $fa: $!\n";
22
- my $def = "";
23
- my $len = 0;
24
- my $gc = 0;
25
- while(<FA>){
26
- next if /^;/;
27
- if(m/^>(\S*)/){
28
- print "$def\t".($gc/$len)."\n" if $len;
29
- $def = $1;
30
- $len = 0;
31
- $gc = 0;
32
- }else{
33
- s/[^ACTGactg]//g;
34
- $len += length $_;
35
- s/[^GC]//g;
36
- $gc += length $_;
37
- }
38
- }
39
- print "$def\t".($gc/$len)."\n" if $len;
40
- close FA;
41
- }
42
-
@@ -1,93 +0,0 @@
1
- #!/usr/bin/env perl
2
-
3
- # @author Luis M. Rodriguez-R
4
- # @license artistic license 2.0
5
-
6
- use strict;
7
- use warnings;
8
- use Symbol;
9
-
10
- my $HELP = <<HELP
11
-
12
- Description:
13
- Interposes sequences in FastA format from two files into one output file.
14
- If more than two files are provided, the script will interpose all the input
15
- files.
16
- Note that this script will check for the consistency of the names (assuming
17
- a pair of related reads contains the same name varying only in a trailing
18
- slash (/) followed by a digit. If you want to turn this feature off just
19
- set the -T option to zero. If you want to decrease the sampling period (to
20
- speed the script up) or increase it (to make it more sensitive to errors)
21
- just change the -T option accordingly.
22
-
23
- Usage:
24
- $0 [-T <int> ]<output_fasta> <input_fasta_1> <input_fasta_2> [additional input files...]
25
-
26
- Where,
27
- -T <int> : Optional. Integer indicating the sampling period for
28
- names evaluation (see Description above).
29
- By default: 1000.
30
- output_fasta : Output file
31
- input_fasta_1 : First FastA file
32
- input_fasta_2 : Second FastA file
33
- ... : Any additional FastA files (or none)
34
-
35
- HELP
36
- ;
37
- my $eval_T = 1000;
38
- if(exists $ARGV[0] and exists $ARGV[1] and $ARGV[0] eq '-T'){
39
- $eval_T = $ARGV[1]+0;
40
- shift @ARGV;
41
- shift @ARGV;
42
- }
43
- my $out = shift @ARGV;
44
- my @in = @ARGV;
45
- $/ = "\n>";
46
-
47
- die $HELP unless $out and $#in >= 1;
48
- open OUT, ">", $out or die "Unable to write on $out: $!\n";
49
- print "Output file: $out\n";
50
-
51
- my @in_fh = ();
52
-
53
- for my $k (0 .. $#in) {
54
- $in_fh[$k] = gensym;
55
- open $in_fh[$k], "<", $in[$k] or die "Unable to read $in[$k]: $!\n";
56
- print "Input file: $in[$k]\n";
57
- }
58
-
59
- my $i = 0;
60
- my $frl;
61
- LINE: while(1){
62
- my $name = "";
63
- print STDERR "\rEntry: $i " unless $i % 1000;
64
- FILE: for my $k (0 .. $#in_fh){
65
- my $ln = readline($in_fh[$k]);
66
- last LINE if $k==0 and not defined $ln;
67
- defined $ln or die "Impossible to read next entry ($.) from $in[$k]: $!\n";
68
- $ln =~ s/^\>?/>/;
69
- $ln =~ s/\>$//;
70
- $ln =~ s/^;.*//gm;
71
- if($eval_T and not $i % $eval_T){
72
- unless($name){
73
- $ln =~ m/^>(.*?)[\/ \\_]\d+/ or die "Impossible to evaluate names!\n offending entry:\n$ln\n";
74
- $name = $1;
75
- }
76
- die "Inconsistent name!\n base name is $name\n offending entry is:\n$ln\n" unless $ln =~ /^>$name/;
77
- }
78
- unless($frl){
79
- $ln =~ m/^>.*?\n(.*?)\n/ or die "Unexpected format!\n offending entry:\n$ln\n";
80
- my $i = $ln;
81
- $i =~ s/^>.*?\n//;
82
- $i =~ s/\n//g;
83
- $frl = length $i;
84
- }
85
- print OUT $ln;
86
- }
87
- $i++;
88
- }
89
- print "\rNumber of entries: $i \nFirst read length: $frl\n";
90
- close OUT;
91
-
92
- for my $k(0..$#in_fh){print "ALERT: The file $in[$k] contains trailing entries\n" if defined readline($in_fh[$k])}
93
-
@@ -1,38 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M Rodriguez-R
4
- # @update Oct-07-2015
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
-
11
- $#ARGV>=0 or die "
12
- Usage:
13
- $0 seqs.fa... > length.txt
14
-
15
- seqs.fa One or more FastA files.
16
- length.txt A table with the lengths of the sequences.
17
-
18
- ";
19
-
20
- for my $fa (@ARGV){
21
- open FA, "<", $fa or die "Cannot open file: $fa: $!\n";
22
- my $def = '';
23
- my $len = 0;
24
- while(<FA>){
25
- next if /^;/;
26
- if(m/^>(\S+)\s?/){
27
- print "$def\t$len\n" if $def;
28
- $def = $1;
29
- $len = 0;
30
- }else{
31
- s/[^A-Za-z]//g;
32
- $len+= length $_;
33
- }
34
- }
35
- print "$def\t$len\n" if $def;
36
- close FA;
37
- }
38
-
@@ -1,89 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'optparse'
4
- o = {x: 'N', trim: false, wrap: 70}
5
- ARGV << '-h' if ARGV.empty?
6
- OptionParser.new do |opts|
7
- opts.banner = "
8
- Mask sequence region(s) in a FastA file.
9
-
10
- Usage: #{$0} [options]"
11
- opts.separator ''
12
- opts.separator 'Mandatory'
13
- opts.on('-i', '--in FILE', 'Input FastA file.'){ |v| o[:in] = v }
14
- opts.on('-o', '--out FILE', 'Output FastA file.'){ |v| o[:out] = v }
15
- opts.on('-r', '--regions REG1,REG2,...', Array,
16
- 'Regions to mask separated by commas.',
17
- 'Each region must be in the format "sequence_id:from..to"'
18
- ){ |v| o[:reg] = v }
19
- opts.separator ''
20
- opts.separator 'Options'
21
- opts.on('-x', '--symbol CHAR',
22
- 'Character used to mask the region(s)',
23
- "By default: #{o[:x]}."){ |v| o[:x] = v }
24
- opts.on('-t', '--trim',
25
- 'Trim masked regions extending to the edge of a sequence'
26
- ){ |v| o[:trim] = v }
27
- opts.on('-w', '--wrap INT',
28
- 'Line length to wrap sequences. Use 0 to generate 1-line sequences.',
29
- "By default: #{o[:wrap]}."){ |v| o[:wrap] = v.to_i }
30
- opts.on('-h', '--help', 'Display this screen.') do
31
- puts opts
32
- exit
33
- end
34
- opts.separator ''
35
- end.parse!
36
- abort '-i is mandatory' if o[:in].nil?
37
- abort '-o is mandatory' if o[:out].nil?
38
- abort '-r is mandatory' if o[:reg].nil?
39
-
40
- def wrap_width(txt, len)
41
- return "" if txt.empty?
42
- return "#{txt}\n" if len==0
43
- txt.gsub(/(.{1,#{len}})/,"\\1\n")
44
- end
45
-
46
- # Read input sequences
47
- sq = {}
48
- File.open(o[:in], 'r') do |ifh|
49
- bf = ''
50
- ifh.each('>') do |i|
51
- (dln, seq) = i.split(/[\n\r]+/, 2)
52
- next if seq.nil?
53
- id = dln.gsub(/\s.*/, '')
54
- seq.gsub!(/[\s>]/, '')
55
- sq[id] = [dln, seq]
56
- end
57
- end
58
-
59
- # Parse coordinates and mask regions
60
- last_id = nil
61
- o[:reg].each do |i|
62
- m = i.match(/^(?:(.+):)?(\d+)\.\.(\d+)$/) or
63
- abort "Unexpected region format: #{i}"
64
- r = [m[1], m[2].to_i-1, m[3].to_i-1]
65
- if r[0].nil?
66
- abort "Region missing sequence ID: #{i}" if last_id.nil?
67
- r[0] = last_id
68
- end
69
- last_id = r[0]
70
- sq[r[0]] or abort "Cannot find sequence #{r[0]}"
71
- r[1] <= r[2] or abort "Malformed range: #{i}"
72
- if r[1] < 0 or r[2] > sq[r[0]][1].size
73
- abort "Range extends beyond the edge of the sequence: #{i}"
74
- end
75
- sq[r[0]][1][r[1] .. r[2]] = o[:x]*(1+r[2]-r[1])
76
- end
77
-
78
- # Trim sequences and generate output
79
- ofh = File.open(o[:out], 'w')
80
- sq.each do |_k,v|
81
- ofh.puts ">#{v[0]}"
82
- if o[:trim]
83
- v[1].gsub!(/^#{o[:x]}+/,'')
84
- v[1].gsub!(/#{o[:x]}+$/,'')
85
- end
86
- ofh.print wrap_width(v[1], o[:wrap])
87
- end
88
- ofh.close
89
-
@@ -1,36 +0,0 @@
1
- #!/usr/bin/env perl
2
- #
3
- # @author Luis M Rodriguez-R
4
- # @update Mar-17-2016
5
- # @license artistic license 2.0
6
- #
7
-
8
- use warnings;
9
- use strict;
10
-
11
- $#ARGV>=1 or die "
12
- Usage:
13
- $0 outdir seqs.fa...
14
-
15
- outdir Output directory for the individual files.
16
- seqs.fa One or more FastA files.
17
-
18
- ";
19
-
20
- my $dir = shift @ARGV;
21
-
22
- for my $fa (@ARGV){
23
- open FA, "<", $fa or die "Cannot open file: $fa: $!\n";
24
- my $file = '';
25
- while(<FA>){
26
- next if /^;/;
27
- if(m/^>(\S+)\s?/){
28
- close ONE if $file;
29
- $file = $dir."/".$1.".fasta";
30
- open ONE, ">", $file or die "Cannot open file: $file: $!\n";
31
- }
32
- print ONE $_ if $file;
33
- }
34
- close ONE if $file;
35
- }
36
-