miga-base 0.7.26.0 → 1.0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
  3. data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
  4. data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
  5. data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
  6. data/lib/miga/cli/action/classify_wf.rb +2 -2
  7. data/lib/miga/cli/action/derep_wf.rb +1 -1
  8. data/lib/miga/cli/action/doctor.rb +57 -14
  9. data/lib/miga/cli/action/doctor/base.rb +47 -23
  10. data/lib/miga/cli/action/init.rb +11 -7
  11. data/lib/miga/cli/action/init/files_helper.rb +1 -0
  12. data/lib/miga/cli/action/ncbi_get.rb +3 -3
  13. data/lib/miga/cli/action/tax_dist.rb +2 -2
  14. data/lib/miga/cli/action/wf.rb +5 -4
  15. data/lib/miga/common.rb +1 -0
  16. data/lib/miga/daemon.rb +11 -4
  17. data/lib/miga/dataset/result.rb +10 -6
  18. data/lib/miga/json.rb +5 -4
  19. data/lib/miga/metadata.rb +5 -1
  20. data/lib/miga/parallel.rb +36 -0
  21. data/lib/miga/project.rb +8 -8
  22. data/lib/miga/project/base.rb +4 -4
  23. data/lib/miga/project/result.rb +2 -2
  24. data/lib/miga/sqlite.rb +10 -2
  25. data/lib/miga/version.rb +23 -9
  26. data/scripts/aai_distances.bash +16 -18
  27. data/scripts/ani_distances.bash +16 -17
  28. data/scripts/assembly.bash +31 -16
  29. data/scripts/haai_distances.bash +3 -27
  30. data/scripts/miga.bash +6 -4
  31. data/scripts/p.bash +1 -1
  32. data/scripts/read_quality.bash +9 -18
  33. data/scripts/trimmed_fasta.bash +14 -30
  34. data/scripts/trimmed_reads.bash +36 -36
  35. data/test/parallel_test.rb +31 -0
  36. data/test/project_test.rb +2 -1
  37. data/test/remote_dataset_test.rb +1 -1
  38. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
  39. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
  40. data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
  41. data/utils/FastAAI/FastAAI/FastAAI +1336 -0
  42. data/utils/FastAAI/README.md +84 -0
  43. data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
  44. data/utils/distance/commands.rb +1 -0
  45. data/utils/distance/database.rb +0 -1
  46. data/utils/distance/runner.rb +2 -4
  47. data/utils/enveomics/Docs/recplot2.md +244 -0
  48. data/utils/enveomics/Examples/aai-matrix.bash +66 -0
  49. data/utils/enveomics/Examples/ani-matrix.bash +66 -0
  50. data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
  51. data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
  52. data/utils/enveomics/LICENSE.txt +73 -0
  53. data/utils/enveomics/Makefile +52 -0
  54. data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
  55. data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
  56. data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
  57. data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
  58. data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
  59. data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
  60. data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
  61. data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
  62. data/utils/enveomics/Manifest/Tasks/other.json +906 -0
  63. data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
  64. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +638 -0
  65. data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
  66. data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
  67. data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
  68. data/utils/enveomics/Manifest/categories.json +165 -0
  69. data/utils/enveomics/Manifest/examples.json +154 -0
  70. data/utils/enveomics/Manifest/tasks.json +4 -0
  71. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
  72. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
  73. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
  74. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
  75. data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
  76. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
  77. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
  78. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
  79. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
  80. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
  81. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
  82. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
  83. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
  84. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
  85. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
  86. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
  87. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
  88. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
  89. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
  90. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
  91. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
  92. data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
  93. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
  94. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
  95. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
  96. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
  97. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
  98. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
  99. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
  100. data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
  101. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
  102. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
  103. data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
  104. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
  105. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
  106. data/utils/enveomics/README.md +42 -0
  107. data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
  108. data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
  109. data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
  110. data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
  111. data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
  112. data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
  113. data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
  114. data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
  115. data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
  116. data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
  117. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
  118. data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
  119. data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
  120. data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
  121. data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
  122. data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
  123. data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
  124. data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
  125. data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
  126. data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
  127. data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
  128. data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
  129. data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
  130. data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
  131. data/utils/enveomics/Scripts/Chao1.pl +97 -0
  132. data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
  133. data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
  134. data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
  135. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  136. data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
  137. data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
  138. data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
  139. data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
  140. data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
  141. data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
  142. data/utils/enveomics/Scripts/FastA.length.pl +38 -0
  143. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  144. data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
  145. data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
  146. data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
  147. data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
  148. data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
  149. data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
  150. data/utils/enveomics/Scripts/FastA.split.pl +55 -0
  151. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  152. data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
  153. data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
  154. data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
  155. data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
  156. data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
  157. data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
  158. data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
  159. data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
  160. data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
  161. data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
  162. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  163. data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
  164. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  165. data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
  166. data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
  167. data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
  168. data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
  169. data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
  170. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
  171. data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
  172. data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
  173. data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
  174. data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
  175. data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
  176. data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
  177. data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
  178. data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
  179. data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
  180. data/utils/enveomics/Scripts/SRA.download.bash +55 -0
  181. data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
  182. data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
  183. data/utils/enveomics/Scripts/Table.barplot.R +31 -0
  184. data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
  185. data/utils/enveomics/Scripts/Table.filter.pl +61 -0
  186. data/utils/enveomics/Scripts/Table.merge.pl +77 -0
  187. data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
  188. data/utils/enveomics/Scripts/Table.replace.rb +69 -0
  189. data/utils/enveomics/Scripts/Table.round.rb +63 -0
  190. data/utils/enveomics/Scripts/Table.split.pl +57 -0
  191. data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
  192. data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
  193. data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
  194. data/utils/enveomics/Scripts/aai.rb +419 -0
  195. data/utils/enveomics/Scripts/ani.rb +362 -0
  196. data/utils/enveomics/Scripts/anir.rb +137 -0
  197. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  198. data/utils/enveomics/Scripts/gi2tax.rb +103 -0
  199. data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
  200. data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
  201. data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
  202. data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
  203. data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
  204. data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
  205. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
  206. data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
  207. data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
  208. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
  209. data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
  210. data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
  211. data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
  212. data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
  213. data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
  214. data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
  215. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
  216. data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
  217. data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
  218. data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
  219. data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
  220. data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
  221. data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
  222. data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
  223. data/utils/enveomics/Scripts/ogs.rb +104 -0
  224. data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
  225. data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
  226. data/utils/enveomics/Scripts/rbm.rb +100 -0
  227. data/utils/enveomics/Scripts/sam.filter.rb +148 -0
  228. data/utils/enveomics/Tests/Makefile +10 -0
  229. data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
  230. data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
  231. data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
  232. data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
  233. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
  234. data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
  235. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
  236. data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
  237. data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
  238. data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
  239. data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
  240. data/utils/enveomics/Tests/alkB.nwk +1 -0
  241. data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
  242. data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
  243. data/utils/enveomics/Tests/hiv1.faa +59 -0
  244. data/utils/enveomics/Tests/hiv1.fna +134 -0
  245. data/utils/enveomics/Tests/hiv2.faa +70 -0
  246. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
  247. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
  248. data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
  249. data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
  250. data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
  251. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
  252. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
  253. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
  254. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
  255. data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
  256. data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
  257. data/utils/enveomics/build_enveomics_r.bash +45 -0
  258. data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
  259. data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
  260. data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
  261. data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
  262. data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
  263. data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
  264. data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
  265. data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
  266. data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
  267. data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
  268. data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
  269. data/utils/enveomics/enveomics.R/R/utils.R +80 -0
  270. data/utils/enveomics/enveomics.R/README.md +81 -0
  271. data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
  272. data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
  273. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
  274. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
  275. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
  276. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
  277. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
  278. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
  279. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
  280. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
  281. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
  282. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
  283. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
  284. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
  285. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
  286. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
  287. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
  288. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
  289. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
  290. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
  291. data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
  292. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
  293. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
  294. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
  295. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
  296. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
  297. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
  298. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
  299. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
  300. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
  301. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
  302. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
  303. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
  304. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
  305. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
  306. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
  307. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
  308. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
  309. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
  310. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
  311. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
  312. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
  313. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
  314. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
  315. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
  316. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
  317. data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
  318. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
  319. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
  320. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
  321. data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
  322. data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
  323. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
  324. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
  325. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
  326. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
  327. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
  328. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
  329. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
  330. data/utils/enveomics/globals.mk +8 -0
  331. data/utils/enveomics/manifest.json +9 -0
  332. data/utils/multitrim/Multitrim How-To.pdf +0 -0
  333. data/utils/multitrim/README.md +67 -0
  334. data/utils/multitrim/multitrim.py +1555 -0
  335. data/utils/multitrim/multitrim.yml +13 -0
  336. data/utils/requirements.txt +4 -3
  337. metadata +304 -3
@@ -0,0 +1,93 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+
5
+ o = {range: 0.5, perseq: false, length: false}
6
+ ARGV << '-h' if ARGV.empty?
7
+ OptionParser.new do |opt|
8
+ opt.banner = "
9
+ Estimates the truncated average sequencing depth (TAD) from a BedGraph file.
10
+
11
+ IMPORTANT: This script doesn't consider zero-coverage positions if missing
12
+ from the file. If you produce your BedGraph file with bedtools genomecov and
13
+ want to consider zero-coverage position, be sure to use -bga (not -bg).
14
+
15
+ Usage: #{$0} [options]"
16
+ opt.separator ''
17
+ opt.on('-i', '--input PATH',
18
+ 'Input BedGraph file (mandatory).'){ |v| o[:i]=v }
19
+ opt.on('-r', '--range FLOAT',
20
+ 'Central range to consider, between 0 and 1.',
21
+ "By default: #{o[:range]} (inter-quartile range)."
22
+ ){ |v| o[:range]=v.to_f }
23
+ opt.on('-s', '--per-seq',
24
+ 'Calculate averages per reference sequence, not total.',
25
+ 'Assumes a sorted BedGraph file.'
26
+ ){ |v| o[:perseq] = v }
27
+ opt.on('-l', '--length',
28
+ 'Add sequence length to the output.'){ |v| o[:length] = v }
29
+ opt.on('-h', '--help', 'Display this screen.') do
30
+ puts opt
31
+ exit
32
+ end
33
+ opt.separator ''
34
+ end.parse!
35
+ abort '-i is mandatory.' if o[:i].nil?
36
+
37
+ def pad(d, idx, r)
38
+ idx.each do |i|
39
+ next if d[i].nil?
40
+ d[i] -= r
41
+ break unless d[i] < 0
42
+ r = -d[i]
43
+ d[i] = nil
44
+ end
45
+ d
46
+ end
47
+
48
+ def report(sq, d, ln, o)
49
+ # Estimate padding ranges
50
+ pad = (1.0-o[:range])/2.0
51
+ r = (pad*ln).round
52
+
53
+ # Pad
54
+ d = pad(d, d.each_index.to_a, r+0)
55
+ d = pad(d, d.each_index.to_a.reverse, r+0)
56
+
57
+ # Average
58
+ y = [0.0]
59
+ unless d.compact.empty?
60
+ s = d.each_with_index.to_a.map{ |v,i| v.nil? ? 0 : i*v }.inject(0,:+)
61
+ y[0] = s.to_f/d.compact.inject(:+)
62
+ end
63
+
64
+ # Report
65
+ y.unshift(sq) if o[:perseq]
66
+ y << ln if o[:length]
67
+ puts y.join("\t")
68
+ end
69
+
70
+ # Read BedGraph
71
+ d = []
72
+ ln = 0
73
+ pre_sq = nil
74
+ File.open(o[:i], "r") do |ifh|
75
+ ifh.each_line do |i|
76
+ next if i =~ /^#/
77
+ r = i.chomp.split("\t")
78
+ sq = r.shift
79
+ if o[:perseq] and !pre_sq.nil? and pre_sq!=sq
80
+ report(pre_sq, d, ln, o)
81
+ d = []
82
+ ln = 0
83
+ end
84
+ r.map! { |j| j.to_i }
85
+ l = r[1]-r[0]
86
+ d[ r[2] ] ||= 0
87
+ d[ r[2] ] += l
88
+ ln += l
89
+ pre_sq = sq
90
+ end
91
+ end
92
+ report(pre_sq, d, ln, o)
93
+
@@ -0,0 +1,71 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+
5
+ o = {win: 1000}
6
+ ARGV << '-h' if ARGV.empty?
7
+ OptionParser.new do |opt|
8
+ opt.banner = "
9
+ Estimates the sequencing depth per windows from a BedGraph file.
10
+
11
+ IMPORTANT: This script doesn't consider zero-coverage positions if missing
12
+ from the file. If you produce your BedGraph file with bedtools genomecov and
13
+ want to consider zero-coverage position, be sure to use -bga (not -bg).
14
+
15
+ Usage: #{$0} [options]"
16
+ opt.separator ''
17
+ opt.on('-i', '--input PATH',
18
+ 'Input BedGraph file (mandatory).'){ |v| o[:i]=v }
19
+ opt.on('-w', '--win INT',
20
+ 'Window size, in base pairs.', "By default: #{o[:win]}."
21
+ ){ |v| o[:win]=v.to_i }
22
+ opt.on('-h', '--help', 'Display this screen.') do
23
+ puts opt
24
+ exit
25
+ end
26
+ opt.separator ''
27
+ end.parse!
28
+ abort '-i is mandatory.' if o[:i].nil?
29
+
30
+ def report(d, a, b, seqs)
31
+ # Average
32
+ y = 0.0
33
+ unless d.compact.empty?
34
+ s = d.each_with_index.to_a.map{ |v,i| v.nil? ? 0 : i*v }.inject(0,:+)
35
+ y = s.to_f/d.compact.inject(:+)
36
+ end
37
+
38
+ # Report
39
+ puts [a, b, y, seqs.keys.join(",")].join("\t")
40
+ end
41
+
42
+ # Read BedGraph
43
+ d = []
44
+ ln = 0
45
+ a = 1
46
+ seqs = {}
47
+ b = o[:win]
48
+ File.open(o[:i], "r") do |ifh|
49
+ ifh.each_line do |i|
50
+ next if i =~ /^#/
51
+ r = i.chomp.split("\t")
52
+ sq = r.shift
53
+ seqs[sq] = 1
54
+ r.map!{ |j| j.to_i }
55
+ l = r[1]-r[0]
56
+ d[ r[2] ] ||= 0
57
+ d[ r[2] ] += l
58
+ ln += l
59
+ while ln >= b
60
+ d[ r[2] ] -= (ln-b)
61
+ report(d, a, b, seqs)
62
+ seqs = {}
63
+ seqs[ sq ] = 1 if ln > b
64
+ d = []
65
+ d[ r[2] ] = (ln-b)
66
+ a = b + 1
67
+ b = a + o[:win] - 1
68
+ end
69
+ end
70
+ end
71
+
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env perl
2
+ #
3
+ # @authors Konstantinos Konstantinidis (initial version)
4
+ # modified to work with the BLASTp 2.2.25+ m0 output by
5
+ # Despina Tsementzi & Luis M. Rodriguez-R
6
+ # @updated Dec-21-2015
7
+ #
8
+
9
+
10
+ $/ = "Lambda ";
11
+ use strict;
12
+ my %hash_depth;
13
+
14
+ my @query;
15
+ my @subject;
16
+ my @similarity;
17
+ my $length = "0";
18
+
19
+ my($cigar_chr, $blast) = @ARGV;
20
+
21
+ ($cigar_chr and $blast) or die "
22
+ .Description:
23
+ Counts the different AA substitutions in the best hit blast alignments, from
24
+ a BLASTP pairwise format output (-outfmt 0 in BLAST+, -m 0 in legacy BLAST).
25
+
26
+ .Usage: $0 cigar_char blast.m0.txt > aa-subs.list
27
+
28
+ cigar_char Use '+' for similar substitutions, use '_' for non similar
29
+ substitutions
30
+ blast.m0.txt Blast in 'text' format (-outfmt/-m 0).
31
+ aa-subs.list A tab-delimited raw file with one substitution per row and
32
+ columns:
33
+ (1) Name-of-query_Name-of-subject
34
+ (2) AA-in-subject
35
+ (3) AA-in-query
36
+ (4) Total-Align-Length
37
+
38
+ ";
39
+
40
+ # For each blast result (i.e., for each query)
41
+ open BLAST, "<", $blast or die "Cannot read file: $blast: $!\n";
42
+ while(my $data=<BLAST>) {
43
+ $data =~ s/\r//g;
44
+ my ($data_q, @array_matches) = split(/>/,$data);
45
+ my ($name_query) = ($data_q =~ /Query\= (\S+?)(?:_GENE|\s)/);
46
+ my ($length_query) = ($data_q =~ /\(([\d,]+) letters/ );
47
+ ($length_query) = ($data_q =~ /Length=([\d,]+)/) unless $length_query;
48
+ $length_query =~ tr/,//d;
49
+
50
+ # For each alignment (i.e., for each HSP),
51
+ # note the "last" at the end of the block,
52
+ # so only the best match is considered
53
+ foreach my $data_f (@array_matches) {
54
+ # Capture statistics
55
+ my ($length_match) = ($data_f =~ /Identities = \d+\/(\d+)/);
56
+ my ($identity_match) = ($data_f =~ /Identities = \d+\/\d+ \((\d+)%/);
57
+ my ($target_name) = ($data_f =~ /^\s?(\S+)/);
58
+
59
+ # If the alignment meets minimum requirements
60
+ if ($length_query >30 && ($length_match/$length_query > 0.7) && $identity_match > 60) {
61
+ $data_f =~ tr/ /_/;
62
+ my @array = split ("\n", $data_f);
63
+ my $blanks = 0;
64
+ my $prefix_size = 0;
65
+
66
+ # For each line in the alignment
67
+ for my $data_fff (@array) {
68
+ if ($data_fff =~ /(Query[:_]_+\d+_+)([^_]+)/){
69
+ # Query lines
70
+ $prefix_size = length($1);
71
+ $length = length($2);
72
+ @query = split (//, $2);
73
+ }elsif ($data_fff =~ /^_{11}/){
74
+ # Cigar lines
75
+ @similarity = split(//, substr($data_fff, $prefix_size, $length));
76
+ }elsif ($data_fff =~ /Sbjct[:_]_+\d+_+([^_]+)/){
77
+ # Subject lines
78
+ @subject = split(//, $1);
79
+ # For each alignment column
80
+ for(my $i=0; $i <= $length; $i++){
81
+ if ($similarity[$i] eq $cigar_chr) {
82
+ print "$name_query\_$target_name\t$subject[$i]\t$query[$i]\t$length_match\n";
83
+ }
84
+ }
85
+ undef @query;
86
+ undef @similarity;
87
+ undef @subject;
88
+ }
89
+
90
+ # Remove secondary alignments
91
+ if ($data_fff =~ /^$/){
92
+ $blanks++;
93
+ last if $blanks >= 3;
94
+ }else{
95
+ $blanks=0;
96
+ }
97
+ } # for my $data_fff (@array)
98
+ } # if ($length_query >30 ...
99
+ last; # <---- So it takes only the best match!
100
+ } # foreach my $data_f (@array_matches)
101
+ } # while(my $data=<>)
102
+
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ #
4
+ # @author: Luis M. Rodriguez-R
5
+ # @license: artistic license 2.0
6
+ #
7
+
8
+ require 'optparse'
9
+
10
+ o = { sbj: false, q: false }
11
+ ARGV << '-h' if ARGV.size == 0
12
+ OptionParser.new do |opts|
13
+ opts.banner = "
14
+ Appends an extra column to a BLAST with the length of the query or the subject
15
+ sequence. You can pipe two instances to add both:
16
+ cat input.blast | #{$0} -f queries.fa | #{$0} -f subjects.fa -s > output.blast
17
+
18
+ Usage: #{$0} [options] < input.blast > output.blast"
19
+ opts.separator ''
20
+ opts.separator 'Mandatory'
21
+ opts.on('-f', '--fasta FILE', 'Path to the FastA file'){ |v| o[:fasta] = v }
22
+ opts.separator ''
23
+ opts.separator 'Options'
24
+ opts.on('-s', '--subject',
25
+ 'Use the subject column of the BLAST, by default the query column is used'
26
+ ){ o[:sbj] = true }
27
+ opts.on('-q', '--quiet', 'Run quietly (no STDERR output)'){ o[:q] = true }
28
+ opts.on('-h', '--help', 'Display this screen') do
29
+ puts opts
30
+ exit
31
+ end
32
+ opts.separator ''
33
+ end.parse!
34
+ abort '-f is mandatory' if o[:fasta].nil?
35
+
36
+ len = {}
37
+ id = ''
38
+ $stderr.puts "Reading FastA file: #{o[:fasta]}" unless o[:q]
39
+ fh = File.open(o[:fasta], 'r')
40
+ fh.each_line do |ln|
41
+ defline = /^>(\S+)/.match(ln)
42
+ if defline.nil?
43
+ ln.gsub! /[^A-Za-z]/, ''
44
+ abort 'Error: Unsupported format, expecting FastA' if len[id].nil?
45
+ len[id] = len[id] + ln.size
46
+ else
47
+ id = defline[1]
48
+ len[id] = 0
49
+ end
50
+ end
51
+ fh.close
52
+
53
+ unless o[:q]
54
+ $stderr.puts 'Appending %s length column' % (o[:sbj] ? 'subject' : 'query')
55
+ end
56
+ ARGF.each_line do |ln|
57
+ ln.chomp!
58
+ row = ln.split /\t/
59
+ id = o[:sbj] ? row[1] : row[0]
60
+ abort "Impossible to find sequence of #{id}" if len[id].nil?
61
+ puts "#{ln}\t#{len[id]}"
62
+ end
63
+
@@ -0,0 +1,48 @@
1
+ #!/bin/bash
2
+
3
+ #
4
+ # @author Luis M. Rodriguez-R
5
+ # @update Mar-23-2016
6
+ # @license artistic license 2.0
7
+ #
8
+
9
+ if [[ ! $2 ]] ; then
10
+ echo "
11
+ .DESCRIPTION
12
+ Calculates the percentage of a partial BLAST result. The
13
+ value produced slightly subestimates the actual advance,
14
+ due to un-flushed output and trailing queries that could
15
+ be processed but generate no results.
16
+
17
+ .USAGE
18
+ $0 blast.txt qry.fasta
19
+
20
+ blast.txt Incomplete Tabular BLAST output.
21
+ qry.fasta FastA file with query sequences.
22
+ ";
23
+ exit 1;
24
+ fi
25
+
26
+ if [[ ! -r $1 ]]; then
27
+ echo "Cannot open file: $1";
28
+ exit 1;
29
+ fi
30
+
31
+ if [[ ! -r $2 ]]; then
32
+ echo "Cannot open file: $2";
33
+ exit 1;
34
+ fi
35
+
36
+ LAST_Q=`tail -n 2 $1 | head -n 1 | awk '{print $1}'`
37
+ LAST_Q_NO=`grep -n "^>$LAST_Q\\( \\|$\\)" $2 | sed -e 's/:.*//'`
38
+ if [[ ! $LAST_Q_NO ]]; then
39
+ echo "Cannot find sequence: $LAST_Q";
40
+ echo "Make sure you are providing the right query file.";
41
+ exit 1;
42
+ fi
43
+ TOTAL_Q_NO=`cat $2 | wc -l | sed -e 's/ *//'`
44
+ let PERC=100*$LAST_Q_NO/$TOTAL_Q_NO
45
+
46
+ echo "$PERC%: $LAST_Q_NO / $TOTAL_Q_NO"
47
+ exit 0;
48
+
@@ -0,0 +1,55 @@
1
+ #!/usr/bin/env perl
2
+
3
+ #
4
+ # @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
5
+ # @license: artistic license 2.0
6
+ # @last_update: Mar-23-2015
7
+ #
8
+
9
+ use strict;
10
+ use warnings;
11
+
12
+ die "
13
+ Usage:
14
+ sort blast.txt ... | $0 > blast.bh.txt
15
+ $0 blast_sorted.txt ... > blast.bh.txt
16
+ $0 -h|--help|-?
17
+
18
+ blast.txt ... One or more files in Tabular BLAST format.
19
+ blast_sorted.txt ... One or more files in Tabular BLAST format pre-sorted.
20
+ blast.bh.txt Output file in BLAST format containing best-hits only.
21
+ -h|--help|-? Any of these flags trigger this help message and exits.
22
+
23
+ NOTE: This script assumes that the BLAST is sorted. Because it can read
24
+ from the STDIN, calling this script without arguments cause it to still until
25
+ killed or until an EOF (^D) is presented.
26
+
27
+ " if exists $ARGV[0] and $ARGV[0] =~ /^\-?\-(h(elp)?|\?)/i;
28
+
29
+ my $last_qry = '';
30
+ my @best_res;
31
+
32
+ sub best_result($$){
33
+ my($r1, $r2)=@_;
34
+ return $r1 unless $r2;
35
+ return $r1->[11] > $r2->[11] ? @$r1 : @$r2;
36
+ }
37
+
38
+ my $i=0;
39
+ while(<>){
40
+ chomp;
41
+ #print STDERR " Reading entry $i... \r" unless $i%1000;
42
+ my @res = split /\t/;
43
+ die "\nCannot parse BLAST line $.: $_\n" unless exists $res[1];
44
+ if($last_qry eq $res[0]){
45
+ @best_res = &best_result(\@res, \@best_res);
46
+ }else{
47
+ print join("\t", @best_res), "\n" if $#best_res>0;
48
+ @best_res = @res;
49
+ $last_qry = $res[0];
50
+ }
51
+ }
52
+ print join("\t", @best_res), "\n" if @best_res;
53
+
54
+
55
+
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/env perl
2
+
3
+ # @author: Luis M. Rodriguez-R
4
+ # @license: Artistic-2.0
5
+
6
+ use warnings;
7
+ use strict;
8
+ use List::Util qw/min max/;
9
+ use Getopt::Std;
10
+
11
+ sub HELP_MESSAGE { die "
12
+
13
+ Description:
14
+ Generates a list of hits from a BLAST result concatenating the subject
15
+ sequences. This can be used, e.g., to analyze BLAST results against
16
+ draft genomes.
17
+
18
+ Usage:
19
+ $0 [options] seq.fa map.bls
20
+
21
+ seq.fa Subject sequences (ref) in FastA format.
22
+ map.bls Mapping of the reads to the reference in BLAST Tabular
23
+ format.
24
+
25
+ Options:
26
+ -i <float> Minimum identity to report a result. By default: 70.
27
+ -l <int> Minimum alignment length to report a result. By default: 60.
28
+ -s The FastA provided is to be treated as a subset of the subject.
29
+ By default, it expects all the BLAST subjects to be present in
30
+ the FastA.
31
+ -q Run quietly.
32
+ -h Display this message and exit.
33
+
34
+ This script creates two files using <map.bls> as prefix with extensions
35
+ .rec (for the recruitment plot) and .lim (for the limits of the different
36
+ sequences in <seq.fa>).
37
+
38
+ ";}
39
+
40
+ my %o;
41
+ getopts('i:l:sqh', \%o);
42
+ my($fa, $map) = @ARGV;
43
+ ($fa and $map) or &HELP_MESSAGE;
44
+ $o{h} and &HELP_MESSAGE;
45
+ $o{i} ||= 70;
46
+ $o{l} ||= 60;
47
+
48
+ my %seq = ();
49
+ my @seq = ();
50
+ my $tot = 0;
51
+
52
+ SEQ: {
53
+ print STDERR "== Reading reference sequences\n" unless $o{q};
54
+ open FA, "<", $fa or die "Cannot read the file: $fa: $!\n";
55
+ my $cur_seq = '';
56
+ while(<FA>){
57
+ chomp;
58
+ if(m/^>(\S+)/){
59
+ my $c = $1;
60
+ $seq{$c} = exists $seq{$cur_seq} ? $seq{$cur_seq}+1 : 1;
61
+ push @seq, $c;
62
+ $cur_seq = $c;
63
+ }else{
64
+ s/[^A-Za-z]//g;
65
+ $seq{$cur_seq} += length $_;
66
+ }
67
+ }
68
+ close FA;
69
+ print STDERR " Found ".(scalar @seq)." sequences.\n" unless $o{q};
70
+ }
71
+
72
+ open LIM, ">", "$map.lim" or die "Cannot create the file: $map.lim: $!\n";
73
+ my $l = 0;
74
+ for my $s (@seq){
75
+ print LIM "$s\t".(++$l)."\t$seq{$s}\n";
76
+ ($l, $seq{$s}) = ($seq{$s}, $l);
77
+ }
78
+ close LIM;
79
+
80
+ MAP: {
81
+ print STDERR "== Reading mapping\n" unless $o{q};
82
+ open BLS, "<", $map or die "Cannot read the file: $map: $!\n";
83
+ open REC, ">", "$map.rec" or die "Cannot create the file: $map.rec: $!\n";
84
+ RESULT: while(<BLS>){
85
+ chomp;
86
+ my @ln = split /\t/;
87
+ $ln[11] or die "Cannot parse line $map:$.: $_\n";
88
+ next unless $ln[3]>=$o{l};
89
+ next unless $ln[2]>=$o{i};
90
+ unless(exists $seq{$ln[1]}){
91
+ die "Cannot find the subject sequence: $ln[1]\n" unless $o{s};
92
+ next RESULT;
93
+ }
94
+ my $start = $seq{$ln[1]}+min($ln[8], $ln[9]);
95
+ my $end = $seq{$ln[1]}+max($ln[8], $ln[9]);
96
+ print REC "$start\t$end\t$ln[2]\t$ln[11]\t$ln[0]",
97
+ (exists($ln[13])?"\t".($ln[2]*$ln[3]/min($ln[12],$ln[13]))."\t":
98
+ exists($ln[12])?"\t".($ln[2]*$ln[3]/$ln[12])."\t":""),"\n";
99
+ }
100
+ close BLS;
101
+ close REC;
102
+ print STDERR " done.\n" unless $o{q};
103
+ }
104
+