miga-base 0.7.26.0 → 1.0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
- data/lib/miga/cli/action/classify_wf.rb +2 -2
- data/lib/miga/cli/action/derep_wf.rb +1 -1
- data/lib/miga/cli/action/doctor.rb +57 -14
- data/lib/miga/cli/action/doctor/base.rb +47 -23
- data/lib/miga/cli/action/init.rb +11 -7
- data/lib/miga/cli/action/init/files_helper.rb +1 -0
- data/lib/miga/cli/action/ncbi_get.rb +3 -3
- data/lib/miga/cli/action/tax_dist.rb +2 -2
- data/lib/miga/cli/action/wf.rb +5 -4
- data/lib/miga/common.rb +1 -0
- data/lib/miga/daemon.rb +11 -4
- data/lib/miga/dataset/result.rb +10 -6
- data/lib/miga/json.rb +5 -4
- data/lib/miga/metadata.rb +5 -1
- data/lib/miga/parallel.rb +36 -0
- data/lib/miga/project.rb +8 -8
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -2
- data/lib/miga/sqlite.rb +10 -2
- data/lib/miga/version.rb +23 -9
- data/scripts/aai_distances.bash +16 -18
- data/scripts/ani_distances.bash +16 -17
- data/scripts/assembly.bash +31 -16
- data/scripts/haai_distances.bash +3 -27
- data/scripts/miga.bash +6 -4
- data/scripts/p.bash +1 -1
- data/scripts/read_quality.bash +9 -18
- data/scripts/trimmed_fasta.bash +14 -30
- data/scripts/trimmed_reads.bash +36 -36
- data/test/parallel_test.rb +31 -0
- data/test/project_test.rb +2 -1
- data/test/remote_dataset_test.rb +1 -1
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
- data/utils/FastAAI/FastAAI/FastAAI +1336 -0
- data/utils/FastAAI/README.md +84 -0
- data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
- data/utils/distance/commands.rb +1 -0
- data/utils/distance/database.rb +0 -1
- data/utils/distance/runner.rb +2 -4
- data/utils/enveomics/Docs/recplot2.md +244 -0
- data/utils/enveomics/Examples/aai-matrix.bash +66 -0
- data/utils/enveomics/Examples/ani-matrix.bash +66 -0
- data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
- data/utils/enveomics/LICENSE.txt +73 -0
- data/utils/enveomics/Makefile +52 -0
- data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
- data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
- data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
- data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
- data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
- data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
- data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
- data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
- data/utils/enveomics/Manifest/Tasks/other.json +906 -0
- data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +638 -0
- data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
- data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
- data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
- data/utils/enveomics/Manifest/categories.json +165 -0
- data/utils/enveomics/Manifest/examples.json +154 -0
- data/utils/enveomics/Manifest/tasks.json +4 -0
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
- data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
- data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
- data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
- data/utils/enveomics/README.md +42 -0
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
- data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
- data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
- data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
- data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
- data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
- data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
- data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
- data/utils/enveomics/Scripts/Chao1.pl +97 -0
- data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
- data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
- data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
- data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
- data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
- data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
- data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
- data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
- data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
- data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
- data/utils/enveomics/Scripts/FastA.length.pl +38 -0
- data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
- data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
- data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
- data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
- data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
- data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
- data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
- data/utils/enveomics/Scripts/FastA.split.pl +55 -0
- data/utils/enveomics/Scripts/FastA.split.rb +79 -0
- data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
- data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
- data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
- data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
- data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
- data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
- data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
- data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
- data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
- data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
- data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
- data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
- data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
- data/utils/enveomics/Scripts/SRA.download.bash +55 -0
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
- data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
- data/utils/enveomics/Scripts/Table.barplot.R +31 -0
- data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
- data/utils/enveomics/Scripts/Table.filter.pl +61 -0
- data/utils/enveomics/Scripts/Table.merge.pl +77 -0
- data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
- data/utils/enveomics/Scripts/Table.replace.rb +69 -0
- data/utils/enveomics/Scripts/Table.round.rb +63 -0
- data/utils/enveomics/Scripts/Table.split.pl +57 -0
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
- data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
- data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
- data/utils/enveomics/Scripts/aai.rb +419 -0
- data/utils/enveomics/Scripts/ani.rb +362 -0
- data/utils/enveomics/Scripts/anir.rb +137 -0
- data/utils/enveomics/Scripts/clust.rand.rb +102 -0
- data/utils/enveomics/Scripts/gi2tax.rb +103 -0
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
- data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
- data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
- data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
- data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
- data/utils/enveomics/Scripts/ogs.rb +104 -0
- data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
- data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
- data/utils/enveomics/Scripts/rbm.rb +100 -0
- data/utils/enveomics/Scripts/sam.filter.rb +148 -0
- data/utils/enveomics/Tests/Makefile +10 -0
- data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
- data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
- data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
- data/utils/enveomics/Tests/alkB.nwk +1 -0
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
- data/utils/enveomics/Tests/hiv1.faa +59 -0
- data/utils/enveomics/Tests/hiv1.fna +134 -0
- data/utils/enveomics/Tests/hiv2.faa +70 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
- data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
- data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
- data/utils/enveomics/build_enveomics_r.bash +45 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
- data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
- data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
- data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
- data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
- data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
- data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
- data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
- data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
- data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
- data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
- data/utils/enveomics/enveomics.R/R/utils.R +80 -0
- data/utils/enveomics/enveomics.R/README.md +81 -0
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
- data/utils/enveomics/globals.mk +8 -0
- data/utils/enveomics/manifest.json +9 -0
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +67 -0
- data/utils/multitrim/multitrim.py +1555 -0
- data/utils/multitrim/multitrim.yml +13 -0
- data/utils/requirements.txt +4 -3
- metadata +304 -3
@@ -0,0 +1,65 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# @author Luis M. Rodriguez-R
|
4
|
+
# @license artistic license 2.0
|
5
|
+
|
6
|
+
require 'optparse'
|
7
|
+
|
8
|
+
o = {q: false, p: '', s: '', d: false}
|
9
|
+
ARGV << '-h' if ARGV.size==0
|
10
|
+
OptionParser.new do |opts|
|
11
|
+
opts.banner = "
|
12
|
+
Generates easy-to-parse tagged reads from FastA files.
|
13
|
+
|
14
|
+
Usage: #{$0} [options]"
|
15
|
+
opts.separator ''
|
16
|
+
opts.separator 'Mandatory'
|
17
|
+
opts.on('-i', '--in FILE',
|
18
|
+
'Path to the FastA file containing the sequences.'){ |v| o[:in] = v }
|
19
|
+
opts.on('-o', '--out FILE',
|
20
|
+
'Path to the FastA to create.'){ |v| o[:out] = v }
|
21
|
+
opts.separator ''
|
22
|
+
opts.separator 'ID options'
|
23
|
+
opts.on('-p', '--prefix STR', 'Prefix to use in all IDs.'){ |v| o[:p] = v }
|
24
|
+
opts.on('-s', '--suffix STR', 'Suffix to use in all IDs.'){ |v| o[:s] = v }
|
25
|
+
opts.on('-d', '--defline',
|
26
|
+
'Keep the original defline after a space.'){ o[:d] = true }
|
27
|
+
opts.on('-l', '--list FILE',
|
28
|
+
'Reads a list of IDS.'){ |v| o[:l] = v }
|
29
|
+
opts.separator ''
|
30
|
+
opts.separator 'Other Options'
|
31
|
+
opts.on('-q', '--quiet', 'Run quietly (no STDERR output)'){ o[:q] = true }
|
32
|
+
opts.on('-h', '--help', 'Display this screen') do
|
33
|
+
puts opts
|
34
|
+
exit
|
35
|
+
end
|
36
|
+
opts.separator ''
|
37
|
+
end.parse!
|
38
|
+
abort '-i is mandatory' if o[:in].nil?
|
39
|
+
abort '-o is mandatory' if o[:out].nil?
|
40
|
+
|
41
|
+
begin
|
42
|
+
list = o[:l].nil? ? nil :
|
43
|
+
File.readlines(o[:l]).map{ |i| i.chomp.gsub(/^>/, '') }
|
44
|
+
ofh = File.open(o[:out], 'w')
|
45
|
+
i = 0
|
46
|
+
File.open(o[:in], 'r') do |ifh|
|
47
|
+
ifh.each do |ln|
|
48
|
+
ln.chomp!
|
49
|
+
next if ln =~ /^;/
|
50
|
+
unless /^>/.match(ln).nil?
|
51
|
+
i += 1
|
52
|
+
new_id = o[:l].nil? ? i : list.shift
|
53
|
+
ofh.puts ">#{o[:p]}#{new_id}#{o[:s]}#{o[:d]?" #{ln[1, ln.size-1]}":''}"
|
54
|
+
else
|
55
|
+
ofh.puts ln
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
ofh.close
|
60
|
+
rescue => err
|
61
|
+
$stderr.puts "Exception: #{err}\n\n"
|
62
|
+
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
63
|
+
err
|
64
|
+
end
|
65
|
+
|
@@ -0,0 +1,69 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'zlib'
|
5
|
+
|
6
|
+
o = { qual: 31, encoding: 33 }
|
7
|
+
ARGV << '-h' if ARGV.empty?
|
8
|
+
OptionParser.new do |opts|
|
9
|
+
opts.banner = "
|
10
|
+
Creates a FastQ-compliant file from a FastA file.
|
11
|
+
|
12
|
+
Usage: #{$0} [options]"
|
13
|
+
opts.separator ''
|
14
|
+
opts.separator 'Options'
|
15
|
+
opts.on(
|
16
|
+
'-i', '--in FILE', 'Input FastA file (supports .gz compression)'
|
17
|
+
) { |v| o[:in] = v }
|
18
|
+
opts.on(
|
19
|
+
'-o', '--out FILE', 'Output FastQ file (supports .gz compression)'
|
20
|
+
) { |v| o[:out] = v }
|
21
|
+
opts.on(
|
22
|
+
'-q', '--quality INT', Integer,
|
23
|
+
'PHRED quality score to use (fixed), in the range [-5, 41]',
|
24
|
+
"By default: #{o[:qual]}"
|
25
|
+
) { |v| o[:qual] = v }
|
26
|
+
opts.on(
|
27
|
+
'--encoding INT', Integer,
|
28
|
+
"Base encoding (33 or 64). By default: #{o[:encoding]}"
|
29
|
+
) { |v| o[:encoding] = v }
|
30
|
+
opts.on('-h', '--help', 'Display this screen.') do
|
31
|
+
puts opts
|
32
|
+
exit
|
33
|
+
end
|
34
|
+
opts.separator ''
|
35
|
+
end.parse!
|
36
|
+
abort '-i is mandatory' if o[:in].nil?
|
37
|
+
abort '-o is mandatory' if o[:out].nil?
|
38
|
+
abort '-q must be in the range -5 .. 41' if o[:qual] < -5 || o[:qual] > 41
|
39
|
+
|
40
|
+
# Determine quality character
|
41
|
+
$qchar = (o[:qual] + o[:encoding]).chr
|
42
|
+
|
43
|
+
# Create file handlers
|
44
|
+
ifh = o[:in] =~ /\.gz$/ ?
|
45
|
+
Zlib::GzipReader.open(o[:in]) : File.open(o[:in], 'r')
|
46
|
+
ofh = o[:out] =~ /\.gz$/ ?
|
47
|
+
Zlib::GzipWriter.open(o[:out]) : File.open(o[:out], 'w')
|
48
|
+
|
49
|
+
def print_seq(ofh, id, seq)
|
50
|
+
ofh.puts "@#{id}", seq, '+', $qchar * seq.length unless seq.empty?
|
51
|
+
end
|
52
|
+
|
53
|
+
# Generate FastQ
|
54
|
+
id = ''
|
55
|
+
seq = ''
|
56
|
+
ifh.each_line do |ln|
|
57
|
+
next if ln =~ /^;/
|
58
|
+
if ln =~ /^>(.*)/
|
59
|
+
print_seq(ofh, id, seq)
|
60
|
+
seq = ''
|
61
|
+
id = $1
|
62
|
+
else
|
63
|
+
seq += ln.chomp.upcase.gsub(/[^A-Z]/,'')
|
64
|
+
end
|
65
|
+
end
|
66
|
+
print_seq(ofh, id, seq)
|
67
|
+
ofh.close
|
68
|
+
ifh.close
|
69
|
+
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "optparse"
|
4
|
+
o = {wrap:70}
|
5
|
+
ARGV << "-h" if ARGV.empty?
|
6
|
+
OptionParser.new do |opts|
|
7
|
+
opts.banner = "
|
8
|
+
Wraps sequences in a FastA to a given line length.
|
9
|
+
|
10
|
+
Usage: #{$0} [options]"
|
11
|
+
opts.separator ""
|
12
|
+
opts.separator "Options"
|
13
|
+
opts.on("-i", "--in FILE", "Input FastA file."){ |v| o[:in] = v }
|
14
|
+
opts.on("-o", "--out FILE", "Output FastA file."){ |v| o[:out] = v }
|
15
|
+
opts.on("-w", "--wrap INT",
|
16
|
+
"Line length to wrap sequences. Use 0 to generate 1-line sequences.",
|
17
|
+
"By default: #{o[:wrap]}."){ |v| o[:wrap] = v.to_i }
|
18
|
+
opts.on("-h", "--help", "Display this screen.") do
|
19
|
+
puts opts
|
20
|
+
exit
|
21
|
+
end
|
22
|
+
opts.separator ""
|
23
|
+
end.parse!
|
24
|
+
abort "-i is mandatory" if o[:in].nil?
|
25
|
+
abort "-o is mandatory" if o[:out].nil?
|
26
|
+
|
27
|
+
def wrap_width(txt, len)
|
28
|
+
return "" if txt.empty?
|
29
|
+
return "#{txt}\n" if len==0
|
30
|
+
txt.gsub(/(.{1,#{len}})/,"\\1\n")
|
31
|
+
end
|
32
|
+
|
33
|
+
ofh = File.open(o[:out], "w")
|
34
|
+
File.open(o[:in], "r") do |ifh|
|
35
|
+
bf = ""
|
36
|
+
ifh.each_line do |ln|
|
37
|
+
if ln =~ /^>/
|
38
|
+
ofh.print wrap_width(bf, o[:wrap])
|
39
|
+
ofh.puts ln
|
40
|
+
bf = ""
|
41
|
+
else
|
42
|
+
ln.chomp!
|
43
|
+
bf << ln
|
44
|
+
end
|
45
|
+
end
|
46
|
+
ofh.print wrap_width(bf, o[:wrap])
|
47
|
+
end
|
48
|
+
ofh.close
|
@@ -0,0 +1,54 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
#
|
3
|
+
# @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
4
|
+
# @update: Mar-23-2015
|
5
|
+
# @license: artistic license 2.0
|
6
|
+
#
|
7
|
+
|
8
|
+
use warnings;
|
9
|
+
use strict;
|
10
|
+
use Getopt::Std;
|
11
|
+
|
12
|
+
sub HELP_MESSAGE { die "
|
13
|
+
.Description:
|
14
|
+
Extracts a subset of sequences from a FastQ file.
|
15
|
+
|
16
|
+
.Usage: $0 [options] list.txt seqs.fq > subset.fq
|
17
|
+
|
18
|
+
[options]
|
19
|
+
-r Reverse list. Extracts sequences NOT present in the list.
|
20
|
+
-q Runs quietly.
|
21
|
+
-h Prints this message and exits.
|
22
|
+
|
23
|
+
[mandatory]
|
24
|
+
list.txt List of sequences to extract.
|
25
|
+
seqs.fq FastQ file containing the superset of sequences.
|
26
|
+
subset.fq FastQ file to be created.
|
27
|
+
|
28
|
+
" }
|
29
|
+
|
30
|
+
my %o=();
|
31
|
+
getopts('rhq', \%o);
|
32
|
+
my($list, $fq) = @ARGV;
|
33
|
+
($list and $fq) or &HELP_MESSAGE;
|
34
|
+
$o{h} and &HELP_MESSAGE;
|
35
|
+
|
36
|
+
print STDERR "Reading list.\n" unless $o{q};
|
37
|
+
open LI, "<", $list or die "Cannot read file: $list: $!\n";
|
38
|
+
my %li = map { chomp; $_ => 1 } <LI>;
|
39
|
+
close LI;
|
40
|
+
|
41
|
+
print STDERR "Filtering FastQ.\n" unless $o{q};
|
42
|
+
open FQ, "<", $fq or die "Cannot read file: $fq: $!\n";
|
43
|
+
my $good = 0;
|
44
|
+
while(my $ln = <FQ>){
|
45
|
+
my @ln = ();
|
46
|
+
$ln[$_] = <FQ> for 0 .. 2;
|
47
|
+
chomp $ln;
|
48
|
+
if($ln =~ m/^@((\S+).*)/){ $good = (exists $li{$1} or exists $li{">$1"} or exists $li{"\@$1"} or exists $li{$2} or exists $li{$ln}) }
|
49
|
+
elsif($ln =~ m/^>/){ $good=0; print STDERR "Warning: Non-cannonical defline, line $.: $ln\n" }
|
50
|
+
else{ $good=$o{r}; print STDERR "Warning: Non-cannonical defline, line $.: $ln\n" }
|
51
|
+
print "".join("", "$ln\n", @ln) if (($good and not $o{r}) or ($o{r} and not $good));
|
52
|
+
}
|
53
|
+
close FQ;
|
54
|
+
|
@@ -0,0 +1,90 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
|
3
|
+
# @author Luis M. Rodriguez-R
|
4
|
+
# @license artistic license 2.0
|
5
|
+
|
6
|
+
use strict;
|
7
|
+
use warnings;
|
8
|
+
use Symbol;
|
9
|
+
|
10
|
+
my $HELP = <<HELP
|
11
|
+
|
12
|
+
Description:
|
13
|
+
Interposes sequences in FastQ format from two files into one output file.
|
14
|
+
If more than two files are provided, the script will interpose all the input
|
15
|
+
files.
|
16
|
+
Note that this script will check for the consistency of the names (assuming
|
17
|
+
a pair of related reads contains the same name varying only in a trailing
|
18
|
+
slash (/) followed by a digit. If you want to turn this feature off just
|
19
|
+
set the -T option to zero. If you want to decrease the sampling period (to
|
20
|
+
speed the script up) or increase it (to make it more sensitive to errors)
|
21
|
+
just change the -T option accordingly.
|
22
|
+
|
23
|
+
Usage:
|
24
|
+
$0 [-T <int> ]<output_fastq> <input_fastq_1> <input_fastq_2> [additional input files...]
|
25
|
+
|
26
|
+
Where,
|
27
|
+
-T <int> : Optional. Integer indicating the sampling period for
|
28
|
+
names evaluation (see Description above).
|
29
|
+
By default: 1000.
|
30
|
+
output_fastq : Output file
|
31
|
+
input_fastq_1 : First FastQ file
|
32
|
+
input_fastq_2 : Second FastQ file
|
33
|
+
... : Any additional FastQ files (or none)
|
34
|
+
|
35
|
+
HELP
|
36
|
+
;
|
37
|
+
my $eval_T = 1000;
|
38
|
+
if(exists $ARGV[0] and exists $ARGV[1] and $ARGV[0] eq '-T'){
|
39
|
+
$eval_T = $ARGV[1]+0;
|
40
|
+
shift @ARGV;
|
41
|
+
shift @ARGV;
|
42
|
+
}
|
43
|
+
my $out = shift @ARGV;
|
44
|
+
my @in = @ARGV;
|
45
|
+
|
46
|
+
|
47
|
+
die $HELP unless $out and $#in >= 1;
|
48
|
+
open OUT, ">", $out or die "Unable to write on $out: $!\n";
|
49
|
+
print "Output file: $out\n";
|
50
|
+
|
51
|
+
my @in_fh = ();
|
52
|
+
|
53
|
+
for my $k (0 .. $#in) {
|
54
|
+
$in_fh[$k] = gensym;
|
55
|
+
open $in_fh[$k], "<", $in[$k] or die "Unable to read $in[$k]: $!\n";
|
56
|
+
print "Input file: $in[$k]\n";
|
57
|
+
}
|
58
|
+
|
59
|
+
my $i = 0;
|
60
|
+
my $frl;
|
61
|
+
LINE: while(1){
|
62
|
+
my $name = "";
|
63
|
+
print STDERR "\rEntry: $i " unless $i % 1000;
|
64
|
+
FILE: for my $k (0 .. $#in_fh){
|
65
|
+
my @ln = ();
|
66
|
+
for my $l (0 .. 3){
|
67
|
+
$ln[$l] = readline($in_fh[$k]);
|
68
|
+
last LINE if $k==0 and $l==0 and (not defined $ln[$l]);
|
69
|
+
defined $ln[$l] or die "Impossible to read next entry (line $.) from $in[$k]: $!\n";
|
70
|
+
chomp $ln[$l];
|
71
|
+
}
|
72
|
+
if($eval_T and not $i % $eval_T){
|
73
|
+
$ln[0] =~ m/^\@(.*?)\/\d+\s*$/ or die "Impossible to evaluate names!\n offending entry:\n$ln[0]\n";
|
74
|
+
$name ||= $1;
|
75
|
+
die "Inconsistent name!\n base name is $name\n offending entry is:\n$ln[0]\n" unless $1 eq $name;
|
76
|
+
}
|
77
|
+
unless($frl){
|
78
|
+
$ln[0] =~ /^@/ or die "Unexpected format! (missing @)\n offending entry: $ln[0].\n";
|
79
|
+
$ln[2] =~ /^\+/ or die "Unexpected format! (missing +)\n offending entry: $ln[0].\n";
|
80
|
+
$frl = length $ln[1];
|
81
|
+
}
|
82
|
+
print OUT "".join("\n", @ln, "");
|
83
|
+
}
|
84
|
+
$i++;
|
85
|
+
}
|
86
|
+
print "\rNumber of entries: $i \nFirst read length: $frl\n";
|
87
|
+
close OUT;
|
88
|
+
|
89
|
+
for my $k(0..$#in_fh){print "ALERT: The file $in[$k] contains trailing entries\n" if defined readline($in_fh[$k])}
|
90
|
+
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$VERSION = 1.2
|
4
|
+
$:.push File.expand_path('../lib', __FILE__)
|
5
|
+
require 'enveomics_rb/enveomics'
|
6
|
+
|
7
|
+
o = { q: false, offset: 33, qual: 15, fasta: false }
|
8
|
+
OptionParser.new do |opts|
|
9
|
+
opts.version = $VERSION
|
10
|
+
Enveomics.opt_banner(
|
11
|
+
opts, 'Masks low-quality bases in a FastQ file',
|
12
|
+
"#{File.basename($0)} -i in.fastq -o out.fastq [options]"
|
13
|
+
)
|
14
|
+
|
15
|
+
opts.separator 'Mandatory'
|
16
|
+
opts.on(
|
17
|
+
'-i', '--input FILE',
|
18
|
+
'Path to the FastQ file containing the sequences',
|
19
|
+
'Supports compression with .gz extension, use - for STDIN'
|
20
|
+
) { |v| o[:in] = v }
|
21
|
+
opts.on(
|
22
|
+
'-o', '--out FILE',
|
23
|
+
'Path to the output FastQ file',
|
24
|
+
'Supports compression with .gz extension, use - for STDOUT'
|
25
|
+
) { |v| o[:out] = v }
|
26
|
+
|
27
|
+
opts.separator ''
|
28
|
+
opts.separator 'Quality Options'
|
29
|
+
opts.on(
|
30
|
+
'-q', '--qual INT', Integer,
|
31
|
+
"Minimum quality score to allow a base, by default: #{o[:qual]}"
|
32
|
+
) { |v| o[:qual] = v }
|
33
|
+
opts.on(
|
34
|
+
'--offset INT', Integer,
|
35
|
+
"Q-score offset, by default: #{o[:offset]}"
|
36
|
+
) { |v| o[:offset] = v }
|
37
|
+
|
38
|
+
opts.separator ''
|
39
|
+
opts.separator 'Other Options'
|
40
|
+
opts.on(
|
41
|
+
'-a', '--fasta', 'Output sequences in FastA format'
|
42
|
+
) { |v| o[:fasta] = v }
|
43
|
+
opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
|
44
|
+
opts.on('-h', '--help', 'Display this screen') do
|
45
|
+
puts opts
|
46
|
+
exit
|
47
|
+
end
|
48
|
+
opts.separator ''
|
49
|
+
end.parse!
|
50
|
+
|
51
|
+
raise Enveomics::OptionError.new('-i is mandatory') if o[:in].nil?
|
52
|
+
raise Enveomics::OptionError.new('-o is mandatory') if o[:out].nil?
|
53
|
+
$QUIET = o[:q]
|
54
|
+
|
55
|
+
# Open in/out files
|
56
|
+
say 'Reading FastQ file'
|
57
|
+
ifh = reader(o[:in])
|
58
|
+
ofh = writer(o[:out])
|
59
|
+
|
60
|
+
# Parse and mask
|
61
|
+
entry = []
|
62
|
+
lno = 0
|
63
|
+
ifh.each_line do |ln|
|
64
|
+
lno += 1 # <- Gzip doesn't support $.
|
65
|
+
case lno % 4
|
66
|
+
when 1
|
67
|
+
ln =~ /^@(\S+)/ or
|
68
|
+
raise Enveomics::ParseError.new("Unexpected defline format: #{ln}")
|
69
|
+
entry << ln
|
70
|
+
when 2, 3
|
71
|
+
entry << ln
|
72
|
+
when 0
|
73
|
+
entry << ln
|
74
|
+
q = entry[3].chomp.split('').map { |i| (i.ord - o[:offset]) }
|
75
|
+
q.map { |i| i < o[:qual] }.each_with_index { |i, k| entry[1][k] = 'N' if i }
|
76
|
+
ofh.puts(o[:fasta] ? [entry[0].gsub(/^@/, '>'), entry[1]] : entry)
|
77
|
+
entry = []
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Finalize
|
82
|
+
say " Lines: #{lno}"
|
83
|
+
unless entry.empty?
|
84
|
+
raise Enveomics::ParseError.new('Unexpected trailing lines in FastQ')
|
85
|
+
end
|
86
|
+
say " Sequences: #{lno / 4}"
|
87
|
+
ifh.close
|
88
|
+
ofh.close
|
89
|
+
|
@@ -0,0 +1,90 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
#
|
3
|
+
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
4
|
+
# @update Mar-23-2015
|
5
|
+
# @license artistic license 2.0
|
6
|
+
#
|
7
|
+
|
8
|
+
use warnings;
|
9
|
+
use strict;
|
10
|
+
|
11
|
+
my ($in, $off, $force) = @ARGV;
|
12
|
+
$in or die "
|
13
|
+
.Description:
|
14
|
+
There are several FastQ formats (see http://en.wikipedia.org/wiki/FASTQ_format).
|
15
|
+
This script takes a FastQ in any of them, identifies the type of FastQ (this is,
|
16
|
+
the offset), and generates a FastQ with the given offset. Note that Solexa+64
|
17
|
+
FastQ can cause problematic values when using the offset 33, since there is no
|
18
|
+
equivalent in Phred+33 for negative values (the range of Solexa+64 is -5 to 40).
|
19
|
+
|
20
|
+
.Usage:
|
21
|
+
$0 in.fastq[ offset[ force]] > out.fastq
|
22
|
+
|
23
|
+
in.fastq Input file in FastQ format (range is automatically detected).
|
24
|
+
offset (optional) Offset to use for the output. Use 0 (zero) to detect
|
25
|
+
the input format and exit. By default: 33.
|
26
|
+
force (optional) If true, turns errors into warnings and continues.
|
27
|
+
Out-of-range values are set to the closest range limit.
|
28
|
+
out.fastq Output file in FastQ format with the specified offset.
|
29
|
+
|
30
|
+
";
|
31
|
+
|
32
|
+
$off = 33 unless defined $off;
|
33
|
+
|
34
|
+
my $in_off = 0;
|
35
|
+
open IN, "<", $in or die "Cannot read file: $in: $!\n";
|
36
|
+
GUESS_FORMAT: while(<IN>){
|
37
|
+
unless($.%4){
|
38
|
+
chomp;
|
39
|
+
for my $chr (split //){
|
40
|
+
my $o = ord $chr;
|
41
|
+
if($o < 55){
|
42
|
+
$in_off = 33;
|
43
|
+
last GUESS_FORMAT;
|
44
|
+
}elsif($o > 80){
|
45
|
+
$in_off = 64;
|
46
|
+
last GUESS_FORMAT;
|
47
|
+
}
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
close IN;
|
52
|
+
print STDERR "Detected input offset: Phred+$in_off\n";
|
53
|
+
exit unless $off;
|
54
|
+
|
55
|
+
my $Solexa64=0;
|
56
|
+
die "Couldn't guess input format.\n" unless $in_off;
|
57
|
+
open IN, "<", $in or die "Cannot read file: $in: $!\n";
|
58
|
+
while(<IN>){
|
59
|
+
if($in_off==$off or $.%4){
|
60
|
+
print $_;
|
61
|
+
}else{
|
62
|
+
chomp;
|
63
|
+
for my $chr (split //){
|
64
|
+
my $score = ord($chr) - $in_off;
|
65
|
+
my $err = '';
|
66
|
+
if($score < -5){
|
67
|
+
$err = "Out-of-range value $chr ($score) in line $..\n";
|
68
|
+
$score = $off==64 ? -5 : 0;
|
69
|
+
}elsif(!$Solexa64 and $score < 0){
|
70
|
+
if($in_off==64){
|
71
|
+
print STDERR "Format variant: Solexa+64\n";
|
72
|
+
$Solexa64 = 1;
|
73
|
+
}else{
|
74
|
+
$err = "Out-of-range value $chr ($score) in line $..\n";
|
75
|
+
$score = 0;
|
76
|
+
}
|
77
|
+
}elsif($score>41){
|
78
|
+
$err = "Out-of-range value $chr ($score) in line $..\n";
|
79
|
+
$score = 41;
|
80
|
+
}
|
81
|
+
if($err){
|
82
|
+
if($force){ warn $err } else { die $err }
|
83
|
+
}
|
84
|
+
print chr( $score + $off );
|
85
|
+
}
|
86
|
+
print "\n";
|
87
|
+
}
|
88
|
+
}
|
89
|
+
close IN;
|
90
|
+
|