miga-base 0.7.26.0 → 1.0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/_data/aai-intax.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-intax.diamond.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.blast.tsv.gz +0 -0
- data/lib/miga/_data/aai-novel.diamond.tsv.gz +0 -0
- data/lib/miga/cli/action/classify_wf.rb +2 -2
- data/lib/miga/cli/action/derep_wf.rb +1 -1
- data/lib/miga/cli/action/doctor.rb +57 -14
- data/lib/miga/cli/action/doctor/base.rb +47 -23
- data/lib/miga/cli/action/init.rb +11 -7
- data/lib/miga/cli/action/init/files_helper.rb +1 -0
- data/lib/miga/cli/action/ncbi_get.rb +3 -3
- data/lib/miga/cli/action/tax_dist.rb +2 -2
- data/lib/miga/cli/action/wf.rb +5 -4
- data/lib/miga/common.rb +1 -0
- data/lib/miga/daemon.rb +11 -4
- data/lib/miga/dataset/result.rb +10 -6
- data/lib/miga/json.rb +5 -4
- data/lib/miga/metadata.rb +5 -1
- data/lib/miga/parallel.rb +36 -0
- data/lib/miga/project.rb +8 -8
- data/lib/miga/project/base.rb +4 -4
- data/lib/miga/project/result.rb +2 -2
- data/lib/miga/sqlite.rb +10 -2
- data/lib/miga/version.rb +23 -9
- data/scripts/aai_distances.bash +16 -18
- data/scripts/ani_distances.bash +16 -17
- data/scripts/assembly.bash +31 -16
- data/scripts/haai_distances.bash +3 -27
- data/scripts/miga.bash +6 -4
- data/scripts/p.bash +1 -1
- data/scripts/read_quality.bash +9 -18
- data/scripts/trimmed_fasta.bash +14 -30
- data/scripts/trimmed_reads.bash +36 -36
- data/test/parallel_test.rb +31 -0
- data/test/project_test.rb +2 -1
- data/test/remote_dataset_test.rb +1 -1
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
- data/utils/FastAAI/FastAAI/FastAAI +1336 -0
- data/utils/FastAAI/README.md +84 -0
- data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
- data/utils/distance/commands.rb +1 -0
- data/utils/distance/database.rb +0 -1
- data/utils/distance/runner.rb +2 -4
- data/utils/enveomics/Docs/recplot2.md +244 -0
- data/utils/enveomics/Examples/aai-matrix.bash +66 -0
- data/utils/enveomics/Examples/ani-matrix.bash +66 -0
- data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
- data/utils/enveomics/LICENSE.txt +73 -0
- data/utils/enveomics/Makefile +52 -0
- data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
- data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
- data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
- data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
- data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
- data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
- data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
- data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
- data/utils/enveomics/Manifest/Tasks/other.json +906 -0
- data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +638 -0
- data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
- data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
- data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
- data/utils/enveomics/Manifest/categories.json +165 -0
- data/utils/enveomics/Manifest/examples.json +154 -0
- data/utils/enveomics/Manifest/tasks.json +4 -0
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
- data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
- data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
- data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
- data/utils/enveomics/README.md +42 -0
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
- data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
- data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
- data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
- data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
- data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
- data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
- data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
- data/utils/enveomics/Scripts/Chao1.pl +97 -0
- data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
- data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
- data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
- data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
- data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
- data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
- data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
- data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
- data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
- data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
- data/utils/enveomics/Scripts/FastA.length.pl +38 -0
- data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
- data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
- data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
- data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
- data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
- data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
- data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
- data/utils/enveomics/Scripts/FastA.split.pl +55 -0
- data/utils/enveomics/Scripts/FastA.split.rb +79 -0
- data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
- data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
- data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
- data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
- data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
- data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
- data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
- data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
- data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
- data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
- data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
- data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
- data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
- data/utils/enveomics/Scripts/SRA.download.bash +55 -0
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
- data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
- data/utils/enveomics/Scripts/Table.barplot.R +31 -0
- data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
- data/utils/enveomics/Scripts/Table.filter.pl +61 -0
- data/utils/enveomics/Scripts/Table.merge.pl +77 -0
- data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
- data/utils/enveomics/Scripts/Table.replace.rb +69 -0
- data/utils/enveomics/Scripts/Table.round.rb +63 -0
- data/utils/enveomics/Scripts/Table.split.pl +57 -0
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
- data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
- data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
- data/utils/enveomics/Scripts/aai.rb +419 -0
- data/utils/enveomics/Scripts/ani.rb +362 -0
- data/utils/enveomics/Scripts/anir.rb +137 -0
- data/utils/enveomics/Scripts/clust.rand.rb +102 -0
- data/utils/enveomics/Scripts/gi2tax.rb +103 -0
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +63 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +73 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
- data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
- data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
- data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
- data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
- data/utils/enveomics/Scripts/ogs.rb +104 -0
- data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
- data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
- data/utils/enveomics/Scripts/rbm.rb +100 -0
- data/utils/enveomics/Scripts/sam.filter.rb +148 -0
- data/utils/enveomics/Tests/Makefile +10 -0
- data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
- data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
- data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
- data/utils/enveomics/Tests/alkB.nwk +1 -0
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
- data/utils/enveomics/Tests/hiv1.faa +59 -0
- data/utils/enveomics/Tests/hiv1.fna +134 -0
- data/utils/enveomics/Tests/hiv2.faa +70 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
- data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
- data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
- data/utils/enveomics/build_enveomics_r.bash +45 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
- data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
- data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
- data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
- data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
- data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
- data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
- data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
- data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
- data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
- data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
- data/utils/enveomics/enveomics.R/R/utils.R +80 -0
- data/utils/enveomics/enveomics.R/README.md +81 -0
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
- data/utils/enveomics/globals.mk +8 -0
- data/utils/enveomics/manifest.json +9 -0
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +67 -0
- data/utils/multitrim/multitrim.py +1555 -0
- data/utils/multitrim/multitrim.yml +13 -0
- data/utils/requirements.txt +4 -3
- metadata +304 -3
@@ -0,0 +1,137 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
$:.push File.expand_path('../lib', __FILE__)
|
6
|
+
require 'enveomics_rb/enveomics'
|
7
|
+
require 'enveomics_rb/anir'
|
8
|
+
$VERSION = 1.0
|
9
|
+
|
10
|
+
o = {
|
11
|
+
q: false, threads: 2,
|
12
|
+
r_format: :fastq, g_format: :fasta, m_format: :sam, r_type: :single,
|
13
|
+
identity: 95.0, algorithm: :auto, bimodality: 0.5, bin_size: 1.0,
|
14
|
+
coefficient: :sarle
|
15
|
+
}
|
16
|
+
|
17
|
+
OptionParser.new do |opt|
|
18
|
+
cmd = File.basename($0)
|
19
|
+
opt.banner = <<~BANNER
|
20
|
+
|
21
|
+
[Enveomics Collection: #{cmd} v#{$VERSION}]
|
22
|
+
|
23
|
+
Estimates ANIr: the Average Nucleotide Identity of reads against a genome
|
24
|
+
|
25
|
+
Usage
|
26
|
+
# [ Input/output modes ]
|
27
|
+
# Run mapping and (optionally) save it as SAM
|
28
|
+
# Requires bowtie2
|
29
|
+
#{cmd} -r reads.fastq -g genome.fasta -m out_map.sam [options]
|
30
|
+
|
31
|
+
# Read mapping from BAM file
|
32
|
+
# Requires samtools
|
33
|
+
#{cmd} -m map.bam --m-format bam [options]
|
34
|
+
|
35
|
+
# Read mapping from other formats: SAM or Tabular BLAST
|
36
|
+
#{cmd} -m map.blast --m-format tab [options]
|
37
|
+
|
38
|
+
# Read a list of identities as percentage (contig filtering off)
|
39
|
+
#{cmd} -m identities.txt --m-format list [options]
|
40
|
+
|
41
|
+
# [ Identity threshold modes ]
|
42
|
+
#{cmd} -i 95 -a fix [options] # Set fixed identity threshold
|
43
|
+
#{cmd} -a gmm [options] # Find valley by EM of GMM
|
44
|
+
#{cmd} -a auto [options] # Pick method by bimodality (default)"
|
45
|
+
|
46
|
+
BANNER
|
47
|
+
|
48
|
+
opt.separator 'Input/Output'
|
49
|
+
opt.on('-r', '--reads PATH', 'Metagenomic reads') { |v| o[:r] = v }
|
50
|
+
opt.on('-g', '--genome PATH', 'Genome assembly') { |v| o[:g] = v }
|
51
|
+
opt.on('-m', '--mapping PATH', 'Mapping file') { |v| o[:m] = v }
|
52
|
+
opt.on('-L', '--list PATH', 'Output file with identities') { |v| o[:L] = v }
|
53
|
+
opt.on('-H', '--hist PATH', 'Output file with histogram') { |v| o[:H] = v }
|
54
|
+
opt.on(
|
55
|
+
'-T', '--tab PATH', 'Output file with results in tabular format'
|
56
|
+
) { |v| o[:T] = v }
|
57
|
+
opt.separator ''
|
58
|
+
|
59
|
+
opt.separator 'Formats'
|
60
|
+
opt.on(
|
61
|
+
'--r-format STRING',
|
62
|
+
'Metagenomic reads format: fastq (default) or fasta',
|
63
|
+
'Both options support compression with .gz file extension'
|
64
|
+
) { |v| o[:r_format] = v.downcase.to_sym }
|
65
|
+
opt.on(
|
66
|
+
'--r-type STRING', 'Type of metagenomic reads:',
|
67
|
+
'~ single (default): Single reads',
|
68
|
+
'~ coupled: Coupled reads in separate files (-m must be comma-delimited)',
|
69
|
+
'~ interleaved: Coupled reads in a single interposed file'
|
70
|
+
) { |v| o[:r_type] = v.downcase.to_sym }
|
71
|
+
opt.on(
|
72
|
+
'--g-format STRING',
|
73
|
+
'Genome assembly format: fasta (default) or list',
|
74
|
+
'Both options support compression with .gz file extension',
|
75
|
+
'If passed in mapping-read mode, filters only matches to these contigs'
|
76
|
+
) { |v| o[:g_format] = v.downcase.to_sym }
|
77
|
+
opt.on(
|
78
|
+
'--m-format STRING',
|
79
|
+
'Mapping file format: sam (default), bam, tab, or list',
|
80
|
+
'sam, tab, and list options support compression with .gz file extension'
|
81
|
+
) { |v| o[:m_format] = v.downcase.to_sym }
|
82
|
+
opt.separator ''
|
83
|
+
|
84
|
+
opt.separator 'Identity threshold'
|
85
|
+
opt.on(
|
86
|
+
'-i', '--identity FLOAT', Float,
|
87
|
+
"Set a fixed threshold of percent identity (default: #{o[:identity]})"
|
88
|
+
) { |v| o[:identity] = v }
|
89
|
+
opt.on(
|
90
|
+
'-a', '--algorithm STRING',
|
91
|
+
'Set an algorithm to automatically detect identity threshold:',
|
92
|
+
'~ gmm: Valley detection by E-M of Gaussian Mixture Model',
|
93
|
+
'~ fix: Fixed threshold, see -i',
|
94
|
+
'~ auto (default): Pick gmm or fix depending on bimodality, see -b'
|
95
|
+
) { |v| o[:algorithm] = v.downcase.to_sym }
|
96
|
+
opt.on(
|
97
|
+
'-b', '--bimodality FLOAT', Float,
|
98
|
+
'Threshold of bimodality below which the algorithm is set to fix',
|
99
|
+
'The coefficient used is the de Michele & Accantino (2014) B index',
|
100
|
+
"By default: #{o[:bimodality]}"
|
101
|
+
) { |v| o[:bimodality] = v }
|
102
|
+
opt.on(
|
103
|
+
'--coefficient STRING',
|
104
|
+
'Coefficient of bimodality for -a auto:',
|
105
|
+
'~ sarle (default): Sarle\'s bimodality coefficient b',
|
106
|
+
'~ dma: de Michele and Accatino (2014 PLoS ONE) B index, use with -b 0.1'
|
107
|
+
) { |v| o[:coefficient] = v.downcase.to_sym }
|
108
|
+
opt.on(
|
109
|
+
'--bin-size FLOAT', Float,
|
110
|
+
"Width of histogram bins (in percent identity). By default: #{o[:bin_size]}"
|
111
|
+
) { |v| o[:bin_size] = v }
|
112
|
+
opt.separator ''
|
113
|
+
|
114
|
+
opt.separator 'General'
|
115
|
+
opt.on(
|
116
|
+
'-t', '--threads INT', Integer, 'Threads to use'
|
117
|
+
) { |v| o[:threads] = v }
|
118
|
+
opt.on('-l', '--log PATH', 'Log file to save output') { |v| o[:log] = v }
|
119
|
+
opt.on('-q', '--quiet', 'Run quietly') { |v| o[:q] = v }
|
120
|
+
opt.on('-h', '--help', 'Display this screen') do
|
121
|
+
puts opt
|
122
|
+
exit
|
123
|
+
end
|
124
|
+
opt.separator ''
|
125
|
+
end.parse!
|
126
|
+
|
127
|
+
anir = Enveomics::ANIr.new(o)
|
128
|
+
anir.go!
|
129
|
+
if o[:T]
|
130
|
+
File.open(o[:T], 'w') do |fh|
|
131
|
+
fh.puts "anir\tsd\treads\tid_threshold"
|
132
|
+
fh.puts [
|
133
|
+
anir.sample.mean, anir.sample.sd, anir.sample.n, anir.opts[:identity]
|
134
|
+
].join("\t")
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
@@ -0,0 +1,102 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# @author: Luis M. Rodriguez-R
|
5
|
+
# @license: artistic license 2.0
|
6
|
+
#
|
7
|
+
|
8
|
+
require "optparse"
|
9
|
+
|
10
|
+
o = { q:false, prec:6 }
|
11
|
+
ARGV << "-h" if ARGV.empty?
|
12
|
+
OptionParser.new do |opts|
|
13
|
+
opts.banner = "
|
14
|
+
Calculates the Rand Index and the Adjusted Rand Index between two clusterings.
|
15
|
+
|
16
|
+
The clustering format is a raw text file with one cluster per line, each
|
17
|
+
defined as comma-delimited members, and a header line (ignored). Note that this
|
18
|
+
is equivalent to the OGs format for 1 genome.
|
19
|
+
|
20
|
+
Usage: #{$0} [options]"
|
21
|
+
opts.separator ""
|
22
|
+
opts.separator "Mandatory"
|
23
|
+
opts.on("-1", "--clust1 FILE", "First input file."){ |v| o[:clust1]=v }
|
24
|
+
opts.on("-2", "--clust2 FILE", "Second input file."){ |v| o[:clust2]=v }
|
25
|
+
opts.separator ""
|
26
|
+
opts.separator "Other options"
|
27
|
+
opts.on("-p", "--prec INT",
|
28
|
+
"Precision to report. By default: #{o[:prec]}"){ |v| o[:prec]=v.to_i }
|
29
|
+
opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
|
30
|
+
opts.on("-h", "--help", "Display this screen.") do
|
31
|
+
puts opts
|
32
|
+
exit
|
33
|
+
end
|
34
|
+
opts.separator ""
|
35
|
+
end.parse!
|
36
|
+
abort "-1 is mandatory" if o[:clust1].nil?
|
37
|
+
abort "-2 is mandatory" if o[:clust2].nil?
|
38
|
+
|
39
|
+
def load_clust(file, q)
|
40
|
+
$stderr.puts "Reading clusters in '#{file}'." unless q
|
41
|
+
out = []
|
42
|
+
File.open(file, "r") do |fh|
|
43
|
+
fh.each_line do |ln|
|
44
|
+
next if $.==1
|
45
|
+
out[$.-2] = ln.chomp.split(",")
|
46
|
+
end
|
47
|
+
end
|
48
|
+
$stderr.puts " Loaded clusters: #{out.size}." unless q
|
49
|
+
out
|
50
|
+
end
|
51
|
+
|
52
|
+
def choose_2(n)
|
53
|
+
return 0 if n<2
|
54
|
+
n*(n-1)/2
|
55
|
+
end
|
56
|
+
|
57
|
+
##### MAIN:
|
58
|
+
begin
|
59
|
+
# Read the pre-computed OGs
|
60
|
+
clust1 = load_clust(o[:clust1], o[:q])
|
61
|
+
clust2 = load_clust(o[:clust2], o[:q])
|
62
|
+
|
63
|
+
# Contingency table
|
64
|
+
$stderr.puts "Estimating the contingency table." unless o[:q]
|
65
|
+
cont = []
|
66
|
+
b_sums = []
|
67
|
+
clust1.each_with_index do |x_i, i|
|
68
|
+
cont[i] = []
|
69
|
+
clust2.each_with_index do |y_j, j|
|
70
|
+
cont[i][j] = (x_i & y_j).size
|
71
|
+
b_sums[j]||= 0
|
72
|
+
b_sums[j] += cont[i][j]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
a_sums = cont.map{ |i| i.inject(:+) }
|
76
|
+
|
77
|
+
# Calculate variables
|
78
|
+
# - see http://i11www.iti.kit.edu/extra/publications/ww-cco-06.pdf
|
79
|
+
$stderr.puts "Estimating indexes." unless o[:q]
|
80
|
+
n = clust1.map{ |i| i.size }.inject(:+)
|
81
|
+
pairs = choose_2(n)
|
82
|
+
n11 = clust1.each_index.map do |i|
|
83
|
+
clust2.each_index.map do |j|
|
84
|
+
choose_2(cont[i][j])
|
85
|
+
end.inject(:+)
|
86
|
+
end.inject(:+).to_f
|
87
|
+
t1 = a_sums.map{ |a_i| choose_2(a_i) }.inject(:+).to_f
|
88
|
+
t2 = b_sums.map{ |b_j| choose_2(b_j) }.inject(:+).to_f
|
89
|
+
t3 = 2*t1*t2/(n*(n-1))
|
90
|
+
n00 = pairs + n11 - t1 - t2
|
91
|
+
r_index = (n11 + n00)/pairs
|
92
|
+
r_adjusted = (n11 - t3)/((t1+t2)/2 - t3)
|
93
|
+
|
94
|
+
# Report
|
95
|
+
puts "Rand Index = %.#{o[:prec]}f" % r_index
|
96
|
+
puts "Adjusted Rand Index = %.#{o[:prec]}f" % r_adjusted
|
97
|
+
rescue => err
|
98
|
+
$stderr.puts "Exception: #{err}\n\n"
|
99
|
+
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
100
|
+
err
|
101
|
+
end
|
102
|
+
|
@@ -0,0 +1,103 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
5
|
+
# @license artistic license 2.0
|
6
|
+
#
|
7
|
+
|
8
|
+
$:.push File.expand_path(File.dirname(__FILE__) + "/lib")
|
9
|
+
require "enveomics_rb/remote_data"
|
10
|
+
use "nokogiri"
|
11
|
+
|
12
|
+
#================================[ Options parsing ]
|
13
|
+
o = {
|
14
|
+
:q=>false, :gis=>[], :dbfrom=>"nuccore", :header=>true,
|
15
|
+
:exact_gi=>false, :no_nil=>false, :ret=>"ScientificName",
|
16
|
+
:ranks=>%w(superkingdom phylum class order family genus species)}
|
17
|
+
OptionParser.new do |opt|
|
18
|
+
opt.banner = "
|
19
|
+
Maps a list of NCBI GIs to their corresponding taxonomy using the NCBI
|
20
|
+
EUtilities. Avoid using this script on millions of entries at a time, since
|
21
|
+
each entry elicits two requests to NCBI's servers.
|
22
|
+
|
23
|
+
*IMPORTANT NOTE*: NCBI is phasing out support for GIs. Please use acc.ver
|
24
|
+
instead with NCBIacc2tax.rb.
|
25
|
+
|
26
|
+
Usage: #{$0} [options]".gsub(/^ +/,"")
|
27
|
+
opt.separator ""
|
28
|
+
opt.on("-g", "--gis GI1,GI2,...", Array,
|
29
|
+
"Comma-separated list of GIs. Required unless -i is passed."
|
30
|
+
){ |v| o[:gis]=v }
|
31
|
+
opt.on("-i", "--infile FILE",
|
32
|
+
"Raw text file containing the list of GIs, one per line.",
|
33
|
+
"Required unless -g is passed."){ |v| o[:infile]=v }
|
34
|
+
opt.on("-p", "--protein",
|
35
|
+
"Use if the GIs are proteins. Otherwise, GIs are assumed to be from " +
|
36
|
+
"the Nuccore Database."){ o[:dbfrom]="protein" }
|
37
|
+
opt.on("-r", "--ranks RANK1,RANK2,...", Array,
|
38
|
+
"Taxonomic ranks to report. By default: #{o[:ranks].join(",")}."
|
39
|
+
){ |v| o[:ranks]=v }
|
40
|
+
opt.on("-n", "--noheader",
|
41
|
+
"Do not include a header in the output."){ o[:header]=false }
|
42
|
+
opt.on("-t", "--taxids",
|
43
|
+
"Return Taxonomy IDs instead of scientific names."){ o[:ret]="TaxId" }
|
44
|
+
opt.on("--exact-gi",
|
45
|
+
"Returns only taxonomy associated with the exact GI passed.",
|
46
|
+
"By default, it attempts to update accession versions if possible."
|
47
|
+
){ |v| o[:exact_gi]=v }
|
48
|
+
opt.on("--ignore-missing",
|
49
|
+
"Does not report missing GIs in the output file.",
|
50
|
+
"By default, it reports GI and empty values for all other columns."
|
51
|
+
){ |v| o[:no_nil]=v }
|
52
|
+
opt.on("-q", "--quiet", "Run quietly."){ |v| o[:q]=true }
|
53
|
+
opt.on("-h", "--help","Display this screen") do
|
54
|
+
puts opt
|
55
|
+
exit
|
56
|
+
end
|
57
|
+
opt.separator ""
|
58
|
+
end.parse!
|
59
|
+
|
60
|
+
#================================[ Functions ]
|
61
|
+
def gi2taxid(db, gi)
|
62
|
+
doc = Nokogiri::XML( RemoteData.elink({:dbfrom=>db,
|
63
|
+
:db=>"taxonomy", :id=>gi}) )
|
64
|
+
doc.at_xpath("/eLinkResult/LinkSet/LinkSetDb/Link/Id")
|
65
|
+
end
|
66
|
+
#================================[ Main ]
|
67
|
+
begin
|
68
|
+
o[:gis] += File.readlines(o[:infile]).map{ |l| l.chomp } unless
|
69
|
+
o[:infile].nil?
|
70
|
+
o[:ranks].map!{ |r| r.downcase }
|
71
|
+
puts (["GI", "TaxId"] + o[:ranks].map{ |r| r.capitalize }).join("\t") if
|
72
|
+
o[:header]
|
73
|
+
o[:gis].each do |gi|
|
74
|
+
taxid = gi2taxid(o[:dbfrom], gi)
|
75
|
+
status = ""
|
76
|
+
if taxid.nil? and not o[:exact_gi]
|
77
|
+
new_gi, status = RemoteData.update_gi(o[:dbfrom], gi)
|
78
|
+
taxid = gi2taxid(o[:dbfrom], new_gi) unless new_gi.nil?
|
79
|
+
end
|
80
|
+
if taxid.nil?
|
81
|
+
warn "Cannot find link to taxonomy: #{gi} #{status}"
|
82
|
+
puts ([gi, ""] + o[:ranks].map{ |i| "" }).join("\t") unless o[:no_nil]
|
83
|
+
next
|
84
|
+
end
|
85
|
+
taxonomy = {}
|
86
|
+
unless taxid.nil?
|
87
|
+
doc = Nokogiri::XML( RemoteData.efetch({:db=>"taxonomy",
|
88
|
+
:id=>taxid.content}) )
|
89
|
+
taxonomy[ doc.at_xpath("/TaxaSet/Taxon/Rank").content ] =
|
90
|
+
doc.at_xpath("/TaxaSet/Taxon/#{o[:ret]}").content
|
91
|
+
doc.xpath("/TaxaSet/Taxon/LineageEx/Taxon").each do |taxon|
|
92
|
+
taxonomy[ taxon.at_xpath("./Rank").content ] =
|
93
|
+
taxon.at_xpath("./#{o[:ret]}").content
|
94
|
+
end
|
95
|
+
end
|
96
|
+
puts ([gi, taxid.content] +
|
97
|
+
o[:ranks].map{ |rank| taxonomy[ rank ] ||= "" }).join("\t")
|
98
|
+
end
|
99
|
+
rescue => err
|
100
|
+
$stderr.puts "Exception: #{err}\n\n"
|
101
|
+
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
102
|
+
err
|
103
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# usage perl in_silico_GA.pl [options]
|
2
|
+
|
3
|
+
use Getopt::Long;
|
4
|
+
use Math::Random qw(:all);
|
5
|
+
|
6
|
+
$argu=GetOptions('in=s'=>\$infile, # input fasta chr file
|
7
|
+
'out=s'=>\$outfile, # output file name
|
8
|
+
'coverage=s'=>\$cov, # desired output
|
9
|
+
'seq_error=s'=>\$seq_error, # sequencing error
|
10
|
+
'read_len=s'=>\$read_len, # simulated read length
|
11
|
+
'ins_len=s'=>\$ins_len, # insertion length
|
12
|
+
'ins_var=s'=>\$ins_var);
|
13
|
+
|
14
|
+
$chr='';
|
15
|
+
open(IN,$infile);
|
16
|
+
open(OUT,">$outfile");
|
17
|
+
%code=();
|
18
|
+
$code{'0'}='C';
|
19
|
+
$code{'1'}='A';
|
20
|
+
$code{'2'}='T';
|
21
|
+
$code{'3'}='G';
|
22
|
+
|
23
|
+
while(<IN>){
|
24
|
+
chomp;
|
25
|
+
if(!/^\>/){
|
26
|
+
$chr.=$_;
|
27
|
+
}
|
28
|
+
else{
|
29
|
+
$gi=$_;
|
30
|
+
if($gi= ~/^\>gi\|(\S+)\|\S+\|\S+/){
|
31
|
+
$gi=$1;}
|
32
|
+
|
33
|
+
}
|
34
|
+
}
|
35
|
+
close(IN);
|
36
|
+
|
37
|
+
$chr_size=length $chr;
|
38
|
+
print "chromosome size: $chr_size\n";
|
39
|
+
$seg_size=2*$read_len+$ins_len;
|
40
|
+
$reads_number=int($cov*$chr_size/($read_len*2));
|
41
|
+
print "generated reads $reads_number x 2\n";
|
42
|
+
|
43
|
+
for(1..$reads_number){
|
44
|
+
$index=$_;
|
45
|
+
$l=length $index;
|
46
|
+
$k=8-$l;
|
47
|
+
$kk='0' x $k;
|
48
|
+
$id= 'read'.$kk.$index.'_'.$gi;
|
49
|
+
|
50
|
+
#make start site;
|
51
|
+
$start_site=int(rand($chr_size));
|
52
|
+
#make short seg length;
|
53
|
+
$seg_length=int(random_normal(1,$seg_size,$ins_var));
|
54
|
+
|
55
|
+
#extract the segment
|
56
|
+
$seg=substr($chr,$start_site,$seg_length);
|
57
|
+
$s_len=length $seg;
|
58
|
+
$gap=$seg_length-$s_len;
|
59
|
+
if($gap!=0){
|
60
|
+
$makeup=substr($chr,0,$gap);
|
61
|
+
$seg.=$makeup;
|
62
|
+
}
|
63
|
+
|
64
|
+
$id.='.start'.$start_site.'.seg_len'.$seg_length;
|
65
|
+
|
66
|
+
#get the reads
|
67
|
+
$seq1=substr($seg,0,$read_len);
|
68
|
+
#$seg=~tr/ATCG/TAGC/ this line can change the orientation of the second read;
|
69
|
+
$seq2=substr($seg,-$read_len);
|
70
|
+
# sequencing error introducing
|
71
|
+
@seq1=split(//,$seq1);
|
72
|
+
@seq2=split(//,$seq2);
|
73
|
+
@mut1=random_binomial($read_len,1,$seq_error);
|
74
|
+
@mut2=random_binomial($read_len,1,$seq_error);
|
75
|
+
|
76
|
+
for(0..$#mut1){
|
77
|
+
$i=$_;
|
78
|
+
if($mut1[$i]==1){
|
79
|
+
$r=int(rand(4));
|
80
|
+
$seq1[$i]=$code{$r};
|
81
|
+
}
|
82
|
+
if($mut2[$i]==1){
|
83
|
+
$r=int(rand(4));
|
84
|
+
$seq2[$i]=$code{$r};
|
85
|
+
}
|
86
|
+
}
|
87
|
+
$seq1=join('',@seq1);
|
88
|
+
$seq2=join('',@seq2);
|
89
|
+
|
90
|
+
$id1=$id.'#0/1';
|
91
|
+
$id2=$id.'#0/2';
|
92
|
+
|
93
|
+
print OUT ">$id1\n$seq1\n>$id2\n$seq2\n";
|
94
|
+
}
|
95
|
+
|
96
|
+
|
Binary file
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
../../enveomics.R
|