miga-base 0.7.26.0 → 0.7.26.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/version.rb +1 -1
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
- data/utils/FastAAI/FastAAI/FastAAI +1336 -0
- data/utils/FastAAI/README.md +84 -0
- data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
- data/utils/enveomics/Docs/recplot2.md +244 -0
- data/utils/enveomics/Examples/aai-matrix.bash +66 -0
- data/utils/enveomics/Examples/ani-matrix.bash +66 -0
- data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
- data/utils/enveomics/LICENSE.txt +73 -0
- data/utils/enveomics/Makefile +52 -0
- data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
- data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
- data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
- data/utils/enveomics/Manifest/Tasks/fasta.json +766 -0
- data/utils/enveomics/Manifest/Tasks/fastq.json +243 -0
- data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
- data/utils/enveomics/Manifest/Tasks/mapping.json +67 -0
- data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
- data/utils/enveomics/Manifest/Tasks/other.json +829 -0
- data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +501 -0
- data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
- data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
- data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
- data/utils/enveomics/Manifest/categories.json +156 -0
- data/utils/enveomics/Manifest/examples.json +154 -0
- data/utils/enveomics/Manifest/tasks.json +4 -0
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
- data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
- data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
- data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
- data/utils/enveomics/README.md +42 -0
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
- data/utils/enveomics/Scripts/Aln.cat.rb +163 -0
- data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
- data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
- data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
- data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
- data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
- data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
- data/utils/enveomics/Scripts/Chao1.pl +97 -0
- data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
- data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
- data/utils/enveomics/Scripts/FastA.N50.pl +56 -0
- data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
- data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
- data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
- data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
- data/utils/enveomics/Scripts/FastA.fragment.rb +92 -0
- data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
- data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
- data/utils/enveomics/Scripts/FastA.length.pl +38 -0
- data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
- data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
- data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
- data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
- data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
- data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
- data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
- data/utils/enveomics/Scripts/FastA.split.pl +55 -0
- data/utils/enveomics/Scripts/FastA.split.rb +79 -0
- data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
- data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
- data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
- data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
- data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +63 -0
- data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
- data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
- data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
- data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
- data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
- data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
- data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
- data/utils/enveomics/Scripts/SRA.download.bash +57 -0
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
- data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
- data/utils/enveomics/Scripts/Table.barplot.R +31 -0
- data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
- data/utils/enveomics/Scripts/Table.filter.pl +61 -0
- data/utils/enveomics/Scripts/Table.merge.pl +77 -0
- data/utils/enveomics/Scripts/Table.replace.rb +69 -0
- data/utils/enveomics/Scripts/Table.round.rb +63 -0
- data/utils/enveomics/Scripts/Table.split.pl +57 -0
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
- data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
- data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
- data/utils/enveomics/Scripts/aai.rb +418 -0
- data/utils/enveomics/Scripts/ani.rb +362 -0
- data/utils/enveomics/Scripts/clust.rand.rb +102 -0
- data/utils/enveomics/Scripts/gi2tax.rb +103 -0
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
- data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
- data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
- data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
- data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
- data/utils/enveomics/Scripts/ogs.rb +104 -0
- data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
- data/utils/enveomics/Scripts/rbm.rb +146 -0
- data/utils/enveomics/Tests/Makefile +10 -0
- data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
- data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
- data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
- data/utils/enveomics/Tests/alkB.nwk +1 -0
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
- data/utils/enveomics/Tests/hiv1.faa +59 -0
- data/utils/enveomics/Tests/hiv1.fna +134 -0
- data/utils/enveomics/Tests/hiv2.faa +70 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
- data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
- data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
- data/utils/enveomics/build_enveomics_r.bash +45 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
- data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
- data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
- data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
- data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
- data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
- data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
- data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
- data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
- data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
- data/utils/enveomics/enveomics.R/R/utils.R +50 -0
- data/utils/enveomics/enveomics.R/README.md +80 -0
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -0
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -0
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -0
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -0
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +37 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
- data/utils/enveomics/globals.mk +8 -0
- data/utils/enveomics/manifest.json +9 -0
- metadata +277 -4
@@ -0,0 +1,30 @@
|
|
1
|
+
|
2
|
+
module Enve
|
3
|
+
class Stat
|
4
|
+
# Generates a random number from the +dist+ distribution with +params+
|
5
|
+
# parameters. This is simply a wrapper to the r_* functions below.
|
6
|
+
def self.rand(dist=:unif, *params)
|
7
|
+
send("r_#{dist}", *params)
|
8
|
+
end
|
9
|
+
|
10
|
+
# Generates a random number from the uniform distribution between +min+ and
|
11
|
+
# +max+. By default generates random numbers between 0.0 and 1.0.
|
12
|
+
def self.r_unif(min=0.0, max=1.0)
|
13
|
+
min + (max-min)*Random::rand
|
14
|
+
end
|
15
|
+
|
16
|
+
# Generates a random number from the geometric distribution with support
|
17
|
+
# {0, 1, 2, ...} and probability of success +p+.
|
18
|
+
def self.r_geom(p)
|
19
|
+
(Math::log(1.0 - rand)/Math::log(1.0-p) - 1.0).ceil
|
20
|
+
end
|
21
|
+
|
22
|
+
# Generates a random number from the shifted geometric distribution with
|
23
|
+
# support {1, 2, 3, ...} and probability of success +p+.
|
24
|
+
def self.r_sgeom(p)
|
25
|
+
(Math::log(1.0 - rand)/Math::log(1.0-p)).ceil
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
@@ -0,0 +1,135 @@
|
|
1
|
+
# @author Luis M. Rodriguez-R
|
2
|
+
# @license Artistic-2.0
|
3
|
+
|
4
|
+
##
|
5
|
+
# Enveomics representation of a Variant Call Format (VCF) file.
|
6
|
+
class VCF
|
7
|
+
|
8
|
+
##
|
9
|
+
# File-handler, a File object.
|
10
|
+
attr_reader :fh
|
11
|
+
def initialize(file)
|
12
|
+
@fh = (file.is_a?(String) ? File.open(file, "r") : file )
|
13
|
+
end
|
14
|
+
|
15
|
+
##
|
16
|
+
# Iterate through each variant (i.e., each non-comment line), passing a
|
17
|
+
# VCF::Variant object to +blk+.
|
18
|
+
def each_variant(&blk)
|
19
|
+
fh.rewind
|
20
|
+
fh.each_line do |ln|
|
21
|
+
next if ln =~ /^#/
|
22
|
+
blk.call VCF::Variant.new(ln)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# Iterate through each header (i.e., each comment line), passing a String to
|
28
|
+
# +blk+.
|
29
|
+
def each_header(&blk)
|
30
|
+
fh.rewind
|
31
|
+
fh.each_line do |ln|
|
32
|
+
next unless ln =~ /^#/
|
33
|
+
blk.call ln
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class VCF::Variant
|
39
|
+
|
40
|
+
##
|
41
|
+
# Column definitions in VCF.
|
42
|
+
@@COLUMNS = [:chrom,:pos,:id,:ref,:alt,:qual,:filter,:info,:format,:bam]
|
43
|
+
|
44
|
+
##
|
45
|
+
# An Array of String, containing each of the VCF entrie's columns.
|
46
|
+
attr_reader :data
|
47
|
+
|
48
|
+
##
|
49
|
+
# Initialize VCF::Variant from String +line+, a non-comment line in the VCF.
|
50
|
+
def initialize(line)
|
51
|
+
@data = line.chomp.split("\t")
|
52
|
+
# Qual as float
|
53
|
+
@data[5] = data[5].to_f
|
54
|
+
# Split info
|
55
|
+
info = data[7].split(";").map{ |i| i=~/=/ ? i.split("=", 2) : [i, true] }
|
56
|
+
@data[7] = Hash[*info.map{ |i| [i[0].to_sym, i[1]] }.flatten]
|
57
|
+
# Read formatted data
|
58
|
+
unless data[9].nil? or data[9].empty?
|
59
|
+
f = format.split(":")
|
60
|
+
b = bam.split(":")
|
61
|
+
f.each_index{ |i| @data[7][f[i].to_sym] = b[i] }
|
62
|
+
end
|
63
|
+
@data[7][:INDEL] = true if ref.size != alt.split(",").first.size
|
64
|
+
end
|
65
|
+
|
66
|
+
##
|
67
|
+
# Named functions for each column.
|
68
|
+
@@COLUMNS.each_index do |i|
|
69
|
+
define_method(@@COLUMNS[i]) { @@COLUMNS[i]==:pos ? data[i].to_i : data[i] }
|
70
|
+
end
|
71
|
+
|
72
|
+
##
|
73
|
+
# Sequencing depth.
|
74
|
+
def dp
|
75
|
+
return nil if info[:DP].nil?
|
76
|
+
info[:DP].to_i
|
77
|
+
end
|
78
|
+
|
79
|
+
##
|
80
|
+
# Sequencing depth of FWD-REF, REV-REF, FWD-ALT, and REV-ALT.
|
81
|
+
def dp4
|
82
|
+
return nil if info[:DP4].nil?
|
83
|
+
@dp4 ||= info[:DP4].split(",").map{ |i| i.to_i }
|
84
|
+
@dp4
|
85
|
+
end
|
86
|
+
|
87
|
+
##
|
88
|
+
## Sequencing depth of REF and ALT.
|
89
|
+
def ad
|
90
|
+
return nil if info[:AD].nil?
|
91
|
+
@ad ||= info[:AD].split(",").map{ |i| i.to_i }
|
92
|
+
@ad
|
93
|
+
end
|
94
|
+
|
95
|
+
##
|
96
|
+
# Sequencing depth of the REF allele.
|
97
|
+
def ref_dp
|
98
|
+
return dp4[0] + dp4[1] unless dp4.nil?
|
99
|
+
return ad[0] unless ad.nil?
|
100
|
+
nil
|
101
|
+
end
|
102
|
+
|
103
|
+
##
|
104
|
+
# Sequencing depth of the ALT allele.
|
105
|
+
def alt_dp
|
106
|
+
return dp4[2] + dp4[3] unless dp4.nil?
|
107
|
+
return ad[1] unless ad.nil?
|
108
|
+
nil
|
109
|
+
end
|
110
|
+
|
111
|
+
##
|
112
|
+
# Information content of the variant in bits (from 0 to 1).
|
113
|
+
def shannon
|
114
|
+
return @shannon unless @shannon.nil?
|
115
|
+
a = ref_dp
|
116
|
+
b = alt_dp
|
117
|
+
ap = a.to_f/(a+b)
|
118
|
+
bp = b.to_f/(a+b)
|
119
|
+
@shannon = -(ap*Math.log(ap,2) + bp*Math.log(bp,2))
|
120
|
+
@shannon
|
121
|
+
end
|
122
|
+
|
123
|
+
##
|
124
|
+
# Is it an indel?
|
125
|
+
def indel? ; !info[:INDEL].nil? and info[:INDEL] ; end
|
126
|
+
|
127
|
+
##
|
128
|
+
# Return as String.
|
129
|
+
def to_s ; (data[0..6] + [info_to_s] + data[8..-1].to_a).join("\t") + "\n" ; end
|
130
|
+
|
131
|
+
##
|
132
|
+
# Returns the INFO entry as String.
|
133
|
+
def info_to_s ; data[7].to_a.map{ |i| i.join("=") }.join(";") ; end
|
134
|
+
|
135
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# @author: Luis M. Rodriguez-R
|
5
|
+
# @update: Mar-23-2015
|
6
|
+
# @license: artistic license 2.0
|
7
|
+
#
|
8
|
+
|
9
|
+
$:.push File.expand_path(File.dirname(__FILE__) + '/lib')
|
10
|
+
require 'enveomics_rb/og'
|
11
|
+
require 'optparse'
|
12
|
+
|
13
|
+
o = {:q=>FALSE, :f=>"(\\S+)\\.txt", :consolidate=>TRUE, :pre=>[]}
|
14
|
+
ARGV << '-h' if ARGV.size==0
|
15
|
+
OptionParser.new do |opts|
|
16
|
+
opts.banner = "
|
17
|
+
Annotates Orthology Groups (OGs) using one or more reference genomes.
|
18
|
+
|
19
|
+
Usage: #{$0} [options]"
|
20
|
+
opts.separator ""
|
21
|
+
opts.separator "Mandatory"
|
22
|
+
opts.on("-i", "--in FILE", "Input file containing the OGs (as generated by ogs.rb)."){ |v| o[:in]=v }
|
23
|
+
opts.on("-o", "--out FILE", "Output file containing the annotated OGs."){ |v| o[:out]=v }
|
24
|
+
opts.on("-a FILE1,FILE2,...", Array, "Input file(s) containing the annotations.One or more tab-delimited files",
|
25
|
+
"with the gene names in the first column and the annotation in the second."){ |v| o[:annotations]=v }
|
26
|
+
opts.separator ""
|
27
|
+
opts.separator "Other Options"
|
28
|
+
opts.on("-f","--format STRING", "Format of the filenames for the annotation files, using regex syntax.",
|
29
|
+
"By default: '#{o[:f]}'."){ |v| o[:f]=v }
|
30
|
+
opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
|
31
|
+
opts.on("-h", "--help", "Display this screen.") do
|
32
|
+
puts opts
|
33
|
+
exit
|
34
|
+
end
|
35
|
+
opts.separator ""
|
36
|
+
end.parse!
|
37
|
+
abort "-i is mandatory" if o[:in].nil?
|
38
|
+
abort "-o is mandatory" if o[:out].nil?
|
39
|
+
abort "-a is mandatory" if o[:annotations].nil?
|
40
|
+
|
41
|
+
##### MAIN:
|
42
|
+
begin
|
43
|
+
# Read the pre-computed OGs
|
44
|
+
collection = OGCollection.new
|
45
|
+
$stderr.puts "Reading pre-computed OGs in '#{o[:in]}'." unless o[:q]
|
46
|
+
f = File.open(o[:in], 'r')
|
47
|
+
h = f.gets.chomp.split /\t/
|
48
|
+
while ln = f.gets
|
49
|
+
collection << OG.new(h, ln.chomp.split(/\t/))
|
50
|
+
end
|
51
|
+
f.close
|
52
|
+
$stderr.puts " Loaded OGs: #{collection.ogs.length}." unless o[:q]
|
53
|
+
|
54
|
+
# Read annotations
|
55
|
+
o[:annotations].each do |annot|
|
56
|
+
m = /#{o[:f]}/.match(annot)
|
57
|
+
if m.nil? or m[1].nil?
|
58
|
+
warn "Cannot parse filename: #{rbm_file} (doesn't match /#{o[:f]}/)."
|
59
|
+
next
|
60
|
+
end
|
61
|
+
f = File.open(annot, 'r')
|
62
|
+
no_og = 0
|
63
|
+
collection.add_note_src m[1]+' annotation'
|
64
|
+
while ln=f.gets
|
65
|
+
r = ln.chomp.split /\t/
|
66
|
+
g = Gene.new m[1], r[0]
|
67
|
+
og = collection.get_og g
|
68
|
+
if og.nil?
|
69
|
+
no_og += 1
|
70
|
+
else
|
71
|
+
og.add_note g.id + ': ' + r[1], collection.note_srcs.length-1
|
72
|
+
end
|
73
|
+
end
|
74
|
+
warn "Warning: Cannot find #{no_og} genes from #{m[1]} in OG collection." if no_og>0
|
75
|
+
end
|
76
|
+
# Save the output matrix
|
77
|
+
$stderr.puts "Saving annotated OGs into '#{o[:out]}'." unless o[:q]
|
78
|
+
f = File.open(o[:out], "w")
|
79
|
+
f.puts collection.to_s
|
80
|
+
f.close
|
81
|
+
$stderr.puts "Done.\n" unless o[:q]
|
82
|
+
rescue => err
|
83
|
+
$stderr.puts "Exception: #{err}\n\n"
|
84
|
+
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
85
|
+
err
|
86
|
+
end
|
87
|
+
|
88
|
+
|
@@ -0,0 +1,160 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#
|
4
|
+
# @author: Luis M. Rodriguez-R
|
5
|
+
# @license: artistic-2.0
|
6
|
+
#
|
7
|
+
|
8
|
+
$:.push File.expand_path("../lib", __FILE__)
|
9
|
+
require "optparse"
|
10
|
+
require "json"
|
11
|
+
require "tmpdir"
|
12
|
+
|
13
|
+
o = {q:false, n:100, thr:2}
|
14
|
+
ARGV << "-h" if ARGV.size==0
|
15
|
+
OptionParser.new do |opts|
|
16
|
+
opts.banner = "
|
17
|
+
Subsamples the genomes in a set of Orthology Groups (OGs) and estimates the
|
18
|
+
trend of core genome and pangenome sizes.
|
19
|
+
|
20
|
+
Usage: #{$0} [options]"
|
21
|
+
opts.separator ""
|
22
|
+
opts.separator "Mandatory"
|
23
|
+
opts.on("-o", "--ogs FILE",
|
24
|
+
"Input file containing the precomputed OGs."){ |v| o[:ogs]=v }
|
25
|
+
opts.separator ""
|
26
|
+
opts.separator "Output Options"
|
27
|
+
opts.on("-s", "--summary FILE",
|
28
|
+
"Output file in tabular format with summary statistics."){ |v| o[:summ]=v }
|
29
|
+
opts.on("-t", "--tab FILE","Output file in tabular format."){ |v| o[:tab]=v }
|
30
|
+
opts.on("-j", "--json FILE", "Output file in JSON format."){ |v| o[:json]=v }
|
31
|
+
opts.separator ""
|
32
|
+
opts.separator "Other Options"
|
33
|
+
opts.on("-n", "--replicates INT",
|
34
|
+
"Number of replicates to estimate. By default: #{o[:n]}."
|
35
|
+
){ |v| o[:n]=v.to_i }
|
36
|
+
opts.on("--threads INT",
|
37
|
+
"Children threads to spawn. By default: #{o[:thr]}."){ |v| o[:thr]=v.to_i}
|
38
|
+
opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
|
39
|
+
opts.on("-h", "--help", "Display this screen.") do
|
40
|
+
puts opts
|
41
|
+
exit
|
42
|
+
end
|
43
|
+
opts.separator ""
|
44
|
+
end.parse!
|
45
|
+
abort "-o is mandatory" if o[:ogs].nil?
|
46
|
+
|
47
|
+
##### MAIN:
|
48
|
+
begin
|
49
|
+
# Read the pre-computed OGs
|
50
|
+
$stderr.puts "Reading pre-computed OGs in '#{o[:ogs]}'." unless o[:q]
|
51
|
+
bool_a = []
|
52
|
+
genomes_n = nil
|
53
|
+
File.open(o[:ogs], "r") do |f|
|
54
|
+
h = f.gets.chomp.split "\t"
|
55
|
+
genomes_n = h.size
|
56
|
+
while ln = f.gets
|
57
|
+
bool_a << ln.chomp.split("\t").map{ |g| g!="-" }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
$stderr.puts " Loaded OGs: #{bool_a.size}." unless o[:q]
|
61
|
+
bool_a_b = bool_a.map{ |og| og.map{ |g| g ? "1" : "0" }.join("").to_i(2) }
|
62
|
+
|
63
|
+
# Generate subsamples
|
64
|
+
size = {core:[], pan:[]}
|
65
|
+
Dir.mktmpdir do |dir|
|
66
|
+
children = 0
|
67
|
+
(0 .. o[:n]-1).each do |i|
|
68
|
+
fork do
|
69
|
+
# Generate trajectory
|
70
|
+
genomes = (0 .. genomes_n-1).to_a.shuffle
|
71
|
+
genomes_b = (2 ** genomes_n) - 1
|
72
|
+
core = []
|
73
|
+
pan = []
|
74
|
+
while not genomes.empty?
|
75
|
+
core.unshift 0
|
76
|
+
pan.unshift 0
|
77
|
+
bool_a_b.map! do |og|
|
78
|
+
r_og = og & genomes_b
|
79
|
+
if r_og>0
|
80
|
+
core[0] += 1 if r_og==genomes_b
|
81
|
+
pan[0] += 1
|
82
|
+
og
|
83
|
+
else
|
84
|
+
nil
|
85
|
+
end
|
86
|
+
end
|
87
|
+
bool_a_b.compact!
|
88
|
+
genomes_b ^= 2 ** genomes.pop
|
89
|
+
end
|
90
|
+
abort "UNEXPECTED ERROR: Final genomes_b=#{genomes_b}." if genomes_b>0
|
91
|
+
# Store trajectory
|
92
|
+
File.open("#{dir}/#{i}", "w") do |tfh|
|
93
|
+
tfh.puts JSON.generate({core:core, pan:pan})
|
94
|
+
end
|
95
|
+
end # fork
|
96
|
+
children += 1
|
97
|
+
if children >= o[:thr]
|
98
|
+
Process.wait
|
99
|
+
children -= 1
|
100
|
+
end
|
101
|
+
end
|
102
|
+
Process.waitall
|
103
|
+
# Recover trajectories
|
104
|
+
(0 .. o[:n]-1).each do |i|
|
105
|
+
s = JSON.parse(File.read("#{dir}/#{i}"), {:symbolize_names=>true})
|
106
|
+
size[:core][i] = s[:core]
|
107
|
+
size[:pan][i] = s[:pan]
|
108
|
+
end
|
109
|
+
end # Dir.mktmpdir
|
110
|
+
|
111
|
+
# Show result
|
112
|
+
$stderr.puts "Generating reports." unless o[:q]
|
113
|
+
|
114
|
+
# Save results in JSON
|
115
|
+
unless o[:json].nil?
|
116
|
+
ofh = File.open(o[:json], "w")
|
117
|
+
ofh.puts JSON.pretty_generate(size)
|
118
|
+
ofh.close
|
119
|
+
end
|
120
|
+
|
121
|
+
# Save results in tab
|
122
|
+
unless o[:tab].nil?
|
123
|
+
ofh = File.open(o[:tab], "w")
|
124
|
+
ofh.puts (%w{replicate metric}+(1 .. genomes_n).to_a).join("\t")
|
125
|
+
(0 .. o[:n]-1).each do |i|
|
126
|
+
ofh.puts ([i+1,"core"] + size[:core][i]).join("\t")
|
127
|
+
ofh.puts ([i+1,"pan"] + size[:pan][i]).join("\t")
|
128
|
+
end
|
129
|
+
ofh.close
|
130
|
+
end
|
131
|
+
|
132
|
+
# Save summary results in tab
|
133
|
+
unless o[:summ].nil?
|
134
|
+
ofh = File.open(o[:summ], "w")
|
135
|
+
ofh.puts %w{genomes core_avg core_sd core_q1 core_q2 core_q3
|
136
|
+
pan_avg pan_sd pan_q1 pan_q2 pan_q3}.join("\t")
|
137
|
+
(0 .. genomes_n-1).each do |i|
|
138
|
+
res = [ i+1 ]
|
139
|
+
[:core, :pan].each do |met|
|
140
|
+
a = size[met].map{ |r| r[i] }.sort
|
141
|
+
avg = a.inject(0,:+).to_f / a.size
|
142
|
+
var = a.map{ |v| v**2 }.inject(0,:+).to_f/a.size - avg**2
|
143
|
+
sd = Math.sqrt(var)
|
144
|
+
q1 = a[ a.size*1/4 ]
|
145
|
+
q2 = a[ a.size*2/4 ]
|
146
|
+
q3 = a[ a.size*3/4 ]
|
147
|
+
res += [avg,sd,q1,q2,q3]
|
148
|
+
end
|
149
|
+
ofh.puts res.join("\t")
|
150
|
+
end
|
151
|
+
ofh.close
|
152
|
+
end
|
153
|
+
|
154
|
+
$stderr.puts "Done.\n" unless o[:q]
|
155
|
+
rescue => err
|
156
|
+
$stderr.puts "Exception: #{err}\n\n"
|
157
|
+
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
158
|
+
err
|
159
|
+
end
|
160
|
+
|
@@ -0,0 +1,125 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# @author Luis M. Rodriguez-R
|
4
|
+
# @license artistic license 2.0
|
5
|
+
#
|
6
|
+
|
7
|
+
$:.push File.expand_path("lib", File.dirname(__FILE__))
|
8
|
+
require "enveomics_rb/enveomics"
|
9
|
+
require "enveomics_rb/og"
|
10
|
+
|
11
|
+
o = {q:false, pergenome:false, prefix:false, first:false, rand:false,
|
12
|
+
core:0.0, dups:0}
|
13
|
+
OptionParser.new do |opts|
|
14
|
+
opts.banner = "
|
15
|
+
Extracts sequences of Orthology Groups (OGs) from genomes (proteomes).
|
16
|
+
|
17
|
+
Usage: #{$0} [options]"
|
18
|
+
opts.separator ""
|
19
|
+
opts.separator "Mandatory"
|
20
|
+
opts.on("-i", "--in FILE",
|
21
|
+
"Input file containing the OGs (as generated by ogs.rb)."){ |v| o[:in]=v }
|
22
|
+
opts.on("-o", "--out FILE",
|
23
|
+
"Output directory where to place extracted sequences."){ |v| o[:out]=v }
|
24
|
+
opts.on("-s", "--seqs STRING",
|
25
|
+
"Path to the proteomes in FastA format, using '%s' to denote the genome.",
|
26
|
+
"For example: /path/to/seqs/%s.faa."){ |v| o[:seqs]=v }
|
27
|
+
opts.separator ""
|
28
|
+
opts.separator "Other Options"
|
29
|
+
opts.on("-c", "--core FLOAT",
|
30
|
+
"Use only OGs present in at least this fraction of the genomes.",
|
31
|
+
"To use only the strict core genome*, use -c 1."){ |v| o[:core]=v.to_f }
|
32
|
+
opts.on("-d", "--duplicates INT",
|
33
|
+
"Use only OGs with less than this number of in-paralogs in a genome.",
|
34
|
+
"To use only genes without in-paralogs*, use -d 1."
|
35
|
+
){ |v| o[:dups]=v.to_i }
|
36
|
+
opts.on("-g", "--per-genome",
|
37
|
+
"If set, the output is generated per genome.",
|
38
|
+
"By default, the output is per OG."){ |v| o[:pergenome]=v }
|
39
|
+
opts.on("-p", "--prefix",
|
40
|
+
"If set, each sequence is prefixed with the genome name",
|
41
|
+
"(or OG number, if --per-genome) and a dash."){ |v| o[:prefix]=v }
|
42
|
+
opts.on("-r", "--rand",
|
43
|
+
"Get only one gene per genome per OG (random) regardless of in-paralogs.",
|
44
|
+
"By default all genes are extracted."){ |v| o[:rand]=v }
|
45
|
+
opts.on("-f", "--first",
|
46
|
+
"Get only one gene per genome per OG (first) regardless of in-paralogs.",
|
47
|
+
"By default all genes are extracted. Takes precendece over --rand."
|
48
|
+
){ |v| o[:first]=v }
|
49
|
+
opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
|
50
|
+
opts.on("-h", "--help", "Display this screen.") do
|
51
|
+
puts opts
|
52
|
+
exit
|
53
|
+
end
|
54
|
+
opts.separator ""
|
55
|
+
opts.separator " * To use only the unus genome (OGs with exactly one " +
|
56
|
+
"gene per genome), use: -c 1 -d 1."
|
57
|
+
opts.separator ""
|
58
|
+
end.parse!
|
59
|
+
abort "-i is mandatory" if o[:in].nil?
|
60
|
+
abort "-o is mandatory" if o[:out].nil?
|
61
|
+
abort "-s is mandatory" if o[:seqs].nil?
|
62
|
+
|
63
|
+
##### MAIN:
|
64
|
+
begin
|
65
|
+
# Read the pre-computed OGs
|
66
|
+
collection = OGCollection.new
|
67
|
+
$stderr.puts "Reading pre-computed OGs in '#{o[:in]}'." unless o[:q]
|
68
|
+
f = File.open(o[:in], "r")
|
69
|
+
h = f.gets.chomp.split /\t/
|
70
|
+
while ln = f.gets
|
71
|
+
collection << OG.new(h, ln.chomp.split(/\t/))
|
72
|
+
end
|
73
|
+
f.close
|
74
|
+
$stderr.puts " Loaded OGs: #{collection.ogs.size}." unless o[:q]
|
75
|
+
$stderr.puts " Reported Genomes: #{Gene.genomes.size}." unless o[:q]
|
76
|
+
|
77
|
+
# Filter core/in-paralog genes
|
78
|
+
collection.filter_core! o[:core] unless o[:core]==0.0
|
79
|
+
collection.remove_inparalogs! o[:dups] unless o[:dups]==0
|
80
|
+
$stderr.puts " Filtered OGs: #{collection.ogs.size}." unless
|
81
|
+
o[:q] or o[:core]==0.0
|
82
|
+
|
83
|
+
# Open outputs
|
84
|
+
$stderr.puts "Initializing output files." unless o[:q]
|
85
|
+
Dir.mkdir(o[:out]) unless Dir.exist? o[:out]
|
86
|
+
ofhs = o[:pergenome] ?
|
87
|
+
Gene.genomes.map{|g| File.open("#{o[:out]}/#{g}.fa", "w")} :
|
88
|
+
( (1 .. collection.ogs.size).map do |og|
|
89
|
+
File.open("#{o[:out]}/OG#{og}.fa", "w")
|
90
|
+
end )
|
91
|
+
$stderr.puts " Created files: #{ofhs.size}." unless o[:q]
|
92
|
+
|
93
|
+
# Read genomes
|
94
|
+
$stderr.puts "Filtering genes." unless o[:q]
|
95
|
+
genome_i = -1
|
96
|
+
Gene.genomes.each do |genome|
|
97
|
+
genome_i = Gene.genomes.index(genome)
|
98
|
+
$stderr.print " Genome #{genome_i+1}. \r" unless o[:q]
|
99
|
+
genes = ( collection.get_genome_genes(genome).map do |og|
|
100
|
+
o[:first] ? [og.first] : (o[:rand] ? [og.sample] : og)
|
101
|
+
end )
|
102
|
+
hand = nil
|
103
|
+
File.open(sprintf(o[:seqs], genome), "r").each do |ln|
|
104
|
+
if ln =~ /^>(\S+)/
|
105
|
+
og = genes.index{|g| g.include? $1}
|
106
|
+
hand = og.nil? ? nil : ( o[:pergenome] ? genome_i : og )
|
107
|
+
ln.sub!(/^>/, ">#{o[:pergenome] ? "OG#{og}" : genome}-") if
|
108
|
+
o[:prefix] and not hand.nil?
|
109
|
+
end
|
110
|
+
ofhs[hand].puts(ln) unless hand.nil?
|
111
|
+
end
|
112
|
+
end
|
113
|
+
$stderr.puts " #{genome_i+1} genomes processed." unless o[:q]
|
114
|
+
|
115
|
+
# Close outputs
|
116
|
+
$stderr.puts "Closing output files." unless o[:q]
|
117
|
+
ofhs.each{|h| h.close}
|
118
|
+
$stderr.puts "Done.\n" unless o[:q]
|
119
|
+
rescue => err
|
120
|
+
$stderr.puts "Exception: #{err}\n\n"
|
121
|
+
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
122
|
+
err
|
123
|
+
end
|
124
|
+
|
125
|
+
|