miga-base 0.7.25.2 → 0.7.25.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/version.rb +2 -2
- data/utils/distance/runner.rb +2 -1
- metadata +5 -278
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
- data/utils/FastAAI/FastAAI/FastAAI +0 -1336
- data/utils/FastAAI/README.md +0 -84
- data/utils/FastAAI/kAAI_v1.0_virus.py +0 -1296
- data/utils/enveomics/Docs/recplot2.md +0 -244
- data/utils/enveomics/Examples/aai-matrix.bash +0 -66
- data/utils/enveomics/Examples/ani-matrix.bash +0 -66
- data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
- data/utils/enveomics/LICENSE.txt +0 -73
- data/utils/enveomics/Makefile +0 -52
- data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
- data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -786
- data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
- data/utils/enveomics/Manifest/Tasks/fasta.json +0 -766
- data/utils/enveomics/Manifest/Tasks/fastq.json +0 -243
- data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
- data/utils/enveomics/Manifest/Tasks/mapping.json +0 -67
- data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
- data/utils/enveomics/Manifest/Tasks/other.json +0 -829
- data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -501
- data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
- data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
- data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
- data/utils/enveomics/Manifest/categories.json +0 -156
- data/utils/enveomics/Manifest/examples.json +0 -154
- data/utils/enveomics/Manifest/tasks.json +0 -4
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
- data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
- data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
- data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
- data/utils/enveomics/README.md +0 -42
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
- data/utils/enveomics/Scripts/Aln.cat.rb +0 -163
- data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
- data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
- data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
- data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
- data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
- data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -101
- data/utils/enveomics/Scripts/Chao1.pl +0 -97
- data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
- data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
- data/utils/enveomics/Scripts/FastA.N50.pl +0 -56
- data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
- data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
- data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
- data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
- data/utils/enveomics/Scripts/FastA.fragment.rb +0 -92
- data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
- data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
- data/utils/enveomics/Scripts/FastA.length.pl +0 -38
- data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
- data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
- data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
- data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
- data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
- data/utils/enveomics/Scripts/FastA.sample.rb +0 -83
- data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
- data/utils/enveomics/Scripts/FastA.split.pl +0 -55
- data/utils/enveomics/Scripts/FastA.split.rb +0 -79
- data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
- data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
- data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
- data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
- data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
- data/utils/enveomics/Scripts/FastQ.tag.rb +0 -63
- data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
- data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
- data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
- data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
- data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
- data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
- data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
- data/utils/enveomics/Scripts/SRA.download.bash +0 -57
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
- data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
- data/utils/enveomics/Scripts/Table.barplot.R +0 -31
- data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
- data/utils/enveomics/Scripts/Table.filter.pl +0 -61
- data/utils/enveomics/Scripts/Table.merge.pl +0 -77
- data/utils/enveomics/Scripts/Table.replace.rb +0 -69
- data/utils/enveomics/Scripts/Table.round.rb +0 -63
- data/utils/enveomics/Scripts/Table.split.pl +0 -57
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
- data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
- data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
- data/utils/enveomics/Scripts/aai.rb +0 -418
- data/utils/enveomics/Scripts/ani.rb +0 -362
- data/utils/enveomics/Scripts/clust.rand.rb +0 -102
- data/utils/enveomics/Scripts/gi2tax.rb +0 -103
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
- data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
- data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
- data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
- data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
- data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
- data/utils/enveomics/Scripts/ogs.rb +0 -104
- data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
- data/utils/enveomics/Scripts/rbm.rb +0 -146
- data/utils/enveomics/Tests/Makefile +0 -10
- data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
- data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
- data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
- data/utils/enveomics/Tests/alkB.nwk +0 -1
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
- data/utils/enveomics/Tests/hiv1.faa +0 -59
- data/utils/enveomics/Tests/hiv1.fna +0 -134
- data/utils/enveomics/Tests/hiv2.faa +0 -70
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
- data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
- data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
- data/utils/enveomics/build_enveomics_r.bash +0 -45
- data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
- data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
- data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
- data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
- data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
- data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
- data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
- data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
- data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
- data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
- data/utils/enveomics/enveomics.R/R/utils.R +0 -50
- data/utils/enveomics/enveomics.R/README.md +0 -80
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -17
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -17
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -17
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -32
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -91
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -57
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -39
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -38
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -40
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -67
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -37
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -122
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -68
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -41
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -40
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -41
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -43
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -37
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -74
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -32
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -59
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -63
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -38
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -38
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -111
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
- data/utils/enveomics/globals.mk +0 -8
- data/utils/enveomics/manifest.json +0 -9
@@ -1,30 +0,0 @@
|
|
1
|
-
|
2
|
-
module Enve
|
3
|
-
class Stat
|
4
|
-
# Generates a random number from the +dist+ distribution with +params+
|
5
|
-
# parameters. This is simply a wrapper to the r_* functions below.
|
6
|
-
def self.rand(dist=:unif, *params)
|
7
|
-
send("r_#{dist}", *params)
|
8
|
-
end
|
9
|
-
|
10
|
-
# Generates a random number from the uniform distribution between +min+ and
|
11
|
-
# +max+. By default generates random numbers between 0.0 and 1.0.
|
12
|
-
def self.r_unif(min=0.0, max=1.0)
|
13
|
-
min + (max-min)*Random::rand
|
14
|
-
end
|
15
|
-
|
16
|
-
# Generates a random number from the geometric distribution with support
|
17
|
-
# {0, 1, 2, ...} and probability of success +p+.
|
18
|
-
def self.r_geom(p)
|
19
|
-
(Math::log(1.0 - rand)/Math::log(1.0-p) - 1.0).ceil
|
20
|
-
end
|
21
|
-
|
22
|
-
# Generates a random number from the shifted geometric distribution with
|
23
|
-
# support {1, 2, 3, ...} and probability of success +p+.
|
24
|
-
def self.r_sgeom(p)
|
25
|
-
(Math::log(1.0 - rand)/Math::log(1.0-p)).ceil
|
26
|
-
end
|
27
|
-
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
@@ -1,135 +0,0 @@
|
|
1
|
-
# @author Luis M. Rodriguez-R
|
2
|
-
# @license Artistic-2.0
|
3
|
-
|
4
|
-
##
|
5
|
-
# Enveomics representation of a Variant Call Format (VCF) file.
|
6
|
-
class VCF
|
7
|
-
|
8
|
-
##
|
9
|
-
# File-handler, a File object.
|
10
|
-
attr_reader :fh
|
11
|
-
def initialize(file)
|
12
|
-
@fh = (file.is_a?(String) ? File.open(file, "r") : file )
|
13
|
-
end
|
14
|
-
|
15
|
-
##
|
16
|
-
# Iterate through each variant (i.e., each non-comment line), passing a
|
17
|
-
# VCF::Variant object to +blk+.
|
18
|
-
def each_variant(&blk)
|
19
|
-
fh.rewind
|
20
|
-
fh.each_line do |ln|
|
21
|
-
next if ln =~ /^#/
|
22
|
-
blk.call VCF::Variant.new(ln)
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
##
|
27
|
-
# Iterate through each header (i.e., each comment line), passing a String to
|
28
|
-
# +blk+.
|
29
|
-
def each_header(&blk)
|
30
|
-
fh.rewind
|
31
|
-
fh.each_line do |ln|
|
32
|
-
next unless ln =~ /^#/
|
33
|
-
blk.call ln
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
class VCF::Variant
|
39
|
-
|
40
|
-
##
|
41
|
-
# Column definitions in VCF.
|
42
|
-
@@COLUMNS = [:chrom,:pos,:id,:ref,:alt,:qual,:filter,:info,:format,:bam]
|
43
|
-
|
44
|
-
##
|
45
|
-
# An Array of String, containing each of the VCF entrie's columns.
|
46
|
-
attr_reader :data
|
47
|
-
|
48
|
-
##
|
49
|
-
# Initialize VCF::Variant from String +line+, a non-comment line in the VCF.
|
50
|
-
def initialize(line)
|
51
|
-
@data = line.chomp.split("\t")
|
52
|
-
# Qual as float
|
53
|
-
@data[5] = data[5].to_f
|
54
|
-
# Split info
|
55
|
-
info = data[7].split(";").map{ |i| i=~/=/ ? i.split("=", 2) : [i, true] }
|
56
|
-
@data[7] = Hash[*info.map{ |i| [i[0].to_sym, i[1]] }.flatten]
|
57
|
-
# Read formatted data
|
58
|
-
unless data[9].nil? or data[9].empty?
|
59
|
-
f = format.split(":")
|
60
|
-
b = bam.split(":")
|
61
|
-
f.each_index{ |i| @data[7][f[i].to_sym] = b[i] }
|
62
|
-
end
|
63
|
-
@data[7][:INDEL] = true if ref.size != alt.split(",").first.size
|
64
|
-
end
|
65
|
-
|
66
|
-
##
|
67
|
-
# Named functions for each column.
|
68
|
-
@@COLUMNS.each_index do |i|
|
69
|
-
define_method(@@COLUMNS[i]) { @@COLUMNS[i]==:pos ? data[i].to_i : data[i] }
|
70
|
-
end
|
71
|
-
|
72
|
-
##
|
73
|
-
# Sequencing depth.
|
74
|
-
def dp
|
75
|
-
return nil if info[:DP].nil?
|
76
|
-
info[:DP].to_i
|
77
|
-
end
|
78
|
-
|
79
|
-
##
|
80
|
-
# Sequencing depth of FWD-REF, REV-REF, FWD-ALT, and REV-ALT.
|
81
|
-
def dp4
|
82
|
-
return nil if info[:DP4].nil?
|
83
|
-
@dp4 ||= info[:DP4].split(",").map{ |i| i.to_i }
|
84
|
-
@dp4
|
85
|
-
end
|
86
|
-
|
87
|
-
##
|
88
|
-
## Sequencing depth of REF and ALT.
|
89
|
-
def ad
|
90
|
-
return nil if info[:AD].nil?
|
91
|
-
@ad ||= info[:AD].split(",").map{ |i| i.to_i }
|
92
|
-
@ad
|
93
|
-
end
|
94
|
-
|
95
|
-
##
|
96
|
-
# Sequencing depth of the REF allele.
|
97
|
-
def ref_dp
|
98
|
-
return dp4[0] + dp4[1] unless dp4.nil?
|
99
|
-
return ad[0] unless ad.nil?
|
100
|
-
nil
|
101
|
-
end
|
102
|
-
|
103
|
-
##
|
104
|
-
# Sequencing depth of the ALT allele.
|
105
|
-
def alt_dp
|
106
|
-
return dp4[2] + dp4[3] unless dp4.nil?
|
107
|
-
return ad[1] unless ad.nil?
|
108
|
-
nil
|
109
|
-
end
|
110
|
-
|
111
|
-
##
|
112
|
-
# Information content of the variant in bits (from 0 to 1).
|
113
|
-
def shannon
|
114
|
-
return @shannon unless @shannon.nil?
|
115
|
-
a = ref_dp
|
116
|
-
b = alt_dp
|
117
|
-
ap = a.to_f/(a+b)
|
118
|
-
bp = b.to_f/(a+b)
|
119
|
-
@shannon = -(ap*Math.log(ap,2) + bp*Math.log(bp,2))
|
120
|
-
@shannon
|
121
|
-
end
|
122
|
-
|
123
|
-
##
|
124
|
-
# Is it an indel?
|
125
|
-
def indel? ; !info[:INDEL].nil? and info[:INDEL] ; end
|
126
|
-
|
127
|
-
##
|
128
|
-
# Return as String.
|
129
|
-
def to_s ; (data[0..6] + [info_to_s] + data[8..-1].to_a).join("\t") + "\n" ; end
|
130
|
-
|
131
|
-
##
|
132
|
-
# Returns the INFO entry as String.
|
133
|
-
def info_to_s ; data[7].to_a.map{ |i| i.join("=") }.join(";") ; end
|
134
|
-
|
135
|
-
end
|
@@ -1,88 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
#
|
4
|
-
# @author: Luis M. Rodriguez-R
|
5
|
-
# @update: Mar-23-2015
|
6
|
-
# @license: artistic license 2.0
|
7
|
-
#
|
8
|
-
|
9
|
-
$:.push File.expand_path(File.dirname(__FILE__) + '/lib')
|
10
|
-
require 'enveomics_rb/og'
|
11
|
-
require 'optparse'
|
12
|
-
|
13
|
-
o = {:q=>FALSE, :f=>"(\\S+)\\.txt", :consolidate=>TRUE, :pre=>[]}
|
14
|
-
ARGV << '-h' if ARGV.size==0
|
15
|
-
OptionParser.new do |opts|
|
16
|
-
opts.banner = "
|
17
|
-
Annotates Orthology Groups (OGs) using one or more reference genomes.
|
18
|
-
|
19
|
-
Usage: #{$0} [options]"
|
20
|
-
opts.separator ""
|
21
|
-
opts.separator "Mandatory"
|
22
|
-
opts.on("-i", "--in FILE", "Input file containing the OGs (as generated by ogs.rb)."){ |v| o[:in]=v }
|
23
|
-
opts.on("-o", "--out FILE", "Output file containing the annotated OGs."){ |v| o[:out]=v }
|
24
|
-
opts.on("-a FILE1,FILE2,...", Array, "Input file(s) containing the annotations.One or more tab-delimited files",
|
25
|
-
"with the gene names in the first column and the annotation in the second."){ |v| o[:annotations]=v }
|
26
|
-
opts.separator ""
|
27
|
-
opts.separator "Other Options"
|
28
|
-
opts.on("-f","--format STRING", "Format of the filenames for the annotation files, using regex syntax.",
|
29
|
-
"By default: '#{o[:f]}'."){ |v| o[:f]=v }
|
30
|
-
opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
|
31
|
-
opts.on("-h", "--help", "Display this screen.") do
|
32
|
-
puts opts
|
33
|
-
exit
|
34
|
-
end
|
35
|
-
opts.separator ""
|
36
|
-
end.parse!
|
37
|
-
abort "-i is mandatory" if o[:in].nil?
|
38
|
-
abort "-o is mandatory" if o[:out].nil?
|
39
|
-
abort "-a is mandatory" if o[:annotations].nil?
|
40
|
-
|
41
|
-
##### MAIN:
|
42
|
-
begin
|
43
|
-
# Read the pre-computed OGs
|
44
|
-
collection = OGCollection.new
|
45
|
-
$stderr.puts "Reading pre-computed OGs in '#{o[:in]}'." unless o[:q]
|
46
|
-
f = File.open(o[:in], 'r')
|
47
|
-
h = f.gets.chomp.split /\t/
|
48
|
-
while ln = f.gets
|
49
|
-
collection << OG.new(h, ln.chomp.split(/\t/))
|
50
|
-
end
|
51
|
-
f.close
|
52
|
-
$stderr.puts " Loaded OGs: #{collection.ogs.length}." unless o[:q]
|
53
|
-
|
54
|
-
# Read annotations
|
55
|
-
o[:annotations].each do |annot|
|
56
|
-
m = /#{o[:f]}/.match(annot)
|
57
|
-
if m.nil? or m[1].nil?
|
58
|
-
warn "Cannot parse filename: #{rbm_file} (doesn't match /#{o[:f]}/)."
|
59
|
-
next
|
60
|
-
end
|
61
|
-
f = File.open(annot, 'r')
|
62
|
-
no_og = 0
|
63
|
-
collection.add_note_src m[1]+' annotation'
|
64
|
-
while ln=f.gets
|
65
|
-
r = ln.chomp.split /\t/
|
66
|
-
g = Gene.new m[1], r[0]
|
67
|
-
og = collection.get_og g
|
68
|
-
if og.nil?
|
69
|
-
no_og += 1
|
70
|
-
else
|
71
|
-
og.add_note g.id + ': ' + r[1], collection.note_srcs.length-1
|
72
|
-
end
|
73
|
-
end
|
74
|
-
warn "Warning: Cannot find #{no_og} genes from #{m[1]} in OG collection." if no_og>0
|
75
|
-
end
|
76
|
-
# Save the output matrix
|
77
|
-
$stderr.puts "Saving annotated OGs into '#{o[:out]}'." unless o[:q]
|
78
|
-
f = File.open(o[:out], "w")
|
79
|
-
f.puts collection.to_s
|
80
|
-
f.close
|
81
|
-
$stderr.puts "Done.\n" unless o[:q]
|
82
|
-
rescue => err
|
83
|
-
$stderr.puts "Exception: #{err}\n\n"
|
84
|
-
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
85
|
-
err
|
86
|
-
end
|
87
|
-
|
88
|
-
|
@@ -1,160 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
#
|
4
|
-
# @author: Luis M. Rodriguez-R
|
5
|
-
# @license: artistic-2.0
|
6
|
-
#
|
7
|
-
|
8
|
-
$:.push File.expand_path("../lib", __FILE__)
|
9
|
-
require "optparse"
|
10
|
-
require "json"
|
11
|
-
require "tmpdir"
|
12
|
-
|
13
|
-
o = {q:false, n:100, thr:2}
|
14
|
-
ARGV << "-h" if ARGV.size==0
|
15
|
-
OptionParser.new do |opts|
|
16
|
-
opts.banner = "
|
17
|
-
Subsamples the genomes in a set of Orthology Groups (OGs) and estimates the
|
18
|
-
trend of core genome and pangenome sizes.
|
19
|
-
|
20
|
-
Usage: #{$0} [options]"
|
21
|
-
opts.separator ""
|
22
|
-
opts.separator "Mandatory"
|
23
|
-
opts.on("-o", "--ogs FILE",
|
24
|
-
"Input file containing the precomputed OGs."){ |v| o[:ogs]=v }
|
25
|
-
opts.separator ""
|
26
|
-
opts.separator "Output Options"
|
27
|
-
opts.on("-s", "--summary FILE",
|
28
|
-
"Output file in tabular format with summary statistics."){ |v| o[:summ]=v }
|
29
|
-
opts.on("-t", "--tab FILE","Output file in tabular format."){ |v| o[:tab]=v }
|
30
|
-
opts.on("-j", "--json FILE", "Output file in JSON format."){ |v| o[:json]=v }
|
31
|
-
opts.separator ""
|
32
|
-
opts.separator "Other Options"
|
33
|
-
opts.on("-n", "--replicates INT",
|
34
|
-
"Number of replicates to estimate. By default: #{o[:n]}."
|
35
|
-
){ |v| o[:n]=v.to_i }
|
36
|
-
opts.on("--threads INT",
|
37
|
-
"Children threads to spawn. By default: #{o[:thr]}."){ |v| o[:thr]=v.to_i}
|
38
|
-
opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
|
39
|
-
opts.on("-h", "--help", "Display this screen.") do
|
40
|
-
puts opts
|
41
|
-
exit
|
42
|
-
end
|
43
|
-
opts.separator ""
|
44
|
-
end.parse!
|
45
|
-
abort "-o is mandatory" if o[:ogs].nil?
|
46
|
-
|
47
|
-
##### MAIN:
|
48
|
-
begin
|
49
|
-
# Read the pre-computed OGs
|
50
|
-
$stderr.puts "Reading pre-computed OGs in '#{o[:ogs]}'." unless o[:q]
|
51
|
-
bool_a = []
|
52
|
-
genomes_n = nil
|
53
|
-
File.open(o[:ogs], "r") do |f|
|
54
|
-
h = f.gets.chomp.split "\t"
|
55
|
-
genomes_n = h.size
|
56
|
-
while ln = f.gets
|
57
|
-
bool_a << ln.chomp.split("\t").map{ |g| g!="-" }
|
58
|
-
end
|
59
|
-
end
|
60
|
-
$stderr.puts " Loaded OGs: #{bool_a.size}." unless o[:q]
|
61
|
-
bool_a_b = bool_a.map{ |og| og.map{ |g| g ? "1" : "0" }.join("").to_i(2) }
|
62
|
-
|
63
|
-
# Generate subsamples
|
64
|
-
size = {core:[], pan:[]}
|
65
|
-
Dir.mktmpdir do |dir|
|
66
|
-
children = 0
|
67
|
-
(0 .. o[:n]-1).each do |i|
|
68
|
-
fork do
|
69
|
-
# Generate trajectory
|
70
|
-
genomes = (0 .. genomes_n-1).to_a.shuffle
|
71
|
-
genomes_b = (2 ** genomes_n) - 1
|
72
|
-
core = []
|
73
|
-
pan = []
|
74
|
-
while not genomes.empty?
|
75
|
-
core.unshift 0
|
76
|
-
pan.unshift 0
|
77
|
-
bool_a_b.map! do |og|
|
78
|
-
r_og = og & genomes_b
|
79
|
-
if r_og>0
|
80
|
-
core[0] += 1 if r_og==genomes_b
|
81
|
-
pan[0] += 1
|
82
|
-
og
|
83
|
-
else
|
84
|
-
nil
|
85
|
-
end
|
86
|
-
end
|
87
|
-
bool_a_b.compact!
|
88
|
-
genomes_b ^= 2 ** genomes.pop
|
89
|
-
end
|
90
|
-
abort "UNEXPECTED ERROR: Final genomes_b=#{genomes_b}." if genomes_b>0
|
91
|
-
# Store trajectory
|
92
|
-
File.open("#{dir}/#{i}", "w") do |tfh|
|
93
|
-
tfh.puts JSON.generate({core:core, pan:pan})
|
94
|
-
end
|
95
|
-
end # fork
|
96
|
-
children += 1
|
97
|
-
if children >= o[:thr]
|
98
|
-
Process.wait
|
99
|
-
children -= 1
|
100
|
-
end
|
101
|
-
end
|
102
|
-
Process.waitall
|
103
|
-
# Recover trajectories
|
104
|
-
(0 .. o[:n]-1).each do |i|
|
105
|
-
s = JSON.parse(File.read("#{dir}/#{i}"), {:symbolize_names=>true})
|
106
|
-
size[:core][i] = s[:core]
|
107
|
-
size[:pan][i] = s[:pan]
|
108
|
-
end
|
109
|
-
end # Dir.mktmpdir
|
110
|
-
|
111
|
-
# Show result
|
112
|
-
$stderr.puts "Generating reports." unless o[:q]
|
113
|
-
|
114
|
-
# Save results in JSON
|
115
|
-
unless o[:json].nil?
|
116
|
-
ofh = File.open(o[:json], "w")
|
117
|
-
ofh.puts JSON.pretty_generate(size)
|
118
|
-
ofh.close
|
119
|
-
end
|
120
|
-
|
121
|
-
# Save results in tab
|
122
|
-
unless o[:tab].nil?
|
123
|
-
ofh = File.open(o[:tab], "w")
|
124
|
-
ofh.puts (%w{replicate metric}+(1 .. genomes_n).to_a).join("\t")
|
125
|
-
(0 .. o[:n]-1).each do |i|
|
126
|
-
ofh.puts ([i+1,"core"] + size[:core][i]).join("\t")
|
127
|
-
ofh.puts ([i+1,"pan"] + size[:pan][i]).join("\t")
|
128
|
-
end
|
129
|
-
ofh.close
|
130
|
-
end
|
131
|
-
|
132
|
-
# Save summary results in tab
|
133
|
-
unless o[:summ].nil?
|
134
|
-
ofh = File.open(o[:summ], "w")
|
135
|
-
ofh.puts %w{genomes core_avg core_sd core_q1 core_q2 core_q3
|
136
|
-
pan_avg pan_sd pan_q1 pan_q2 pan_q3}.join("\t")
|
137
|
-
(0 .. genomes_n-1).each do |i|
|
138
|
-
res = [ i+1 ]
|
139
|
-
[:core, :pan].each do |met|
|
140
|
-
a = size[met].map{ |r| r[i] }.sort
|
141
|
-
avg = a.inject(0,:+).to_f / a.size
|
142
|
-
var = a.map{ |v| v**2 }.inject(0,:+).to_f/a.size - avg**2
|
143
|
-
sd = Math.sqrt(var)
|
144
|
-
q1 = a[ a.size*1/4 ]
|
145
|
-
q2 = a[ a.size*2/4 ]
|
146
|
-
q3 = a[ a.size*3/4 ]
|
147
|
-
res += [avg,sd,q1,q2,q3]
|
148
|
-
end
|
149
|
-
ofh.puts res.join("\t")
|
150
|
-
end
|
151
|
-
ofh.close
|
152
|
-
end
|
153
|
-
|
154
|
-
$stderr.puts "Done.\n" unless o[:q]
|
155
|
-
rescue => err
|
156
|
-
$stderr.puts "Exception: #{err}\n\n"
|
157
|
-
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
158
|
-
err
|
159
|
-
end
|
160
|
-
|
@@ -1,125 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# @author Luis M. Rodriguez-R
|
4
|
-
# @license artistic license 2.0
|
5
|
-
#
|
6
|
-
|
7
|
-
$:.push File.expand_path("lib", File.dirname(__FILE__))
|
8
|
-
require "enveomics_rb/enveomics"
|
9
|
-
require "enveomics_rb/og"
|
10
|
-
|
11
|
-
o = {q:false, pergenome:false, prefix:false, first:false, rand:false,
|
12
|
-
core:0.0, dups:0}
|
13
|
-
OptionParser.new do |opts|
|
14
|
-
opts.banner = "
|
15
|
-
Extracts sequences of Orthology Groups (OGs) from genomes (proteomes).
|
16
|
-
|
17
|
-
Usage: #{$0} [options]"
|
18
|
-
opts.separator ""
|
19
|
-
opts.separator "Mandatory"
|
20
|
-
opts.on("-i", "--in FILE",
|
21
|
-
"Input file containing the OGs (as generated by ogs.rb)."){ |v| o[:in]=v }
|
22
|
-
opts.on("-o", "--out FILE",
|
23
|
-
"Output directory where to place extracted sequences."){ |v| o[:out]=v }
|
24
|
-
opts.on("-s", "--seqs STRING",
|
25
|
-
"Path to the proteomes in FastA format, using '%s' to denote the genome.",
|
26
|
-
"For example: /path/to/seqs/%s.faa."){ |v| o[:seqs]=v }
|
27
|
-
opts.separator ""
|
28
|
-
opts.separator "Other Options"
|
29
|
-
opts.on("-c", "--core FLOAT",
|
30
|
-
"Use only OGs present in at least this fraction of the genomes.",
|
31
|
-
"To use only the strict core genome*, use -c 1."){ |v| o[:core]=v.to_f }
|
32
|
-
opts.on("-d", "--duplicates INT",
|
33
|
-
"Use only OGs with less than this number of in-paralogs in a genome.",
|
34
|
-
"To use only genes without in-paralogs*, use -d 1."
|
35
|
-
){ |v| o[:dups]=v.to_i }
|
36
|
-
opts.on("-g", "--per-genome",
|
37
|
-
"If set, the output is generated per genome.",
|
38
|
-
"By default, the output is per OG."){ |v| o[:pergenome]=v }
|
39
|
-
opts.on("-p", "--prefix",
|
40
|
-
"If set, each sequence is prefixed with the genome name",
|
41
|
-
"(or OG number, if --per-genome) and a dash."){ |v| o[:prefix]=v }
|
42
|
-
opts.on("-r", "--rand",
|
43
|
-
"Get only one gene per genome per OG (random) regardless of in-paralogs.",
|
44
|
-
"By default all genes are extracted."){ |v| o[:rand]=v }
|
45
|
-
opts.on("-f", "--first",
|
46
|
-
"Get only one gene per genome per OG (first) regardless of in-paralogs.",
|
47
|
-
"By default all genes are extracted. Takes precendece over --rand."
|
48
|
-
){ |v| o[:first]=v }
|
49
|
-
opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = TRUE }
|
50
|
-
opts.on("-h", "--help", "Display this screen.") do
|
51
|
-
puts opts
|
52
|
-
exit
|
53
|
-
end
|
54
|
-
opts.separator ""
|
55
|
-
opts.separator " * To use only the unus genome (OGs with exactly one " +
|
56
|
-
"gene per genome), use: -c 1 -d 1."
|
57
|
-
opts.separator ""
|
58
|
-
end.parse!
|
59
|
-
abort "-i is mandatory" if o[:in].nil?
|
60
|
-
abort "-o is mandatory" if o[:out].nil?
|
61
|
-
abort "-s is mandatory" if o[:seqs].nil?
|
62
|
-
|
63
|
-
##### MAIN:
|
64
|
-
begin
|
65
|
-
# Read the pre-computed OGs
|
66
|
-
collection = OGCollection.new
|
67
|
-
$stderr.puts "Reading pre-computed OGs in '#{o[:in]}'." unless o[:q]
|
68
|
-
f = File.open(o[:in], "r")
|
69
|
-
h = f.gets.chomp.split /\t/
|
70
|
-
while ln = f.gets
|
71
|
-
collection << OG.new(h, ln.chomp.split(/\t/))
|
72
|
-
end
|
73
|
-
f.close
|
74
|
-
$stderr.puts " Loaded OGs: #{collection.ogs.size}." unless o[:q]
|
75
|
-
$stderr.puts " Reported Genomes: #{Gene.genomes.size}." unless o[:q]
|
76
|
-
|
77
|
-
# Filter core/in-paralog genes
|
78
|
-
collection.filter_core! o[:core] unless o[:core]==0.0
|
79
|
-
collection.remove_inparalogs! o[:dups] unless o[:dups]==0
|
80
|
-
$stderr.puts " Filtered OGs: #{collection.ogs.size}." unless
|
81
|
-
o[:q] or o[:core]==0.0
|
82
|
-
|
83
|
-
# Open outputs
|
84
|
-
$stderr.puts "Initializing output files." unless o[:q]
|
85
|
-
Dir.mkdir(o[:out]) unless Dir.exist? o[:out]
|
86
|
-
ofhs = o[:pergenome] ?
|
87
|
-
Gene.genomes.map{|g| File.open("#{o[:out]}/#{g}.fa", "w")} :
|
88
|
-
( (1 .. collection.ogs.size).map do |og|
|
89
|
-
File.open("#{o[:out]}/OG#{og}.fa", "w")
|
90
|
-
end )
|
91
|
-
$stderr.puts " Created files: #{ofhs.size}." unless o[:q]
|
92
|
-
|
93
|
-
# Read genomes
|
94
|
-
$stderr.puts "Filtering genes." unless o[:q]
|
95
|
-
genome_i = -1
|
96
|
-
Gene.genomes.each do |genome|
|
97
|
-
genome_i = Gene.genomes.index(genome)
|
98
|
-
$stderr.print " Genome #{genome_i+1}. \r" unless o[:q]
|
99
|
-
genes = ( collection.get_genome_genes(genome).map do |og|
|
100
|
-
o[:first] ? [og.first] : (o[:rand] ? [og.sample] : og)
|
101
|
-
end )
|
102
|
-
hand = nil
|
103
|
-
File.open(sprintf(o[:seqs], genome), "r").each do |ln|
|
104
|
-
if ln =~ /^>(\S+)/
|
105
|
-
og = genes.index{|g| g.include? $1}
|
106
|
-
hand = og.nil? ? nil : ( o[:pergenome] ? genome_i : og )
|
107
|
-
ln.sub!(/^>/, ">#{o[:pergenome] ? "OG#{og}" : genome}-") if
|
108
|
-
o[:prefix] and not hand.nil?
|
109
|
-
end
|
110
|
-
ofhs[hand].puts(ln) unless hand.nil?
|
111
|
-
end
|
112
|
-
end
|
113
|
-
$stderr.puts " #{genome_i+1} genomes processed." unless o[:q]
|
114
|
-
|
115
|
-
# Close outputs
|
116
|
-
$stderr.puts "Closing output files." unless o[:q]
|
117
|
-
ofhs.each{|h| h.close}
|
118
|
-
$stderr.puts "Done.\n" unless o[:q]
|
119
|
-
rescue => err
|
120
|
-
$stderr.puts "Exception: #{err}\n\n"
|
121
|
-
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
122
|
-
err
|
123
|
-
end
|
124
|
-
|
125
|
-
|