miga-base 1.2.17.0 → 1.2.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/version.rb +1 -1
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
- data/utils/FastAAI/FastAAI +3659 -0
- data/utils/FastAAI/FastAAI-legacy/FastAAI +1336 -0
- data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +1296 -0
- data/utils/FastAAI/README.md +84 -0
- data/utils/enveomics/Docs/recplot2.md +244 -0
- data/utils/enveomics/Examples/aai-matrix.bash +66 -0
- data/utils/enveomics/Examples/ani-matrix.bash +66 -0
- data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
- data/utils/enveomics/LICENSE.txt +73 -0
- data/utils/enveomics/Makefile +52 -0
- data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
- data/utils/enveomics/Manifest/Tasks/blasttab.json +790 -0
- data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
- data/utils/enveomics/Manifest/Tasks/fasta.json +802 -0
- data/utils/enveomics/Manifest/Tasks/fastq.json +291 -0
- data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
- data/utils/enveomics/Manifest/Tasks/mapping.json +137 -0
- data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
- data/utils/enveomics/Manifest/Tasks/other.json +906 -0
- data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +650 -0
- data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
- data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
- data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
- data/utils/enveomics/Manifest/categories.json +165 -0
- data/utils/enveomics/Manifest/examples.json +162 -0
- data/utils/enveomics/Manifest/tasks.json +4 -0
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
- data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
- data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
- data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
- data/utils/enveomics/README.md +42 -0
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
- data/utils/enveomics/Scripts/Aln.cat.rb +221 -0
- data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
- data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
- data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
- data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
- data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
- data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +123 -0
- data/utils/enveomics/Scripts/Chao1.pl +97 -0
- data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
- data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
- data/utils/enveomics/Scripts/FastA.N50.pl +60 -0
- data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
- data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
- data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
- data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
- data/utils/enveomics/Scripts/FastA.fragment.rb +100 -0
- data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
- data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
- data/utils/enveomics/Scripts/FastA.length.pl +38 -0
- data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
- data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
- data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
- data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
- data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
- data/utils/enveomics/Scripts/FastA.sample.rb +98 -0
- data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
- data/utils/enveomics/Scripts/FastA.split.pl +55 -0
- data/utils/enveomics/Scripts/FastA.split.rb +79 -0
- data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
- data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +69 -0
- data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
- data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
- data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +89 -0
- data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +70 -0
- data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
- data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
- data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
- data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
- data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
- data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
- data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
- data/utils/enveomics/Scripts/SRA.download.bash +55 -0
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
- data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
- data/utils/enveomics/Scripts/Table.barplot.R +31 -0
- data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
- data/utils/enveomics/Scripts/Table.filter.pl +61 -0
- data/utils/enveomics/Scripts/Table.merge.pl +77 -0
- data/utils/enveomics/Scripts/Table.prefScore.R +60 -0
- data/utils/enveomics/Scripts/Table.replace.rb +69 -0
- data/utils/enveomics/Scripts/Table.round.rb +63 -0
- data/utils/enveomics/Scripts/Table.split.pl +57 -0
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
- data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
- data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
- data/utils/enveomics/Scripts/aai.rb +421 -0
- data/utils/enveomics/Scripts/ani.rb +362 -0
- data/utils/enveomics/Scripts/anir.rb +137 -0
- data/utils/enveomics/Scripts/clust.rand.rb +102 -0
- data/utils/enveomics/Scripts/gi2tax.rb +103 -0
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +293 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +175 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +17 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +88 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +49 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +31 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +152 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +3 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +74 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
- data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
- data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
- data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
- data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
- data/utils/enveomics/Scripts/ogs.rb +104 -0
- data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
- data/utils/enveomics/Scripts/rbm-legacy.rb +172 -0
- data/utils/enveomics/Scripts/rbm.rb +108 -0
- data/utils/enveomics/Scripts/sam.filter.rb +148 -0
- data/utils/enveomics/Tests/Makefile +10 -0
- data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
- data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
- data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
- data/utils/enveomics/Tests/alkB.nwk +1 -0
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
- data/utils/enveomics/Tests/hiv1.faa +59 -0
- data/utils/enveomics/Tests/hiv1.fna +134 -0
- data/utils/enveomics/Tests/hiv2.faa +70 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
- data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
- data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
- data/utils/enveomics/build_enveomics_r.bash +45 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
- data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
- data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
- data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
- data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
- data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
- data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
- data/utils/enveomics/enveomics.R/R/prefscore.R +79 -0
- data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
- data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
- data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
- data/utils/enveomics/enveomics.R/R/utils.R +80 -0
- data/utils/enveomics/enveomics.R/README.md +81 -0
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +16 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +16 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +16 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +40 -0
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +103 -0
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +67 -0
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +44 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +75 -0
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +50 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +44 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +139 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +77 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +52 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +51 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +43 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +82 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +36 -0
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +68 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +78 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +46 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +125 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
- data/utils/enveomics/globals.mk +8 -0
- data/utils/enveomics/manifest.json +9 -0
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +67 -0
- data/utils/multitrim/multitrim.py +1555 -0
- data/utils/multitrim/multitrim.yml +13 -0
- metadata +301 -5
@@ -0,0 +1,30 @@
|
|
1
|
+
|
2
|
+
require 'enveomics_rb/stats/sample'
|
3
|
+
|
4
|
+
module Enveomics
|
5
|
+
# Calculate Gaussian Mixture Models by Expectation Maximization
|
6
|
+
class GmmEm
|
7
|
+
attr :sample
|
8
|
+
attr :components
|
9
|
+
attr :opts
|
10
|
+
|
11
|
+
# Initialize Enve::GmmEm object from numeric array +x+, +components+
|
12
|
+
# gaussian components (an Integer), and options hash +opts+ with supported
|
13
|
+
# Symbol keys:
|
14
|
+
# - ll_delta_converge: Maximum change in LL to consider convergence
|
15
|
+
# (by default: 1e-15)
|
16
|
+
# - max_iter: Maximum number of EM iterations (by default: 1_000)
|
17
|
+
# - init_mu: Initial components means as numeric array
|
18
|
+
# - init_sigma: Initial components standard deviation as numeric array
|
19
|
+
# - init_alpha: Initial components fractions as numeric array adding up to 1
|
20
|
+
def initialize(x, components = 2, opts = {})
|
21
|
+
@sample = Enve::Stats::Sample.new(x)
|
22
|
+
@opts = opts
|
23
|
+
@opts[:ll_delta_convergence] ||= 1e-15
|
24
|
+
@opts[:max_iter] ||= 1_000
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
@@ -0,0 +1,253 @@
|
|
1
|
+
|
2
|
+
#
|
3
|
+
# @author: Luis M. Rodriguez-R
|
4
|
+
# @update: Jul-14-2015
|
5
|
+
# @license: artistic license 2.0
|
6
|
+
#
|
7
|
+
|
8
|
+
module JPlace
|
9
|
+
##### CLASSES:
|
10
|
+
# Placement.new(placement[, fields]): Initializes a new read placement.
|
11
|
+
# placement: A hash containing the placement.
|
12
|
+
# fields: If passed, sets the field order for all subsequent placements.
|
13
|
+
class Placement
|
14
|
+
attr_writer :flag # This attribute is used by JPlace.distances.rb as a placeholder
|
15
|
+
attr_reader :p, :n, :m, :flag
|
16
|
+
@@fields = nil
|
17
|
+
def self.fields=(fields)
|
18
|
+
@@fields=fields
|
19
|
+
end
|
20
|
+
def self.fields
|
21
|
+
@@fields
|
22
|
+
end
|
23
|
+
def initialize(placement, fields=nil)
|
24
|
+
@@fields = fields if @@fields.nil? and not fields.nil?
|
25
|
+
# Save only the best (first) placement:
|
26
|
+
abort "Placements must contain a 'p' field.\n" if placement["p"].nil?
|
27
|
+
abort "Placements must contain a 'p' field with at least one entry.\n" if placement["p"][0].nil?
|
28
|
+
@p = [placement["p"][0]]
|
29
|
+
# Find name-only placements (EPA-style):
|
30
|
+
unless placement["n"].nil?
|
31
|
+
@n = placement["n"]
|
32
|
+
@m = @n.map{ |n| 1 }
|
33
|
+
end
|
34
|
+
# Find multiplicity placements (pplacer-style):
|
35
|
+
unless placement["nm"].nil?
|
36
|
+
@n = placement["nm"].map{ |nm| nm[0] }
|
37
|
+
@m = placement["nm"].map{ |nm| nm[1].to_i }
|
38
|
+
end
|
39
|
+
abort "Placements must contain one of 'n' or 'nm' fields.\n" if @n.nil? or @m.nil?
|
40
|
+
end
|
41
|
+
def nm
|
42
|
+
(0 .. (self.n.length-1)).map{ |i| {:n=>self.n[i], :m=>self.m[i]} }
|
43
|
+
end
|
44
|
+
def get_field_value(field)
|
45
|
+
abort "Impossible to read placement with undefined fields." if @@fields.nil?
|
46
|
+
f = @@fields.find_index(field)
|
47
|
+
abort "Undefined field #{field}." if f.nil?
|
48
|
+
self.p[0][f]
|
49
|
+
end
|
50
|
+
def set_field_value(field, value)
|
51
|
+
f = @@fields.find_index(field)
|
52
|
+
abort "Undefined field #{field}." if f.nil?
|
53
|
+
self.p[0][f] = value
|
54
|
+
end
|
55
|
+
def edge_num
|
56
|
+
self.get_field_value('edge_num').to_i
|
57
|
+
end
|
58
|
+
def likelihood
|
59
|
+
self.get_field_value('likelihood').to_f
|
60
|
+
end
|
61
|
+
def like_weight_ratio
|
62
|
+
self.get_field_value('like_weight_ratio').to_f
|
63
|
+
end
|
64
|
+
def distal_length
|
65
|
+
(self.get_field_value('distal_length') || 0).to_f
|
66
|
+
end
|
67
|
+
def pendant_length
|
68
|
+
(self.get_field_value('pendant_length') || 0).to_f
|
69
|
+
end
|
70
|
+
def to_s
|
71
|
+
"#<Placement of #{self.n}: #{self.p}>"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Ancilliary class Tree
|
76
|
+
class Tree
|
77
|
+
@@HAS_ICONV = nil
|
78
|
+
def self.has_iconv?
|
79
|
+
if @@HAS_ICONV.nil?
|
80
|
+
@@HAS_ICONV = true
|
81
|
+
begin
|
82
|
+
require 'rubygems'
|
83
|
+
require 'iconv'
|
84
|
+
rescue LoadError
|
85
|
+
@@HAS_ICONV = false
|
86
|
+
end
|
87
|
+
end
|
88
|
+
@@HAS_ICONV
|
89
|
+
end
|
90
|
+
def self.from_nwk(nwk)
|
91
|
+
if Tree.has_iconv?
|
92
|
+
ic = Iconv.new('UTF-8//IGNORE','UTF-8')
|
93
|
+
nwk = ic.iconv(nwk + ' ')[0..-2]
|
94
|
+
end
|
95
|
+
Node.new(nwk)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Node.new(nwk[, parent]): Initializes a new Node.
|
100
|
+
# nwk: Node's description in Newick format.
|
101
|
+
# parent: Node's parent, or nil if root node.
|
102
|
+
class Node
|
103
|
+
# Class
|
104
|
+
@@edges = []
|
105
|
+
def self.edges
|
106
|
+
@@edges
|
107
|
+
end
|
108
|
+
def self.register(node)
|
109
|
+
@@edges[node.index] = node unless node.index.nil?
|
110
|
+
end
|
111
|
+
# Class-level functions related to JPlace
|
112
|
+
def self.link_placement(placement)
|
113
|
+
abort "Trying to link placement in undefined edge #{placement.edge_num}: #{placement.to_s}" if @@edges[placement.edge_num].nil?
|
114
|
+
@@edges[placement.edge_num].add_placement!(placement)
|
115
|
+
end
|
116
|
+
def self.unlink_placement(placement)
|
117
|
+
@@edges[placement.edge_num].delete_placement!(placement)
|
118
|
+
end
|
119
|
+
# Instance
|
120
|
+
attr_reader :children, :length, :name, :label, :index, :nwk, :parent, :placements, :collapsed
|
121
|
+
def initialize(nwk, parent=nil)
|
122
|
+
abort "Empty newick.\n" if nwk.nil? or nwk==''
|
123
|
+
nwk.gsub! /;(.)/, '--\1'
|
124
|
+
@nwk = nwk
|
125
|
+
@parent = parent
|
126
|
+
@placements = []
|
127
|
+
@collapsed = false
|
128
|
+
# Find index
|
129
|
+
index_m = /^(?<pre>.*){(?<idx>[0-9]+)}(?<post>[^\(\),;]*);?$/.match(nwk)
|
130
|
+
if index_m.nil? and parent.nil? and nwk[nwk.length-1]==';'
|
131
|
+
@index = nil
|
132
|
+
else
|
133
|
+
abort "Unindexed edge found:\n#{@nwk}\n" if index_m.nil?
|
134
|
+
nwk = index_m[:pre]+index_m[:post]
|
135
|
+
@index = index_m[:idx].to_i
|
136
|
+
end
|
137
|
+
# Find name, label, and length
|
138
|
+
meta_m = /^(\((?<cont>.+)\))?(?<name>[^:\(\);]*)(:(?<length>[0-9\.Ee+-]*)(?<label>\[[^\[\]\(\);]+\])?)?;?$/.match(nwk) or
|
139
|
+
abort "Cannot parse node metadata (index #{@index}):\n#{@nwk}\n"
|
140
|
+
nwk = meta_m[:cont]
|
141
|
+
@name = meta_m[:name]
|
142
|
+
@length = meta_m[:length]
|
143
|
+
@label = meta_m[:label]
|
144
|
+
# Find children
|
145
|
+
@children = []
|
146
|
+
nwk ||= ''
|
147
|
+
quote = nil
|
148
|
+
while nwk != ''
|
149
|
+
i = 0
|
150
|
+
j = 0
|
151
|
+
nwk.each_char do |chr|
|
152
|
+
if quote.nil?
|
153
|
+
if chr=='"' or chr=="'"
|
154
|
+
quote = chr
|
155
|
+
else
|
156
|
+
i += 1 if chr=='('
|
157
|
+
i -= 1 if chr==')'
|
158
|
+
if i==0 and chr==','
|
159
|
+
i=nil
|
160
|
+
break
|
161
|
+
end
|
162
|
+
end
|
163
|
+
else
|
164
|
+
quote = nil if chr==quote
|
165
|
+
end
|
166
|
+
j += 1
|
167
|
+
end
|
168
|
+
abort "Unbalanced node at edge {#{@index}}, with leftness #{i}:\n#{@nwk}\n" unless i.nil? or i==0
|
169
|
+
@children << Node.new(nwk[0 .. j-1],self)
|
170
|
+
nwk = nwk.length==j ? '' : nwk[j+1 .. -1]
|
171
|
+
end
|
172
|
+
Node.register(self)
|
173
|
+
end
|
174
|
+
# Accessors/Setters
|
175
|
+
def name=(new_name)
|
176
|
+
@name = new_name.gsub(/[\s\(\),;:]/, '_')
|
177
|
+
end
|
178
|
+
# Tree algorithms
|
179
|
+
def post_order &blk
|
180
|
+
self.children.each { |n| n.post_order &blk }
|
181
|
+
blk[self]
|
182
|
+
end
|
183
|
+
def in_order &blk
|
184
|
+
abort "Tree must be dycotomic to traverse in_order, node #{self.cannonical_name} "+
|
185
|
+
"has #{self.children.lenght} children." unless [0,2].include? self.children.length
|
186
|
+
self.children[0].in_order &blk unless self.children[0].nil?
|
187
|
+
blk[self]
|
188
|
+
self.children[1].in_order &blk unless self.children[1].nil?
|
189
|
+
end
|
190
|
+
def pre_order &blk
|
191
|
+
blk[self]
|
192
|
+
self.children.each { |n| n.pre_order &blk }
|
193
|
+
end
|
194
|
+
def path_to_root
|
195
|
+
if @path_to_root.nil?
|
196
|
+
@path_to_root = [self]
|
197
|
+
@path_to_root += self.parent.path_to_root unless self.parent.nil?
|
198
|
+
end
|
199
|
+
@path_to_root
|
200
|
+
end
|
201
|
+
def distance_to_root
|
202
|
+
if @distance_to_root.nil?
|
203
|
+
@distance_to_root = path_to_root.map{ |n| n.length.nil? ? 0.0 : n.length.to_f }.reduce(0.0, :+)
|
204
|
+
end
|
205
|
+
@distance_to_root
|
206
|
+
end
|
207
|
+
def lca(node)
|
208
|
+
p1 = self.path_to_root
|
209
|
+
p2 = node.path_to_root
|
210
|
+
p1.find{ |n| p2.include? n }
|
211
|
+
end
|
212
|
+
def distance(node)
|
213
|
+
self.distance_to_root + node.distance_to_root - (2.0 * self.lca(node).distance_to_root)
|
214
|
+
end
|
215
|
+
def ==(node) self.index == node.index ; end
|
216
|
+
# Tree representation
|
217
|
+
def cannonical_name
|
218
|
+
return(self.name) unless self.name.nil? or self.name == ""
|
219
|
+
return(self.label) unless self.label.nil? or self.label == ""
|
220
|
+
return("{#{self.index.to_s}}") unless self.index.nil?
|
221
|
+
""
|
222
|
+
end
|
223
|
+
def to_s
|
224
|
+
o = ""
|
225
|
+
o += "(" + self.children.map{ |c| c.to_s }.join(",") + ")" if self.children.length > 0
|
226
|
+
o += self.cannonical_name
|
227
|
+
u = "#{self.length.nil? ? "" : self.length}#{self.label.nil? ? "" : self.label}"
|
228
|
+
o += ":#{u}" unless u==""
|
229
|
+
o
|
230
|
+
end
|
231
|
+
# Instance-level functions related to JPlace
|
232
|
+
def collapse!
|
233
|
+
self.pre_order do |n|
|
234
|
+
if n!=self
|
235
|
+
while n.placements.length > 0
|
236
|
+
p = Node.unlink_placement(n.placements[0])
|
237
|
+
p.set_field_value('edge_num', self.index)
|
238
|
+
Node.link_placement(p)
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
@collapsed = true
|
243
|
+
end
|
244
|
+
def add_placement!(placement)
|
245
|
+
@placements << placement
|
246
|
+
end
|
247
|
+
def delete_placement!(placement)
|
248
|
+
@placements.delete(placement)
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
end # module JPlace
|
253
|
+
|
@@ -0,0 +1,88 @@
|
|
1
|
+
|
2
|
+
module Enveomics
|
3
|
+
##
|
4
|
+
# A simple object representing a sequence match from a search engine
|
5
|
+
# supporting tabular BLAST output
|
6
|
+
class Match
|
7
|
+
class << self
|
8
|
+
def column_types
|
9
|
+
{
|
10
|
+
qseqid: String, sseqid: String, pident: Float,
|
11
|
+
length: Integer, mismatch: Integer, gapopen: Integer,
|
12
|
+
q_start: Integer, q_end: Integer, s_start: Integer,
|
13
|
+
s_end: Integer, evalue: Float, bitscore: Float,
|
14
|
+
# Non-standard (but frequently used in Enveomics Collection):
|
15
|
+
qry_len: Integer, sbj_len: Integer
|
16
|
+
}
|
17
|
+
end
|
18
|
+
|
19
|
+
def column_type(sym)
|
20
|
+
column_types[colname(sym)]
|
21
|
+
end
|
22
|
+
|
23
|
+
def to_column_type(sym, value)
|
24
|
+
case column_type(sym).to_s
|
25
|
+
when 'String' ; value.to_s
|
26
|
+
when 'Float' ; value.to_f
|
27
|
+
when 'Integer'; value.to_i
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def columns
|
32
|
+
column_types.keys
|
33
|
+
end
|
34
|
+
|
35
|
+
def column(sym)
|
36
|
+
columns.index(colname(sym))
|
37
|
+
end
|
38
|
+
|
39
|
+
def colsynonyms
|
40
|
+
{
|
41
|
+
qry: :qseqid, sbj: :sseqid,
|
42
|
+
id: :pident, len: :length, score: :bitscore
|
43
|
+
}
|
44
|
+
end
|
45
|
+
|
46
|
+
def colnames
|
47
|
+
columns + colsynonyms.keys
|
48
|
+
end
|
49
|
+
|
50
|
+
def colname(sym)
|
51
|
+
s = sym.to_sym
|
52
|
+
column_types[s] ? s : colsynonyms[s]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
####--- Instance Level ---###
|
57
|
+
|
58
|
+
attr :row
|
59
|
+
|
60
|
+
##
|
61
|
+
# Initialize Enveomics::Match object from a tabular blast line String +ln+
|
62
|
+
def initialize(ln)
|
63
|
+
@row = ln.chomp.split("\t")
|
64
|
+
end
|
65
|
+
|
66
|
+
colnames.each do |sym|
|
67
|
+
define_method sym do
|
68
|
+
self.class.to_column_type(sym, row[self.class.column(sym)])
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def qry_fract
|
73
|
+
return 0.0 unless qry_len.zero?
|
74
|
+
@fract ||= len.to_f / qry_len
|
75
|
+
end
|
76
|
+
|
77
|
+
alias fract qry_fract
|
78
|
+
|
79
|
+
def sbj_fract
|
80
|
+
return 0.0 unless sbj_len.zero?
|
81
|
+
@fract ||= len.to_f / sbj_len
|
82
|
+
end
|
83
|
+
|
84
|
+
def to_s
|
85
|
+
row.join("\t")
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,182 @@
|
|
1
|
+
|
2
|
+
##### CLASSES:
|
3
|
+
# Gene.new(genome, id): Initializes a new Gene.
|
4
|
+
# genome: A string uniquely identifying the parent genome.
|
5
|
+
# id: A string uniquely identifying the gene within the genome. It can be
|
6
|
+
# non-unique across genomes.
|
7
|
+
class Gene
|
8
|
+
attr_reader :genome_id, :id
|
9
|
+
@@genomes = []
|
10
|
+
def self.genomes
|
11
|
+
@@genomes
|
12
|
+
end
|
13
|
+
def initialize(genome, id)
|
14
|
+
if genome.is_a? Integer
|
15
|
+
abort "Internal error: Genome #{genome} does not exist yet." if
|
16
|
+
@@genomes[genome].nil?
|
17
|
+
@genome_id = genome
|
18
|
+
else
|
19
|
+
@@genomes << genome unless @@genomes.include? genome
|
20
|
+
@genome_id = @@genomes.index(genome)
|
21
|
+
end
|
22
|
+
@id = id
|
23
|
+
end
|
24
|
+
# Compare if two Gene objects refer to the same gene.
|
25
|
+
def ==(b)
|
26
|
+
self.genome_id==b.genome_id and self.id==b.id
|
27
|
+
end
|
28
|
+
# Get all genomes in the run as an array of strings.
|
29
|
+
def genome
|
30
|
+
@@genomes[self.genome_id]
|
31
|
+
end
|
32
|
+
def to_s
|
33
|
+
"#{self.genome}:#{self.id}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# OG.new(): Initializes an empty OG.
|
38
|
+
# OG.new(genomes, genes): Initializes a pre-computed OG.
|
39
|
+
# genomes: List of genomes as an array of strings (as in Gene.genomes).
|
40
|
+
# genes: List of genes as an array of strings, with '-' indicating no genes and
|
41
|
+
# multiple genes separated by ','.
|
42
|
+
class OG
|
43
|
+
attr_reader :genes, :notes
|
44
|
+
def initialize(genomes=nil, genes=nil)
|
45
|
+
@genes = []
|
46
|
+
@notes = []
|
47
|
+
unless genomes.nil? or genes.nil?
|
48
|
+
(0 .. genes.length-1).each do |genome_i|
|
49
|
+
next if genes[genome_i]=="-"
|
50
|
+
genes[genome_i].split(/,/).each do |gene_id|
|
51
|
+
self << Gene.new(genomes[genome_i], gene_id)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
# Add genes or combine another OG into the loaded OG (self).
|
57
|
+
def <<(obj)
|
58
|
+
if obj.is_a? Gene
|
59
|
+
@genes[obj.genome_id] = [] if @genes[obj.genome_id].nil?
|
60
|
+
@genes[obj.genome_id] << obj.id unless self.include? obj
|
61
|
+
elsif obj.is_a? OG
|
62
|
+
obj.genes_obj.each{ |gene| self << gene }
|
63
|
+
else
|
64
|
+
abort "Unsupported class for #{obj}"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
# Get the list of genes as objects (internally saved as strings to save RAM).
|
68
|
+
def genes_obj
|
69
|
+
o = []
|
70
|
+
(0 .. Gene.genomes.length-1).map do |genome_id|
|
71
|
+
o += self.genes[genome_id].map{ |gene_id|
|
72
|
+
Gene.new(Gene.genomes[genome_id], gene_id) } unless
|
73
|
+
self.genes[genome_id].nil?
|
74
|
+
end
|
75
|
+
return o
|
76
|
+
end
|
77
|
+
# Evaluates if the OG contains the passed gene.
|
78
|
+
def include?(gene)
|
79
|
+
return false if self.genes[gene.genome_id].nil?
|
80
|
+
self.genes[gene.genome_id].include? gene.id
|
81
|
+
end
|
82
|
+
# Get the list of genomes containing genes in this OG.
|
83
|
+
def genomes
|
84
|
+
(0 .. Gene.genomes.length-1).select do |gno|
|
85
|
+
not(self.genes[gno].nil? or self.genes[gno].empty?)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
# Adds a note that will be printed after the last column
|
89
|
+
def add_note note, note_idx=nil
|
90
|
+
if note_idx.nil?
|
91
|
+
@notes << note
|
92
|
+
else
|
93
|
+
@notes[note_idx] = (@notes[note_idx].nil? ? '' :
|
94
|
+
(@notes[note_idx]+' || ')) + note
|
95
|
+
end
|
96
|
+
end
|
97
|
+
def to_s
|
98
|
+
(0 .. Gene.genomes.length-1).map do |genome_id|
|
99
|
+
self.genes[genome_id].nil? ? "-" : self.genes[genome_id].join(",")
|
100
|
+
end.join("\t") + ((self.notes.size==0) ? '' :
|
101
|
+
("\t#\t"+self.notes.join("\t")))
|
102
|
+
end
|
103
|
+
def to_bool_a
|
104
|
+
(0 .. Gene.genomes.length-1).map { |genome_id| not genes[genome_id].nil? }
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# OGCollection.new(): Initializes an empty collection of OGs.
|
109
|
+
class OGCollection
|
110
|
+
attr_reader :ogs, :note_srcs
|
111
|
+
def initialize
|
112
|
+
@ogs = []
|
113
|
+
@note_srcs = []
|
114
|
+
end
|
115
|
+
# Add an OG to the collection
|
116
|
+
def <<(og)
|
117
|
+
@ogs << og
|
118
|
+
end
|
119
|
+
# Compare OGs all-vs-all to identify groups that should be merged.
|
120
|
+
def consolidate!
|
121
|
+
old_ogs = self.ogs
|
122
|
+
@ogs = []
|
123
|
+
old_ogs.each do |og|
|
124
|
+
is_new = true
|
125
|
+
og.genes_obj.each do |gene|
|
126
|
+
o = self.get_og gene
|
127
|
+
unless o.nil?
|
128
|
+
o << og
|
129
|
+
is_new = false
|
130
|
+
break
|
131
|
+
end
|
132
|
+
end
|
133
|
+
self << og if is_new
|
134
|
+
end
|
135
|
+
end
|
136
|
+
# Removes OGs present in less than 'fraction' of the genomes
|
137
|
+
def filter_core!(fraction=1.0)
|
138
|
+
min_genomes = (fraction * Gene.genomes.size).ceil
|
139
|
+
@ogs.select! { |og| og.genomes.size >= min_genomes }
|
140
|
+
end
|
141
|
+
# Removes OGs present more than 'dups' number of times in any genome
|
142
|
+
def remove_inparalogs!(dups=1)
|
143
|
+
@ogs.select! do |og|
|
144
|
+
og.genes.map{ |pergenome| pergenome.size }.max <= dups
|
145
|
+
end
|
146
|
+
end
|
147
|
+
# Add a pair of RBM genes into the corresponding OG, or create a new OG.
|
148
|
+
def add_rbm(a, b)
|
149
|
+
og = self.get_og(a)
|
150
|
+
og = self.get_og(b) if og.nil?
|
151
|
+
if og.nil?
|
152
|
+
og = OG.new
|
153
|
+
@ogs << og
|
154
|
+
end
|
155
|
+
og << a
|
156
|
+
og << b
|
157
|
+
end
|
158
|
+
# Get the OG containing the gene (returns the first, if multiple).
|
159
|
+
def get_og(gene)
|
160
|
+
idx = self.ogs.index { |og| og.include? gene }
|
161
|
+
idx.nil? ? nil : self.ogs[idx]
|
162
|
+
end
|
163
|
+
# Get the genes from a given genome (returns an array of arrays)
|
164
|
+
def get_genome_genes(genome)
|
165
|
+
genome_id = Gene.genomes.index(genome)
|
166
|
+
self.ogs.map do |og|
|
167
|
+
g = og.genes[genome_id]
|
168
|
+
g.nil? ? [] : g
|
169
|
+
end
|
170
|
+
end
|
171
|
+
# Add annotation sources
|
172
|
+
def add_note_src src
|
173
|
+
@note_srcs << src
|
174
|
+
end
|
175
|
+
def to_s
|
176
|
+
Gene.genomes.join("\t") + ((self.note_srcs.length>0) ?
|
177
|
+
("\t#\t"+self.note_srcs.join("\t")) : '') +
|
178
|
+
"\n" + self.ogs.map{ |og| og.to_s }.join("\n")
|
179
|
+
end
|
180
|
+
def to_bool_a ; ogs.map{ |og| og.to_bool_a } ; end
|
181
|
+
end
|
182
|
+
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'enveomics_rb/bm_set'
|
2
|
+
|
3
|
+
module Enveomics
|
4
|
+
class RBM
|
5
|
+
attr :seq1, :seq2, :bms1, :bms2
|
6
|
+
|
7
|
+
##
|
8
|
+
# Initialize RBM object with sequence paths +seq1+ and +seq2+, and
|
9
|
+
# Enveomics::BMset options Hash +bm_opts+
|
10
|
+
def initialize(seq1, seq2, bm_opts = {})
|
11
|
+
@seq1 = seq1
|
12
|
+
@seq2 = seq2
|
13
|
+
@bms1 = Enveomics::BMset.new(seq1, seq2, bm_opts)
|
14
|
+
@bms2 = Enveomics::BMset.new(seq2, seq1, bm_opts)
|
15
|
+
@set = nil
|
16
|
+
end
|
17
|
+
|
18
|
+
##
|
19
|
+
# Array of Reciprocal Best Enveomics::Match objects
|
20
|
+
def set
|
21
|
+
@set ||= reciprocate!
|
22
|
+
end
|
23
|
+
|
24
|
+
##
|
25
|
+
# Number of reciprocal best matches found
|
26
|
+
def count
|
27
|
+
set.count
|
28
|
+
end
|
29
|
+
|
30
|
+
##
|
31
|
+
# Find reciprocal best matches and return the subset of +bms1+ that
|
32
|
+
# is reciprocal with +bms2+
|
33
|
+
def reciprocate!
|
34
|
+
bms1.each.select do |bm|
|
35
|
+
bms2[bm.sbj] && bm.qry == bms2[bm.sbj].sbj
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
##
|
40
|
+
# Enumerate RBMs and yield +blk+
|
41
|
+
def each(&blk)
|
42
|
+
if block_given?
|
43
|
+
set.each { |bm| blk.call(bm) }
|
44
|
+
else
|
45
|
+
to_enum(:each)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
|
2
|
+
#
|
3
|
+
# @author: Luis M. Rodriguez-R
|
4
|
+
# @license: artistic license 2.0
|
5
|
+
#
|
6
|
+
|
7
|
+
require "enveomics_rb/enveomics"
|
8
|
+
use "restclient"
|
9
|
+
use "json"
|
10
|
+
|
11
|
+
class RemoteData
|
12
|
+
# Class-level variables
|
13
|
+
@@EUTILS = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
|
14
|
+
@@EBIREST = "http://www.ebi.ac.uk/Tools"
|
15
|
+
|
16
|
+
# Class-level methods
|
17
|
+
def self.eutils(script, params={}, outfile=nil)
|
18
|
+
response = nil
|
19
|
+
10.times do
|
20
|
+
begin
|
21
|
+
response = RestClient.get "#{@@EUTILS}/#{script}", {:params=>params}
|
22
|
+
rescue => err
|
23
|
+
warn "Request failed #{response.nil? ? "without error code" :
|
24
|
+
"with error code #{response.code}"}."
|
25
|
+
next
|
26
|
+
end
|
27
|
+
break if response.code == 200
|
28
|
+
end
|
29
|
+
abort "Unable to reach NCBI EUtils, error code #{response.code}." unless
|
30
|
+
response.code == 200
|
31
|
+
unless outfile.nil?
|
32
|
+
ohf = File.open(outfile, "w")
|
33
|
+
ohf.print response.to_s
|
34
|
+
ohf.close
|
35
|
+
end
|
36
|
+
response.to_s
|
37
|
+
end
|
38
|
+
def self.efetch(*etc)
|
39
|
+
eutils "efetch.fcgi", *etc
|
40
|
+
end
|
41
|
+
def self.elink(*etc)
|
42
|
+
eutils "elink.fcgi", *etc
|
43
|
+
end
|
44
|
+
def self.esummary(*etc)
|
45
|
+
eutils "esummary.fcgi", *etc
|
46
|
+
end
|
47
|
+
def self.update_gi(db, old_gi)
|
48
|
+
summ = JSON.parse RemoteData.esummary({:db=>db, :id=>old_gi,
|
49
|
+
:retmode=>"json"})
|
50
|
+
return nil,nil if summ["result"].nil? or summ["result"][old_gi.to_s].nil?
|
51
|
+
new_acc = summ["result"][old_gi.to_s]["replacedby"]
|
52
|
+
new_gi = (new_acc.nil? ? nil :
|
53
|
+
RemoteData.efetch({:db=>db, :id=>new_acc, :rettype=>"gi"}))
|
54
|
+
return new_gi,summ["result"][old_gi.to_s]["status"]
|
55
|
+
end
|
56
|
+
def self.ebiFetch(db, id, format, outfile=nil)
|
57
|
+
url = "#{@@EBIREST}/dbfetch/dbfetch/#{db}/#{id}/#{format}"
|
58
|
+
response = RestClient::Request.execute(:method=>:get,
|
59
|
+
:url=>url, :timeout=>600)
|
60
|
+
raise "Unable to reach EBI REST client, error code " +
|
61
|
+
response.code.to_s + "." unless response.code == 200
|
62
|
+
response.to_s
|
63
|
+
end
|
64
|
+
def self.ebiseq2taxid(id,db)
|
65
|
+
doc = RemoteData.ebiFetch(db, id, "annot").split(/[\n\r]/)
|
66
|
+
ln = doc.grep(/^FT\s+\/db_xref="taxon:/).first
|
67
|
+
ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
|
68
|
+
return nil if ln.nil?
|
69
|
+
ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, "\\1")
|
70
|
+
return nil unless ln =~ /^\d+$/
|
71
|
+
ln
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|