miga-base 0.7.26.0 → 0.7.26.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/version.rb +1 -1
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +41964 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +32439 -0
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +62056 -0
- data/utils/FastAAI/FastAAI/FastAAI +1336 -0
- data/utils/FastAAI/README.md +84 -0
- data/utils/FastAAI/kAAI_v1.0_virus.py +1296 -0
- data/utils/enveomics/Docs/recplot2.md +244 -0
- data/utils/enveomics/Examples/aai-matrix.bash +66 -0
- data/utils/enveomics/Examples/ani-matrix.bash +66 -0
- data/utils/enveomics/Examples/essential-phylogeny.bash +105 -0
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +100 -0
- data/utils/enveomics/LICENSE.txt +73 -0
- data/utils/enveomics/Makefile +52 -0
- data/utils/enveomics/Manifest/Tasks/aasubs.json +103 -0
- data/utils/enveomics/Manifest/Tasks/blasttab.json +786 -0
- data/utils/enveomics/Manifest/Tasks/distances.json +161 -0
- data/utils/enveomics/Manifest/Tasks/fasta.json +766 -0
- data/utils/enveomics/Manifest/Tasks/fastq.json +243 -0
- data/utils/enveomics/Manifest/Tasks/graphics.json +126 -0
- data/utils/enveomics/Manifest/Tasks/mapping.json +67 -0
- data/utils/enveomics/Manifest/Tasks/ogs.json +382 -0
- data/utils/enveomics/Manifest/Tasks/other.json +829 -0
- data/utils/enveomics/Manifest/Tasks/remote.json +355 -0
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +501 -0
- data/utils/enveomics/Manifest/Tasks/tables.json +308 -0
- data/utils/enveomics/Manifest/Tasks/trees.json +68 -0
- data/utils/enveomics/Manifest/Tasks/variants.json +111 -0
- data/utils/enveomics/Manifest/categories.json +156 -0
- data/utils/enveomics/Manifest/examples.json +154 -0
- data/utils/enveomics/Manifest/tasks.json +4 -0
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +69 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -0
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +189 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +112 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +23 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +44 -0
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +50 -0
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +37 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +68 -0
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +49 -0
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +80 -0
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +57 -0
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +63 -0
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +38 -0
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +73 -0
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +21 -0
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +72 -0
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +98 -0
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -0
- data/utils/enveomics/Pipelines/blast.pbs/README.md +127 -0
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +109 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +128 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +16 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +22 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +26 -0
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +89 -0
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +29 -0
- data/utils/enveomics/Pipelines/idba.pbs/README.md +49 -0
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +95 -0
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +56 -0
- data/utils/enveomics/Pipelines/trim.pbs/README.md +54 -0
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +70 -0
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +130 -0
- data/utils/enveomics/README.md +42 -0
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +171 -0
- data/utils/enveomics/Scripts/Aln.cat.rb +163 -0
- data/utils/enveomics/Scripts/Aln.convert.pl +35 -0
- data/utils/enveomics/Scripts/AlphaDiversity.pl +152 -0
- data/utils/enveomics/Scripts/BedGraph.tad.rb +93 -0
- data/utils/enveomics/Scripts/BedGraph.window.rb +71 -0
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +102 -0
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +63 -0
- data/utils/enveomics/Scripts/BlastTab.advance.bash +48 -0
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +55 -0
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +76 -0
- data/utils/enveomics/Scripts/BlastTab.filter.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +194 -0
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +104 -0
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +157 -0
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +48 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +119 -0
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +86 -0
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +47 -0
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +114 -0
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +90 -0
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +101 -0
- data/utils/enveomics/Scripts/Chao1.pl +97 -0
- data/utils/enveomics/Scripts/CharTable.classify.rb +234 -0
- data/utils/enveomics/Scripts/EBIseq2tax.rb +83 -0
- data/utils/enveomics/Scripts/FastA.N50.pl +56 -0
- data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
- data/utils/enveomics/Scripts/FastA.filter.pl +52 -0
- data/utils/enveomics/Scripts/FastA.filterLen.pl +28 -0
- data/utils/enveomics/Scripts/FastA.filterN.pl +60 -0
- data/utils/enveomics/Scripts/FastA.fragment.rb +92 -0
- data/utils/enveomics/Scripts/FastA.gc.pl +42 -0
- data/utils/enveomics/Scripts/FastA.interpose.pl +93 -0
- data/utils/enveomics/Scripts/FastA.length.pl +38 -0
- data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
- data/utils/enveomics/Scripts/FastA.per_file.pl +36 -0
- data/utils/enveomics/Scripts/FastA.qlen.pl +57 -0
- data/utils/enveomics/Scripts/FastA.rename.pl +65 -0
- data/utils/enveomics/Scripts/FastA.revcom.pl +23 -0
- data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
- data/utils/enveomics/Scripts/FastA.slider.pl +85 -0
- data/utils/enveomics/Scripts/FastA.split.pl +55 -0
- data/utils/enveomics/Scripts/FastA.split.rb +79 -0
- data/utils/enveomics/Scripts/FastA.subsample.pl +131 -0
- data/utils/enveomics/Scripts/FastA.tag.rb +65 -0
- data/utils/enveomics/Scripts/FastA.wrap.rb +48 -0
- data/utils/enveomics/Scripts/FastQ.filter.pl +54 -0
- data/utils/enveomics/Scripts/FastQ.interpose.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.offset.pl +90 -0
- data/utils/enveomics/Scripts/FastQ.split.pl +53 -0
- data/utils/enveomics/Scripts/FastQ.tag.rb +63 -0
- data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +24 -0
- data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +84 -0
- data/utils/enveomics/Scripts/HMM.essential.rb +351 -0
- data/utils/enveomics/Scripts/HMM.haai.rb +168 -0
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +83 -0
- data/utils/enveomics/Scripts/JPlace.distances.rb +88 -0
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +320 -0
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +81 -0
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +198 -0
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +35 -0
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +49 -0
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +92 -0
- data/utils/enveomics/Scripts/Newick.autoprune.R +27 -0
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +228 -0
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +32 -0
- data/utils/enveomics/Scripts/RefSeq.download.bash +48 -0
- data/utils/enveomics/Scripts/SRA.download.bash +57 -0
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +36 -0
- data/utils/enveomics/Scripts/TRIBS.test.R +39 -0
- data/utils/enveomics/Scripts/Table.barplot.R +31 -0
- data/utils/enveomics/Scripts/Table.df2dist.R +30 -0
- data/utils/enveomics/Scripts/Table.filter.pl +61 -0
- data/utils/enveomics/Scripts/Table.merge.pl +77 -0
- data/utils/enveomics/Scripts/Table.replace.rb +69 -0
- data/utils/enveomics/Scripts/Table.round.rb +63 -0
- data/utils/enveomics/Scripts/Table.split.pl +57 -0
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +227 -0
- data/utils/enveomics/Scripts/VCF.KaKs.rb +147 -0
- data/utils/enveomics/Scripts/VCF.SNPs.rb +88 -0
- data/utils/enveomics/Scripts/aai.rb +418 -0
- data/utils/enveomics/Scripts/ani.rb +362 -0
- data/utils/enveomics/Scripts/clust.rand.rb +102 -0
- data/utils/enveomics/Scripts/gi2tax.rb +103 -0
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +96 -0
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +24 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +253 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +182 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +74 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +237 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +30 -0
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +135 -0
- data/utils/enveomics/Scripts/ogs.annotate.rb +88 -0
- data/utils/enveomics/Scripts/ogs.core-pan.rb +160 -0
- data/utils/enveomics/Scripts/ogs.extract.rb +125 -0
- data/utils/enveomics/Scripts/ogs.mcl.rb +186 -0
- data/utils/enveomics/Scripts/ogs.rb +104 -0
- data/utils/enveomics/Scripts/ogs.stats.rb +131 -0
- data/utils/enveomics/Scripts/rbm.rb +146 -0
- data/utils/enveomics/Tests/Makefile +10 -0
- data/utils/enveomics/Tests/Mgen_M2288.faa +3189 -0
- data/utils/enveomics/Tests/Mgen_M2288.fna +8282 -0
- data/utils/enveomics/Tests/Mgen_M2321.fna +8288 -0
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +2970 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +7 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +17 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +137 -0
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +123 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +200 -0
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +55 -0
- data/utils/enveomics/Tests/alkB.nwk +1 -0
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +13 -0
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +17 -0
- data/utils/enveomics/Tests/hiv1.faa +59 -0
- data/utils/enveomics/Tests/hiv1.fna +134 -0
- data/utils/enveomics/Tests/hiv2.faa +70 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +233 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +1 -0
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +233 -0
- data/utils/enveomics/Tests/phyla_counts.tsv +10 -0
- data/utils/enveomics/Tests/primate_lentivirus.ogs +11 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +8 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +9 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +6 -0
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +6 -0
- data/utils/enveomics/build_enveomics_r.bash +45 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +31 -0
- data/utils/enveomics/enveomics.R/NAMESPACE +39 -0
- data/utils/enveomics/enveomics.R/R/autoprune.R +155 -0
- data/utils/enveomics/enveomics.R/R/barplot.R +184 -0
- data/utils/enveomics/enveomics.R/R/cliopts.R +135 -0
- data/utils/enveomics/enveomics.R/R/df2dist.R +154 -0
- data/utils/enveomics/enveomics.R/R/growthcurve.R +331 -0
- data/utils/enveomics/enveomics.R/R/recplot.R +354 -0
- data/utils/enveomics/enveomics.R/R/recplot2.R +1631 -0
- data/utils/enveomics/enveomics.R/R/tribs.R +583 -0
- data/utils/enveomics/enveomics.R/R/utils.R +50 -0
- data/utils/enveomics/enveomics.R/README.md +80 -0
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +46 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +47 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -0
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -0
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -0
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -0
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -0
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -0
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +45 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +37 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +59 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -0
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -0
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -0
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +14 -0
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +13 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -0
- data/utils/enveomics/globals.mk +8 -0
- data/utils/enveomics/manifest.json +9 -0
- metadata +277 -4
@@ -0,0 +1,182 @@
|
|
1
|
+
|
2
|
+
##### CLASSES:
|
3
|
+
# Gene.new(genome, id): Initializes a new Gene.
|
4
|
+
# genome: A string uniquely identifying the parent genome.
|
5
|
+
# id: A string uniquely identifying the gene within the genome. It can be
|
6
|
+
# non-unique across genomes.
|
7
|
+
class Gene
|
8
|
+
attr_reader :genome_id, :id
|
9
|
+
@@genomes = []
|
10
|
+
def self.genomes
|
11
|
+
@@genomes
|
12
|
+
end
|
13
|
+
def initialize(genome, id)
|
14
|
+
if genome.is_a? Integer
|
15
|
+
abort "Internal error: Genome #{genome} does not exist yet." if
|
16
|
+
@@genomes[genome].nil?
|
17
|
+
@genome_id = genome
|
18
|
+
else
|
19
|
+
@@genomes << genome unless @@genomes.include? genome
|
20
|
+
@genome_id = @@genomes.index(genome)
|
21
|
+
end
|
22
|
+
@id = id
|
23
|
+
end
|
24
|
+
# Compare if two Gene objects refer to the same gene.
|
25
|
+
def ==(b)
|
26
|
+
self.genome_id==b.genome_id and self.id==b.id
|
27
|
+
end
|
28
|
+
# Get all genomes in the run as an array of strings.
|
29
|
+
def genome
|
30
|
+
@@genomes[self.genome_id]
|
31
|
+
end
|
32
|
+
def to_s
|
33
|
+
"#{self.genome}:#{self.id}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# OG.new(): Initializes an empty OG.
|
38
|
+
# OG.new(genomes, genes): Initializes a pre-computed OG.
|
39
|
+
# genomes: List of genomes as an array of strings (as in Gene.genomes).
|
40
|
+
# genes: List of genes as an array of strings, with '-' indicating no genes and
|
41
|
+
# multiple genes separated by ','.
|
42
|
+
class OG
|
43
|
+
attr_reader :genes, :notes
|
44
|
+
def initialize(genomes=nil, genes=nil)
|
45
|
+
@genes = []
|
46
|
+
@notes = []
|
47
|
+
unless genomes.nil? or genes.nil?
|
48
|
+
(0 .. genes.length-1).each do |genome_i|
|
49
|
+
next if genes[genome_i]=="-"
|
50
|
+
genes[genome_i].split(/,/).each do |gene_id|
|
51
|
+
self << Gene.new(genomes[genome_i], gene_id)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
# Add genes or combine another OG into the loaded OG (self).
|
57
|
+
def <<(obj)
|
58
|
+
if obj.is_a? Gene
|
59
|
+
@genes[obj.genome_id] = [] if @genes[obj.genome_id].nil?
|
60
|
+
@genes[obj.genome_id] << obj.id unless self.include? obj
|
61
|
+
elsif obj.is_a? OG
|
62
|
+
obj.genes_obj.each{ |gene| self << gene }
|
63
|
+
else
|
64
|
+
abort "Unsupported class for #{obj}"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
# Get the list of genes as objects (internally saved as strings to save RAM).
|
68
|
+
def genes_obj
|
69
|
+
o = []
|
70
|
+
(0 .. Gene.genomes.length-1).map do |genome_id|
|
71
|
+
o += self.genes[genome_id].map{ |gene_id|
|
72
|
+
Gene.new(Gene.genomes[genome_id], gene_id) } unless
|
73
|
+
self.genes[genome_id].nil?
|
74
|
+
end
|
75
|
+
return o
|
76
|
+
end
|
77
|
+
# Evaluates if the OG contains the passed gene.
|
78
|
+
def include?(gene)
|
79
|
+
return false if self.genes[gene.genome_id].nil?
|
80
|
+
self.genes[gene.genome_id].include? gene.id
|
81
|
+
end
|
82
|
+
# Get the list of genomes containing genes in this OG.
|
83
|
+
def genomes
|
84
|
+
(0 .. Gene.genomes.length-1).select do |gno|
|
85
|
+
not(self.genes[gno].nil? or self.genes[gno].empty?)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
# Adds a note that will be printed after the last column
|
89
|
+
def add_note note, note_idx=nil
|
90
|
+
if note_idx.nil?
|
91
|
+
@notes << note
|
92
|
+
else
|
93
|
+
@notes[note_idx] = (@notes[note_idx].nil? ? '' :
|
94
|
+
(@notes[note_idx]+' || ')) + note
|
95
|
+
end
|
96
|
+
end
|
97
|
+
def to_s
|
98
|
+
(0 .. Gene.genomes.length-1).map do |genome_id|
|
99
|
+
self.genes[genome_id].nil? ? "-" : self.genes[genome_id].join(",")
|
100
|
+
end.join("\t") + ((self.notes.size==0) ? '' :
|
101
|
+
("\t#\t"+self.notes.join("\t")))
|
102
|
+
end
|
103
|
+
def to_bool_a
|
104
|
+
(0 .. Gene.genomes.length-1).map { |genome_id| not genes[genome_id].nil? }
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# OGCollection.new(): Initializes an empty collection of OGs.
|
109
|
+
class OGCollection
|
110
|
+
attr_reader :ogs, :note_srcs
|
111
|
+
def initialize
|
112
|
+
@ogs = []
|
113
|
+
@note_srcs = []
|
114
|
+
end
|
115
|
+
# Add an OG to the collection
|
116
|
+
def <<(og)
|
117
|
+
@ogs << og
|
118
|
+
end
|
119
|
+
# Compare OGs all-vs-all to identify groups that should be merged.
|
120
|
+
def consolidate!
|
121
|
+
old_ogs = self.ogs
|
122
|
+
@ogs = []
|
123
|
+
old_ogs.each do |og|
|
124
|
+
is_new = true
|
125
|
+
og.genes_obj.each do |gene|
|
126
|
+
o = self.get_og gene
|
127
|
+
unless o.nil?
|
128
|
+
o << og
|
129
|
+
is_new = false
|
130
|
+
break
|
131
|
+
end
|
132
|
+
end
|
133
|
+
self << og if is_new
|
134
|
+
end
|
135
|
+
end
|
136
|
+
# Removes OGs present in less than 'fraction' of the genomes
|
137
|
+
def filter_core!(fraction=1.0)
|
138
|
+
min_genomes = (fraction * Gene.genomes.size).ceil
|
139
|
+
@ogs.select! { |og| og.genomes.size >= min_genomes }
|
140
|
+
end
|
141
|
+
# Removes OGs present more than 'dups' number of times in any genome
|
142
|
+
def remove_inparalogs!(dups=1)
|
143
|
+
@ogs.select! do |og|
|
144
|
+
og.genes.map{ |pergenome| pergenome.size }.max <= dups
|
145
|
+
end
|
146
|
+
end
|
147
|
+
# Add a pair of RBM genes into the corresponding OG, or create a new OG.
|
148
|
+
def add_rbm(a, b)
|
149
|
+
og = self.get_og(a)
|
150
|
+
og = self.get_og(b) if og.nil?
|
151
|
+
if og.nil?
|
152
|
+
og = OG.new
|
153
|
+
@ogs << og
|
154
|
+
end
|
155
|
+
og << a
|
156
|
+
og << b
|
157
|
+
end
|
158
|
+
# Get the OG containing the gene (returns the first, if multiple).
|
159
|
+
def get_og(gene)
|
160
|
+
idx = self.ogs.index { |og| og.include? gene }
|
161
|
+
idx.nil? ? nil : self.ogs[idx]
|
162
|
+
end
|
163
|
+
# Get the genes from a given genome (returns an array of arrays)
|
164
|
+
def get_genome_genes(genome)
|
165
|
+
genome_id = Gene.genomes.index(genome)
|
166
|
+
self.ogs.map do |og|
|
167
|
+
g = og.genes[genome_id]
|
168
|
+
g.nil? ? [] : g
|
169
|
+
end
|
170
|
+
end
|
171
|
+
# Add annotation sources
|
172
|
+
def add_note_src src
|
173
|
+
@note_srcs << src
|
174
|
+
end
|
175
|
+
def to_s
|
176
|
+
Gene.genomes.join("\t") + ((self.note_srcs.length>0) ?
|
177
|
+
("\t#\t"+self.note_srcs.join("\t")) : '') +
|
178
|
+
"\n" + self.ogs.map{ |og| og.to_s }.join("\n")
|
179
|
+
end
|
180
|
+
def to_bool_a ; ogs.map{ |og| og.to_bool_a } ; end
|
181
|
+
end
|
182
|
+
|
@@ -0,0 +1,74 @@
|
|
1
|
+
|
2
|
+
#
|
3
|
+
# @author: Luis M. Rodriguez-R
|
4
|
+
# @license: artistic license 2.0
|
5
|
+
#
|
6
|
+
|
7
|
+
require "enveomics_rb/enveomics"
|
8
|
+
use "restclient"
|
9
|
+
use "json"
|
10
|
+
|
11
|
+
class RemoteData
|
12
|
+
# Class-level variables
|
13
|
+
@@EUTILS = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
|
14
|
+
@@EBIREST = "http://www.ebi.ac.uk/Tools"
|
15
|
+
|
16
|
+
# Class-level methods
|
17
|
+
def self.eutils(script, params={}, outfile=nil)
|
18
|
+
response = nil
|
19
|
+
10.times do
|
20
|
+
begin
|
21
|
+
response = RestClient.get "#{@@EUTILS}/#{script}", {:params=>params}
|
22
|
+
rescue => err
|
23
|
+
warn "Request failed #{response.nil? ? "without error code" :
|
24
|
+
"with error code #{response.code}"}."
|
25
|
+
next
|
26
|
+
end
|
27
|
+
break if response.code == 200
|
28
|
+
end
|
29
|
+
abort "Unable to reach NCBI EUtils, error code #{response.code}." unless
|
30
|
+
response.code == 200
|
31
|
+
unless outfile.nil?
|
32
|
+
ohf = File.open(outfile, "w")
|
33
|
+
ohf.print response.to_s
|
34
|
+
ohf.close
|
35
|
+
end
|
36
|
+
response.to_s
|
37
|
+
end
|
38
|
+
def self.efetch(*etc)
|
39
|
+
eutils "efetch.fcgi", *etc
|
40
|
+
end
|
41
|
+
def self.elink(*etc)
|
42
|
+
eutils "elink.fcgi", *etc
|
43
|
+
end
|
44
|
+
def self.esummary(*etc)
|
45
|
+
eutils "esummary.fcgi", *etc
|
46
|
+
end
|
47
|
+
def self.update_gi(db, old_gi)
|
48
|
+
summ = JSON.parse RemoteData.esummary({:db=>db, :id=>old_gi,
|
49
|
+
:retmode=>"json"})
|
50
|
+
return nil,nil if summ["result"].nil? or summ["result"][old_gi.to_s].nil?
|
51
|
+
new_acc = summ["result"][old_gi.to_s]["replacedby"]
|
52
|
+
new_gi = (new_acc.nil? ? nil :
|
53
|
+
RemoteData.efetch({:db=>db, :id=>new_acc, :rettype=>"gi"}))
|
54
|
+
return new_gi,summ["result"][old_gi.to_s]["status"]
|
55
|
+
end
|
56
|
+
def self.ebiFetch(db, id, format, outfile=nil)
|
57
|
+
url = "#{@@EBIREST}/dbfetch/dbfetch/#{db}/#{id}/#{format}"
|
58
|
+
response = RestClient::Request.execute(:method=>:get,
|
59
|
+
:url=>url, :timeout=>600)
|
60
|
+
raise "Unable to reach EBI REST client, error code " +
|
61
|
+
response.code.to_s + "." unless response.code == 200
|
62
|
+
response.to_s
|
63
|
+
end
|
64
|
+
def self.ebiseq2taxid(id,db)
|
65
|
+
doc = RemoteData.ebiFetch(db, id, "annot").split(/[\n\r]/)
|
66
|
+
ln = doc.grep(/^FT\s+\/db_xref="taxon:/).first
|
67
|
+
ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
|
68
|
+
return nil if ln.nil?
|
69
|
+
ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, "\\1")
|
70
|
+
return nil unless ln =~ /^\d+$/
|
71
|
+
ln
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
@@ -0,0 +1,237 @@
|
|
1
|
+
|
2
|
+
##### CLASSES:
|
3
|
+
# SeqRange.parse(str): Initializes a new SeqRange from a string. A SeqRange is a
|
4
|
+
# representation of any collection of coordinates in a given sequence.
|
5
|
+
# Coordinates here are 1-based and base-located. Admitedly, the
|
6
|
+
# 0-based/interbase-located system is much more convenient for range
|
7
|
+
# operations, but GenBank (together with most common Software) is built on
|
8
|
+
# the 1-based/base-located system.
|
9
|
+
# str: A string describing the sequence range as in GenBank records.
|
10
|
+
# Note that "ID:location" notation is NOT supported by this implementation,
|
11
|
+
# althought it is permitted by GenBank. Some examples of valid `str`:
|
12
|
+
# "<1..123"
|
13
|
+
# "complement(3..6)"
|
14
|
+
# "join(complement(join(13..43,complement(45..46),complement(1..12),
|
15
|
+
# <1..12)),12..15,13..22)"
|
16
|
+
# The last one is valid, but once parsed it's internally simplified as:
|
17
|
+
# "join(complement(<1..12),1..12,45..46,complement(13..43),12..15,13..22)"
|
18
|
+
# Which is exactly equivalent. The common (but non-GenBank-compliant)
|
19
|
+
# practice of inverting coordinates instead of using the `complement()`
|
20
|
+
# operator is also supported. For example:
|
21
|
+
# "123..3"
|
22
|
+
# Is interpreted as:
|
23
|
+
# "complement(3..123)"
|
24
|
+
# See also http://www.insdc.org/files/feature_table.html
|
25
|
+
#
|
26
|
+
# SeqRange.new(c): Initializes a new SeqRange from an object.
|
27
|
+
# c: Any object supported by the `<<` operator, or `nil` to create an empty
|
28
|
+
# SeqRange.
|
29
|
+
#
|
30
|
+
# See also ContigSeqRange.parse.
|
31
|
+
class SeqRange
|
32
|
+
# Class-level
|
33
|
+
def self.parse(str)
|
34
|
+
str.gsub!(/[^A-Za-z0-9\.\(\)<>,]/,"")
|
35
|
+
sr = nil
|
36
|
+
if str =~ /^join\((.+)\)$/i
|
37
|
+
str1 = $1
|
38
|
+
str2 = ""
|
39
|
+
sr = SeqRange.new
|
40
|
+
parens = 0
|
41
|
+
str1.each_char do |chr|
|
42
|
+
if chr=="," and parens==0
|
43
|
+
sr += SeqRange.parse(str2)
|
44
|
+
str2 = ""
|
45
|
+
next
|
46
|
+
elsif chr=="("
|
47
|
+
parens += 1
|
48
|
+
elsif chr==")"
|
49
|
+
parens -= 1
|
50
|
+
raise "Unbalanced parenthesis in '#{str1}'." if parens < 0
|
51
|
+
end
|
52
|
+
str2 += chr
|
53
|
+
end
|
54
|
+
sr += SeqRange.parse(str2) unless str2.empty?
|
55
|
+
sr
|
56
|
+
elsif str =~ /^complement\((.+)\)$/i
|
57
|
+
sr = SeqRange.parse($1)
|
58
|
+
sr.reverse!
|
59
|
+
sr
|
60
|
+
else
|
61
|
+
sr = SeqRange.new(ContigSeqRange.parse(str))
|
62
|
+
end
|
63
|
+
sr
|
64
|
+
end
|
65
|
+
# Instance-level
|
66
|
+
attr_reader :contig
|
67
|
+
def initialize(c=nil)
|
68
|
+
@contig = []
|
69
|
+
self << c unless c.nil?
|
70
|
+
end
|
71
|
+
def leftmost; contig.map{ |c| c.left }.min; end
|
72
|
+
def rightmost; contig.map{ |c| c.right }.max; end
|
73
|
+
def size; contig.map{ |c| c.size }.inject(0,:+); end
|
74
|
+
def +(sr)
|
75
|
+
return(self + SeqRange.new(sr)) if sr.is_a? ContigSeqRange
|
76
|
+
raise "Unsupported operation '+' with class #{sr.class.to_s}." unless
|
77
|
+
sr.is_a? SeqRange
|
78
|
+
out = SeqRange.new(self)
|
79
|
+
out << sr
|
80
|
+
out
|
81
|
+
end
|
82
|
+
def /(sr)
|
83
|
+
if sr.is_a? SeqRange
|
84
|
+
sr2 = sr.sort.compact
|
85
|
+
raise "Denominator is not a contiguous domain." unless sr2.size==1
|
86
|
+
return(self/sr2.contig.first)
|
87
|
+
end
|
88
|
+
raise "Unsupported operation '/' with class #{sr.class.to_s}" unless
|
89
|
+
sr.is_a? ContigSeqRange
|
90
|
+
raise "Denominator doesn't span the whole domain of numerator." unless
|
91
|
+
sr.left <= leftmost and sr.right >= rightmost
|
92
|
+
i = ContigSeqRange.IGNORE_STRAND
|
93
|
+
ContigSeqRange.IGNORE_STRAND = false
|
94
|
+
range = self.sort.compact.size
|
95
|
+
ContigSeqRange.IGNORE_STRAND = i
|
96
|
+
range.to_f / sr.size
|
97
|
+
end
|
98
|
+
def <<(c)
|
99
|
+
if c.is_a? ContigSeqRange
|
100
|
+
@contig << c
|
101
|
+
elsif c.is_a? SeqRange
|
102
|
+
@contig += c.contig
|
103
|
+
elsif c.is_a? Array
|
104
|
+
raise "Array must contain only objects of class ContigSeqRange." unless
|
105
|
+
c.map{ |cc| cc.is_a? ContigSeqRange }.all?
|
106
|
+
@contig += c
|
107
|
+
else
|
108
|
+
raise "Unsupported operation '<<' with class #{c.class.to_s}."
|
109
|
+
end
|
110
|
+
end
|
111
|
+
def reverse ; SeqRange.new(self).reverse! ; end
|
112
|
+
def sort ; SeqRange.new(self).sort! ; end
|
113
|
+
def compact ; SeqRange.new(self).compact! ; end
|
114
|
+
def reverse!
|
115
|
+
@contig.each{ |c| c.reverse! }
|
116
|
+
@contig.reverse!
|
117
|
+
self
|
118
|
+
end
|
119
|
+
def sort!
|
120
|
+
@contig.sort!{ |x,y| x.left <=> y.left }
|
121
|
+
self
|
122
|
+
end
|
123
|
+
def compact!
|
124
|
+
return self if contig.size < 2
|
125
|
+
clean = false
|
126
|
+
while not clean
|
127
|
+
clean = true
|
128
|
+
(2 .. contig.size).each do |i|
|
129
|
+
next unless contig[i-2].reverse? == contig[i-1].reverse?
|
130
|
+
next unless contig[i-2].contig? contig[i-1]
|
131
|
+
contig[i-2] += contig[i-1]
|
132
|
+
contig[i-1] = nil
|
133
|
+
clean = false
|
134
|
+
break
|
135
|
+
end
|
136
|
+
@contig.compact!
|
137
|
+
end
|
138
|
+
self
|
139
|
+
end
|
140
|
+
def to_s
|
141
|
+
o = contig.map{ |c| c.to_s }.join(",")
|
142
|
+
o = "join(#{o})" if contig.size > 1
|
143
|
+
o
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
# ContigSeqRange.parse(str): Initializes a new ContigSeqRange from a string. A
|
149
|
+
# ContigSeqRange is a primitive of `SeqRange` that doesn't support the
|
150
|
+
# `join()` operator. Other than that, syntax is identical to `SeqRange`.
|
151
|
+
# str: A string describing the sequence range as in GenBank records (except
|
152
|
+
# `join()`).
|
153
|
+
#
|
154
|
+
# ContigSeqRange.new(a,b): Initializes a new ContigSeqRange from the
|
155
|
+
# coordinates as integers.
|
156
|
+
# a: Start of the range.
|
157
|
+
# b: End of the range. If a>b, the `complement()` operator is assumed.
|
158
|
+
#
|
159
|
+
# ContigSeqRange.IGNORE_STRAND = true: Use this pragma to ignore strandness.
|
160
|
+
# If set, it globally affects the behavior of of the class. Note that
|
161
|
+
# `SeqRange` instances contain a collection of `ContigSeqRange` objects, so
|
162
|
+
# that class is also affected.
|
163
|
+
class ContigSeqRange
|
164
|
+
# Class-level
|
165
|
+
@@IGNORE_STRAND = false
|
166
|
+
def self.IGNORE_STRAND=(v); @@IGNORE_STRAND = !!v ; end
|
167
|
+
def self.IGNORE_STRAND; @@IGNORE_STRAND ; end
|
168
|
+
def self.parse(str)
|
169
|
+
str.downcase!
|
170
|
+
m = %r{^
|
171
|
+
(?<c>complement\()? # Reverse
|
172
|
+
(?<lt><?) # Open-ended to the left
|
173
|
+
(?<left>\d+) # Left coordinate
|
174
|
+
(
|
175
|
+
\.\.\.? # 2 or 3 dots
|
176
|
+
(?<gt1>>?) # Open-ended to the right
|
177
|
+
(?<right>\d+) # Right coordinate
|
178
|
+
)?
|
179
|
+
(?<gt2>>?) # Open-ended to the right
|
180
|
+
\)? # If reverse
|
181
|
+
$}x.match(str)
|
182
|
+
raise "Cannot parse range: #{str}." if m.nil?
|
183
|
+
c = ContigSeqRange.new(m[:left].to_i, m[:right].to_i)
|
184
|
+
c.open_left = true if m[:lt]=="<"
|
185
|
+
c.open_right = true if m[:gt1]==">" or m[:gt2]==">"
|
186
|
+
c.reverse! if m[:c]=="complement("
|
187
|
+
c
|
188
|
+
end
|
189
|
+
# Instance-level
|
190
|
+
attr_accessor :open_left, :open_right
|
191
|
+
attr_reader :coords
|
192
|
+
def initialize(a,b)
|
193
|
+
@coords = [[a,b].min, [a,b].max]
|
194
|
+
@open_left = false
|
195
|
+
@open_right = false
|
196
|
+
@reverse = (a > b)
|
197
|
+
end
|
198
|
+
def from; coords[ reverse ? 1 : 0 ] ; end
|
199
|
+
def to; coords[ reverse ? 0 : 1 ] ; end
|
200
|
+
def left; coords[0] ; end
|
201
|
+
def right; coords[1] ; end
|
202
|
+
def size; right-left+1 ; end
|
203
|
+
def reverse?; @reverse ; end
|
204
|
+
def reverse!
|
205
|
+
@reverse = ! reverse? unless @@IGNORE_STRAND
|
206
|
+
self
|
207
|
+
end
|
208
|
+
def overlap?(sr) !(right < sr.left or left > sr.right) ; end
|
209
|
+
def contig?(sr) !(right+1 < sr.left or left-1 > sr.right) ; end
|
210
|
+
def +(sr)
|
211
|
+
raise "Unsupported operation '+' with class #{sr.class.to_s}" unless
|
212
|
+
sr.is_a? ContigSeqRange
|
213
|
+
raise "Non-contiguous ranges cannot be added." unless contig? sr
|
214
|
+
raise "Ranges in different strands cannot be added." unless
|
215
|
+
reverse? == sr.reverse?
|
216
|
+
out = ContigSeqRange.new([left,sr.left].min, [right,sr.right].max)
|
217
|
+
out.reverse! if reverse?
|
218
|
+
out.open_left=true if (left < sr.left ? self : sr).open_left
|
219
|
+
out.open_right=true if (right > sr.right ? self : sr).open_right
|
220
|
+
out
|
221
|
+
end
|
222
|
+
def to_s
|
223
|
+
o = ""
|
224
|
+
o += "<" if open_left
|
225
|
+
o += left.to_s
|
226
|
+
if left == right
|
227
|
+
o += ">" if open_right
|
228
|
+
else
|
229
|
+
o += ".."
|
230
|
+
o += ">" if open_right
|
231
|
+
o += right.to_s
|
232
|
+
end
|
233
|
+
o = "complement(#{o})" if reverse?
|
234
|
+
o
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|