miga-base 1.2.15.2 → 1.2.15.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/download/gtdb.rb +4 -1
- data/lib/miga/cli/action/gtdb_get.rb +4 -0
- data/lib/miga/daemon.rb +4 -1
- data/lib/miga/lair.rb +6 -4
- data/lib/miga/remote_dataset/download.rb +3 -2
- data/lib/miga/remote_dataset.rb +25 -7
- data/lib/miga/taxonomy.rb +6 -0
- data/lib/miga/version.rb +2 -2
- metadata +6 -302
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
- data/utils/FastAAI/FastAAI +0 -3659
- data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
- data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
- data/utils/FastAAI/README.md +0 -84
- data/utils/enveomics/Docs/recplot2.md +0 -244
- data/utils/enveomics/Examples/aai-matrix.bash +0 -66
- data/utils/enveomics/Examples/ani-matrix.bash +0 -66
- data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
- data/utils/enveomics/LICENSE.txt +0 -73
- data/utils/enveomics/Makefile +0 -52
- data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
- data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
- data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
- data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
- data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
- data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
- data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
- data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
- data/utils/enveomics/Manifest/Tasks/other.json +0 -906
- data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
- data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
- data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
- data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
- data/utils/enveomics/Manifest/categories.json +0 -165
- data/utils/enveomics/Manifest/examples.json +0 -162
- data/utils/enveomics/Manifest/tasks.json +0 -4
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
- data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
- data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
- data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
- data/utils/enveomics/README.md +0 -42
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
- data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
- data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
- data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
- data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
- data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
- data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
- data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
- data/utils/enveomics/Scripts/Chao1.pl +0 -97
- data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
- data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
- data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
- data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
- data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
- data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
- data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
- data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
- data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
- data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
- data/utils/enveomics/Scripts/FastA.length.pl +0 -38
- data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
- data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
- data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
- data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
- data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
- data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
- data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
- data/utils/enveomics/Scripts/FastA.split.pl +0 -55
- data/utils/enveomics/Scripts/FastA.split.rb +0 -79
- data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
- data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
- data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
- data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
- data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
- data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
- data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
- data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
- data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
- data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
- data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
- data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
- data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
- data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
- data/utils/enveomics/Scripts/SRA.download.bash +0 -55
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
- data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
- data/utils/enveomics/Scripts/Table.barplot.R +0 -31
- data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
- data/utils/enveomics/Scripts/Table.filter.pl +0 -61
- data/utils/enveomics/Scripts/Table.merge.pl +0 -77
- data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
- data/utils/enveomics/Scripts/Table.replace.rb +0 -69
- data/utils/enveomics/Scripts/Table.round.rb +0 -63
- data/utils/enveomics/Scripts/Table.split.pl +0 -57
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
- data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
- data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
- data/utils/enveomics/Scripts/aai.rb +0 -421
- data/utils/enveomics/Scripts/ani.rb +0 -362
- data/utils/enveomics/Scripts/anir.rb +0 -137
- data/utils/enveomics/Scripts/clust.rand.rb +0 -102
- data/utils/enveomics/Scripts/gi2tax.rb +0 -103
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
- data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
- data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
- data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
- data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
- data/utils/enveomics/Scripts/ogs.rb +0 -104
- data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
- data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
- data/utils/enveomics/Scripts/rbm.rb +0 -108
- data/utils/enveomics/Scripts/sam.filter.rb +0 -148
- data/utils/enveomics/Tests/Makefile +0 -10
- data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
- data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
- data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
- data/utils/enveomics/Tests/alkB.nwk +0 -1
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
- data/utils/enveomics/Tests/hiv1.faa +0 -59
- data/utils/enveomics/Tests/hiv1.fna +0 -134
- data/utils/enveomics/Tests/hiv2.faa +0 -70
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
- data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
- data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
- data/utils/enveomics/build_enveomics_r.bash +0 -45
- data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
- data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
- data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
- data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
- data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
- data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
- data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
- data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
- data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
- data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
- data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
- data/utils/enveomics/enveomics.R/R/utils.R +0 -80
- data/utils/enveomics/enveomics.R/README.md +0 -81
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
- data/utils/enveomics/globals.mk +0 -8
- data/utils/enveomics/manifest.json +0 -9
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +0 -67
- data/utils/multitrim/multitrim.py +0 -1555
- data/utils/multitrim/multitrim.yml +0 -13
@@ -1,30 +0,0 @@
|
|
1
|
-
|
2
|
-
require 'enveomics_rb/stats/sample'
|
3
|
-
|
4
|
-
module Enveomics
|
5
|
-
# Calculate Gaussian Mixture Models by Expectation Maximization
|
6
|
-
class GmmEm
|
7
|
-
attr :sample
|
8
|
-
attr :components
|
9
|
-
attr :opts
|
10
|
-
|
11
|
-
# Initialize Enve::GmmEm object from numeric array +x+, +components+
|
12
|
-
# gaussian components (an Integer), and options hash +opts+ with supported
|
13
|
-
# Symbol keys:
|
14
|
-
# - ll_delta_converge: Maximum change in LL to consider convergence
|
15
|
-
# (by default: 1e-15)
|
16
|
-
# - max_iter: Maximum number of EM iterations (by default: 1_000)
|
17
|
-
# - init_mu: Initial components means as numeric array
|
18
|
-
# - init_sigma: Initial components standard deviation as numeric array
|
19
|
-
# - init_alpha: Initial components fractions as numeric array adding up to 1
|
20
|
-
def initialize(x, components = 2, opts = {})
|
21
|
-
@sample = Enve::Stats::Sample.new(x)
|
22
|
-
@opts = opts
|
23
|
-
@opts[:ll_delta_convergence] ||= 1e-15
|
24
|
-
@opts[:max_iter] ||= 1_000
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
@@ -1,253 +0,0 @@
|
|
1
|
-
|
2
|
-
#
|
3
|
-
# @author: Luis M. Rodriguez-R
|
4
|
-
# @update: Jul-14-2015
|
5
|
-
# @license: artistic license 2.0
|
6
|
-
#
|
7
|
-
|
8
|
-
module JPlace
|
9
|
-
##### CLASSES:
|
10
|
-
# Placement.new(placement[, fields]): Initializes a new read placement.
|
11
|
-
# placement: A hash containing the placement.
|
12
|
-
# fields: If passed, sets the field order for all subsequent placements.
|
13
|
-
class Placement
|
14
|
-
attr_writer :flag # This attribute is used by JPlace.distances.rb as a placeholder
|
15
|
-
attr_reader :p, :n, :m, :flag
|
16
|
-
@@fields = nil
|
17
|
-
def self.fields=(fields)
|
18
|
-
@@fields=fields
|
19
|
-
end
|
20
|
-
def self.fields
|
21
|
-
@@fields
|
22
|
-
end
|
23
|
-
def initialize(placement, fields=nil)
|
24
|
-
@@fields = fields if @@fields.nil? and not fields.nil?
|
25
|
-
# Save only the best (first) placement:
|
26
|
-
abort "Placements must contain a 'p' field.\n" if placement["p"].nil?
|
27
|
-
abort "Placements must contain a 'p' field with at least one entry.\n" if placement["p"][0].nil?
|
28
|
-
@p = [placement["p"][0]]
|
29
|
-
# Find name-only placements (EPA-style):
|
30
|
-
unless placement["n"].nil?
|
31
|
-
@n = placement["n"]
|
32
|
-
@m = @n.map{ |n| 1 }
|
33
|
-
end
|
34
|
-
# Find multiplicity placements (pplacer-style):
|
35
|
-
unless placement["nm"].nil?
|
36
|
-
@n = placement["nm"].map{ |nm| nm[0] }
|
37
|
-
@m = placement["nm"].map{ |nm| nm[1].to_i }
|
38
|
-
end
|
39
|
-
abort "Placements must contain one of 'n' or 'nm' fields.\n" if @n.nil? or @m.nil?
|
40
|
-
end
|
41
|
-
def nm
|
42
|
-
(0 .. (self.n.length-1)).map{ |i| {:n=>self.n[i], :m=>self.m[i]} }
|
43
|
-
end
|
44
|
-
def get_field_value(field)
|
45
|
-
abort "Impossible to read placement with undefined fields." if @@fields.nil?
|
46
|
-
f = @@fields.find_index(field)
|
47
|
-
abort "Undefined field #{field}." if f.nil?
|
48
|
-
self.p[0][f]
|
49
|
-
end
|
50
|
-
def set_field_value(field, value)
|
51
|
-
f = @@fields.find_index(field)
|
52
|
-
abort "Undefined field #{field}." if f.nil?
|
53
|
-
self.p[0][f] = value
|
54
|
-
end
|
55
|
-
def edge_num
|
56
|
-
self.get_field_value('edge_num').to_i
|
57
|
-
end
|
58
|
-
def likelihood
|
59
|
-
self.get_field_value('likelihood').to_f
|
60
|
-
end
|
61
|
-
def like_weight_ratio
|
62
|
-
self.get_field_value('like_weight_ratio').to_f
|
63
|
-
end
|
64
|
-
def distal_length
|
65
|
-
(self.get_field_value('distal_length') || 0).to_f
|
66
|
-
end
|
67
|
-
def pendant_length
|
68
|
-
(self.get_field_value('pendant_length') || 0).to_f
|
69
|
-
end
|
70
|
-
def to_s
|
71
|
-
"#<Placement of #{self.n}: #{self.p}>"
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
# Ancilliary class Tree
|
76
|
-
class Tree
|
77
|
-
@@HAS_ICONV = nil
|
78
|
-
def self.has_iconv?
|
79
|
-
if @@HAS_ICONV.nil?
|
80
|
-
@@HAS_ICONV = true
|
81
|
-
begin
|
82
|
-
require 'rubygems'
|
83
|
-
require 'iconv'
|
84
|
-
rescue LoadError
|
85
|
-
@@HAS_ICONV = false
|
86
|
-
end
|
87
|
-
end
|
88
|
-
@@HAS_ICONV
|
89
|
-
end
|
90
|
-
def self.from_nwk(nwk)
|
91
|
-
if Tree.has_iconv?
|
92
|
-
ic = Iconv.new('UTF-8//IGNORE','UTF-8')
|
93
|
-
nwk = ic.iconv(nwk + ' ')[0..-2]
|
94
|
-
end
|
95
|
-
Node.new(nwk)
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
# Node.new(nwk[, parent]): Initializes a new Node.
|
100
|
-
# nwk: Node's description in Newick format.
|
101
|
-
# parent: Node's parent, or nil if root node.
|
102
|
-
class Node
|
103
|
-
# Class
|
104
|
-
@@edges = []
|
105
|
-
def self.edges
|
106
|
-
@@edges
|
107
|
-
end
|
108
|
-
def self.register(node)
|
109
|
-
@@edges[node.index] = node unless node.index.nil?
|
110
|
-
end
|
111
|
-
# Class-level functions related to JPlace
|
112
|
-
def self.link_placement(placement)
|
113
|
-
abort "Trying to link placement in undefined edge #{placement.edge_num}: #{placement.to_s}" if @@edges[placement.edge_num].nil?
|
114
|
-
@@edges[placement.edge_num].add_placement!(placement)
|
115
|
-
end
|
116
|
-
def self.unlink_placement(placement)
|
117
|
-
@@edges[placement.edge_num].delete_placement!(placement)
|
118
|
-
end
|
119
|
-
# Instance
|
120
|
-
attr_reader :children, :length, :name, :label, :index, :nwk, :parent, :placements, :collapsed
|
121
|
-
def initialize(nwk, parent=nil)
|
122
|
-
abort "Empty newick.\n" if nwk.nil? or nwk==''
|
123
|
-
nwk.gsub! /;(.)/, '--\1'
|
124
|
-
@nwk = nwk
|
125
|
-
@parent = parent
|
126
|
-
@placements = []
|
127
|
-
@collapsed = false
|
128
|
-
# Find index
|
129
|
-
index_m = /^(?<pre>.*){(?<idx>[0-9]+)}(?<post>[^\(\),;]*);?$/.match(nwk)
|
130
|
-
if index_m.nil? and parent.nil? and nwk[nwk.length-1]==';'
|
131
|
-
@index = nil
|
132
|
-
else
|
133
|
-
abort "Unindexed edge found:\n#{@nwk}\n" if index_m.nil?
|
134
|
-
nwk = index_m[:pre]+index_m[:post]
|
135
|
-
@index = index_m[:idx].to_i
|
136
|
-
end
|
137
|
-
# Find name, label, and length
|
138
|
-
meta_m = /^(\((?<cont>.+)\))?(?<name>[^:\(\);]*)(:(?<length>[0-9\.Ee+-]*)(?<label>\[[^\[\]\(\);]+\])?)?;?$/.match(nwk) or
|
139
|
-
abort "Cannot parse node metadata (index #{@index}):\n#{@nwk}\n"
|
140
|
-
nwk = meta_m[:cont]
|
141
|
-
@name = meta_m[:name]
|
142
|
-
@length = meta_m[:length]
|
143
|
-
@label = meta_m[:label]
|
144
|
-
# Find children
|
145
|
-
@children = []
|
146
|
-
nwk ||= ''
|
147
|
-
quote = nil
|
148
|
-
while nwk != ''
|
149
|
-
i = 0
|
150
|
-
j = 0
|
151
|
-
nwk.each_char do |chr|
|
152
|
-
if quote.nil?
|
153
|
-
if chr=='"' or chr=="'"
|
154
|
-
quote = chr
|
155
|
-
else
|
156
|
-
i += 1 if chr=='('
|
157
|
-
i -= 1 if chr==')'
|
158
|
-
if i==0 and chr==','
|
159
|
-
i=nil
|
160
|
-
break
|
161
|
-
end
|
162
|
-
end
|
163
|
-
else
|
164
|
-
quote = nil if chr==quote
|
165
|
-
end
|
166
|
-
j += 1
|
167
|
-
end
|
168
|
-
abort "Unbalanced node at edge {#{@index}}, with leftness #{i}:\n#{@nwk}\n" unless i.nil? or i==0
|
169
|
-
@children << Node.new(nwk[0 .. j-1],self)
|
170
|
-
nwk = nwk.length==j ? '' : nwk[j+1 .. -1]
|
171
|
-
end
|
172
|
-
Node.register(self)
|
173
|
-
end
|
174
|
-
# Accessors/Setters
|
175
|
-
def name=(new_name)
|
176
|
-
@name = new_name.gsub(/[\s\(\),;:]/, '_')
|
177
|
-
end
|
178
|
-
# Tree algorithms
|
179
|
-
def post_order &blk
|
180
|
-
self.children.each { |n| n.post_order &blk }
|
181
|
-
blk[self]
|
182
|
-
end
|
183
|
-
def in_order &blk
|
184
|
-
abort "Tree must be dycotomic to traverse in_order, node #{self.cannonical_name} "+
|
185
|
-
"has #{self.children.lenght} children." unless [0,2].include? self.children.length
|
186
|
-
self.children[0].in_order &blk unless self.children[0].nil?
|
187
|
-
blk[self]
|
188
|
-
self.children[1].in_order &blk unless self.children[1].nil?
|
189
|
-
end
|
190
|
-
def pre_order &blk
|
191
|
-
blk[self]
|
192
|
-
self.children.each { |n| n.pre_order &blk }
|
193
|
-
end
|
194
|
-
def path_to_root
|
195
|
-
if @path_to_root.nil?
|
196
|
-
@path_to_root = [self]
|
197
|
-
@path_to_root += self.parent.path_to_root unless self.parent.nil?
|
198
|
-
end
|
199
|
-
@path_to_root
|
200
|
-
end
|
201
|
-
def distance_to_root
|
202
|
-
if @distance_to_root.nil?
|
203
|
-
@distance_to_root = path_to_root.map{ |n| n.length.nil? ? 0.0 : n.length.to_f }.reduce(0.0, :+)
|
204
|
-
end
|
205
|
-
@distance_to_root
|
206
|
-
end
|
207
|
-
def lca(node)
|
208
|
-
p1 = self.path_to_root
|
209
|
-
p2 = node.path_to_root
|
210
|
-
p1.find{ |n| p2.include? n }
|
211
|
-
end
|
212
|
-
def distance(node)
|
213
|
-
self.distance_to_root + node.distance_to_root - (2.0 * self.lca(node).distance_to_root)
|
214
|
-
end
|
215
|
-
def ==(node) self.index == node.index ; end
|
216
|
-
# Tree representation
|
217
|
-
def cannonical_name
|
218
|
-
return(self.name) unless self.name.nil? or self.name == ""
|
219
|
-
return(self.label) unless self.label.nil? or self.label == ""
|
220
|
-
return("{#{self.index.to_s}}") unless self.index.nil?
|
221
|
-
""
|
222
|
-
end
|
223
|
-
def to_s
|
224
|
-
o = ""
|
225
|
-
o += "(" + self.children.map{ |c| c.to_s }.join(",") + ")" if self.children.length > 0
|
226
|
-
o += self.cannonical_name
|
227
|
-
u = "#{self.length.nil? ? "" : self.length}#{self.label.nil? ? "" : self.label}"
|
228
|
-
o += ":#{u}" unless u==""
|
229
|
-
o
|
230
|
-
end
|
231
|
-
# Instance-level functions related to JPlace
|
232
|
-
def collapse!
|
233
|
-
self.pre_order do |n|
|
234
|
-
if n!=self
|
235
|
-
while n.placements.length > 0
|
236
|
-
p = Node.unlink_placement(n.placements[0])
|
237
|
-
p.set_field_value('edge_num', self.index)
|
238
|
-
Node.link_placement(p)
|
239
|
-
end
|
240
|
-
end
|
241
|
-
end
|
242
|
-
@collapsed = true
|
243
|
-
end
|
244
|
-
def add_placement!(placement)
|
245
|
-
@placements << placement
|
246
|
-
end
|
247
|
-
def delete_placement!(placement)
|
248
|
-
@placements.delete(placement)
|
249
|
-
end
|
250
|
-
end
|
251
|
-
|
252
|
-
end # module JPlace
|
253
|
-
|
@@ -1,88 +0,0 @@
|
|
1
|
-
|
2
|
-
module Enveomics
|
3
|
-
##
|
4
|
-
# A simple object representing a sequence match from a search engine
|
5
|
-
# supporting tabular BLAST output
|
6
|
-
class Match
|
7
|
-
class << self
|
8
|
-
def column_types
|
9
|
-
{
|
10
|
-
qseqid: String, sseqid: String, pident: Float,
|
11
|
-
length: Integer, mismatch: Integer, gapopen: Integer,
|
12
|
-
q_start: Integer, q_end: Integer, s_start: Integer,
|
13
|
-
s_end: Integer, evalue: Float, bitscore: Float,
|
14
|
-
# Non-standard (but frequently used in Enveomics Collection):
|
15
|
-
qry_len: Integer, sbj_len: Integer
|
16
|
-
}
|
17
|
-
end
|
18
|
-
|
19
|
-
def column_type(sym)
|
20
|
-
column_types[colname(sym)]
|
21
|
-
end
|
22
|
-
|
23
|
-
def to_column_type(sym, value)
|
24
|
-
case column_type(sym).to_s
|
25
|
-
when 'String' ; value.to_s
|
26
|
-
when 'Float' ; value.to_f
|
27
|
-
when 'Integer'; value.to_i
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def columns
|
32
|
-
column_types.keys
|
33
|
-
end
|
34
|
-
|
35
|
-
def column(sym)
|
36
|
-
columns.index(colname(sym))
|
37
|
-
end
|
38
|
-
|
39
|
-
def colsynonyms
|
40
|
-
{
|
41
|
-
qry: :qseqid, sbj: :sseqid,
|
42
|
-
id: :pident, len: :length, score: :bitscore
|
43
|
-
}
|
44
|
-
end
|
45
|
-
|
46
|
-
def colnames
|
47
|
-
columns + colsynonyms.keys
|
48
|
-
end
|
49
|
-
|
50
|
-
def colname(sym)
|
51
|
-
s = sym.to_sym
|
52
|
-
column_types[s] ? s : colsynonyms[s]
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
####--- Instance Level ---###
|
57
|
-
|
58
|
-
attr :row
|
59
|
-
|
60
|
-
##
|
61
|
-
# Initialize Enveomics::Match object from a tabular blast line String +ln+
|
62
|
-
def initialize(ln)
|
63
|
-
@row = ln.chomp.split("\t")
|
64
|
-
end
|
65
|
-
|
66
|
-
colnames.each do |sym|
|
67
|
-
define_method sym do
|
68
|
-
self.class.to_column_type(sym, row[self.class.column(sym)])
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
def qry_fract
|
73
|
-
return 0.0 unless qry_len.zero?
|
74
|
-
@fract ||= len.to_f / qry_len
|
75
|
-
end
|
76
|
-
|
77
|
-
alias fract qry_fract
|
78
|
-
|
79
|
-
def sbj_fract
|
80
|
-
return 0.0 unless sbj_len.zero?
|
81
|
-
@fract ||= len.to_f / sbj_len
|
82
|
-
end
|
83
|
-
|
84
|
-
def to_s
|
85
|
-
row.join("\t")
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
@@ -1,182 +0,0 @@
|
|
1
|
-
|
2
|
-
##### CLASSES:
|
3
|
-
# Gene.new(genome, id): Initializes a new Gene.
|
4
|
-
# genome: A string uniquely identifying the parent genome.
|
5
|
-
# id: A string uniquely identifying the gene within the genome. It can be
|
6
|
-
# non-unique across genomes.
|
7
|
-
class Gene
|
8
|
-
attr_reader :genome_id, :id
|
9
|
-
@@genomes = []
|
10
|
-
def self.genomes
|
11
|
-
@@genomes
|
12
|
-
end
|
13
|
-
def initialize(genome, id)
|
14
|
-
if genome.is_a? Integer
|
15
|
-
abort "Internal error: Genome #{genome} does not exist yet." if
|
16
|
-
@@genomes[genome].nil?
|
17
|
-
@genome_id = genome
|
18
|
-
else
|
19
|
-
@@genomes << genome unless @@genomes.include? genome
|
20
|
-
@genome_id = @@genomes.index(genome)
|
21
|
-
end
|
22
|
-
@id = id
|
23
|
-
end
|
24
|
-
# Compare if two Gene objects refer to the same gene.
|
25
|
-
def ==(b)
|
26
|
-
self.genome_id==b.genome_id and self.id==b.id
|
27
|
-
end
|
28
|
-
# Get all genomes in the run as an array of strings.
|
29
|
-
def genome
|
30
|
-
@@genomes[self.genome_id]
|
31
|
-
end
|
32
|
-
def to_s
|
33
|
-
"#{self.genome}:#{self.id}"
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
# OG.new(): Initializes an empty OG.
|
38
|
-
# OG.new(genomes, genes): Initializes a pre-computed OG.
|
39
|
-
# genomes: List of genomes as an array of strings (as in Gene.genomes).
|
40
|
-
# genes: List of genes as an array of strings, with '-' indicating no genes and
|
41
|
-
# multiple genes separated by ','.
|
42
|
-
class OG
|
43
|
-
attr_reader :genes, :notes
|
44
|
-
def initialize(genomes=nil, genes=nil)
|
45
|
-
@genes = []
|
46
|
-
@notes = []
|
47
|
-
unless genomes.nil? or genes.nil?
|
48
|
-
(0 .. genes.length-1).each do |genome_i|
|
49
|
-
next if genes[genome_i]=="-"
|
50
|
-
genes[genome_i].split(/,/).each do |gene_id|
|
51
|
-
self << Gene.new(genomes[genome_i], gene_id)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
# Add genes or combine another OG into the loaded OG (self).
|
57
|
-
def <<(obj)
|
58
|
-
if obj.is_a? Gene
|
59
|
-
@genes[obj.genome_id] = [] if @genes[obj.genome_id].nil?
|
60
|
-
@genes[obj.genome_id] << obj.id unless self.include? obj
|
61
|
-
elsif obj.is_a? OG
|
62
|
-
obj.genes_obj.each{ |gene| self << gene }
|
63
|
-
else
|
64
|
-
abort "Unsupported class for #{obj}"
|
65
|
-
end
|
66
|
-
end
|
67
|
-
# Get the list of genes as objects (internally saved as strings to save RAM).
|
68
|
-
def genes_obj
|
69
|
-
o = []
|
70
|
-
(0 .. Gene.genomes.length-1).map do |genome_id|
|
71
|
-
o += self.genes[genome_id].map{ |gene_id|
|
72
|
-
Gene.new(Gene.genomes[genome_id], gene_id) } unless
|
73
|
-
self.genes[genome_id].nil?
|
74
|
-
end
|
75
|
-
return o
|
76
|
-
end
|
77
|
-
# Evaluates if the OG contains the passed gene.
|
78
|
-
def include?(gene)
|
79
|
-
return false if self.genes[gene.genome_id].nil?
|
80
|
-
self.genes[gene.genome_id].include? gene.id
|
81
|
-
end
|
82
|
-
# Get the list of genomes containing genes in this OG.
|
83
|
-
def genomes
|
84
|
-
(0 .. Gene.genomes.length-1).select do |gno|
|
85
|
-
not(self.genes[gno].nil? or self.genes[gno].empty?)
|
86
|
-
end
|
87
|
-
end
|
88
|
-
# Adds a note that will be printed after the last column
|
89
|
-
def add_note note, note_idx=nil
|
90
|
-
if note_idx.nil?
|
91
|
-
@notes << note
|
92
|
-
else
|
93
|
-
@notes[note_idx] = (@notes[note_idx].nil? ? '' :
|
94
|
-
(@notes[note_idx]+' || ')) + note
|
95
|
-
end
|
96
|
-
end
|
97
|
-
def to_s
|
98
|
-
(0 .. Gene.genomes.length-1).map do |genome_id|
|
99
|
-
self.genes[genome_id].nil? ? "-" : self.genes[genome_id].join(",")
|
100
|
-
end.join("\t") + ((self.notes.size==0) ? '' :
|
101
|
-
("\t#\t"+self.notes.join("\t")))
|
102
|
-
end
|
103
|
-
def to_bool_a
|
104
|
-
(0 .. Gene.genomes.length-1).map { |genome_id| not genes[genome_id].nil? }
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
# OGCollection.new(): Initializes an empty collection of OGs.
|
109
|
-
class OGCollection
|
110
|
-
attr_reader :ogs, :note_srcs
|
111
|
-
def initialize
|
112
|
-
@ogs = []
|
113
|
-
@note_srcs = []
|
114
|
-
end
|
115
|
-
# Add an OG to the collection
|
116
|
-
def <<(og)
|
117
|
-
@ogs << og
|
118
|
-
end
|
119
|
-
# Compare OGs all-vs-all to identify groups that should be merged.
|
120
|
-
def consolidate!
|
121
|
-
old_ogs = self.ogs
|
122
|
-
@ogs = []
|
123
|
-
old_ogs.each do |og|
|
124
|
-
is_new = true
|
125
|
-
og.genes_obj.each do |gene|
|
126
|
-
o = self.get_og gene
|
127
|
-
unless o.nil?
|
128
|
-
o << og
|
129
|
-
is_new = false
|
130
|
-
break
|
131
|
-
end
|
132
|
-
end
|
133
|
-
self << og if is_new
|
134
|
-
end
|
135
|
-
end
|
136
|
-
# Removes OGs present in less than 'fraction' of the genomes
|
137
|
-
def filter_core!(fraction=1.0)
|
138
|
-
min_genomes = (fraction * Gene.genomes.size).ceil
|
139
|
-
@ogs.select! { |og| og.genomes.size >= min_genomes }
|
140
|
-
end
|
141
|
-
# Removes OGs present more than 'dups' number of times in any genome
|
142
|
-
def remove_inparalogs!(dups=1)
|
143
|
-
@ogs.select! do |og|
|
144
|
-
og.genes.map{ |pergenome| pergenome.size }.max <= dups
|
145
|
-
end
|
146
|
-
end
|
147
|
-
# Add a pair of RBM genes into the corresponding OG, or create a new OG.
|
148
|
-
def add_rbm(a, b)
|
149
|
-
og = self.get_og(a)
|
150
|
-
og = self.get_og(b) if og.nil?
|
151
|
-
if og.nil?
|
152
|
-
og = OG.new
|
153
|
-
@ogs << og
|
154
|
-
end
|
155
|
-
og << a
|
156
|
-
og << b
|
157
|
-
end
|
158
|
-
# Get the OG containing the gene (returns the first, if multiple).
|
159
|
-
def get_og(gene)
|
160
|
-
idx = self.ogs.index { |og| og.include? gene }
|
161
|
-
idx.nil? ? nil : self.ogs[idx]
|
162
|
-
end
|
163
|
-
# Get the genes from a given genome (returns an array of arrays)
|
164
|
-
def get_genome_genes(genome)
|
165
|
-
genome_id = Gene.genomes.index(genome)
|
166
|
-
self.ogs.map do |og|
|
167
|
-
g = og.genes[genome_id]
|
168
|
-
g.nil? ? [] : g
|
169
|
-
end
|
170
|
-
end
|
171
|
-
# Add annotation sources
|
172
|
-
def add_note_src src
|
173
|
-
@note_srcs << src
|
174
|
-
end
|
175
|
-
def to_s
|
176
|
-
Gene.genomes.join("\t") + ((self.note_srcs.length>0) ?
|
177
|
-
("\t#\t"+self.note_srcs.join("\t")) : '') +
|
178
|
-
"\n" + self.ogs.map{ |og| og.to_s }.join("\n")
|
179
|
-
end
|
180
|
-
def to_bool_a ; ogs.map{ |og| og.to_bool_a } ; end
|
181
|
-
end
|
182
|
-
|
@@ -1,49 +0,0 @@
|
|
1
|
-
require 'enveomics_rb/bm_set'
|
2
|
-
|
3
|
-
module Enveomics
|
4
|
-
class RBM
|
5
|
-
attr :seq1, :seq2, :bms1, :bms2
|
6
|
-
|
7
|
-
##
|
8
|
-
# Initialize RBM object with sequence paths +seq1+ and +seq2+, and
|
9
|
-
# Enveomics::BMset options Hash +bm_opts+
|
10
|
-
def initialize(seq1, seq2, bm_opts = {})
|
11
|
-
@seq1 = seq1
|
12
|
-
@seq2 = seq2
|
13
|
-
@bms1 = Enveomics::BMset.new(seq1, seq2, bm_opts)
|
14
|
-
@bms2 = Enveomics::BMset.new(seq2, seq1, bm_opts)
|
15
|
-
@set = nil
|
16
|
-
end
|
17
|
-
|
18
|
-
##
|
19
|
-
# Array of Reciprocal Best Enveomics::Match objects
|
20
|
-
def set
|
21
|
-
@set ||= reciprocate!
|
22
|
-
end
|
23
|
-
|
24
|
-
##
|
25
|
-
# Number of reciprocal best matches found
|
26
|
-
def count
|
27
|
-
set.count
|
28
|
-
end
|
29
|
-
|
30
|
-
##
|
31
|
-
# Find reciprocal best matches and return the subset of +bms1+ that
|
32
|
-
# is reciprocal with +bms2+
|
33
|
-
def reciprocate!
|
34
|
-
bms1.each.select do |bm|
|
35
|
-
bms2[bm.sbj] && bm.qry == bms2[bm.sbj].sbj
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
##
|
40
|
-
# Enumerate RBMs and yield +blk+
|
41
|
-
def each(&blk)
|
42
|
-
if block_given?
|
43
|
-
set.each { |bm| blk.call(bm) }
|
44
|
-
else
|
45
|
-
to_enum(:each)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
@@ -1,74 +0,0 @@
|
|
1
|
-
|
2
|
-
#
|
3
|
-
# @author: Luis M. Rodriguez-R
|
4
|
-
# @license: artistic license 2.0
|
5
|
-
#
|
6
|
-
|
7
|
-
require "enveomics_rb/enveomics"
|
8
|
-
use "restclient"
|
9
|
-
use "json"
|
10
|
-
|
11
|
-
class RemoteData
|
12
|
-
# Class-level variables
|
13
|
-
@@EUTILS = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
|
14
|
-
@@EBIREST = "http://www.ebi.ac.uk/Tools"
|
15
|
-
|
16
|
-
# Class-level methods
|
17
|
-
def self.eutils(script, params={}, outfile=nil)
|
18
|
-
response = nil
|
19
|
-
10.times do
|
20
|
-
begin
|
21
|
-
response = RestClient.get "#{@@EUTILS}/#{script}", {:params=>params}
|
22
|
-
rescue => err
|
23
|
-
warn "Request failed #{response.nil? ? "without error code" :
|
24
|
-
"with error code #{response.code}"}."
|
25
|
-
next
|
26
|
-
end
|
27
|
-
break if response.code == 200
|
28
|
-
end
|
29
|
-
abort "Unable to reach NCBI EUtils, error code #{response.code}." unless
|
30
|
-
response.code == 200
|
31
|
-
unless outfile.nil?
|
32
|
-
ohf = File.open(outfile, "w")
|
33
|
-
ohf.print response.to_s
|
34
|
-
ohf.close
|
35
|
-
end
|
36
|
-
response.to_s
|
37
|
-
end
|
38
|
-
def self.efetch(*etc)
|
39
|
-
eutils "efetch.fcgi", *etc
|
40
|
-
end
|
41
|
-
def self.elink(*etc)
|
42
|
-
eutils "elink.fcgi", *etc
|
43
|
-
end
|
44
|
-
def self.esummary(*etc)
|
45
|
-
eutils "esummary.fcgi", *etc
|
46
|
-
end
|
47
|
-
def self.update_gi(db, old_gi)
|
48
|
-
summ = JSON.parse RemoteData.esummary({:db=>db, :id=>old_gi,
|
49
|
-
:retmode=>"json"})
|
50
|
-
return nil,nil if summ["result"].nil? or summ["result"][old_gi.to_s].nil?
|
51
|
-
new_acc = summ["result"][old_gi.to_s]["replacedby"]
|
52
|
-
new_gi = (new_acc.nil? ? nil :
|
53
|
-
RemoteData.efetch({:db=>db, :id=>new_acc, :rettype=>"gi"}))
|
54
|
-
return new_gi,summ["result"][old_gi.to_s]["status"]
|
55
|
-
end
|
56
|
-
def self.ebiFetch(db, id, format, outfile=nil)
|
57
|
-
url = "#{@@EBIREST}/dbfetch/dbfetch/#{db}/#{id}/#{format}"
|
58
|
-
response = RestClient::Request.execute(:method=>:get,
|
59
|
-
:url=>url, :timeout=>600)
|
60
|
-
raise "Unable to reach EBI REST client, error code " +
|
61
|
-
response.code.to_s + "." unless response.code == 200
|
62
|
-
response.to_s
|
63
|
-
end
|
64
|
-
def self.ebiseq2taxid(id,db)
|
65
|
-
doc = RemoteData.ebiFetch(db, id, "annot").split(/[\n\r]/)
|
66
|
-
ln = doc.grep(/^FT\s+\/db_xref="taxon:/).first
|
67
|
-
ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
|
68
|
-
return nil if ln.nil?
|
69
|
-
ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, "\\1")
|
70
|
-
return nil unless ln =~ /^\d+$/
|
71
|
-
ln
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|