miga-base 1.2.15.2 → 1.2.15.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/download/gtdb.rb +4 -1
- data/lib/miga/cli/action/gtdb_get.rb +4 -0
- data/lib/miga/daemon.rb +4 -1
- data/lib/miga/lair.rb +6 -4
- data/lib/miga/remote_dataset/download.rb +3 -2
- data/lib/miga/remote_dataset.rb +25 -7
- data/lib/miga/taxonomy.rb +6 -0
- data/lib/miga/version.rb +2 -2
- metadata +6 -302
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
- data/utils/FastAAI/FastAAI +0 -3659
- data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
- data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
- data/utils/FastAAI/README.md +0 -84
- data/utils/enveomics/Docs/recplot2.md +0 -244
- data/utils/enveomics/Examples/aai-matrix.bash +0 -66
- data/utils/enveomics/Examples/ani-matrix.bash +0 -66
- data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
- data/utils/enveomics/LICENSE.txt +0 -73
- data/utils/enveomics/Makefile +0 -52
- data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
- data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
- data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
- data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
- data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
- data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
- data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
- data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
- data/utils/enveomics/Manifest/Tasks/other.json +0 -906
- data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
- data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
- data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
- data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
- data/utils/enveomics/Manifest/categories.json +0 -165
- data/utils/enveomics/Manifest/examples.json +0 -162
- data/utils/enveomics/Manifest/tasks.json +0 -4
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
- data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
- data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
- data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
- data/utils/enveomics/README.md +0 -42
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
- data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
- data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
- data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
- data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
- data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
- data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
- data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
- data/utils/enveomics/Scripts/Chao1.pl +0 -97
- data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
- data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
- data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
- data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
- data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
- data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
- data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
- data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
- data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
- data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
- data/utils/enveomics/Scripts/FastA.length.pl +0 -38
- data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
- data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
- data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
- data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
- data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
- data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
- data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
- data/utils/enveomics/Scripts/FastA.split.pl +0 -55
- data/utils/enveomics/Scripts/FastA.split.rb +0 -79
- data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
- data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
- data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
- data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
- data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
- data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
- data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
- data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
- data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
- data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
- data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
- data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
- data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
- data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
- data/utils/enveomics/Scripts/SRA.download.bash +0 -55
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
- data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
- data/utils/enveomics/Scripts/Table.barplot.R +0 -31
- data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
- data/utils/enveomics/Scripts/Table.filter.pl +0 -61
- data/utils/enveomics/Scripts/Table.merge.pl +0 -77
- data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
- data/utils/enveomics/Scripts/Table.replace.rb +0 -69
- data/utils/enveomics/Scripts/Table.round.rb +0 -63
- data/utils/enveomics/Scripts/Table.split.pl +0 -57
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
- data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
- data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
- data/utils/enveomics/Scripts/aai.rb +0 -421
- data/utils/enveomics/Scripts/ani.rb +0 -362
- data/utils/enveomics/Scripts/anir.rb +0 -137
- data/utils/enveomics/Scripts/clust.rand.rb +0 -102
- data/utils/enveomics/Scripts/gi2tax.rb +0 -103
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
- data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
- data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
- data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
- data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
- data/utils/enveomics/Scripts/ogs.rb +0 -104
- data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
- data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
- data/utils/enveomics/Scripts/rbm.rb +0 -108
- data/utils/enveomics/Scripts/sam.filter.rb +0 -148
- data/utils/enveomics/Tests/Makefile +0 -10
- data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
- data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
- data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
- data/utils/enveomics/Tests/alkB.nwk +0 -1
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
- data/utils/enveomics/Tests/hiv1.faa +0 -59
- data/utils/enveomics/Tests/hiv1.fna +0 -134
- data/utils/enveomics/Tests/hiv2.faa +0 -70
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
- data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
- data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
- data/utils/enveomics/build_enveomics_r.bash +0 -45
- data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
- data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
- data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
- data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
- data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
- data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
- data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
- data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
- data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
- data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
- data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
- data/utils/enveomics/enveomics.R/R/utils.R +0 -80
- data/utils/enveomics/enveomics.R/README.md +0 -81
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
- data/utils/enveomics/globals.mk +0 -8
- data/utils/enveomics/manifest.json +0 -9
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +0 -67
- data/utils/multitrim/multitrim.py +0 -1555
- data/utils/multitrim/multitrim.yml +0 -13
|
@@ -1,237 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
##### CLASSES:
|
|
3
|
-
# SeqRange.parse(str): Initializes a new SeqRange from a string. A SeqRange is a
|
|
4
|
-
# representation of any collection of coordinates in a given sequence.
|
|
5
|
-
# Coordinates here are 1-based and base-located. Admitedly, the
|
|
6
|
-
# 0-based/interbase-located system is much more convenient for range
|
|
7
|
-
# operations, but GenBank (together with most common Software) is built on
|
|
8
|
-
# the 1-based/base-located system.
|
|
9
|
-
# str: A string describing the sequence range as in GenBank records.
|
|
10
|
-
# Note that "ID:location" notation is NOT supported by this implementation,
|
|
11
|
-
# althought it is permitted by GenBank. Some examples of valid `str`:
|
|
12
|
-
# "<1..123"
|
|
13
|
-
# "complement(3..6)"
|
|
14
|
-
# "join(complement(join(13..43,complement(45..46),complement(1..12),
|
|
15
|
-
# <1..12)),12..15,13..22)"
|
|
16
|
-
# The last one is valid, but once parsed it's internally simplified as:
|
|
17
|
-
# "join(complement(<1..12),1..12,45..46,complement(13..43),12..15,13..22)"
|
|
18
|
-
# Which is exactly equivalent. The common (but non-GenBank-compliant)
|
|
19
|
-
# practice of inverting coordinates instead of using the `complement()`
|
|
20
|
-
# operator is also supported. For example:
|
|
21
|
-
# "123..3"
|
|
22
|
-
# Is interpreted as:
|
|
23
|
-
# "complement(3..123)"
|
|
24
|
-
# See also http://www.insdc.org/files/feature_table.html
|
|
25
|
-
#
|
|
26
|
-
# SeqRange.new(c): Initializes a new SeqRange from an object.
|
|
27
|
-
# c: Any object supported by the `<<` operator, or `nil` to create an empty
|
|
28
|
-
# SeqRange.
|
|
29
|
-
#
|
|
30
|
-
# See also ContigSeqRange.parse.
|
|
31
|
-
class SeqRange
|
|
32
|
-
# Class-level
|
|
33
|
-
def self.parse(str)
|
|
34
|
-
str.gsub!(/[^A-Za-z0-9\.\(\)<>,]/,"")
|
|
35
|
-
sr = nil
|
|
36
|
-
if str =~ /^join\((.+)\)$/i
|
|
37
|
-
str1 = $1
|
|
38
|
-
str2 = ""
|
|
39
|
-
sr = SeqRange.new
|
|
40
|
-
parens = 0
|
|
41
|
-
str1.each_char do |chr|
|
|
42
|
-
if chr=="," and parens==0
|
|
43
|
-
sr += SeqRange.parse(str2)
|
|
44
|
-
str2 = ""
|
|
45
|
-
next
|
|
46
|
-
elsif chr=="("
|
|
47
|
-
parens += 1
|
|
48
|
-
elsif chr==")"
|
|
49
|
-
parens -= 1
|
|
50
|
-
raise "Unbalanced parenthesis in '#{str1}'." if parens < 0
|
|
51
|
-
end
|
|
52
|
-
str2 += chr
|
|
53
|
-
end
|
|
54
|
-
sr += SeqRange.parse(str2) unless str2.empty?
|
|
55
|
-
sr
|
|
56
|
-
elsif str =~ /^complement\((.+)\)$/i
|
|
57
|
-
sr = SeqRange.parse($1)
|
|
58
|
-
sr.reverse!
|
|
59
|
-
sr
|
|
60
|
-
else
|
|
61
|
-
sr = SeqRange.new(ContigSeqRange.parse(str))
|
|
62
|
-
end
|
|
63
|
-
sr
|
|
64
|
-
end
|
|
65
|
-
# Instance-level
|
|
66
|
-
attr_reader :contig
|
|
67
|
-
def initialize(c=nil)
|
|
68
|
-
@contig = []
|
|
69
|
-
self << c unless c.nil?
|
|
70
|
-
end
|
|
71
|
-
def leftmost; contig.map{ |c| c.left }.min; end
|
|
72
|
-
def rightmost; contig.map{ |c| c.right }.max; end
|
|
73
|
-
def size; contig.map{ |c| c.size }.inject(0,:+); end
|
|
74
|
-
def +(sr)
|
|
75
|
-
return(self + SeqRange.new(sr)) if sr.is_a? ContigSeqRange
|
|
76
|
-
raise "Unsupported operation '+' with class #{sr.class.to_s}." unless
|
|
77
|
-
sr.is_a? SeqRange
|
|
78
|
-
out = SeqRange.new(self)
|
|
79
|
-
out << sr
|
|
80
|
-
out
|
|
81
|
-
end
|
|
82
|
-
def /(sr)
|
|
83
|
-
if sr.is_a? SeqRange
|
|
84
|
-
sr2 = sr.sort.compact
|
|
85
|
-
raise "Denominator is not a contiguous domain." unless sr2.size==1
|
|
86
|
-
return(self/sr2.contig.first)
|
|
87
|
-
end
|
|
88
|
-
raise "Unsupported operation '/' with class #{sr.class.to_s}" unless
|
|
89
|
-
sr.is_a? ContigSeqRange
|
|
90
|
-
raise "Denominator doesn't span the whole domain of numerator." unless
|
|
91
|
-
sr.left <= leftmost and sr.right >= rightmost
|
|
92
|
-
i = ContigSeqRange.IGNORE_STRAND
|
|
93
|
-
ContigSeqRange.IGNORE_STRAND = false
|
|
94
|
-
range = self.sort.compact.size
|
|
95
|
-
ContigSeqRange.IGNORE_STRAND = i
|
|
96
|
-
range.to_f / sr.size
|
|
97
|
-
end
|
|
98
|
-
def <<(c)
|
|
99
|
-
if c.is_a? ContigSeqRange
|
|
100
|
-
@contig << c
|
|
101
|
-
elsif c.is_a? SeqRange
|
|
102
|
-
@contig += c.contig
|
|
103
|
-
elsif c.is_a? Array
|
|
104
|
-
raise "Array must contain only objects of class ContigSeqRange." unless
|
|
105
|
-
c.map{ |cc| cc.is_a? ContigSeqRange }.all?
|
|
106
|
-
@contig += c
|
|
107
|
-
else
|
|
108
|
-
raise "Unsupported operation '<<' with class #{c.class.to_s}."
|
|
109
|
-
end
|
|
110
|
-
end
|
|
111
|
-
def reverse ; SeqRange.new(self).reverse! ; end
|
|
112
|
-
def sort ; SeqRange.new(self).sort! ; end
|
|
113
|
-
def compact ; SeqRange.new(self).compact! ; end
|
|
114
|
-
def reverse!
|
|
115
|
-
@contig.each{ |c| c.reverse! }
|
|
116
|
-
@contig.reverse!
|
|
117
|
-
self
|
|
118
|
-
end
|
|
119
|
-
def sort!
|
|
120
|
-
@contig.sort!{ |x,y| x.left <=> y.left }
|
|
121
|
-
self
|
|
122
|
-
end
|
|
123
|
-
def compact!
|
|
124
|
-
return self if contig.size < 2
|
|
125
|
-
clean = false
|
|
126
|
-
while not clean
|
|
127
|
-
clean = true
|
|
128
|
-
(2 .. contig.size).each do |i|
|
|
129
|
-
next unless contig[i-2].reverse? == contig[i-1].reverse?
|
|
130
|
-
next unless contig[i-2].contig? contig[i-1]
|
|
131
|
-
contig[i-2] += contig[i-1]
|
|
132
|
-
contig[i-1] = nil
|
|
133
|
-
clean = false
|
|
134
|
-
break
|
|
135
|
-
end
|
|
136
|
-
@contig.compact!
|
|
137
|
-
end
|
|
138
|
-
self
|
|
139
|
-
end
|
|
140
|
-
def to_s
|
|
141
|
-
o = contig.map{ |c| c.to_s }.join(",")
|
|
142
|
-
o = "join(#{o})" if contig.size > 1
|
|
143
|
-
o
|
|
144
|
-
end
|
|
145
|
-
end
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
# ContigSeqRange.parse(str): Initializes a new ContigSeqRange from a string. A
|
|
149
|
-
# ContigSeqRange is a primitive of `SeqRange` that doesn't support the
|
|
150
|
-
# `join()` operator. Other than that, syntax is identical to `SeqRange`.
|
|
151
|
-
# str: A string describing the sequence range as in GenBank records (except
|
|
152
|
-
# `join()`).
|
|
153
|
-
#
|
|
154
|
-
# ContigSeqRange.new(a,b): Initializes a new ContigSeqRange from the
|
|
155
|
-
# coordinates as integers.
|
|
156
|
-
# a: Start of the range.
|
|
157
|
-
# b: End of the range. If a>b, the `complement()` operator is assumed.
|
|
158
|
-
#
|
|
159
|
-
# ContigSeqRange.IGNORE_STRAND = true: Use this pragma to ignore strandness.
|
|
160
|
-
# If set, it globally affects the behavior of of the class. Note that
|
|
161
|
-
# `SeqRange` instances contain a collection of `ContigSeqRange` objects, so
|
|
162
|
-
# that class is also affected.
|
|
163
|
-
class ContigSeqRange
|
|
164
|
-
# Class-level
|
|
165
|
-
@@IGNORE_STRAND = false
|
|
166
|
-
def self.IGNORE_STRAND=(v); @@IGNORE_STRAND = !!v ; end
|
|
167
|
-
def self.IGNORE_STRAND; @@IGNORE_STRAND ; end
|
|
168
|
-
def self.parse(str)
|
|
169
|
-
str.downcase!
|
|
170
|
-
m = %r{^
|
|
171
|
-
(?<c>complement\()? # Reverse
|
|
172
|
-
(?<lt><?) # Open-ended to the left
|
|
173
|
-
(?<left>\d+) # Left coordinate
|
|
174
|
-
(
|
|
175
|
-
\.\.\.? # 2 or 3 dots
|
|
176
|
-
(?<gt1>>?) # Open-ended to the right
|
|
177
|
-
(?<right>\d+) # Right coordinate
|
|
178
|
-
)?
|
|
179
|
-
(?<gt2>>?) # Open-ended to the right
|
|
180
|
-
\)? # If reverse
|
|
181
|
-
$}x.match(str)
|
|
182
|
-
raise "Cannot parse range: #{str}." if m.nil?
|
|
183
|
-
c = ContigSeqRange.new(m[:left].to_i, m[:right].to_i)
|
|
184
|
-
c.open_left = true if m[:lt]=="<"
|
|
185
|
-
c.open_right = true if m[:gt1]==">" or m[:gt2]==">"
|
|
186
|
-
c.reverse! if m[:c]=="complement("
|
|
187
|
-
c
|
|
188
|
-
end
|
|
189
|
-
# Instance-level
|
|
190
|
-
attr_accessor :open_left, :open_right
|
|
191
|
-
attr_reader :coords
|
|
192
|
-
def initialize(a,b)
|
|
193
|
-
@coords = [[a,b].min, [a,b].max]
|
|
194
|
-
@open_left = false
|
|
195
|
-
@open_right = false
|
|
196
|
-
@reverse = (a > b)
|
|
197
|
-
end
|
|
198
|
-
def from; coords[ reverse ? 1 : 0 ] ; end
|
|
199
|
-
def to; coords[ reverse ? 0 : 1 ] ; end
|
|
200
|
-
def left; coords[0] ; end
|
|
201
|
-
def right; coords[1] ; end
|
|
202
|
-
def size; right-left+1 ; end
|
|
203
|
-
def reverse?; @reverse ; end
|
|
204
|
-
def reverse!
|
|
205
|
-
@reverse = ! reverse? unless @@IGNORE_STRAND
|
|
206
|
-
self
|
|
207
|
-
end
|
|
208
|
-
def overlap?(sr) !(right < sr.left or left > sr.right) ; end
|
|
209
|
-
def contig?(sr) !(right+1 < sr.left or left-1 > sr.right) ; end
|
|
210
|
-
def +(sr)
|
|
211
|
-
raise "Unsupported operation '+' with class #{sr.class.to_s}" unless
|
|
212
|
-
sr.is_a? ContigSeqRange
|
|
213
|
-
raise "Non-contiguous ranges cannot be added." unless contig? sr
|
|
214
|
-
raise "Ranges in different strands cannot be added." unless
|
|
215
|
-
reverse? == sr.reverse?
|
|
216
|
-
out = ContigSeqRange.new([left,sr.left].min, [right,sr.right].max)
|
|
217
|
-
out.reverse! if reverse?
|
|
218
|
-
out.open_left=true if (left < sr.left ? self : sr).open_left
|
|
219
|
-
out.open_right=true if (right > sr.right ? self : sr).open_right
|
|
220
|
-
out
|
|
221
|
-
end
|
|
222
|
-
def to_s
|
|
223
|
-
o = ""
|
|
224
|
-
o += "<" if open_left
|
|
225
|
-
o += left.to_s
|
|
226
|
-
if left == right
|
|
227
|
-
o += ">" if open_right
|
|
228
|
-
else
|
|
229
|
-
o += ".."
|
|
230
|
-
o += ">" if open_right
|
|
231
|
-
o += right.to_s
|
|
232
|
-
end
|
|
233
|
-
o = "complement(#{o})" if reverse?
|
|
234
|
-
o
|
|
235
|
-
end
|
|
236
|
-
end
|
|
237
|
-
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
module Enveomics
|
|
3
|
-
module Stats
|
|
4
|
-
class << self
|
|
5
|
-
# Generates a random number from the +dist+ distribution with +params+
|
|
6
|
-
# parameters. This is simply a wrapper to the r_* functions below.
|
|
7
|
-
def rand(dist = :unif, *params)
|
|
8
|
-
send("r_#{dist}", *params)
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
# Generates a random number from the uniform distribution between +min+
|
|
12
|
-
# and +max+. By default generates random numbers between 0.0 and 1.0.
|
|
13
|
-
def r_unif(min = 0.0, max = 1.0)
|
|
14
|
-
min + (max - min) * Random::rand
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
# Generates a random number from the geometric distribution with support
|
|
18
|
-
# {0, 1, 2, ...} and probability of success +p+.
|
|
19
|
-
def r_geom(p)
|
|
20
|
-
(Math::log(1.0 - rand) / Math::log(1.0 - p) - 1.0).ceil
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# Generates a random number from the shifted geometric distribution with
|
|
24
|
-
# support {1, 2, 3, ...} and probability of success +p+.
|
|
25
|
-
def r_sgeom(p)
|
|
26
|
-
(Math::log(1.0 - rand) / Math::log(1.0 - p)).ceil
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
end
|
|
30
|
-
end
|
|
31
|
-
|
|
@@ -1,152 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
module Enveomics
|
|
3
|
-
module Stats
|
|
4
|
-
# Descriptive statistics for a given sample
|
|
5
|
-
class Sample
|
|
6
|
-
attr :x
|
|
7
|
-
attr :opts
|
|
8
|
-
|
|
9
|
-
# Initialize Enveomics::Stats::Sample with numeric vector +x+ and options
|
|
10
|
-
# Hash +opts+ supporting the keys:
|
|
11
|
-
# - +effective_range+: Range where values fall (by default: range of +x+)
|
|
12
|
-
# - +histo_bin_size+: Width of histogram widths
|
|
13
|
-
# (by default: 1/50th of +effective_range+)
|
|
14
|
-
def initialize(x, opts = {})
|
|
15
|
-
raise 'Cannot initialize an empty sample' if x.empty?
|
|
16
|
-
@x = x.map(&:to_f)
|
|
17
|
-
@opts = opts
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
# Size of the sample
|
|
21
|
-
def n
|
|
22
|
-
x.size
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
# Estimates the sample mean
|
|
26
|
-
def mean
|
|
27
|
-
@mean ||= x.inject(:+) / n
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
# Estimates the mean of the square of the sample
|
|
31
|
-
def square_mean
|
|
32
|
-
@square_mean ||= x.map { |i| i**2 }.inject(:+) / n
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# Estimates the unbiased sample variance
|
|
36
|
-
def var
|
|
37
|
-
@var ||= (square_mean - mean ** 2) * n / (n - 1)
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
# Estimates the unbiased sample standard deviation
|
|
41
|
-
def sd
|
|
42
|
-
@sd ||= var ** 0.5
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
# --- Higher moments ---
|
|
46
|
-
|
|
47
|
-
# Estimate sample skewness
|
|
48
|
-
def skewness
|
|
49
|
-
return 0.0 if n == 1
|
|
50
|
-
cubed_dev = x.inject(0.0) { |sum, i| sum + (i - mean) ** 3 }
|
|
51
|
-
cubed_dev / ((n - 1) * (sd ** 3))
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
# Estimate sample excess kurtosis
|
|
55
|
-
def kurtosis
|
|
56
|
-
return 0.0 if n == 1
|
|
57
|
-
quart_dev = x.inject(0.0) { |sum, i| sum + (i - mean)**4 }
|
|
58
|
-
quart_dev / ((n - 1) * (sd**4))
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# --- Ranges ---
|
|
62
|
-
|
|
63
|
-
# Range effectively considered
|
|
64
|
-
def effective_range
|
|
65
|
-
@opts[:effective_range] ||= [nil, nil]
|
|
66
|
-
@opts[:effective_range][0] ||= x.min
|
|
67
|
-
@opts[:effective_range][1] ||= x.max
|
|
68
|
-
@opts[:effective_range]
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
# Size of the effective range
|
|
72
|
-
def effective_range_size
|
|
73
|
-
effective_range[1] - effective_range[0]
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
# --- Histograms ---
|
|
77
|
-
|
|
78
|
-
# Size of each histogram bin
|
|
79
|
-
def histo_bin_size
|
|
80
|
-
@opts[:histo_bin_size] ||= effective_range_size / 50.0
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
# Calculate histogram ranges without checking for cached value
|
|
84
|
-
#
|
|
85
|
-
# Use #histo_ranges instead
|
|
86
|
-
def calculate_histo_ranges
|
|
87
|
-
rng = [[effective_range[1], effective_range[1] - histo_bin_size]]
|
|
88
|
-
while rng[rng.size - 1][1] > effective_range[0]
|
|
89
|
-
rng << [rng[rng.size - 1][1], rng[rng.size - 1][1] - histo_bin_size]
|
|
90
|
-
end
|
|
91
|
-
rng
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
# Histogram ranges as an array of two-entry arrays where the fist entry
|
|
95
|
-
# is the closed-ended maximum value (inclusive) of the range and the
|
|
96
|
-
# second entry is the open-ended minimum value (non-inclusive) of the
|
|
97
|
-
# range. The array is sorted from maximum to minimum
|
|
98
|
-
#
|
|
99
|
-
# Something like: +[[100.0, 99.0], [99.0, 98.0], ...]+, representing the
|
|
100
|
-
# ranges: {[100, 99), [99, 98), ...}
|
|
101
|
-
#
|
|
102
|
-
# The bin width is determined by #hist_bin_size
|
|
103
|
-
def histo_ranges
|
|
104
|
-
@histo_ranges ||= calculate_histo_ranges
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
# Mid-points of the histogram ranges from #histo_ranges, returns
|
|
108
|
-
# and array of Float
|
|
109
|
-
def histo_mids
|
|
110
|
-
@histo_mids ||= histo_ranges.map { |x| (x[0] + x[1]) / 2 }
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
# Calculate the histogram counts withouth checking cached value
|
|
114
|
-
#
|
|
115
|
-
# Use #histo_count instead
|
|
116
|
-
def calculate_histo_counts
|
|
117
|
-
counts = []
|
|
118
|
-
xx = x.dup
|
|
119
|
-
histo_ranges.each do |i|
|
|
120
|
-
counts << xx.size - xx.delete_if { |j| j > i[1] }.size
|
|
121
|
-
end
|
|
122
|
-
counts
|
|
123
|
-
end
|
|
124
|
-
|
|
125
|
-
# Histogram counts in the ranges determined by #histo_ranges
|
|
126
|
-
def histo_counts
|
|
127
|
-
@histo_counts ||= calculate_histo_counts
|
|
128
|
-
end
|
|
129
|
-
|
|
130
|
-
# --- Bimodality coefficients ---
|
|
131
|
-
|
|
132
|
-
# Sarle's sample bimodality coefficient b
|
|
133
|
-
def sarle_bimodality
|
|
134
|
-
(skewness**2 + 1) /
|
|
135
|
-
(kurtosis + (3 * ((n - 1)**2)) / ((n - 2) * (n - 3)))
|
|
136
|
-
end
|
|
137
|
-
|
|
138
|
-
# de Michele & Accantino (2014) B index
|
|
139
|
-
# DOI: 10.1371%2Fjournal.pone.0091195
|
|
140
|
-
def dma_bimodality
|
|
141
|
-
(mean - dma_mu_M).abs
|
|
142
|
-
end
|
|
143
|
-
|
|
144
|
-
# µ_M index proposed by Michele & Accantino (2014)
|
|
145
|
-
# DOI: 10.1371%2Fjournal.pone.0091195
|
|
146
|
-
def dma_mu_M
|
|
147
|
-
histo_counts.each_with_index.map { |m, k| m * histo_mids[k] }.inject(:+) / n
|
|
148
|
-
end
|
|
149
|
-
end
|
|
150
|
-
end
|
|
151
|
-
end
|
|
152
|
-
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
require 'enveomics_rb/errors'
|
|
3
|
-
require 'zlib'
|
|
4
|
-
|
|
5
|
-
def use(gems, mandatory = true)
|
|
6
|
-
gems = [gems] unless gems.is_a? Array
|
|
7
|
-
begin
|
|
8
|
-
require 'rubygems'
|
|
9
|
-
while !gems.empty?
|
|
10
|
-
require gems.shift
|
|
11
|
-
end
|
|
12
|
-
return true
|
|
13
|
-
rescue LoadError
|
|
14
|
-
abort "\nUnmet requirements, please install required gems:" +
|
|
15
|
-
gems.map{ |gem| "\n gem install #{gem}" }.join + "\n\n" if mandatory
|
|
16
|
-
return false
|
|
17
|
-
end
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
def say(*msg)
|
|
21
|
-
$stderr.puts('[%s] %s' % [Time.now, msg.join('')]) unless $QUIET ||= false
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
def say_inline(*msg)
|
|
25
|
-
$stderr.print('[%s] %s' % [Time.now, msg.join('')]) unless $QUIET ||= false
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
##
|
|
29
|
-
# Returns an open reading file handler for the file,
|
|
30
|
-
# supporting .gz and '-' for STDIN
|
|
31
|
-
def reader(file)
|
|
32
|
-
file == '-' ? $stdin :
|
|
33
|
-
file =~ /\.gz$/ ? Zlib::GzipReader.open(file) :
|
|
34
|
-
File.open(file, 'r')
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
##
|
|
38
|
-
# Returns an open writing file handler for the file,
|
|
39
|
-
# supporting .gz and '-' for STDOUT
|
|
40
|
-
def writer(file)
|
|
41
|
-
file == '-' ? $stdout :
|
|
42
|
-
file =~ /\.gz$/ ? Zlib::GzipWriter.open(file) :
|
|
43
|
-
File.open(file, 'w')
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
##
|
|
47
|
-
# Run a command +cmd+ that can be a ready-to-go string or an Array to escape
|
|
48
|
-
#
|
|
49
|
-
# Supported symbol key options in Hash +opts+:
|
|
50
|
-
# - wait: Boolean, should I wait for the command to complete? Default: true
|
|
51
|
-
# - stdout: Path to redirect the standard output
|
|
52
|
-
# - stderr: Path to redirect the standard error
|
|
53
|
-
# - mergeout: Send stderr to stdout
|
|
54
|
-
#
|
|
55
|
-
# Return the process ID. If wait is true (default), check for the exit
|
|
56
|
-
# status and throw an Enveomics::CommandError if non-zero
|
|
57
|
-
def run_cmd(cmd, opts = {})
|
|
58
|
-
opts[:wait] = true if opts[:wait].nil?
|
|
59
|
-
cmd = cmd.shelljoin if cmd.is_a? Array
|
|
60
|
-
cmd += " > #{opts[:stdout].shellescape}" if opts[:stdout]
|
|
61
|
-
cmd += " 2> #{opts[:stderr].shellescape}" if opts[:stderr]
|
|
62
|
-
cmd += ' 2>&1' if opts[:mergeout]
|
|
63
|
-
pid = spawn(cmd)
|
|
64
|
-
return pid unless opts[:wait]
|
|
65
|
-
|
|
66
|
-
Process.wait(pid)
|
|
67
|
-
unless $?.success?
|
|
68
|
-
raise Enveomics::CommandError.new(
|
|
69
|
-
"Command failed with status #{$?.exitstatus}:\n#{cmd}"
|
|
70
|
-
)
|
|
71
|
-
end
|
|
72
|
-
pid
|
|
73
|
-
end
|
|
74
|
-
|
|
@@ -1,135 +0,0 @@
|
|
|
1
|
-
# @author Luis M. Rodriguez-R
|
|
2
|
-
# @license Artistic-2.0
|
|
3
|
-
|
|
4
|
-
##
|
|
5
|
-
# Enveomics representation of a Variant Call Format (VCF) file.
|
|
6
|
-
class VCF
|
|
7
|
-
|
|
8
|
-
##
|
|
9
|
-
# File-handler, a File object.
|
|
10
|
-
attr_reader :fh
|
|
11
|
-
def initialize(file)
|
|
12
|
-
@fh = (file.is_a?(String) ? File.open(file, "r") : file )
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
##
|
|
16
|
-
# Iterate through each variant (i.e., each non-comment line), passing a
|
|
17
|
-
# VCF::Variant object to +blk+.
|
|
18
|
-
def each_variant(&blk)
|
|
19
|
-
fh.rewind
|
|
20
|
-
fh.each_line do |ln|
|
|
21
|
-
next if ln =~ /^#/
|
|
22
|
-
blk.call VCF::Variant.new(ln)
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
##
|
|
27
|
-
# Iterate through each header (i.e., each comment line), passing a String to
|
|
28
|
-
# +blk+.
|
|
29
|
-
def each_header(&blk)
|
|
30
|
-
fh.rewind
|
|
31
|
-
fh.each_line do |ln|
|
|
32
|
-
next unless ln =~ /^#/
|
|
33
|
-
blk.call ln
|
|
34
|
-
end
|
|
35
|
-
end
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
class VCF::Variant
|
|
39
|
-
|
|
40
|
-
##
|
|
41
|
-
# Column definitions in VCF.
|
|
42
|
-
@@COLUMNS = [:chrom,:pos,:id,:ref,:alt,:qual,:filter,:info,:format,:bam]
|
|
43
|
-
|
|
44
|
-
##
|
|
45
|
-
# An Array of String, containing each of the VCF entrie's columns.
|
|
46
|
-
attr_reader :data
|
|
47
|
-
|
|
48
|
-
##
|
|
49
|
-
# Initialize VCF::Variant from String +line+, a non-comment line in the VCF.
|
|
50
|
-
def initialize(line)
|
|
51
|
-
@data = line.chomp.split("\t")
|
|
52
|
-
# Qual as float
|
|
53
|
-
@data[5] = data[5].to_f
|
|
54
|
-
# Split info
|
|
55
|
-
info = data[7].split(";").map{ |i| i=~/=/ ? i.split("=", 2) : [i, true] }
|
|
56
|
-
@data[7] = Hash[*info.map{ |i| [i[0].to_sym, i[1]] }.flatten]
|
|
57
|
-
# Read formatted data
|
|
58
|
-
unless data[9].nil? or data[9].empty?
|
|
59
|
-
f = format.split(":")
|
|
60
|
-
b = bam.split(":")
|
|
61
|
-
f.each_index{ |i| @data[7][f[i].to_sym] = b[i] }
|
|
62
|
-
end
|
|
63
|
-
@data[7][:INDEL] = true if ref.size != alt.split(",").first.size
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
##
|
|
67
|
-
# Named functions for each column.
|
|
68
|
-
@@COLUMNS.each_index do |i|
|
|
69
|
-
define_method(@@COLUMNS[i]) { @@COLUMNS[i]==:pos ? data[i].to_i : data[i] }
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
##
|
|
73
|
-
# Sequencing depth.
|
|
74
|
-
def dp
|
|
75
|
-
return nil if info[:DP].nil?
|
|
76
|
-
info[:DP].to_i
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
##
|
|
80
|
-
# Sequencing depth of FWD-REF, REV-REF, FWD-ALT, and REV-ALT.
|
|
81
|
-
def dp4
|
|
82
|
-
return nil if info[:DP4].nil?
|
|
83
|
-
@dp4 ||= info[:DP4].split(",").map{ |i| i.to_i }
|
|
84
|
-
@dp4
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
##
|
|
88
|
-
## Sequencing depth of REF and ALT.
|
|
89
|
-
def ad
|
|
90
|
-
return nil if info[:AD].nil?
|
|
91
|
-
@ad ||= info[:AD].split(",").map{ |i| i.to_i }
|
|
92
|
-
@ad
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
##
|
|
96
|
-
# Sequencing depth of the REF allele.
|
|
97
|
-
def ref_dp
|
|
98
|
-
return dp4[0] + dp4[1] unless dp4.nil?
|
|
99
|
-
return ad[0] unless ad.nil?
|
|
100
|
-
nil
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
##
|
|
104
|
-
# Sequencing depth of the ALT allele.
|
|
105
|
-
def alt_dp
|
|
106
|
-
return dp4[2] + dp4[3] unless dp4.nil?
|
|
107
|
-
return ad[1] unless ad.nil?
|
|
108
|
-
nil
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
##
|
|
112
|
-
# Information content of the variant in bits (from 0 to 1).
|
|
113
|
-
def shannon
|
|
114
|
-
return @shannon unless @shannon.nil?
|
|
115
|
-
a = ref_dp
|
|
116
|
-
b = alt_dp
|
|
117
|
-
ap = a.to_f/(a+b)
|
|
118
|
-
bp = b.to_f/(a+b)
|
|
119
|
-
@shannon = -(ap*Math.log(ap,2) + bp*Math.log(bp,2))
|
|
120
|
-
@shannon
|
|
121
|
-
end
|
|
122
|
-
|
|
123
|
-
##
|
|
124
|
-
# Is it an indel?
|
|
125
|
-
def indel? ; !info[:INDEL].nil? and info[:INDEL] ; end
|
|
126
|
-
|
|
127
|
-
##
|
|
128
|
-
# Return as String.
|
|
129
|
-
def to_s ; (data[0..6] + [info_to_s] + data[8..-1].to_a).join("\t") + "\n" ; end
|
|
130
|
-
|
|
131
|
-
##
|
|
132
|
-
# Returns the INFO entry as String.
|
|
133
|
-
def info_to_s ; data[7].to_a.map{ |i| i.join("=") }.join(";") ; end
|
|
134
|
-
|
|
135
|
-
end
|