miga-base 1.2.15.2 → 1.2.15.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/download/gtdb.rb +4 -1
- data/lib/miga/cli/action/gtdb_get.rb +4 -0
- data/lib/miga/daemon.rb +4 -1
- data/lib/miga/lair.rb +6 -4
- data/lib/miga/remote_dataset/download.rb +3 -2
- data/lib/miga/remote_dataset.rb +25 -7
- data/lib/miga/taxonomy.rb +6 -0
- data/lib/miga/version.rb +2 -2
- metadata +6 -302
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
- data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
- data/utils/FastAAI/FastAAI +0 -3659
- data/utils/FastAAI/FastAAI-legacy/FastAAI +0 -1336
- data/utils/FastAAI/FastAAI-legacy/kAAI_v1.0_virus.py +0 -1296
- data/utils/FastAAI/README.md +0 -84
- data/utils/enveomics/Docs/recplot2.md +0 -244
- data/utils/enveomics/Examples/aai-matrix.bash +0 -66
- data/utils/enveomics/Examples/ani-matrix.bash +0 -66
- data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
- data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
- data/utils/enveomics/LICENSE.txt +0 -73
- data/utils/enveomics/Makefile +0 -52
- data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
- data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -790
- data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
- data/utils/enveomics/Manifest/Tasks/fasta.json +0 -802
- data/utils/enveomics/Manifest/Tasks/fastq.json +0 -291
- data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
- data/utils/enveomics/Manifest/Tasks/mapping.json +0 -137
- data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
- data/utils/enveomics/Manifest/Tasks/other.json +0 -906
- data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
- data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -650
- data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
- data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
- data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
- data/utils/enveomics/Manifest/categories.json +0 -165
- data/utils/enveomics/Manifest/examples.json +0 -162
- data/utils/enveomics/Manifest/tasks.json +0 -4
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
- data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
- data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
- data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
- data/utils/enveomics/README.md +0 -42
- data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
- data/utils/enveomics/Scripts/Aln.cat.rb +0 -221
- data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
- data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
- data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
- data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
- data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
- data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
- data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
- data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
- data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
- data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
- data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
- data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
- data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
- data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
- data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
- data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
- data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -123
- data/utils/enveomics/Scripts/Chao1.pl +0 -97
- data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
- data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
- data/utils/enveomics/Scripts/FastA.N50.pl +0 -60
- data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
- data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
- data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
- data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
- data/utils/enveomics/Scripts/FastA.fragment.rb +0 -100
- data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
- data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
- data/utils/enveomics/Scripts/FastA.length.pl +0 -38
- data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
- data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
- data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
- data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
- data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
- data/utils/enveomics/Scripts/FastA.sample.rb +0 -98
- data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
- data/utils/enveomics/Scripts/FastA.split.pl +0 -55
- data/utils/enveomics/Scripts/FastA.split.rb +0 -79
- data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
- data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
- data/utils/enveomics/Scripts/FastA.toFastQ.rb +0 -69
- data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
- data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
- data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.maskQual.rb +0 -89
- data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
- data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
- data/utils/enveomics/Scripts/FastQ.tag.rb +0 -70
- data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
- data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
- data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
- data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
- data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
- data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
- data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
- data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
- data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
- data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
- data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
- data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
- data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
- data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
- data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
- data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
- data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
- data/utils/enveomics/Scripts/SRA.download.bash +0 -55
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
- data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
- data/utils/enveomics/Scripts/Table.barplot.R +0 -31
- data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
- data/utils/enveomics/Scripts/Table.filter.pl +0 -61
- data/utils/enveomics/Scripts/Table.merge.pl +0 -77
- data/utils/enveomics/Scripts/Table.prefScore.R +0 -60
- data/utils/enveomics/Scripts/Table.replace.rb +0 -69
- data/utils/enveomics/Scripts/Table.round.rb +0 -63
- data/utils/enveomics/Scripts/Table.split.pl +0 -57
- data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
- data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
- data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
- data/utils/enveomics/Scripts/aai.rb +0 -421
- data/utils/enveomics/Scripts/ani.rb +0 -362
- data/utils/enveomics/Scripts/anir.rb +0 -137
- data/utils/enveomics/Scripts/clust.rand.rb +0 -102
- data/utils/enveomics/Scripts/gi2tax.rb +0 -103
- data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
- data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
- data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
- data/utils/enveomics/Scripts/lib/enveomics_rb/anir.rb +0 -293
- data/utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb +0 -175
- data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
- data/utils/enveomics/Scripts/lib/enveomics_rb/errors.rb +0 -17
- data/utils/enveomics/Scripts/lib/enveomics_rb/gmm_em.rb +0 -30
- data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
- data/utils/enveomics/Scripts/lib/enveomics_rb/match.rb +0 -88
- data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
- data/utils/enveomics/Scripts/lib/enveomics_rb/rbm.rb +0 -49
- data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/rand.rb +0 -31
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats/sample.rb +0 -152
- data/utils/enveomics/Scripts/lib/enveomics_rb/stats.rb +0 -3
- data/utils/enveomics/Scripts/lib/enveomics_rb/utils.rb +0 -74
- data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
- data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
- data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
- data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
- data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
- data/utils/enveomics/Scripts/ogs.rb +0 -104
- data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
- data/utils/enveomics/Scripts/rbm-legacy.rb +0 -172
- data/utils/enveomics/Scripts/rbm.rb +0 -108
- data/utils/enveomics/Scripts/sam.filter.rb +0 -148
- data/utils/enveomics/Tests/Makefile +0 -10
- data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
- data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
- data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
- data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
- data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
- data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
- data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
- data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
- data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
- data/utils/enveomics/Tests/alkB.nwk +0 -1
- data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
- data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
- data/utils/enveomics/Tests/hiv1.faa +0 -59
- data/utils/enveomics/Tests/hiv1.fna +0 -134
- data/utils/enveomics/Tests/hiv2.faa +0 -70
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
- data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
- data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
- data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
- data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
- data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
- data/utils/enveomics/build_enveomics_r.bash +0 -45
- data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
- data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
- data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
- data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
- data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
- data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
- data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
- data/utils/enveomics/enveomics.R/R/prefscore.R +0 -79
- data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
- data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
- data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
- data/utils/enveomics/enveomics.R/R/utils.R +0 -80
- data/utils/enveomics/enveomics.R/README.md +0 -81
- data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
- data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
- data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -16
- data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -40
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -103
- data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -67
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -44
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -75
- data/utils/enveomics/enveomics.R/man/enve.prefscore.Rd +0 -50
- data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -44
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -139
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -77
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -47
- data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -52
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -51
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -43
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -82
- data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
- data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -36
- data/utils/enveomics/enveomics.R/man/enve.selvector.Rd +0 -23
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -68
- data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
- data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
- data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -78
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -46
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -45
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -125
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
- data/utils/enveomics/globals.mk +0 -8
- data/utils/enveomics/manifest.json +0 -9
- data/utils/multitrim/Multitrim How-To.pdf +0 -0
- data/utils/multitrim/README.md +0 -67
- data/utils/multitrim/multitrim.py +0 -1555
- data/utils/multitrim/multitrim.yml +0 -13
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env Rscript
|
|
2
|
-
|
|
3
|
-
#= Load stuff
|
|
4
|
-
args <- commandArgs(trailingOnly = FALSE)
|
|
5
|
-
enveomics_R <- file.path(
|
|
6
|
-
dirname(sub('^--file=', '', args[grep('^--file=', args)])),
|
|
7
|
-
'lib',
|
|
8
|
-
'enveomics.R'
|
|
9
|
-
)
|
|
10
|
-
for(file in c('cliopts.R','utils.R','prefscore.R'))
|
|
11
|
-
source(file.path(enveomics_R, 'R', file))
|
|
12
|
-
|
|
13
|
-
#= Generate interface
|
|
14
|
-
opt <- enve.cliopts(
|
|
15
|
-
enve.prefscore,
|
|
16
|
-
file.path(enveomics_R, 'man', 'enve.prefscore.Rd'),
|
|
17
|
-
positional_arguments = c(1, 4),
|
|
18
|
-
usage = 'usage: %prog [options] output.tsv [output.pdf [width height]]',
|
|
19
|
-
mandatory = c('x', 'set'),
|
|
20
|
-
number = c('signif.thr'),
|
|
21
|
-
ignore = c('plot'),
|
|
22
|
-
o_desc = list(
|
|
23
|
-
x = 'A tab-delimited table of presence/absence (1/0) with species as rows and samples as columns.',
|
|
24
|
-
set = 'A list of sample names that constitute the test set, one per line',
|
|
25
|
-
ignore = 'A list of species to exclude from the analysis, one per line'
|
|
26
|
-
)
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
#= Set output files
|
|
30
|
-
opt$options[['x']] <- read.table(
|
|
31
|
-
opt$options[['x']],
|
|
32
|
-
header = TRUE,
|
|
33
|
-
row.names = 1,
|
|
34
|
-
sep = '\t'
|
|
35
|
-
)
|
|
36
|
-
opt$options[['set']] <- read.table(
|
|
37
|
-
opt$options[['set']],
|
|
38
|
-
header = FALSE,
|
|
39
|
-
sep = '\t',
|
|
40
|
-
as.is = TRUE
|
|
41
|
-
)[,1]
|
|
42
|
-
if(!is.null(opt$options[['ignore']]))
|
|
43
|
-
opt$options[['ignore']] <- read.table(
|
|
44
|
-
opt$options[['ignore']],
|
|
45
|
-
header = FALSE,
|
|
46
|
-
sep = '\t',
|
|
47
|
-
as.is = TRUE
|
|
48
|
-
)[,1]
|
|
49
|
-
if(length(opt$args) > 1) {
|
|
50
|
-
args <- as.list(opt$args[-1])
|
|
51
|
-
for(i in 2:3) if(length(args) >= i) args[[i]] <- as.numeric(args[[i]])
|
|
52
|
-
do.call('pdf', args)
|
|
53
|
-
} else {
|
|
54
|
-
opt$options[['plot']] <- FALSE
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
#= Run it!
|
|
58
|
-
y <- do.call('enve.prefscore', opt$options)
|
|
59
|
-
write.table(y, opt$args[1], quote = FALSE, sep = '\t', col.names = FALSE)
|
|
60
|
-
if(length(opt$args)>1) ttt <- dev.off()
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
|
5
|
-
# @update Feb 01 2016
|
|
6
|
-
# @license artistic license 2.0
|
|
7
|
-
#
|
|
8
|
-
|
|
9
|
-
require "optparse"
|
|
10
|
-
|
|
11
|
-
o = {delimiter: "\t", key: 1, default: ""}
|
|
12
|
-
ARGV << "-h" if ARGV.size==0
|
|
13
|
-
OptionParser.new do |opts|
|
|
14
|
-
opts.banner = "\nReplaces a field in a table using a mapping file."
|
|
15
|
-
opts.on("-m", "--map FILE",
|
|
16
|
-
"Mapping file with two columns (key and replacement)."){ |v| o[:map] = v }
|
|
17
|
-
opts.on("-i", "--in FILE", "Input table."){ |v| o[:in] = v }
|
|
18
|
-
opts.on("-o", "--out FILE", "Output table."){ |v| o[:out] = v }
|
|
19
|
-
opts.on("-k", "--key INT",
|
|
20
|
-
"Column to replace in --in. By deafult: 1."){ |v| o[:key] = v.to_i }
|
|
21
|
-
opts.on("-u", "--unknown STR",
|
|
22
|
-
"String to use whenever the key is not found in --map."
|
|
23
|
-
){ |v| o[:default] = v }
|
|
24
|
-
opts.on("-d", "--delimiter STR",
|
|
25
|
-
"String delimiting columns. By default, tabulation."
|
|
26
|
-
){ |v| o[:delimiter] = v }
|
|
27
|
-
opts.on("-h", "--help", "Display this screen") do
|
|
28
|
-
puts opts
|
|
29
|
-
exit
|
|
30
|
-
end
|
|
31
|
-
opts.separator ""
|
|
32
|
-
end.parse!
|
|
33
|
-
abort "-m is mandatory" if o[:map].nil?
|
|
34
|
-
abort "-i is mandatory" if o[:in].nil?
|
|
35
|
-
abort "-o is mandatory" if o[:out].nil?
|
|
36
|
-
|
|
37
|
-
class String
|
|
38
|
-
def is_number?
|
|
39
|
-
true if Float(self) rescue false
|
|
40
|
-
end
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
begin
|
|
44
|
-
# Read mapping file
|
|
45
|
-
ifh = File.open(o[:map], "r")
|
|
46
|
-
map = {}
|
|
47
|
-
while(ln = ifh.gets)
|
|
48
|
-
row = ln.chomp.split(o[:delimiter])
|
|
49
|
-
map[ row[0] ] = row[1]
|
|
50
|
-
end
|
|
51
|
-
ifh.close
|
|
52
|
-
# Process table
|
|
53
|
-
ifh = File.open(o[:in], "r")
|
|
54
|
-
ofh = File.open(o[:out], "w")
|
|
55
|
-
while(ln = ifh.gets)
|
|
56
|
-
row = ln.chomp.split(o[:delimiter])
|
|
57
|
-
k = row[ o[:key]-1 ]
|
|
58
|
-
v = map[ k ]
|
|
59
|
-
v = o[:default] if v.nil?
|
|
60
|
-
row[ o[:key]-1 ] = v
|
|
61
|
-
ofh.puts(row.join(o[:delimiter]))
|
|
62
|
-
end
|
|
63
|
-
ifh.close
|
|
64
|
-
ofh.close
|
|
65
|
-
rescue => err
|
|
66
|
-
$stderr.puts "Exception: #{err}\n\n"
|
|
67
|
-
err.backtrace.each { |l| $stderr.puts " - " + l + "\n" }
|
|
68
|
-
err
|
|
69
|
-
end
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
# @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
|
5
|
-
# @update: Feb 04 2015
|
|
6
|
-
# @license: artistic license 2.0
|
|
7
|
-
#
|
|
8
|
-
|
|
9
|
-
require 'optparse'
|
|
10
|
-
|
|
11
|
-
o = {:ndigits=>0, :action=>:round, :delimiter=>"\t"}
|
|
12
|
-
ARGV << '-h' if ARGV.size==0
|
|
13
|
-
OptionParser.new do |opts|
|
|
14
|
-
opts.banner = "\nRounds numbers in a table."
|
|
15
|
-
opts.on("-i", "--in FILE", "Input table."){ |v| o[:in] = v}
|
|
16
|
-
opts.on("-o", "--out FILE", "Output table."){ |v| o[:out] = v }
|
|
17
|
-
opts.on("-n", "--ndigits INT", "Number of decimal digits. By default: #{o[:ndigits]}"){ |v| o[:ndigits] = v.to_i }
|
|
18
|
-
opts.on("-f", "--floor", "Floors the values instead of rounding them. Ignores -n."){ o[:action] = :floor }
|
|
19
|
-
opts.on("-c", "--ceil", "Ceils the values instead of rounding them. Ignores -n."){ o[:action] = :ceil }
|
|
20
|
-
opts.on("-d", "--delimiter STR", "String delimiting columns. By default, tabulation."){ |v| o[:delimiter] = v }
|
|
21
|
-
opts.on("-h", "--help", "Display this screen") do
|
|
22
|
-
puts opts
|
|
23
|
-
exit
|
|
24
|
-
end
|
|
25
|
-
opts.separator ""
|
|
26
|
-
end.parse!
|
|
27
|
-
abort "-i is mandatory" if o[:in].nil?
|
|
28
|
-
abort "-o is mandatory" if o[:out].nil?
|
|
29
|
-
|
|
30
|
-
class String
|
|
31
|
-
def is_number?
|
|
32
|
-
true if Float(self) rescue false
|
|
33
|
-
end
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
begin
|
|
37
|
-
ifh = File.open(o[:in], "r")
|
|
38
|
-
ofh = File.open(o[:out], "w")
|
|
39
|
-
while(ln = ifh.gets)
|
|
40
|
-
ln.chomp!
|
|
41
|
-
row = []
|
|
42
|
-
ln.split(o[:delimiter]).each do |value|
|
|
43
|
-
if value.is_number?
|
|
44
|
-
case o[:action]
|
|
45
|
-
when :round
|
|
46
|
-
value = value.to_f.round(o[:ndigits])
|
|
47
|
-
when :floor
|
|
48
|
-
value = value.to_f.floor
|
|
49
|
-
when :ceil
|
|
50
|
-
value = value.to_f.ceil
|
|
51
|
-
end
|
|
52
|
-
end
|
|
53
|
-
row.push value.to_s
|
|
54
|
-
end
|
|
55
|
-
ofh.puts(row.join(o[:delimiter]))
|
|
56
|
-
end
|
|
57
|
-
ifh.close
|
|
58
|
-
ofh.close
|
|
59
|
-
rescue => err
|
|
60
|
-
$stderr.puts "Exception: #{err}\n\n"
|
|
61
|
-
err.backtrace.each { |l| $stderr.puts " - " + l + "\n" }
|
|
62
|
-
err
|
|
63
|
-
end
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env perl
|
|
2
|
-
#
|
|
3
|
-
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
|
4
|
-
# @update Feb-01-2016
|
|
5
|
-
# @license artistic license 2.0
|
|
6
|
-
#
|
|
7
|
-
|
|
8
|
-
use warnings;
|
|
9
|
-
use strict;
|
|
10
|
-
use Getopt::Std;
|
|
11
|
-
use Symbol;
|
|
12
|
-
|
|
13
|
-
my %o;
|
|
14
|
-
getopts('i:o:d:e:h', \%o);
|
|
15
|
-
my $file = shift @ARGV;
|
|
16
|
-
|
|
17
|
-
($file and not $o{h}) or die "
|
|
18
|
-
.Description:
|
|
19
|
-
Split a file with multiple columns into multiple two-columns lists.
|
|
20
|
-
|
|
21
|
-
.Usage:
|
|
22
|
-
$0 [options] file
|
|
23
|
-
|
|
24
|
-
Options:
|
|
25
|
-
-i <str> Input field-delimiter. By default: tabulation (\"\\t\").
|
|
26
|
-
-o <str> Prefix of the output files. By default: no prefix (\"\").
|
|
27
|
-
-d <str> Output directory. By default: current directory (\"\").
|
|
28
|
-
|
|
29
|
-
";
|
|
30
|
-
$o{i} ||= "\t";
|
|
31
|
-
$o{o} ||= "";
|
|
32
|
-
$o{o} = $o{d}."/".$o{o} if $o{d};
|
|
33
|
-
|
|
34
|
-
my $open=0;
|
|
35
|
-
my @fhs=();
|
|
36
|
-
open IN, "<", $file or die "Cannot read file: $file: $!\n";
|
|
37
|
-
while(<IN>){
|
|
38
|
-
chomp;
|
|
39
|
-
my @row = split $o{i};
|
|
40
|
-
my $h = shift @row;
|
|
41
|
-
if($open){
|
|
42
|
-
for my $i (0 .. $#row){
|
|
43
|
-
print { qualify_to_ref $fhs[$i] } $h.$o{i}.$row[$i]."\n" if $row[$i];
|
|
44
|
-
}
|
|
45
|
-
}else{
|
|
46
|
-
$open++;
|
|
47
|
-
for my $l (@row){
|
|
48
|
-
$l =~ s/[\.\/:]/_/g;
|
|
49
|
-
my $gs = gensym;
|
|
50
|
-
open($gs, '>', $o{o}.$l.".txt") or die "Cannot create file: $o{o}$l.txt: $!\n";
|
|
51
|
-
push @fhs, $gs;
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
close IN;
|
|
56
|
-
close $_ for @fhs;
|
|
57
|
-
|
|
@@ -1,227 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
|
5
|
-
# @update: Feb-06-2015
|
|
6
|
-
# @license artistic license 2.0
|
|
7
|
-
#
|
|
8
|
-
|
|
9
|
-
require 'optparse'
|
|
10
|
-
|
|
11
|
-
$opts = {:warns=>false}
|
|
12
|
-
ARGV << '-h' if ARGV.size==0
|
|
13
|
-
OptionParser.new do |opt|
|
|
14
|
-
opt.separator "Re-formats Silva taxonomy into NCBI-like taxonomy dump files."
|
|
15
|
-
opt.separator ""
|
|
16
|
-
opt.separator "Mandatory arguments"
|
|
17
|
-
opt.on("-k", "--silvaranks FILE", "Input Silva ranks file (e.g., tax_ranks_ssu_115.txt)."){ |v| $opts[:silvaranks]=v }
|
|
18
|
-
opt.on("-f", "--silvaref FILE", "Input Silva ref alignment file (e.g., SSURef_NR99_115_tax_silva_full_align_trunc.fasta)."){ |v| $opts[:silvaref]=v }
|
|
19
|
-
opt.separator ""
|
|
20
|
-
opt.separator "Additional options"
|
|
21
|
-
opt.on("-p", "--patch FILE", "If passed, it replaces the paths specified in the patch."){ |v| $opts[:patch]=v }
|
|
22
|
-
opt.on("-s", "--seqinfo FILE", "If passed, it creates a CSV seq-info file compatible with taxtastic."){ |v| $opts[:seqinfo]=v }
|
|
23
|
-
opt.on("-t", "--taxfile FILE", "If passed, it creates a simple TSV taxonomy file."){ |v| $opts[:taxfile]=v }
|
|
24
|
-
opt.on("-n", "--ncbi FILE", "If passed, output folder for the NCBI dump files (e.g., taxdmp)."){ |v| $opts[:ncbi]=v }
|
|
25
|
-
opt.on("-w", "--warns", "Verbously display warnings."){ $opts[:warns]=true }
|
|
26
|
-
opt.on("-h", "--help","Display this screen") do
|
|
27
|
-
puts opt
|
|
28
|
-
exit
|
|
29
|
-
end
|
|
30
|
-
opt.separator ""
|
|
31
|
-
end.parse!
|
|
32
|
-
abort "-k/--silvaranks is mandatory." if $opts[:silvaranks].nil?
|
|
33
|
-
abort "-k/--silvaranks must exist." unless File.exists? $opts[:silvaranks]
|
|
34
|
-
abort "-f/--silvaref is mandatory." if $opts[:silvaref].nil?
|
|
35
|
-
abort "-f/--silvaref must exist." unless File.exists? $opts[:silvaref]
|
|
36
|
-
|
|
37
|
-
class Node
|
|
38
|
-
attr_accessor :id, :tax, :leaf, :name_type
|
|
39
|
-
attr_reader :name, :rank, :parent, :children
|
|
40
|
-
def initialize(name, rank=nil)
|
|
41
|
-
@name = name
|
|
42
|
-
@rank = rank.nil? ? "no rank" : rank
|
|
43
|
-
@children = []
|
|
44
|
-
@leaf = false
|
|
45
|
-
@name_type = "scientific name";
|
|
46
|
-
end
|
|
47
|
-
def parent=(node)
|
|
48
|
-
@parent=node
|
|
49
|
-
node.add_child(self)
|
|
50
|
-
end
|
|
51
|
-
def add_child(node)
|
|
52
|
-
@children << node
|
|
53
|
-
end
|
|
54
|
-
def ncbirank
|
|
55
|
-
ncbirank =
|
|
56
|
-
self.rank == "superkingdom" ? "no rank" :
|
|
57
|
-
self.rank == "domain" ? "superkingdom" :
|
|
58
|
-
self.rank == "major_clade" ? "no rank" : self.rank
|
|
59
|
-
return ncbirank
|
|
60
|
-
end
|
|
61
|
-
def path
|
|
62
|
-
if self.parent.nil?
|
|
63
|
-
self.name
|
|
64
|
-
else
|
|
65
|
-
"#{self.parent.path};#{self.name}"
|
|
66
|
-
end
|
|
67
|
-
end
|
|
68
|
-
def each_desc internals, leaves, &blk
|
|
69
|
-
blk[self] if (leaves and self.leaf) or (internals and not self.leaf)
|
|
70
|
-
self.children.each {|child| child.each_desc internals, leaves, &blk}
|
|
71
|
-
end
|
|
72
|
-
def to_s
|
|
73
|
-
"#{self.name} (#{self.rank})"
|
|
74
|
-
end
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
class Taxonomy
|
|
78
|
-
attr_reader :root, :next_id
|
|
79
|
-
def initialize
|
|
80
|
-
@root = Node.new('root')
|
|
81
|
-
@root.id = 1
|
|
82
|
-
@next_id = 2
|
|
83
|
-
end
|
|
84
|
-
def register(node)
|
|
85
|
-
node.id = self.next_id
|
|
86
|
-
node.parent = self.root if node.parent.nil?
|
|
87
|
-
@next_id += 1
|
|
88
|
-
end
|
|
89
|
-
def node(path)
|
|
90
|
-
node = self.root
|
|
91
|
-
path.each do |level|
|
|
92
|
-
node.children.each do |child|
|
|
93
|
-
if child.name == level
|
|
94
|
-
node = child
|
|
95
|
-
break
|
|
96
|
-
end
|
|
97
|
-
end
|
|
98
|
-
unless node.name == level
|
|
99
|
-
$stderr.puts "Warning: Impossible to find #{level} at #{node.to_s}, making it up." if $opts[:warns]
|
|
100
|
-
child = Node.new(level)
|
|
101
|
-
child.parent = node
|
|
102
|
-
self.register(child)
|
|
103
|
-
node = child
|
|
104
|
-
end
|
|
105
|
-
end
|
|
106
|
-
node
|
|
107
|
-
end
|
|
108
|
-
def each_node &blk
|
|
109
|
-
self.root.each_desc true, true, &blk
|
|
110
|
-
end
|
|
111
|
-
def each_leaf &blk
|
|
112
|
-
self.root.each_desc false, true, &blk
|
|
113
|
-
end
|
|
114
|
-
def each_internal &blk
|
|
115
|
-
self.root.each_desc true, false, &blk
|
|
116
|
-
end
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
begin
|
|
120
|
-
taxo = Taxonomy.new()
|
|
121
|
-
|
|
122
|
-
## Read patch
|
|
123
|
-
patch = {}
|
|
124
|
-
unless $opts[:patch].nil?
|
|
125
|
-
$stderr.puts "Reading patch: #{$opts[:patch]}"
|
|
126
|
-
f = File.open($opts[:patch], "r")
|
|
127
|
-
while(ln = f.gets)
|
|
128
|
-
m = ln.chomp.split(/\t/)
|
|
129
|
-
patch[ m[0] ] = m[1]
|
|
130
|
-
end
|
|
131
|
-
end
|
|
132
|
-
|
|
133
|
-
## Read the Silva ranks
|
|
134
|
-
$stderr.puts "Reading Silva ranks: #{$opts[:silvaranks]}"
|
|
135
|
-
f = File.open($opts[:silvaranks], "r")
|
|
136
|
-
f.gets # header
|
|
137
|
-
while(ln = f.gets)
|
|
138
|
-
m = ln.chomp.split(/\t/)
|
|
139
|
-
m[0] = patch[ m[0] ] unless patch[ m[0] ].nil?
|
|
140
|
-
p = m[0].split(/;/)
|
|
141
|
-
raise "Inconsistent path and node name at line #{$.}: #{ln}." unless m[1] == p.pop
|
|
142
|
-
if m[3] != "w"
|
|
143
|
-
node = Node.new(m[1], m[2])
|
|
144
|
-
node.name_type = "common name" if m[3] == "a"
|
|
145
|
-
node.parent = taxo.node(p)
|
|
146
|
-
taxo.register(node)
|
|
147
|
-
end
|
|
148
|
-
end
|
|
149
|
-
f.close
|
|
150
|
-
|
|
151
|
-
$stderr.puts " Top taxa:"
|
|
152
|
-
taxo.root.children.each do |top|
|
|
153
|
-
$stderr.puts " o #{top.to_s} has #{top.children.length} children."
|
|
154
|
-
end
|
|
155
|
-
|
|
156
|
-
## Read the Silva ref alignment
|
|
157
|
-
$stderr.puts "Reading Silva ref alignment: #{$opts[:silvaref]}"
|
|
158
|
-
i = 0
|
|
159
|
-
f = File.open($opts[:silvaref], "r")
|
|
160
|
-
while(ln = f.gets)
|
|
161
|
-
m = />([^\s]+)\s(.*)/.match(ln)
|
|
162
|
-
next unless m
|
|
163
|
-
# Patch
|
|
164
|
-
pm = /(.+);([^;]+)/.match(m[2])
|
|
165
|
-
path = "#{patch[ pm[1] ].nil? ? pm[1] : patch[ pm[1] ]};#{pm[2]}".split(/;/)
|
|
166
|
-
# Register
|
|
167
|
-
node = taxo.node(path)
|
|
168
|
-
taxo.register(node)
|
|
169
|
-
refseq = Node.new(m[1], 'refseq')
|
|
170
|
-
refseq.parent = node
|
|
171
|
-
refseq.leaf = true
|
|
172
|
-
taxo.register(refseq)
|
|
173
|
-
i += 1
|
|
174
|
-
end
|
|
175
|
-
f.close
|
|
176
|
-
$stderr.puts " Saved #{i} leaves."
|
|
177
|
-
|
|
178
|
-
### NCBI
|
|
179
|
-
unless $opts[:ncbi].nil?
|
|
180
|
-
## Create taxonomy .dmp files
|
|
181
|
-
$stderr.puts "Creating NCBI-like files: #{$opts[:ncbi]}"
|
|
182
|
-
Dir.mkdir($opts[:ncbi]) unless Dir.exists?($opts[:ncbi]);
|
|
183
|
-
# merged.dmp
|
|
184
|
-
$stderr.puts " o Creating merged.dmp"
|
|
185
|
-
File.open(File.join($opts[:ncbi], 'merged.dmp'), 'w'){}
|
|
186
|
-
# names.dmp
|
|
187
|
-
$stderr.puts " o Creating names.dmp"
|
|
188
|
-
f = File.open(File.join($opts[:ncbi], 'names.dmp'), 'w')
|
|
189
|
-
taxo.each_internal do |n|
|
|
190
|
-
f.puts [n.id, n.name, "", n.name_type].join("\t|\t")+"\t|"
|
|
191
|
-
end
|
|
192
|
-
f.close
|
|
193
|
-
# nodes.dmp
|
|
194
|
-
$stderr.puts " o Creating nodes.dmp"
|
|
195
|
-
f = File.open(File.join($opts[:ncbi], 'nodes.dmp'), 'w')
|
|
196
|
-
taxo.each_internal do |n|
|
|
197
|
-
f.puts ([n.id, n.parent.nil? ? n.id : n.parent.id, n.ncbirank, ""] << Array.new(8,0) << "").join("\t|\t")+"\t|"
|
|
198
|
-
end
|
|
199
|
-
f.close
|
|
200
|
-
end
|
|
201
|
-
|
|
202
|
-
## Taxtastic
|
|
203
|
-
unless $opts[:seqinfo].nil?
|
|
204
|
-
$stderr.puts "Creating seq-info file: #{$opts[:seqinfo]}"
|
|
205
|
-
f = File.open($opts[:seqinfo], 'w')
|
|
206
|
-
f.puts "\"seqname\",\"tax_id\",\"group_name\""
|
|
207
|
-
taxo.each_leaf { |n| f.puts "\"#{n.name}\",\"#{n.parent.id}\",\"#{n.parent.name}\"" }
|
|
208
|
-
f.close
|
|
209
|
-
end
|
|
210
|
-
|
|
211
|
-
## Misc
|
|
212
|
-
unless $opts[:taxfile].nil?
|
|
213
|
-
$stderr.puts "Creating taxonomy file: #{$opts[:taxfile]}"
|
|
214
|
-
f = File.open($opts[:taxfile], 'w')
|
|
215
|
-
f.puts "tax_id\tparent_id\trank\ttax_name"
|
|
216
|
-
taxo.each_internal do |n|
|
|
217
|
-
f.puts [n.id, n.parent.nil? ? n.id : n.parent.id, n.rank, n.name].join("\t")
|
|
218
|
-
end
|
|
219
|
-
f.close
|
|
220
|
-
end
|
|
221
|
-
rescue => err
|
|
222
|
-
$stderr.puts "Exception: #{err}\n\n"
|
|
223
|
-
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
|
224
|
-
err
|
|
225
|
-
end
|
|
226
|
-
|
|
227
|
-
|
|
@@ -1,147 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env ruby
|
|
2
|
-
|
|
3
|
-
# @author Luis M. Rodriguez-R
|
|
4
|
-
# @license Artistic-2.0
|
|
5
|
-
|
|
6
|
-
$:.push File.expand_path("../lib", __FILE__)
|
|
7
|
-
require "enveomics_rb/enveomics"
|
|
8
|
-
require "enveomics_rb/vcf"
|
|
9
|
-
|
|
10
|
-
o = {}
|
|
11
|
-
OptionParser.new do |opt|
|
|
12
|
-
opt.banner = "
|
|
13
|
-
Estimates the Ka/Ks ratio from the SNPs in a VCF file. Ka and Ks are corrected
|
|
14
|
-
using pseudo-counts, but no corrections for multiple substitutions are
|
|
15
|
-
applied.
|
|
16
|
-
|
|
17
|
-
Usage: #{$0} [options]".gsub(/^ +/,"")
|
|
18
|
-
opt.separator ""
|
|
19
|
-
opt.separator "Mandatory"
|
|
20
|
-
opt.on("-i", "--input FILE",
|
|
21
|
-
"Input file in Variant Call Format (VCF)."){ |v| o[:file] = v}
|
|
22
|
-
opt.on("-s", "--seqs FILE",
|
|
23
|
-
"Input gene sequences (nucleotides) in FastA format."){ |v| o[:seqs] = v}
|
|
24
|
-
opt.separator ""
|
|
25
|
-
opt.separator "Parameters"
|
|
26
|
-
opt.on("-f", "--syn-frx FLOAT",
|
|
27
|
-
"Fraction of synonymous substitutions. If passed, the number of sites are",
|
|
28
|
-
"estimated (not counted per gene), speeding up the computation ~10X."
|
|
29
|
-
){ |v| o[:syn_frx] = v.to_f }
|
|
30
|
-
opt.on("-b", "--syn-bacterial-code",
|
|
31
|
-
"Sets --syn-frx to 0.760417, approximately the proportion of synonymous",
|
|
32
|
-
"substitutions in the bacterial code."){ o[:syn_frx] = 0.760417 }
|
|
33
|
-
opt.separator ""
|
|
34
|
-
opt.separator "Miscellaneous"
|
|
35
|
-
opt.on("-c", "--codon-file FILE",
|
|
36
|
-
"Output file including the codons of substitution variants."
|
|
37
|
-
){ |v| o[:codon_file] = v }
|
|
38
|
-
opt.on("-h", "--help", "Display this screen.") do
|
|
39
|
-
puts opt
|
|
40
|
-
exit
|
|
41
|
-
end
|
|
42
|
-
opt.separator ""
|
|
43
|
-
end.parse!
|
|
44
|
-
|
|
45
|
-
abort "--input is mandatory" if o[:file].nil?
|
|
46
|
-
abort "--seqs is mandatory" if o[:seqs].nil?
|
|
47
|
-
|
|
48
|
-
# Codon table (11. The Bacterial, Archaeal and Plant Plastid Code)
|
|
49
|
-
# https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi#SG11
|
|
50
|
-
t = {
|
|
51
|
-
AAs: "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
|
|
52
|
-
Starts: "---M------**--*----M------------MMMM---------------M------------",
|
|
53
|
-
Base1: "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
|
|
54
|
-
Base2: "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
|
|
55
|
-
Base3: "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"
|
|
56
|
-
}
|
|
57
|
-
$codon_aa = {}
|
|
58
|
-
$codon_st = {}
|
|
59
|
-
(0 .. (t[:Base1].size-1)).each do |i|
|
|
60
|
-
cod = [:Base1, :Base2, :Base3].map{ |k| t[k][i] }.join
|
|
61
|
-
$codon_aa[cod] = t[:AAs][i]
|
|
62
|
-
$codon_st[cod] = t[:Starts][i]
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
##
|
|
66
|
-
# Is the change +cod+ to +cod_alt+ synonymous? +start_codon+ indicates if the
|
|
67
|
-
# codon the first in the gene.
|
|
68
|
-
def syn?(cod, cod_alt, start_codon=false)
|
|
69
|
-
start_codon ?
|
|
70
|
-
( $codon_st[cod] == $codon_st[cod_alt] ) :
|
|
71
|
-
( $codon_aa[cod] == $codon_aa[cod_alt] )
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
##
|
|
75
|
-
# Estimates the fraction of times that the substitutions in the sequence +seq+
|
|
76
|
-
# result in synonymous mutations from those in position +pos+ by any of the
|
|
77
|
-
# nucleotides in +alts+.
|
|
78
|
-
def syn_fraction(seq, pos, alts)
|
|
79
|
-
cod_let = (pos-1)%3
|
|
80
|
-
cod_pos = (pos-1) - cod_let
|
|
81
|
-
cod = seq[cod_pos .. (cod_pos+2)]
|
|
82
|
-
syn = 0
|
|
83
|
-
cod_alts = alts.map do |alt|
|
|
84
|
-
cod_alt = "#{cod}"
|
|
85
|
-
cod_alt[cod_let] = alt
|
|
86
|
-
cod_alt
|
|
87
|
-
end
|
|
88
|
-
syn = cod_alts.map{ |i| syn?(cod, i, pos<=3) ? 1 : 0 }.inject(0,:+)
|
|
89
|
-
$codon_fh.puts [syn, cod, cod_alts.join(",")].join("\t") unless $codon_fh.nil?
|
|
90
|
-
syn.to_f/alts.size
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
# Read sequences
|
|
94
|
-
seqs = {}
|
|
95
|
-
File.open(o[:seqs], "r") do |fh|
|
|
96
|
-
id = ""
|
|
97
|
-
fh.each_line do |ln|
|
|
98
|
-
if ln =~ /^>(\S+)/
|
|
99
|
-
id = $1
|
|
100
|
-
seqs[id] = ""
|
|
101
|
-
else
|
|
102
|
-
seqs[id] += ln.chomp.gsub(/[^A-Za-z]/, "")
|
|
103
|
-
end
|
|
104
|
-
end
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
# Process variants
|
|
108
|
-
$codon_fh = nil
|
|
109
|
-
unless o[:codon_file].nil?
|
|
110
|
-
$codon_fh = File.open(o[:codon_file], "w")
|
|
111
|
-
$codon_fh.puts "#" + %w[Syn Ref Alt].join("\t")
|
|
112
|
-
end
|
|
113
|
-
vcf = VCF.new(o[:file])
|
|
114
|
-
gen = {}
|
|
115
|
-
vcf.each_variant do |v|
|
|
116
|
-
next if v.indel?
|
|
117
|
-
raise "REF doesn't match VCF:\n#{v}" unless seqs[v.chrom][v.pos-1] == v.ref
|
|
118
|
-
gen[v.chrom] ||= [0.0, 0.0]
|
|
119
|
-
alts = v.alt.split(",")
|
|
120
|
-
syn = syn_fraction(seqs[v.chrom], v.pos, alts)
|
|
121
|
-
gen[v.chrom][0] += 1.0-syn
|
|
122
|
-
gen[v.chrom][1] += syn
|
|
123
|
-
end
|
|
124
|
-
$codon_fh.close unless $codon_fh.nil?
|
|
125
|
-
$codon_fh = nil
|
|
126
|
-
|
|
127
|
-
# Ka/Ks
|
|
128
|
-
puts "#" +
|
|
129
|
-
"SeqID KaKs Ka Ks NonSynSubs SynSubs NonSynSites SynSites".tr(" ","\t")
|
|
130
|
-
gen.each do |k,v|
|
|
131
|
-
if o[:syn_frx].nil?
|
|
132
|
-
v[2,3] = [0.0,0.0]
|
|
133
|
-
(1 .. seqs[k].size).each do |pos|
|
|
134
|
-
alts = %w(A C T G) - [seqs[k][pos-1]]
|
|
135
|
-
syn = syn_fraction(seqs[k], pos, alts)
|
|
136
|
-
v[2] += 1.0-syn
|
|
137
|
-
v[3] += syn
|
|
138
|
-
end
|
|
139
|
-
else
|
|
140
|
-
v[2] = seqs[k].size.to_f*o[:syn_frx]
|
|
141
|
-
v[3] = seqs[k].size.to_f*(1.0-o[:syn_frx])
|
|
142
|
-
end
|
|
143
|
-
ka = (v[0] + 1) / (v[2] + 2)
|
|
144
|
-
ks = (v[1] + 1) / (v[3] + 2)
|
|
145
|
-
puts ([k, ka/ks, ka, ks] + v).join("\t")
|
|
146
|
-
end
|
|
147
|
-
|