RubyGems - miga-base - Versions diffs - 0.7.25.2 → 0.7.25.3 - Mend

miga-base 0.7.25.2 → 0.7.25.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (277) hide show

checksums.yaml +4 -4
data/lib/miga/version.rb +2 -2
data/utils/distance/runner.rb +2 -1
metadata +5 -278
data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Archaea_SCG.hmm +0 -41964
data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Bacteria_SCG.hmm +0 -32439
data/utils/FastAAI/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm +0 -62056
data/utils/FastAAI/FastAAI/FastAAI +0 -1336
data/utils/FastAAI/README.md +0 -84
data/utils/FastAAI/kAAI_v1.0_virus.py +0 -1296
data/utils/enveomics/Docs/recplot2.md +0 -244
data/utils/enveomics/Examples/aai-matrix.bash +0 -66
data/utils/enveomics/Examples/ani-matrix.bash +0 -66
data/utils/enveomics/Examples/essential-phylogeny.bash +0 -105
data/utils/enveomics/Examples/unus-genome-phylogeny.bash +0 -100
data/utils/enveomics/LICENSE.txt +0 -73
data/utils/enveomics/Makefile +0 -52
data/utils/enveomics/Manifest/Tasks/aasubs.json +0 -103
data/utils/enveomics/Manifest/Tasks/blasttab.json +0 -786
data/utils/enveomics/Manifest/Tasks/distances.json +0 -161
data/utils/enveomics/Manifest/Tasks/fasta.json +0 -766
data/utils/enveomics/Manifest/Tasks/fastq.json +0 -243
data/utils/enveomics/Manifest/Tasks/graphics.json +0 -126
data/utils/enveomics/Manifest/Tasks/mapping.json +0 -67
data/utils/enveomics/Manifest/Tasks/ogs.json +0 -382
data/utils/enveomics/Manifest/Tasks/other.json +0 -829
data/utils/enveomics/Manifest/Tasks/remote.json +0 -355
data/utils/enveomics/Manifest/Tasks/sequence-identity.json +0 -501
data/utils/enveomics/Manifest/Tasks/tables.json +0 -308
data/utils/enveomics/Manifest/Tasks/trees.json +0 -68
data/utils/enveomics/Manifest/Tasks/variants.json +0 -111
data/utils/enveomics/Manifest/categories.json +0 -156
data/utils/enveomics/Manifest/examples.json +0 -154
data/utils/enveomics/Manifest/tasks.json +0 -4
data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
data/utils/enveomics/README.md +0 -42
data/utils/enveomics/Scripts/AAsubs.log2ratio.rb +0 -171
data/utils/enveomics/Scripts/Aln.cat.rb +0 -163
data/utils/enveomics/Scripts/Aln.convert.pl +0 -35
data/utils/enveomics/Scripts/AlphaDiversity.pl +0 -152
data/utils/enveomics/Scripts/BedGraph.tad.rb +0 -93
data/utils/enveomics/Scripts/BedGraph.window.rb +0 -71
data/utils/enveomics/Scripts/BlastPairwise.AAsubs.pl +0 -102
data/utils/enveomics/Scripts/BlastTab.addlen.rb +0 -63
data/utils/enveomics/Scripts/BlastTab.advance.bash +0 -48
data/utils/enveomics/Scripts/BlastTab.best_hit_sorted.pl +0 -55
data/utils/enveomics/Scripts/BlastTab.catsbj.pl +0 -104
data/utils/enveomics/Scripts/BlastTab.cogCat.rb +0 -76
data/utils/enveomics/Scripts/BlastTab.filter.pl +0 -47
data/utils/enveomics/Scripts/BlastTab.kegg_pep2path_rest.pl +0 -194
data/utils/enveomics/Scripts/BlastTab.metaxaPrep.pl +0 -104
data/utils/enveomics/Scripts/BlastTab.pairedHits.rb +0 -157
data/utils/enveomics/Scripts/BlastTab.recplot2.R +0 -48
data/utils/enveomics/Scripts/BlastTab.seqdepth.pl +0 -86
data/utils/enveomics/Scripts/BlastTab.seqdepth_ZIP.pl +0 -119
data/utils/enveomics/Scripts/BlastTab.seqdepth_nomedian.pl +0 -86
data/utils/enveomics/Scripts/BlastTab.subsample.pl +0 -47
data/utils/enveomics/Scripts/BlastTab.sumPerHit.pl +0 -114
data/utils/enveomics/Scripts/BlastTab.taxid2taxrank.pl +0 -90
data/utils/enveomics/Scripts/BlastTab.topHits_sorted.rb +0 -101
data/utils/enveomics/Scripts/Chao1.pl +0 -97
data/utils/enveomics/Scripts/CharTable.classify.rb +0 -234
data/utils/enveomics/Scripts/EBIseq2tax.rb +0 -83
data/utils/enveomics/Scripts/FastA.N50.pl +0 -56
data/utils/enveomics/Scripts/FastA.extract.rb +0 -152
data/utils/enveomics/Scripts/FastA.filter.pl +0 -52
data/utils/enveomics/Scripts/FastA.filterLen.pl +0 -28
data/utils/enveomics/Scripts/FastA.filterN.pl +0 -60
data/utils/enveomics/Scripts/FastA.fragment.rb +0 -92
data/utils/enveomics/Scripts/FastA.gc.pl +0 -42
data/utils/enveomics/Scripts/FastA.interpose.pl +0 -93
data/utils/enveomics/Scripts/FastA.length.pl +0 -38
data/utils/enveomics/Scripts/FastA.mask.rb +0 -89
data/utils/enveomics/Scripts/FastA.per_file.pl +0 -36
data/utils/enveomics/Scripts/FastA.qlen.pl +0 -57
data/utils/enveomics/Scripts/FastA.rename.pl +0 -65
data/utils/enveomics/Scripts/FastA.revcom.pl +0 -23
data/utils/enveomics/Scripts/FastA.sample.rb +0 -83
data/utils/enveomics/Scripts/FastA.slider.pl +0 -85
data/utils/enveomics/Scripts/FastA.split.pl +0 -55
data/utils/enveomics/Scripts/FastA.split.rb +0 -79
data/utils/enveomics/Scripts/FastA.subsample.pl +0 -131
data/utils/enveomics/Scripts/FastA.tag.rb +0 -65
data/utils/enveomics/Scripts/FastA.wrap.rb +0 -48
data/utils/enveomics/Scripts/FastQ.filter.pl +0 -54
data/utils/enveomics/Scripts/FastQ.interpose.pl +0 -90
data/utils/enveomics/Scripts/FastQ.offset.pl +0 -90
data/utils/enveomics/Scripts/FastQ.split.pl +0 -53
data/utils/enveomics/Scripts/FastQ.tag.rb +0 -63
data/utils/enveomics/Scripts/FastQ.test-error.rb +0 -81
data/utils/enveomics/Scripts/FastQ.toFastA.awk +0 -24
data/utils/enveomics/Scripts/GFF.catsbj.pl +0 -127
data/utils/enveomics/Scripts/GenBank.add_fields.rb +0 -84
data/utils/enveomics/Scripts/HMM.essential.rb +0 -351
data/utils/enveomics/Scripts/HMM.haai.rb +0 -168
data/utils/enveomics/Scripts/HMMsearch.extractIds.rb +0 -83
data/utils/enveomics/Scripts/JPlace.distances.rb +0 -88
data/utils/enveomics/Scripts/JPlace.to_iToL.rb +0 -320
data/utils/enveomics/Scripts/M5nr.getSequences.rb +0 -81
data/utils/enveomics/Scripts/MeTaxa.distribution.pl +0 -198
data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl +0 -35
data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb +0 -49
data/utils/enveomics/Scripts/NCBIacc2tax.rb +0 -92
data/utils/enveomics/Scripts/Newick.autoprune.R +0 -27
data/utils/enveomics/Scripts/RAxML-EPA.to_iToL.pl +0 -228
data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +0 -32
data/utils/enveomics/Scripts/RefSeq.download.bash +0 -48
data/utils/enveomics/Scripts/SRA.download.bash +0 -57
data/utils/enveomics/Scripts/TRIBS.plot-test.R +0 -36
data/utils/enveomics/Scripts/TRIBS.test.R +0 -39
data/utils/enveomics/Scripts/Table.barplot.R +0 -31
data/utils/enveomics/Scripts/Table.df2dist.R +0 -30
data/utils/enveomics/Scripts/Table.filter.pl +0 -61
data/utils/enveomics/Scripts/Table.merge.pl +0 -77
data/utils/enveomics/Scripts/Table.replace.rb +0 -69
data/utils/enveomics/Scripts/Table.round.rb +0 -63
data/utils/enveomics/Scripts/Table.split.pl +0 -57
data/utils/enveomics/Scripts/Taxonomy.silva2ncbi.rb +0 -227
data/utils/enveomics/Scripts/VCF.KaKs.rb +0 -147
data/utils/enveomics/Scripts/VCF.SNPs.rb +0 -88
data/utils/enveomics/Scripts/aai.rb +0 -418
data/utils/enveomics/Scripts/ani.rb +0 -362
data/utils/enveomics/Scripts/clust.rand.rb +0 -102
data/utils/enveomics/Scripts/gi2tax.rb +0 -103
data/utils/enveomics/Scripts/in_silico_GA_GI.pl +0 -96
data/utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz +0 -0
data/utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz +0 -0
data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +0 -24
data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +0 -253
data/utils/enveomics/Scripts/lib/enveomics_rb/og.rb +0 -182
data/utils/enveomics/Scripts/lib/enveomics_rb/remote_data.rb +0 -74
data/utils/enveomics/Scripts/lib/enveomics_rb/seq_range.rb +0 -237
data/utils/enveomics/Scripts/lib/enveomics_rb/stat.rb +0 -30
data/utils/enveomics/Scripts/lib/enveomics_rb/vcf.rb +0 -135
data/utils/enveomics/Scripts/ogs.annotate.rb +0 -88
data/utils/enveomics/Scripts/ogs.core-pan.rb +0 -160
data/utils/enveomics/Scripts/ogs.extract.rb +0 -125
data/utils/enveomics/Scripts/ogs.mcl.rb +0 -186
data/utils/enveomics/Scripts/ogs.rb +0 -104
data/utils/enveomics/Scripts/ogs.stats.rb +0 -131
data/utils/enveomics/Scripts/rbm.rb +0 -146
data/utils/enveomics/Tests/Makefile +0 -10
data/utils/enveomics/Tests/Mgen_M2288.faa +0 -3189
data/utils/enveomics/Tests/Mgen_M2288.fna +0 -8282
data/utils/enveomics/Tests/Mgen_M2321.fna +0 -8288
data/utils/enveomics/Tests/Nequ_Kin4M.faa +0 -2970
data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.tribs.Rdata +0 -0
data/utils/enveomics/Tests/Xanthomonas_oryzae-PilA.txt +0 -7
data/utils/enveomics/Tests/Xanthomonas_oryzae.aai-mat.tsv +0 -17
data/utils/enveomics/Tests/Xanthomonas_oryzae.aai.tsv +0 -137
data/utils/enveomics/Tests/a_mg.cds-go.blast.tsv +0 -123
data/utils/enveomics/Tests/a_mg.reads-cds.blast.tsv +0 -200
data/utils/enveomics/Tests/a_mg.reads-cds.counts.tsv +0 -55
data/utils/enveomics/Tests/alkB.nwk +0 -1
data/utils/enveomics/Tests/anthrax-cansnp-data.tsv +0 -13
data/utils/enveomics/Tests/anthrax-cansnp-key.tsv +0 -17
data/utils/enveomics/Tests/hiv1.faa +0 -59
data/utils/enveomics/Tests/hiv1.fna +0 -134
data/utils/enveomics/Tests/hiv2.faa +0 -70
data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv +0 -233
data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.lim +0 -1
data/utils/enveomics/Tests/hiv_mix-hiv1.blast.tsv.rec +0 -233
data/utils/enveomics/Tests/phyla_counts.tsv +0 -10
data/utils/enveomics/Tests/primate_lentivirus.ogs +0 -11
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv1.rbm +0 -9
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-hiv2.rbm +0 -8
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv1-siv.rbm +0 -6
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-hiv2.rbm +0 -9
data/utils/enveomics/Tests/primate_lentivirus.rbm/hiv2-siv.rbm +0 -6
data/utils/enveomics/Tests/primate_lentivirus.rbm/siv-siv.rbm +0 -6
data/utils/enveomics/build_enveomics_r.bash +0 -45
data/utils/enveomics/enveomics.R/DESCRIPTION +0 -31
data/utils/enveomics/enveomics.R/NAMESPACE +0 -39
data/utils/enveomics/enveomics.R/R/autoprune.R +0 -155
data/utils/enveomics/enveomics.R/R/barplot.R +0 -184
data/utils/enveomics/enveomics.R/R/cliopts.R +0 -135
data/utils/enveomics/enveomics.R/R/df2dist.R +0 -154
data/utils/enveomics/enveomics.R/R/growthcurve.R +0 -331
data/utils/enveomics/enveomics.R/R/recplot.R +0 -354
data/utils/enveomics/enveomics.R/R/recplot2.R +0 -1631
data/utils/enveomics/enveomics.R/R/tribs.R +0 -583
data/utils/enveomics/enveomics.R/R/utils.R +0 -50
data/utils/enveomics/enveomics.R/README.md +0 -80
data/utils/enveomics/enveomics.R/data/growth.curves.rda +0 -0
data/utils/enveomics/enveomics.R/data/phyla.counts.rda +0 -0
data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +0 -17
data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +0 -17
data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +0 -17
data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +0 -25
data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +0 -46
data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +0 -23
data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +0 -47
data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +0 -23
data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +0 -23
data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +0 -32
data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +0 -91
data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +0 -57
data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +0 -24
data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +0 -19
data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +0 -39
data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +0 -38
data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +0 -40
data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +0 -67
data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +0 -37
data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +0 -122
data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +0 -45
data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +0 -24
data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +0 -68
data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +0 -25
data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +0 -21
data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +0 -19
data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +0 -19
data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +0 -41
data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +0 -29
data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +0 -18
data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +0 -40
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -36
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +0 -19
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +0 -19
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +0 -27
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +0 -41
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +0 -17
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +0 -43
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +0 -37
data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +0 -74
data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +0 -59
data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +0 -27
data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +0 -32
data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +0 -59
data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +0 -28
data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +0 -27
data/utils/enveomics/enveomics.R/man/growth.curves.Rd +0 -14
data/utils/enveomics/enveomics.R/man/phyla.counts.Rd +0 -13
data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +0 -63
data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +0 -38
data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +0 -38
data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +0 -111
data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +0 -19
data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +0 -19
data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +0 -19
data/utils/enveomics/globals.mk +0 -8
data/utils/enveomics/manifest.json +0 -9

data/utils/enveomics/Scripts/M5nr.getSequences.rb DELETED Viewed

@@ -1,81 +0,0 @@
-#!/usr/bin/env ruby
-#
-# @author: Luis M. Rodriguez-R
-# @update: Feb-06-2015
-# @license: artistic license 2.0
-#
-require 'optparse'
-require 'rubygems'
-require 'restclient'
-require 'open-uri'
-require 'JSON'
-o = {:q=>FALSE, :url=>'http://api.metagenomics.anl.gov/m5nr', :max=>0, :recover=>FALSE}
-ARGV << '-h' if ARGV.size==0
-OptionParser.new do |opts|
-   opts.banner = "
-Downloads a set of sequences from M5nr with a given functional annotation.
-Usage: #{$0} [options]"
-   opts.separator ""
-   opts.separator "Mandatory"
-   opts.on("-f", "--function STR", "Functional annotation."){ |v| o[:function] = v }
-   opts.separator ""
-   opts.separator "Options"
-   opts.on("-m", "--max INT", "Maximum number of sequences to download.  By default: all (0)."){ |v| o[:max] = v.to_i }
-   opts.on("-r", "--recover", "If set, tries to recover a previous FastA."){ o[:recover]=TRUE }
-   opts.on("-n", "--url STR", "URL for M5nr API.  By default: #{o[:url]}."){ |v| o[:url] = v }
-   opts.on("-o", "--out FILE", "File containing the sequences.  By default: value of -f appended with .fa."){ |v| o[:out] = v }
-   opts.on("-q", "--quiet", "Run quietly (no STDERR output)"){ o[:q] = TRUE }
-   opts.on("-h", "--help", "Display this screen") do
-      puts opts
-      exit
-   end
-   opts.separator ""
-end.parse!
-abort "-f is mandatory" if o[:function].nil?
-o[:out] = "#{o[:function].gsub(/ /,'_')}.fa" if o[:out].nil?
-uri_fun = URI::encode(o[:function])
-ignore = {}
-if o[:recover] and File.exists? o[:out]
-   ih = File.open(o[:out], "r")
-   ih.each_line do |ln|
-      id = /^>(\S+)\s/.match(ln)
-      unless id.nil?
-         ignore[id[1]] = 1
-      end
-   end
-   ih.close
-   of = File.open(o[:out], "a+")
-else
-   of = File.open(o[:out], "w")
-end
-next_url = "#{o[:url]}/function/#{uri_fun}"
-i = 0
-loop do
-   $stderr.print "Downloading sequence #{i+1}.   \r" unless o[:q]
-   res_fun = RestClient.get next_url
-   abort "Unable to reach MG-RAST M5nr API, error code #{res_fun.code}." unless res_fun.code == 200
-   fun = JSON.parse(res_fun.to_str)
-   fun["data"].each do |datum|
-      if ignore["#{datum["source"]}:#{datum["accession"]}"].nil?
-	 res_seq = RestClient.get "#{o[:url]}/md5/#{datum["md5"]}", {:params=>{:sequence=>1}}
-	 abort "Unable to reach MG-RAST M5nr API, error code #{res_seq.code}." unless res_seq.code == 200
-	 seq = JSON.parse(res_seq.to_str)
-	 of.puts ">#{datum["source"]}:#{datum["accession"]} #{datum["function"]} [#{datum["organism"]} taxid:#{datum["ncbi_tax_id"]}]"
-	 of.puts seq["data"]["sequence"].scan(/.{80}|.+/).map{ |x| x.strip }.join($/)
-      end
-      i += 1
-      break if o[:max]>0 and i >= o[:max]
-   end # |datum|
-   next_url = fun["next"]
-   break if next_url.nil? or (o[:max] > 0 and i >= o[:max])
-end
-of.close
-$stderr.puts "Downloaded #{i} sequences." unless o[:q]

data/utils/enveomics/Scripts/MeTaxa.distribution.pl DELETED Viewed

@@ -1,198 +0,0 @@
-#!/usr/bin/env perl
-#
-# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
-# @license artistic license 2.0
-# @update Mar-23-2015
-#
-use warnings;
-use strict;
-use Symbol;
-use Getopt::Std;
-use List::Util qw/max/;
-sub HELP_MESSAGE { die "
-Usage:
-   $0 [args]
-Mandatory:
-   -m <str>	MyTaxa output.
-Optional:
-   -g <str>	Genes predicted in the format defined by -f.  If not passed, abundance is assumed to be based
-   		on contigs.
-   -f <str>	Format of the predicted genes.  One of:
-   		o gff2: GFF v2 as produced by MetaGeneMark.hmm (default).
-		o gff3: GFF v3, including the field id in the last column (with the Gene ID).
-		o tab: A tab-delimited file with the gene ID (col #1), the length of the gene in bp (col #2),
-		  and the ID of the corresponding contig (col #3). The length of the gene (col #2) isn't used
-		  (and it can be empty),  but the column must exist (i.e., 2 tabs per line) for compatibility
-		  with BlastTab.metaxaPrep.pl
-   -c <str>	Counts file: Sequence IDs (genes if -g is provided, contigs otherwise) and reads per sequence
-   		in a tab-delimited file.  If not provided, each sequence counts as 1.
-   -O <str>	Prefix of the output files to be generated.  By default, the value of -m.
-   -I <str>	File containing the complete classification of all the contigs identified as Innominate taxa.
-		By default, this file is not created.
-   -G <str>	File containing the classification of each gene.  By default, this file is not created.  This
-   		requires -g to be set.  Note: This option requires extra RAM.
-   -K <str>	File containing a krona input file.  By default, this file is not created.
-   -k <str>	List of ranks to include in the Krona file, delimited by comma.   It MUST be decreasing rank.
-   		By default: 'superkingdom,phylum,class,family,genus,species'.  This is ignored unless -K also
-		is passed.
-   -R <str>	List of taxonomic ranks for which individual reports should be generated, delimited by comma.
-   		It MUST be decreasing rank.  By default: 'phylum,genus,species'.
-   -r		If set, reports raw counts.  Otherwise, reports permil of the rank.
-   -u		Report Unknown taxa.
-   -q		Run quietly.
-   -h		Display this help message and exits.
-" }
-my %o;
-getopts('g:f:c:m:O:I:G:K:k:R:ruqh', \%o);
-$o{h} and &HELP_MESSAGE;
-$o{m} or  &HELP_MESSAGE;
-$o{O} ||= $o{m};
-$o{f} ||= "gff2";
-$o{k} ||= "superkingdom,phylum,class,family,genus,species";
-my @K = split /,/, lc $o{k};
-$o{R} ||= "phylum,genus,species";
-my @R = split /,/, lc $o{R};
-($o{G} and not $o{g}) and die "-G requires -g to be set.\n";
-my %gene;
-my %count;
-my %ctg=();
-if($o{g}){
-   print STDERR "Reading genes collection.\n" unless $o{q};
-   open GFF, "<", $o{g} or die "Cannot read file: $o{g}: $!\n";
-   while(<GFF>){
-      next if /^#/;
-      next if /^\s*$/;
-      chomp;
-      my($id,$ctg);
-      my @ln = split /\t/;
-      if($o{f} eq 'gff2'){
-	 exists $ln[8] or die "Cannot parse line $., expecting 9 columns: $_\n";
-	 $id = $ln[8];
-	 $id =~ s/gene_id /gene_id_/;
-	 $ctg=$ln[0];
-      }elsif($o{f} eq 'gff3'){
-	 exists $ln[8] or die "Cannot parse line $., expecting 9 columns: $_\n";
-	 $ln[8] =~ /id=([^;]+)/ or die "Cannot parse line $.: $_\n";
-	 $id = $1;
-	 $ctg = $ln[0];
-      }elsif($o{f} eq 'tab'){
-         exists $ln[2] or die "Cannot parse line $., expecting 3 columns: $_\n";
-	 $id = $ln[0];
-	 $ctg = $ln[2];
-      }else{
-         die "Unsupported format: ".$o{f}.".\n";
-      }
-      $ctg =~ s/ .*//;
-      if($o{c}){
-	 $gene{$id} = $ctg;
-      }else{
-         $count{$ctg}++;
-      }
-      push( @{$ctg{$ctg}||=[]}, $id ) if $o{G};
-   }
-   close GFF;
-   print STDERR " Found ".(scalar(keys %gene))." genes.\n" unless $o{q};
-}
-my $Nreads = 0;
-if($o{c}){
-   print STDERR "Reading read-counts.\n" unless $o{q};
-   open COUNT, "<", $o{c} or die "Cannot read file: $o{c}: $!\n";
-   while(<COUNT>){
-      chomp;
-      my @l = split /\t/;
-      if($o{g}){
-	 exists $gene{$l[0]} or die "Cannot find gene's contig: $l[0].\n";
-	 $count{ $gene{$l[0]} } += $l[1];
-	 delete $gene{$l[0]};
-      }else{
-	 $count{ $l[0] } += $l[1];
-      }
-      $Nreads += $l[1];
-   }
-   close COUNT;
-   print STDERR " Found ".scalar(keys %gene)." genes without reads.\n" if scalar(keys %gene) and not $o{q};
-   $count{$_}+=0 for values %gene;
-   print STDERR " Found ".scalar(keys %count)." sequences and $Nreads reads.\n" unless $o{q};
-}
-print STDERR "Reading Metaxa results.\n";
-open METAXA, "<", $o{m} or die "Cannot read file: $o{m}: $!\n";
-my $ctg;
-my $rank;
-my @ofh = ();
-my @n   = (0,0,0);
-my @out = ({},{},{});
-my @rank_name = map { ucfirst } ('unknown', @R);
-my %rank = map { ($rank_name[$_]=>$_) } 0 .. $#rank_name;
-my @rank_tag  = ("NA", map { "<$_>" } @R);
-$o{I} and (open OUT_I, ">", $o{I} or die "Cannot create file: $o{I}: $!\n");
-$o{K} and (open OUT_K, ">", $o{K} or die "Cannot create file: $o{K}: $!\n");
-$o{G} and (open OUT_G, ">", $o{G} or die "Cannot create file: $o{G}: $!\n");
-my $Nreads_class = 0;
-my $Nno_read_ctg = 0;
-while(not eof(METAXA)){
-   my @h=split /\t/, <METAXA>;
-   my $t=<METAXA>; chomp $t;
-   exists $h[3] or die "Cannot parse MyTaxa file, line $.: $_\n";
-   my $count_h;
-   if($o{c} or $o{g}){
-      unless(exists $count{$h[0]}){
-         $Nno_read_ctg++;
-	 next;
-      }
-      $count_h = $count{$h[0]};
-   }else{
-      $count_h = 1;
-   }
-   if($o{G}){ print OUT_G "$_\t$t\n" for @{$ctg{$h[0]}} }
-   next unless $count_h;
-   my $last = 'organism';
-   $n[0] += $count_h;
-   for my $r (1 .. max(values %rank)){
-      if($rank{$h[1]} >= $r){
-	 if($t =~ m/$rank_tag[$r]([^;]*)/){
-	    $last = $1 if $1;
-	 }else{
-	    $last = $last=~/^Innominate / ? $last : "Innominate $last";
-	    $o{I} and print OUT_I "$h[0]\t$rank_name[$r]\t$last\t$t\n";
-	 }
-	 $out[$r]->{$last} += $count_h;
-	 $n[$r] += $count_h;
-      }else{
-         $out[$r]->{"Unknown $last"} += $count_h if $o{u};
-      }
-   }
-   if($o{K}){
-      my $ln = $count_h;
-      for my $r (@K){ $ln.= "\t".($t=~m/<$r>([^;]+)/?$1:'') }
-      print OUT_K "$ln\n";
-   }
-   $Nreads_class+= $count_h;
-}
-print OUT_K "".($Nreads-$Nreads_class)."\n" if $o{K} and $Nreads>$Nreads_class;
-close METAXA;
-$o{I} and close OUT_I;
-$o{K} and close OUT_K;
-$o{G} and close OUT_G;
-print " Found $n[0] reads.\n" unless $o{q};
-print " Couldn't find counts for $Nno_read_ctg contigs.\n" if $Nno_read_ctg;
-unless($o{q}){ print " Found $n[$_] classified reads at ".$rank_name[$_]." level.\n" for (1 .. max(values %rank)) }
-print STDERR "Generating output.\n" unless $o{q};
-for my $rank (1 .. max(values %rank)){
-   open OUT, ">", "$o{O}.".$rank_name[$rank].".txt" or die "Cannot create file: $o{O}.".$rank_name[$rank].".txt: $!\n";
-   for my $class (keys %{$out[$rank]}){
-      printf OUT "%s\t%.20f\n", $class, ($out[$rank]->{$class}*($o{r}?1:1000/$n[$rank]));
-   }
-   close OUT;
-}

data/utils/enveomics/Scripts/MyTaxa.fragsByTax.pl DELETED Viewed

@@ -1,35 +0,0 @@
-#!/usr/bin/env perl
-#
-# @author: Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
-# @update: Mar-23-2015
-# @license: artistic license 2.0
-#
-use warnings;
-use strict;
-my($file,$tax,$rank) = @ARGV;
-($file and $tax) or die "
-.Usage:
-   $0 file.txt taxon[ rank]
-   file.txt	MyTaxa output.
-   taxon	Taxon to look for.
-   rank		Rank of taxon (optional). By default: any rank.
-";
-$rank ||= ".*";
-$rank = lc $rank;
-open MT, "<", $file or die "Cannot read file: $file: $!\n";
-my $last = '';
-while(my $ln=<MT>){
-   chomp $ln;
-   if($ln =~ /<$rank>$tax(;|$)/){
-      $last =~ s/\t.*//;
-      print $last, "\n";
-   }
-   $last = $ln;
-}
-close MT;

data/utils/enveomics/Scripts/MyTaxa.seq-taxrank.rb DELETED Viewed

@@ -1,49 +0,0 @@
-#!/usr/bin/env ruby
-#
-# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
-# @update: Feb-06-2015
-# @license artistic license 2.0
-#
-require 'optparse'
-opts = {:rank=>'genus', :quiet=>FALSE}
-ARGV << '-h' if ARGV.size==0
-OptionParser.new do |opt|
-   opt.separator "Generates a simple tabular file with the classification of each sequence at a given taxonomic rank from a MyTaxa output."
-   opt.separator ""
-   opt.on("-i", "--mytaxa FILE", "Input MyTaxa file."){ |v| opts[:mytaxa]=v }
-   opt.on("-r", "--rank STR", "Taxonomic rank.  By default: #{opts[:rank]}."){ |v| opts[:rank] = v.downcase }
-   opt.on("-q","--quiet","Run quietly.") { opts[:quiet]=TRUE }
-   opt.on("-h","--help","Display this screen.") do
-      puts opt
-      exit
-   end
-   opt.separator ""
-end.parse!
-abort "-i/--mytaxa is mandatory." if opts[:mytaxa].nil?
-abort "-i/--mytaxa must exist." unless File.exists? opts[:mytaxa]
-begin
-   f = File.open(opts[:mytaxa], "r")
-   ctg = nil;
-   while(ln = f.gets)
-      m = /^(.+)(\t.+){3}/.match(ln)
-      if m
-	 raise "Couldn't find classification for contig #{ctg}" unless ctg.nil?
-	 ctg = m[1]
-      else
-	 raise "Couldn't find the contig name at line #{$.}" if ctg.nil?
-	 m = /<#{opts[:rank]}>([^;]+)/.match(ln)
-	 puts "#{ctg}\t#{m ? m[1] : "Unclassified"}"
-	 ctg = nil
-      end
-   end
-   f.close
-rescue => err
-   $stderr.puts "Exception: #{err}\n\n"
-   err.backtrace.each { |l| $stderr.puts l + "\n" }
-   err
-end

data/utils/enveomics/Scripts/NCBIacc2tax.rb DELETED Viewed

@@ -1,92 +0,0 @@
-#!/usr/bin/env ruby
-#
-# @author  Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
-# @license artistic license 2.0
-#
-$:.push File.expand_path(File.dirname(__FILE__) + "/lib")
-require "enveomics_rb/remote_data"
-use "nokogiri"
-#================================[ Options parsing ]
-o = {
-  :q=>false, :accs=>[], :dbfrom=>"nuccore", :header=>true,
-  :no_nil=>false, :ret=>"ScientificName",
-  :ranks=>%w(superkingdom phylum class order family genus species)}
-OptionParser.new do |opt|
-  opt.banner = "
-  Maps a list of NCBI accessions to their corresponding taxonomy using the NCBI
-  EUtilities. Avoid using this script on millions of entries at a time, since
-  each entry elicits two requests to NCBI's servers.
-  Usage: #{$0} [options]".gsub(/^ +/,"")
-  opt.separator ""
-  opt.on("-a", "--acc acc1,acc2.ver,...", Array,
-    "Comma-separated list of accessions. Required unless -i is passed."
-    ){ |v| o[:accs]=v }
-  opt.on("-i", "--infile FILE",
-    "Raw text file containing the list of accessions, one per line.",
-    "Required unless -g is passed."){ |v| o[:infile]=v }
-  opt.on("-p", "--protein",
-    "Use if the accessions are proteins. Otherwise, accessions are assumed " +
-    "to be from the Nuccore Database."){ o[:dbfrom]="protein" }
-  opt.on("-r", "--ranks RANK1,RANK2,...", Array,
-    "Taxonomic ranks to report. By default: #{o[:ranks].join(",")}."
-    ){ |v| o[:ranks]=v }
-  opt.on("-n", "--noheader",
-    "Do not include a header in the output."){ o[:header]=false }
-  opt.on("-t", "--taxids",
-    "Return Taxonomy IDs instead of scientific names."){ o[:ret]="TaxId" }
-  opt.on("--ignore-missing",
-    "Does not report missing accessions in the output file.",
-    "By default, it reports accessions and empty values for all other columns."
-    ){ |v| o[:no_nil]=v }
-  opt.on("-q", "--quiet", "Run quietly."){ |v| o[:q]=true }
-  opt.on("-h", "--help","Display this screen") do
-    puts opt
-    exit
-  end
-  opt.separator ""
-end.parse!
-#================================[ Functions ]
-def acc2taxid(db, acc)
-  doc = Nokogiri::XML( RemoteData.elink({:dbfrom=>db,
-    :db=>"taxonomy", :id=>acc, :idtype=>"acc"}) )
-  doc.at_xpath("/eLinkResult/LinkSet/LinkSetDb/Link/Id")
-end
-#================================[ Main ]
-begin
-  o[:accs] += File.readlines(o[:infile]).map{ |l| l.chomp } unless
-    o[:infile].nil?
-  o[:ranks].map!{ |r| r.downcase }
-  puts (["Acc", "TaxId"] + o[:ranks].map{ |r| r.capitalize }).join("\t") if
-    o[:header]
-  o[:accs].each do |acc|
-    taxid = acc2taxid(o[:dbfrom], acc)
-    status = ""
-    if taxid.nil?
-      warn "Cannot find link to taxonomy: #{acc} #{status}"
-      puts ([acc, ""] + o[:ranks].map{ |i| "" }).join("\t") unless o[:no_nil]
-      next
-    end
-    taxonomy = {}
-    unless taxid.nil?
-      doc = Nokogiri::XML( RemoteData.efetch({:db=>"taxonomy",
-        :id=>taxid.content}) )
-      taxonomy[ doc.at_xpath("/TaxaSet/Taxon/Rank").content ] =
-        doc.at_xpath("/TaxaSet/Taxon/#{o[:ret]}").content
-      doc.xpath("/TaxaSet/Taxon/LineageEx/Taxon").each do |taxon|
-        taxonomy[ taxon.at_xpath("./Rank").content ] =
-          taxon.at_xpath("./#{o[:ret]}").content
-      end
-    end
-    puts ([acc, taxid.content] +
-      o[:ranks].map{ |rank| taxonomy[ rank ] ||= "" }).join("\t")
-  end
-rescue => err
-  $stderr.puts "Exception: #{err}\n\n"
-  err.backtrace.each { |l| $stderr.puts l + "\n" }
-  err
-end