RubyGems - lederhosen - Versions diffs - 2.0.8 → 3.1.0 - Mend

lederhosen 2.0.8 → 3.1.0

Files changed (15) hide show

data/.travis.yml +3 -0
data/lederhosen.gemspec +5 -3
data/lib/lederhosen/cli.rb +1 -1
data/lib/lederhosen/no_tasks.rb +18 -0
data/lib/lederhosen/tasks/cluster.rb +18 -15
data/lib/lederhosen/tasks/count_taxonomies.rb +1 -40
data/lib/lederhosen/tasks/make_udb.rb +3 -1
data/lib/lederhosen/tasks/otu_filter.rb +1 -1
data/lib/lederhosen/version.rb +7 -5
data/readme.md +37 -60
data/scripts/count_taxonomies.go +68 -0
data/spec/cli_spec.rb +10 -30
data/spec/no_tasks_spec.rb +24 -5
data/spec/spec_helper.rb +9 -0
metadata +6 -4

data/.travis.yml ADDED Viewed

@@ -0,0 +1,3 @@
+rvm:
+  - '1.9.7'
+script: bundle exec rspec

data/lederhosen.gemspec CHANGED Viewed

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = "lederhosen"
-  s.version = "2.0.8"
+  s.version = "3.1.0"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Austin G. Davis-Richardson"]
-  s.date = "2013-03-14"
+  s.date = "2013-07-03"
   s.description = "Various tools for OTU clustering"
   s.email = "harekrishna@gmail.com"
   s.executables = ["lederhosen"]
@@ -18,6 +18,7 @@ Gem::Specification.new do |s|
   ]
   s.files = [
     ".rspec",
+    ".travis.yml",
     "Gemfile",
     "LICENSE.txt",
     "Rakefile",
@@ -40,6 +41,7 @@ Gem::Specification.new do |s|
     "lib/lederhosen/version.rb",
     "logo.png",
     "readme.md",
+    "scripts/count_taxonomies.go",
     "scripts/illumina_pipeline/.gitignore",
     "scripts/illumina_pipeline/Makefile",
     "scripts/illumina_pipeline/pipeline.sh",
@@ -56,7 +58,7 @@ Gem::Specification.new do |s|
   s.homepage = "http://audy.github.com/lederhosen"
   s.licenses = ["MIT"]
   s.require_paths = ["lib"]
-  s.rubygems_version = "1.8.24"
+  s.rubygems_version = "1.8.25"
   s.summary = "OTU Clustering"
   if s.respond_to? :specification_version then

data/lib/lederhosen/cli.rb CHANGED Viewed

@@ -33,4 +33,4 @@ module Lederhosen
 end # module
-Dir.glob(File.join(File.dirname(__FILE__), 'tasks', '*.rb')).each { |f| require f }
+Dir.glob(File.join(File.dirname(__FILE__), 'tasks', '*.rb')).each { |f| require f }

data/lib/lederhosen/no_tasks.rb CHANGED Viewed

@@ -36,6 +36,8 @@ module Lederhosen
         # taxcollector taxonomy starts with a open square bracked
         if taxonomy =~ /^\[/
           :taxcollector
+        elsif taxonomy =~ /s__/
+          :greengenes_135
         elsif taxonomy =~ /^\d/
           :greengenes
         elsif taxonomy.nil?
@@ -51,6 +53,8 @@ module Lederhosen
         case @taxonomy_format
         when :greengenes
           parse_taxonomy_greengenes(taxonomy)
+        when :greengenes_135
+          parse_taxonomy_greengenes_135(taxonomy)
         when :taxcollector
           parse_taxonomy_taxcollector(taxonomy)
         when :qiime
@@ -62,6 +66,7 @@ module Lederhosen
       RE_TAXCOLLECTOR = /^\[0\](.*);\[1\](.*);\[2\](.*);\[3\](.*);\[4\](.*);\[5\](.*);\[6\](.*);\[7\](.*);\[8\](.*)/
       RE_GREENGENES = /k__(.*); ?p__(.*); ?c__(.*); ?o__(.*); ?f__(.*); ?g__(.*); ?(.*);/
+      RE_GREENGENES_135 = /k__(.*); ?p__(.*); ?c__(.*); ?o__(.*); ?f__(.*); ?g__(.*); ?s__(.*)/
       RE_QIIME = /k__(.*);p__(.*);c__(.*);o__(.*);f__(.*);g__(.*);s__(.*)/
       def parse_taxonomy_qiime(taxonomy)
@@ -90,6 +95,19 @@ module Lederhosen
         names
       end
+      def parse_taxonomy_greengenes_135(taxonomy)
+        levels = %w{domain phylum class order family genus species}
+        match_data = taxonomy.match(RE_GREENGENES_135)
+        match_data = match_data[1..-1]
+        names = Hash.new
+        # for some reason Hash[*levels.zip(match_data)] ain't working
+        levels.zip(match_data).each { |l, n| names[l] = n }
+        names['original'] = taxonomy
+        names
+      end
       # parse a taxonomic description using the
       # taxcollector format returning name at each level (genus, etc...)
       #

data/lib/lederhosen/tasks/cluster.rb CHANGED Viewed

@@ -4,22 +4,24 @@ module Lederhosen
     desc 'cluster', 'reference-based clustering using usearch'
-    method_option :input,    :type => :string,  :required => true
-    method_option :database, :type => :string,  :required => true
-    method_option :threads,  :type => :numeric, :default  => false
-    method_option :identity, :type => :numeric, :required => true
-    method_option :output,   :type => :string,  :required => true
-    method_option :strand,   :type => :string,  :default => 'plus'
-    method_option :dry_run,  :type => :boolean, :default => false
+    method_option :input,    :type  => :string,  :required => true
+    method_option :database, :type  => :string,  :required => true
+    method_option :threads,  :type  => :numeric, :default  => false
+    method_option :identity, :type  => :numeric, :required => true
+    method_option :output,   :type  => :string,  :required => true
+    method_option :strand,   :type  => :string,  :default  => 'plus'
+    method_option :dry_run,  :type  => :boolean, :default  => false
+    method_option :query_cov, :type => :numeric, :required => false, :default => 0.95
     def cluster
-      input    = File.expand_path(options[:input])
-      database = File.expand_path(options[:database])
-      threads  = options[:threads]
-      identity = options[:identity]
-      output   = File.expand_path(options[:output])
-      strand   = options[:strand]
-      dry_run  = options[:dry_run]
+      input     = File.expand_path(options[:input])
+      database  = File.expand_path(options[:database])
+      threads   = options[:threads]
+      identity  = options[:identity]
+      output    = File.expand_path(options[:output])
+      strand    = options[:strand]
+      dry_run   = options[:dry_run]
+      query_cov = options[:query_cov]
       ohai "#{'(dry run)' if dry_run} clustering #{input} to #{database} and saving to #{output}"
@@ -32,7 +34,8 @@ module Lederhosen
         "--id #{identity}",
         "--uc #{output}",
         "--db #{database}",
-        "--strand #{strand}"
+        "--strand #{strand}",
+        "--query_cov #{query_cov}"
       ]
       # threads = False : use all threads (default)

data/lib/lederhosen/tasks/count_taxonomies.rb CHANGED Viewed

@@ -5,27 +5,16 @@ module Lederhosen
     method_option :input, :type => :string, :required => true
     method_option :output, :type => :string, :required => true
-    method_option :strict, :type => :string, :default => false,
-                  :banner => '<level> only count reads where both taxonomies are in agreement at <level>'
     def count_taxonomies
       input  = options[:input]
       output = options[:output]
-      strict = options[:strict]
       ohai "generating #{output} from #{input}"
       handle = File.open(input)
       uc = UCParser.new(handle)
-      taxonomy_count =
-        if not strict
-          get_taxonomy_count(uc)
-        elsif strict
-          get_strict_taxonomy_count(uc, strict)
-        end
+      taxonomy_count = get_taxonomy_count(uc)
       handle.close
       out = File.open(output, 'w')
@@ -51,34 +40,6 @@ module Lederhosen
         taxonomy_count
       end
-      # returns Hash of taxonomy => number_of_reads
-      # if a pair of reads do not agree at a taxonomic level,
-      # or if at least one is unclassified, bot reads are counted
-      # as unclassified_reads
-      def get_strict_taxonomy_count(uc, level)
-        taxonomy_count = Hash.new { |h, k| h[k] = 0 }
-        # TODO: I'm making a block for results because I don't know how to
-        # make results return an Enumerator when not given a block
-        uc.each_slice(2) do |left, right|
-          if left.miss? or right.miss? # at least one is a miss
-            taxonomy_count['unclassified_reads'] += 2
-          # both are hits, check taxonomies
-          else
-            ta = parse_taxonomy(left.target)
-            tb = parse_taxonomy(right.target)
-            # they match up, count both separately
-            if ta[level] == tb[level]
-              taxonomy_count[left.target] += 1
-              taxonomy_count[right.target] += 1
-            # they don't match up, count as unclassified
-            else
-              taxonomy_count['unclassified_reads'] += 2
-            end
-          end
-        end # results.each_slice
-        taxonomy_count
-      end
     end
   end
 end

data/lib/lederhosen/tasks/make_udb.rb CHANGED Viewed

@@ -10,12 +10,14 @@ module Lederhosen
       input       = options[:input]
       output      = options[:output]
       word_length = options[:word_length]
+      db_step     = options[:db_step]
       ohai "making udb w/ #{input}, saving as #{output}."
       cmd = ['usearch',
              "-makeudb_usearch #{input}",
-             "-output #{output}"]
+             "-output #{output}",
+            ]
       cmd = cmd.join(' ')

data/lib/lederhosen/tasks/otu_filter.rb CHANGED Viewed

@@ -70,7 +70,7 @@ module Lederhosen
           kept_counts = counts.zip(mask).map { |c, m| c if m }.compact
           noise = counts.zip(mask).map { |c, m| c unless m }.compact.inject(:+)
-          filtered_reads += noise
+          filtered_reads += noise || 0
           output.puts "#{sample_name},#{kept_counts.join(',')},#{noise}"

data/lib/lederhosen/version.rb CHANGED Viewed

@@ -1,10 +1,12 @@
 module Lederhosen
   module Version
-    MAJOR = 2
-    MINOR = 0
-    CODENAME = 'Schnittlauchbrot' # changes for minor versions
-    PATCH = 8
+    MAJOR = 3
+    MINOR = 1
+    CODENAME = 'Hauptbahnhof' # changes for minor versions
+    PATCH = 0
-    STRING = [MAJOR, MINOR, PATCH].join('.')
+    string = [MAJOR, MINOR, PATCH].join('.')
+    STRING = string
   end
 end

data/readme.md CHANGED Viewed

@@ -1,52 +1,38 @@
 <img src="https://raw.github.com/audy/lederhosen/master/logo.png" align="right">
-# Lederhosen
-Lederhosen is a set of tools for OTU clustering rRNA amplicons using Robert Edgar's USEARCH.
-It's used to run USEARCH and create and filter tables. Unlike most of the software in Bioinformatics,
-It is meant to be UNIX-y: do one thing and do it well.
-Do you want to run Lederhosen on a cluster? Use `--dry-run` and feed it to your cluster's queue management system.
+[![Build
+Status](https://travis-ci.org/audy/lederhosen.png)](https://travis-ci.org/audy/lederhosen)
-Lederhosen is not a pipeline but rather a set of tools broken up into tasks. Tasks are invoked by running `lederhosen TASK ...`.
-Lederhosen is designed with the following "pipeline" in mind:
-1. Clustering sequences to reference sequences (read: database) and/or _de novo_ OTU clustering.
-  - `lederhosen cluster ...`
-2. Generating tables from USEARCH output.
-  - `lederhosen count_taxonomies ...`
-  - `lederhosen otu_table ...`
-3. Filtering tables to remove small or insignificant OTUs.
-  - `lederhosen otu_filter ...`
+# Lederhosen
+Lederhosen is a set of tools for OTU clustering rRNA amplicons using
+Robert Edgar's USEARCH and is simple, robust, and fast.
+Lederhosen was designed from the beginning to handle lots of data from
+lots of samples, specifically from data generated by multiplexed
+Illumina Hi/Mi-Seq sequencing.
-### About
+No assumptions are made about the design of your experiment.
+Therefore, there are no tools for read pre-processing and data analysis
+or statistics. Insert reads, receive data.
-- Lederhosen is a project born out of the Triplett Lab at the University of Florida.
-- Lederhosen is designed to be a fast and **simple** (~700 SLOC) tool to aid in clustering 16S rRNA amplicons sequenced
-using paired and non-paired end short reads such as those produced by Illumina (GAIIx, HiSeq and MiSeq), Ion Torrent, or Roche-454.
-- Lederhosen uses [Semantic Versioning](http://semver.org/), is free and open source under the
-[MIT open source license](http://opensource.org/licenses/mit-license.php/).
-- Except for USEARCH which requires a license, Lederhosen is available for commercial use.
+Lederhosen is free and open source under the MIT license. Except for
+the USEARCH license, Lederhosen is free for commercial use.
 ### Features
-- Closed/Open/Mixed OTU clustering to TaxCollector or GreenGenes via USEARCH.
-- Parallel support (pipe commands into [parallel](http://savannah.gnu.org/projects/parallel/), or use your cluster's queue).
-- Support for RDP, TaxCollector or GreenGenes 16S rRNA databases.
+- Referenced-based OTU clustering to via USEARCH.
+- Multiple Database Support (RDP, GreenGenes, TaxCollector, Silva).
+- Parallel support (USEARCH, MapReduce or Compute Cluster).
 - Generation and filtering of OTU abundancy matrices.
--. Support for paired end reads (considers taxonomic assignment for both reads in a pair).
 ### Installation
-0. Obtain & Install [USEARCH](http://www.drive5.com/) (32bit is fine for non-commercial use)
-2. Get a database:
+0. Obtain & Install [USEARCH](http://www.drive5.com/).
+1. Get a database:
   - [TaxCollector](http://github.com/audy/taxcollector)
   - [GreenGenes](http://greengenes.lbl.gov) 16S database
   - File an [issue report](https://github.com/audy/lederhosen/issues) or pull request ;) to request support for a different database.
-3. Install Lederhosen by typing:
+2. Install Lederhosen by typing:
     `sudo gem install lederhosen`
 4. Check installation by typing `lederhosen`. You should see some help text.
@@ -61,11 +47,17 @@ Lederhosen is invoked by typing `lederhosen [TASK]`
 ### Trim Reads
-Trimming removed. I think you should use [Sickle](https://github.com/najoshi/sickle).
+Trimming removed. I think you should use
+[Sickle](https://github.com/najoshi/sickle), or
+[Trimmomatic](http://www.usadellab.org/cms/index.php?page=trimmomatic).
+You can use
+[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) to inspect read quality.
 ### Create Database
-Create UDB database required by usearch from TaxCollector
+The 16S database can optionally be in USEARCH database format (udb).
+This speeds things up if you are clustering sequences in multiple FASTA
+files.
 ```bash
 lederhosen make_udb \
@@ -73,22 +65,21 @@ lederhosen make_udb \
   --output=taxcollector.udb
 ```
-(not actually required but will make batch searching a lot faster)
 ### Cluster Reads using USEARCH
 Cluster reads using USEARCH. Output is a uc file.
 ```bash
 lederhosen cluster \
-  --input=trimmed/*.fasta \
+  --input=trimmed/sequences.fasta \
   --identity=0.95 \
   --output=clusters_95.uc \
   --database=taxcollector.udb
 ```
-The optional `--dry-run` parameter outputs the usearch command to standard out.
-This is useful if you want to run usearch on a cluster.
+The optional `--dry-run` parameter prints the USEARCH command to
+standard out. Instead of actually running the command. This is useful if
+you want to run jobs in parallel and/or on a cluster.
 ```bash
 for reads_file in reads/*.fasta;
@@ -108,7 +99,7 @@ cat jobs.sh | parallel -j 24 # run 24 parallel jobs
 ### Generate taxonomy counts tables
-Before generating OTU tables, you must generate taxonomy counts tables.
+Before generating OTU tables, you must generate taxonomy counts (`.tax`) tables.
 A taxonomy count table looks something like this
@@ -125,19 +116,6 @@ lederhosen count_taxonomies \
   --output=clusters_taxonomies.txt
 ```
-If you did paired-end sequencing, you can generate strict taxonomy tables that only count reads when *both pairs* have the *same*
-taxonomic description at a certain taxonomic level. This is useful for leveraging the increased length of having pairs and also
-acts as a sort of chimera filter. You will, however, end up using less of your reads as the level goes from domain to species.
-```bash
-lederhosen count_taxonomies \
-  --input=clusters.uc \
-  --strict=genus \
-  --output=clusters_taxonomies.strict.genus.txt
-```
-Reads that do not have the same phylogeny at `level` will become `unclassified_reads`
 ### Generate OTU tables
 Create an OTU abundance table where rows are samples and columns are clusters. The entries are the number of reads for that cluster in a sample.
@@ -152,8 +130,8 @@ lederhosen otu_table \
 This will create the file `my_poop_samples_genus_strict.95.txt` containing the clusters
 as columns and the samples as rows.
-You now will apply advanced data mining and statistical techniques to this table to make
-interesting biological inferences and cure diseases.
+If your database doesn't have taxonomic descriptions, use
+`--level=original`.
 ### Filter OTU tables
@@ -175,7 +153,6 @@ lederhosen otu_filter \
 This will remove any clusters that do not appear in at least 10 samples with at least 50 reads. The read counts
 for filtered clusters will be moved to the `noise` psuedocluster.
 ### Get representative sequences
 You can get the representative sequences for each cluster using the `get_reps` tasks.
@@ -219,9 +196,9 @@ lederhosen separate_unclassified \
 ## Acknowledgements
-- Lexi, Vinnie and Kevin for beta-testing and putting up with bugs
-- The QIIME project for inspiration
-- Sinbad Richardson for the Lederhosen Guy artwork
+- [Sinbad Richardson](http://viennapitts.com/) for the Lederhosen Guy artwork
+- Lexi, and Kevin for beta-testing and putting up with bugs.
+- The QIIME project for inspiration.
 ## Please Cite

data/scripts/count_taxonomies.go ADDED Viewed

@@ -0,0 +1,68 @@
+package main
+//
+// count_taxonomies.go
+// a faster alternative to lederhosen count_taxonomies
+// (c2013) Austin G. Davis-Richardson
+// MIT v3 LICENSE
+//
+// COMPILATION:
+//
+// 1.) Install Go (http://golang.org)
+// 2.) go build count_taxonomies.go
+// 3.) At this point you're ready to go
+//
+// USAGE:
+// count_taxonomies input.uc > output.tax
+//
+import (
+  "encoding/csv"
+  "fmt"
+  "io"
+  "os"
+)
+func main() {
+  table := map[string]int64{}
+  infile := os.Args[1]
+  file, err := os.Open(infile)
+  if err != nil {
+    panic(err)
+  }
+  defer file.Close()
+  reader := csv.NewReader(file)
+  reader.Comma = '\t'
+  // count items
+  for {
+    record, err := reader.Read()
+    if err == io.EOF {
+      break
+    } else if err != nil {
+      panic(err)
+    }
+    // key is the name of the target sequence.
+    // column 8 in the uc file (9 if you start
+    // counting at 0)
+    key := record[9]
+    if _, present := table[key]; present {
+      table[key] = table[key] + 1
+    } else {
+      table[key] = 1
+    }
+  }
+  for k, _ := range table {
+    fmt.Printf("%v,%v\n", k, table[k])
+  }
+}

data/spec/cli_spec.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 require 'spec_helper'
-describe Lederhosen::CLI do
+describe Lederhosen::CLI, :requires_usearch => true do
   it 'should have an executable' do
     `./bin/lederhosen`
@@ -39,10 +39,10 @@ describe Lederhosen::CLI do
     unclassified_reads = File.readlines("#{$test_dir}/unclassified.fasta")\
                              .select { |x| x =~ /^>/ }\
                              .size
     unclassified_results.should == unclassified_reads
   end
   it 'can separate unclassified reads from usearch output using strict pairing' do
     `./bin/lederhosen separate_unclassified --strict=genus --uc-file=spec/data/test.uc --reads=spec/data/trimmed/ILT_L_9_B_001.fasta --output=#{$test_dir}/unclassified.strict_genus.fasta`
     $?.success?.should be_true
@@ -52,42 +52,22 @@ describe Lederhosen::CLI do
   end
   it 'can create taxonomy count tables' do
-    `./bin/lederhosen count_taxonomies --input=spec/data/test.uc --output=#{$test_dir}/taxonomy_count.txt`
+    `./bin/lederhosen count_taxonomies --input=spec/data/test.uc --output=#{$test_dir}/taxonomy_count.tax`
     $?.success?.should be_true
-    File.exists?(File.join($test_dir, 'taxonomy_count.txt')).should be_true
+    File.exists?(File.join($test_dir, 'taxonomy_count.tax')).should be_true
   end
   it 'generates taxonomy tables w/ comma-free taxonomic descriptions' do
-    File.readlines(File.join($test_dir, 'taxonomy_count.txt'))\
+    File.readlines(File.join($test_dir, 'taxonomy_count.tax'))\
       .map(&:strip)\
       .map { |x| x.count(',') }\
       .uniq\
       .should == [1]
   end
-  %w{domain phylum class order family genus species}.each do |level|
-    it "generates taxonomy tables only counting pairs that agree at level: #{level}" do
-      `./bin/lederhosen count_taxonomies --input=spec/data/test.uc --output=#{$test_dir}/taxonomy_count.strict.#{level}.txt --strict=#{level}`
-      $?.success?.should be_true
-      lines = File.readlines(File.join($test_dir, "taxonomy_count.strict.#{level}.txt"))
-      # make sure total number of reads is even
-      # requires that there should be an odd number if classification is not strict
-      lines.select { |x| !(x =~ /^#/) }\
-           .map(&:strip)\
-           .map { |x| x.split(',') }\
-           .map(&:last)\
-           .map(&:to_i)\
-           .inject(:+).should be_even
-    end
-  end
-  %w{domain phylum class order family genus species}.each do |level|
-    it "should create OTU abundance matrices from taxonomy count tables at level: #{level}" do
-      `./bin/lederhosen otu_table --files=#{$test_dir}/taxonomy_count.strict.*.txt --level=#{level} --output=#{$test_dir}/otus_genus.strict.csv`
-      $?.success?.should be_true
-    end
+  it 'can create OTU abundance matrices' do
+    `./bin/lederhosen otu_table --files=#{$test_dir}/taxonomy_count.tax --output=#{$test_dir}/otus.genus.csv --level=genus`
+    $?.success?.should be_true
   end
   it 'should filter OTU abundance matrices' do
@@ -95,7 +75,7 @@ describe Lederhosen::CLI do
     # filtering should move filtered reads to 'unclassified_reads' so that we maintain
     # our knowledge of depth of coverage throughout
     # this makes normalization better later.
-    `./bin/lederhosen otu_filter --input=#{$test_dir}/otus_genus.strict.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
+    `./bin/lederhosen otu_filter --input=#{$test_dir}/otus.genus.csv --output=#{$test_dir}/otus_genus.filtered.csv --reads 1 --samples 1`
     $?.success?.should be_true
   end

data/spec/no_tasks_spec.rb CHANGED Viewed

@@ -3,6 +3,7 @@ require 'spec_helper'
 describe 'no_tasks' do
   let(:greengenes_taxonomies) { ['124 U55236.1 Methanobrevibacter thaueri str. CW k__domain; p__phylum; c__class; o__order; f__family; g__genus; species; otu_127']}
+  let(:greengenes135_taxonomies) { ['k__domain; p__phylum; c__class; o__order; f__family; g__genus; s__species']}
   let(:qiime_taxonomies) { [ 'k__domain;p__phylum;c__class;o__order;f__family;g__genus;s__species' ]}
   let(:taxcollector_taxonomies) { ['[0]domain;[1]phylum;[2]class;[3]order;[4]family;[5]genus;[6]species;[7]strain;[8]Genus_species_strain_id'] }
   let(:lederhosen) { Lederhosen::CLI.new }
@@ -15,6 +16,12 @@ describe 'no_tasks' do
     end
   end
+  it '#detect_taxonomy_format should recognize GreenGenes v13.5' do
+    greengenes135_taxonomies.each do |greengenes_taxonomy|
+      lederhosen.detect_taxonomy_format(greengenes_taxonomy).should == :greengenes_135
+    end
+  end
   it '#detect_taxonomy_format should recognize TaxCollector' do
     taxcollector_taxonomies.each do |taxcollector_taxonomy|
       lederhosen.detect_taxonomy_format(taxcollector_taxonomy).should == :taxcollector
@@ -33,28 +40,34 @@ describe 'no_tasks' do
         taxonomy[level].should == level
       end
     end
     it "#parse_taxonomy_greengenes should parse greengenes taxonomy (#{level})" do
       greengenes_taxonomies.each do |greengenes_taxonomy|
         taxonomy = lederhosen.parse_taxonomy_greengenes(greengenes_taxonomy)
         taxonomy[level].should == level
       end
     end
+    it "#parse_taxonomy_greengenes_135 should parse greengenes v13.5 taxonomy (#{level})" do
+      greengenes135_taxonomies.each do |greengenes_taxonomy|
+        taxonomy = lederhosen.parse_taxonomy_greengenes_135(greengenes_taxonomy)
+        taxonomy[level].should == level
+      end
+    end
     it "#parse_taxonomy_greengenes should parse qiime taxonomy (#{level})" do
       qiime_taxonomies.each do |qiime_taxonomy|
         taxonomy = lederhosen.parse_taxonomy_qiime(qiime_taxonomy)
         taxonomy[level].should == level
       end
     end
   end
   it '#parse_taxonomy_taxcollector should return original taxonomy' do
     lederhosen.parse_taxonomy_taxcollector(taxcollector_taxonomies[0])['original'].should == taxcollector_taxonomies[0]
   end
   it '#parse_taxonomy should automatically detect and parse greengenes taxonomy' do
     greengenes_taxonomies.each do |greengenes_taxonomy|
       lederhosen.parse_taxonomy(greengenes_taxonomy).should_not be_nil
@@ -67,6 +80,12 @@ describe 'no_tasks' do
     end
   end
+  it '#parse_taxonomy should automatically detect and parse greengenes 13.5 taxonomy' do
+    greengenes135_taxonomies.each do |greengenes_taxonomy|
+      lederhosen.parse_taxonomy(greengenes_taxonomy).should_not be_nil
+    end
+  end
   it '#parse_taxonomy_taxcollector should replace unclassified species names with strain name' do
     t = '[0]Bacteria;[1]Actinobacteria;[2]Actinobacteria;[3]Actinomycetales;[4]test;[5]null;[6]Propionibacterineae_bacterium;[7]Propionibacterineae_bacterium_870BRRJ;[8]Propionibacterineae_bacterium_870BRRJ|genus'
     tax = lederhosen.parse_taxonomy(t)

data/spec/spec_helper.rb CHANGED Viewed

@@ -7,3 +7,12 @@ Bundler.require :test, :development
 $test_dir = ENV['TEST_DIR'] || "/tmp/lederhosen_test_#{(0...8).map{65.+(rand(25)).chr}.join}/"
 `mkdir -p #{$test_dir}`
 $stderr.puts "test dir: #{$test_dir}"
+RSpec.configure do |c|
+  # check if usearch is in $PATH
+  # if not, skip usearch tests.
+  usearch = `which usearch`
+  if usearch == ''
+    c.filter_run_excluding :requires_usearch => true
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: lederhosen
 version: !ruby/object:Gem::Version
-  version: 2.0.8
+  version: 3.1.0
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-03-14 00:00:00.000000000 Z
+date: 2013-07-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: dna
@@ -100,6 +100,7 @@ extra_rdoc_files:
 - LICENSE.txt
 files:
 - .rspec
+- .travis.yml
 - Gemfile
 - LICENSE.txt
 - Rakefile
@@ -122,6 +123,7 @@ files:
 - lib/lederhosen/version.rb
 - logo.png
 - readme.md
+- scripts/count_taxonomies.go
 - scripts/illumina_pipeline/.gitignore
 - scripts/illumina_pipeline/Makefile
 - scripts/illumina_pipeline/pipeline.sh
@@ -149,7 +151,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: -1050380685311720987
+      hash: -391146498945924903
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:
@@ -158,7 +160,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 1.8.24
+rubygems_version: 1.8.25
 signing_key:
 specification_version: 3
 summary: OTU Clustering