RubyGems - lederhosen - Versions diffs - 1.1.0 → 1.1.1 - Mend

lederhosen 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/lederhosen.gemspec +2 -1
data/lib/lederhosen/tasks/join_otu_tables.rb +59 -0
data/lib/lederhosen/tasks/otu_table.rb +8 -2
data/lib/lederhosen/version.rb +1 -1
data/readme.md +5 -3
data/spec/cli_spec.rb +5 -0
metadata +4 -3

data/lederhosen.gemspec CHANGED Viewed

@@ -5,7 +5,7 @@
 Gem::Specification.new do |s|
   s.name = "lederhosen"
-  s.version = "1.1.0"
+  s.version = "1.1.1"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Austin G. Davis-Richardson"]
@@ -27,6 +27,7 @@ Gem::Specification.new do |s|
     "lib/lederhosen.rb",
     "lib/lederhosen/cli.rb",
     "lib/lederhosen/tasks/cluster.rb",
+    "lib/lederhosen/tasks/join_otu_tables.rb",
     "lib/lederhosen/tasks/make_udb.rb",
     "lib/lederhosen/tasks/otu_filter.rb",
     "lib/lederhosen/tasks/otu_table.rb",

data/lib/lederhosen/tasks/join_otu_tables.rb ADDED Viewed

@@ -0,0 +1,59 @@
+require 'set'
+module Lederhosen
+  class CLI
+    desc 'join_otu_tables', 'combine multiple otu tables'
+    method_option :input,  :type => :string, :required => true
+    method_option :output, :type => :string, :required => true
+    def join_otu_tables
+      input = Dir[options[:input]]
+      output = options[:output]
+      ohai "combining #{input.size} file(s) and saving to #{output}"
+      all_otu_names = Set.new
+      all_samples = Set.new
+      sample_name_count = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } }
+      # read all of the csv files
+      input.each do |input_file|
+        File.open(input_file) do |handle|
+          otu_names = handle.gets.strip.split(',')[1..-1]
+          all_otu_names += otu_names.to_set
+          handle.each do |line|
+            line = line.strip.split(',')
+            sample = File.basename(input_file)
+            all_samples << sample
+            read_counts = line[1..-1]
+            otu_names.zip(read_counts) do |name, count|
+              sample_name_count[sample][name] = count
+            end
+          end
+        end
+      end
+      # save to csv
+      File.open(output, 'w') do |handle|
+        header = all_otu_names.to_a.sort
+        handle.puts "-,#{header.join(',')}"
+        all_samples.to_a.sort.each do |sample|
+          handle.print "#{sample}"
+          header.each do |name|
+            handle.print ",#{sample_name_count[sample][name]}"
+          end
+          handle.print "\n"
+        end
+      end
+    end
+  end
+end

data/lib/lederhosen/tasks/otu_table.rb CHANGED Viewed

@@ -33,13 +33,19 @@ module Lederhosen
       level_sample_cluster_count = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } } }
       all_names = Hash.new { |h, k| h[k] = Set.new }
-      pbar = ProgressBar.new "loading", input.size
+      # create a progress bar with the total number of bytes of
+      # the files we're slurping up
+      pbar = ProgressBar.new "loading", input.map{ |x| File.size(x) }.reduce(&:+)
       # Load cluster table
       input.each do |input_file|
-        pbar.inc
         File.open(input_file) do |handle|
           handle.each do |line|
+            # increase progressbar by the number of bytes in each line
+            pbar.inc line.unpack('*C').size
             dat = parse_usearch_line(line.strip)
             next if dat.nil?

data/lib/lederhosen/version.rb CHANGED Viewed

@@ -3,7 +3,7 @@ module Lederhosen
     MAJOR = 1
     MINOR = 1
     CODENAME = 'Apfelstrudel' # changes for minor versions
-    PATCH = 0
+    PATCH = 1
     STRING = [MAJOR, MINOR, PATCH].join('.')
   end

data/readme.md CHANGED Viewed

@@ -54,10 +54,12 @@ Cluster reads using USEARCH. Output is a uc file.
     lederhosen cluster --input=trimmed/*.fasta --identity=0.95 --output=clusters_95.uc --database=taxcollector.udb
-### Generate OTU tables
+### Generate OTU table(s)
 Create an OTU abundance table where rows are samples and columns are clusters. The entries are the number of reads for that cluster in a sample.
-    lederhosen otu_table --clusters=clusters_95.uc --output=genus.csv --level=genus
+    lederhosen otu_table --clusters=clusters_95.uc --prefix=otu_table --level=domain phylum class order family genus species
-Level can be Kingdom, Domain, Phylum, Class, Order, Family or Genus.
+This will create the files:
+    otu_table.domain.csv, ..., otu_table.species.csv

data/spec/cli_spec.rb CHANGED Viewed

@@ -37,6 +37,11 @@ describe Lederhosen::CLI do
     $?.success?.should be_true
   end
+  it 'should combine OTU abundance matrices' do
+    `./bin/lederhosen join_otu_tables --input=#{$test_dir}/otu_table*.csv --output=#{$test_dir}/merged.csv`
+    $?.success?.should be_true
+  end
   it 'should split a fasta file into smaller fasta files (optionally gzipped)' do
     `./bin/lederhosen split_fasta --input=#{$test_dir}/trimmed/ILT_L_9_B_001.fasta --out-dir=#{$test_dir}/split/ --gzip true -n 100`
     $?.success?.should be_true

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: lederhosen
 version: !ruby/object:Gem::Version
-  hash: 19
+  hash: 17
   prerelease:
   segments:
   - 1
   - 1
-  - 0
-  version: 1.1.0
+  - 1
+  version: 1.1.1
 platform: ruby
 authors:
 - Austin G. Davis-Richardson
@@ -123,6 +123,7 @@ files:
 - lib/lederhosen.rb
 - lib/lederhosen/cli.rb
 - lib/lederhosen/tasks/cluster.rb
+- lib/lederhosen/tasks/join_otu_tables.rb
 - lib/lederhosen/tasks/make_udb.rb
 - lib/lederhosen/tasks/otu_filter.rb
 - lib/lederhosen/tasks/otu_table.rb