RubyGems - lederhosen - Versions diffs - 0.5.2 → 0.5.4 - Mend

lederhosen 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

data/lederhosen.gemspec +2 -2
data/lib/lederhosen/helpers.rb +1 -1
data/lib/lederhosen/tasks/k_filter.rb +1 -1
data/lib/lederhosen/tasks/split_fasta.rb +27 -2
data/lib/lederhosen/tasks/uc_filter.rb +1 -1
data/lib/lederhosen/version.rb +1 -1
data/spec/cli_spec.rb +8 -0
metadata +4 -4

data/lederhosen.gemspec CHANGED Viewed

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = "lederhosen"
-  s.version = "0.5.2"
+  s.version = "0.5.4"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Austin G. Davis-Richardson"]
-  s.date = "2012-08-27"
+  s.date = "2012-08-28"
   s.description = "Various tools for OTU clustering"
   s.email = "harekrishna@gmail.com"
   s.executables = ["lederhosen"]

data/lib/lederhosen/helpers.rb CHANGED Viewed

@@ -99,7 +99,7 @@ module Lederhosen
       pbar = ProgressBar.new 'loading uc file', bytes
       File.open(input) do |handle|
         handle.each do |line|
-          pbar.inc handle.pos
+          pbar.set handle.pos
           next if line =~ /^#/ # skip comments
           line = line.strip.split

data/lib/lederhosen/tasks/k_filter.rb CHANGED Viewed

@@ -28,7 +28,7 @@ module Lederhosen
         pbar = ProgressBar.new 'counting', File.size(input)
         records = Dna.new handle
         records.each do |r|
-          pbar.inc(handle.pos)
+          pbar.set handle.pos
           total_reads += 1
           kmers = r.sequence.to_kmers(k_len)
           kmers.each { |x| counting_table[x] += 1 }

data/lib/lederhosen/tasks/split_fasta.rb CHANGED Viewed

@@ -2,6 +2,8 @@
 # Split a fasta file into many fasta files with n reads
 #
+require 'zlib'
 module Lederhosen
   class CLI
@@ -11,26 +13,49 @@ module Lederhosen
     method_option :input,   :type => :string,  :required => true
     method_option :out_dir, :type => :string,  :required => true
     method_option :n,       :type => :numeric, :required => true
+    method_option :gzip,    :type => :boolean, :default  => false
     def split_fasta
       input   = options[:input]
       out_dir = options[:out_dir]
       n       = options[:n].to_i
+      gzip    = options[:gzip]
       ohai "splitting #{input} into files with #{n} reads stored in #{out_dir}"
+      ohai "using gzip" if gzip
       `mkdir -p #{out_dir}`
       File.open input do |handle|
         pbar = ProgressBar.new 'splitting', File.size(handle)
         Dna.new(handle).each_with_index do |record, i|
-          pbar.inc handle.pos
-          @out = File.open(File.join(out_dir, "split_#{i/n}.fasta"), 'w') if i%n == 0
+          pbar.set handle.pos
+          # I have to use a class variable here because
+          # if I don't the variable gets set to nil after
+          # after each iteration.
+          @out =
+            if i%n == 0 # start a new file
+              # GzipWriter must be closed explicitly
+              # this raises an exception this first time
+              @out.close rescue nil
+              # create an IO object depending on whether or
+              # not the user wants to use gzip
+              if gzip
+                Zlib::GzipWriter.open(File.join(out_dir, "split_#{i/n}.fasta.gz"))
+              else
+                File.open(File.join(out_dir, "split_#{i/n}.fasta"), 'w')
+              end
+            else # keep using current handle
+              @out
+            end
           @out.puts record
         end
         pbar.finish
+        @out.close
       end
+      ohai "created #{Dir[File.join(out_dir, '*')].size} files"
     end
   end
 end

data/lib/lederhosen/tasks/uc_filter.rb CHANGED Viewed

@@ -53,7 +53,7 @@ module Lederhosen
         pbar = ProgressBar.new 'saving', File.size(input)
         handle.each do |line|
-          pbar.inc handle.pos
+          pbar.set handle.pos
           if line =~ /^#/
             out.print line
             next

data/lib/lederhosen/version.rb CHANGED Viewed

@@ -2,7 +2,7 @@ module Lederhosen
   module Version
     MAJOR = 0
     MINOR = 5
-    PATCH = 2
+    PATCH = 4
     STRING = [MAJOR, MINOR, PATCH].join('.')
   end

data/spec/cli_spec.rb CHANGED Viewed

@@ -9,6 +9,7 @@ describe Lederhosen::CLI do
   it 'should have a version command' do
     `./bin/lederhosen version `.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
+    $?.success?.should be_true
   end
   it 'should trim reads' do
@@ -43,10 +44,17 @@ describe Lederhosen::CLI do
   it 'should filter OTU abundance matrices' do
     `./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
+    $?.success?.should be_true
+  end
+  it 'should split a fasta file into smaller fasta files (optionally gzipped)' do
+    `./bin/lederhosen split_fasta --input=#{$test_dir}/joined.fasta --out-dir=#{$test_dir}/split/ --gzip true -n 100`
+    $?.success?.should be_true
   end
   it 'should split joined.fasta into reads for each cluster' do
     `./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1`
+    $?.success?.should be_true
   end
   it 'should create a fasta file containing representative reads for each cluster' do

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: lederhosen
 version: !ruby/object:Gem::Version
-  hash: 15
+  hash: 3
   prerelease:
   segments:
   - 0
   - 5
-  - 2
-  version: 0.5.2
+  - 4
+  version: 0.5.4
 platform: ruby
 authors:
 - Austin G. Davis-Richardson
@@ -15,7 +15,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-08-27 00:00:00 Z
+date: 2012-08-28 00:00:00 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   type: :runtime