RubyGems - lederhosen - Versions diffs - 0.0.10 → 0.0.11 - Mend

lederhosen 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

data/Gemfile +3 -1
data/lib/lederhosen/tasks/join.rb +1 -1
data/lib/lederhosen/tasks/{filter.rb → k_filter.rb} +5 -5
data/lib/lederhosen/tasks/otu_table.rb +0 -28
data/lib/lederhosen/tasks/uc_filter.rb +58 -0
data/lib/lederhosen.rb +3 -4
data/lib/version.rb +1 -1
data/pipeline.sh +37 -0
metadata +7 -5

data/Gemfile CHANGED Viewed

@@ -1,5 +1,7 @@
 source :rubygems
 gem 'thor'
 gem 'rspec'
 gem 'dna'
-gem 'progressbar'
+gem 'progressbar'
+gem 'awesome_print'

data/lib/lederhosen/tasks/join.rb CHANGED Viewed

@@ -31,7 +31,7 @@ module Lederhosen
           next
         end
-        records.each_slice(2) do |r, l|
+        records.each_slice(2) do |l, r|
           output.puts ">#{r.name}:#{File.basename(fasta_file, '.fasta')}\n#{r.sequence.reverse+l.sequence}"
         end
       end

data/lib/lederhosen/tasks/{filter.rb → k_filter.rb} RENAMED Viewed

@@ -5,7 +5,7 @@
 module Lederhosen
   class CLI
-    desc "filter fasta file",
+    desc "k_filter khmer filtering",
          "--input=joined.fasta --output=filtered.fasta --k=10 --cutoff=50"
     method_option :input,    :type => :string,  :required => true
@@ -13,7 +13,7 @@ module Lederhosen
     method_option :k,        :type => :numeric, :required => true
     method_option :cutoff,   :type => :numeric, :required => true
-    def filter
+    def k_filter
       input  = options[:input]
       output = options[:output]
       k_len  = options[:k].to_i
@@ -39,7 +39,7 @@ module Lederhosen
       kept = 0
       total_reads = total_reads.to_f
       pbar = ProgressBar.new "saving", total_reads.to_i
       output = File.open(output, 'w')
       File.open(input) do |handle|
@@ -60,7 +60,7 @@ module Lederhosen
               break
             end
           end
           if keep
             kept += 1
             output.puts r
@@ -70,7 +70,7 @@ module Lederhosen
       end
       pbar.finish
       ohai "survivors = #{kept} (#{kept/total_reads.to_f})"
       output.close
     end

data/lib/lederhosen/tasks/otu_table.rb CHANGED Viewed

@@ -45,34 +45,6 @@ module Lederhosen
         end
       end
-      # # Get representative sequences!
-      # reads_total = 0
-      # representatives = {}
-      # clusters[:count_data].each{ |k, x| representatives[x[:seed]] = k }
-      #
-      # out_handle = File.open("#{output}.fasta", 'w')
-      #
-      # File.open(joined_reads) do |handle|
-      #   records = Dna.new handle
-      #   records.each do |dna|
-      #     reads_total += 1
-      #     if !representatives[dna.name].nil?
-      #       dna.name = "#{dna.name}:cluster_#{representatives[dna.name]}"
-      #       out_handle.puts dna
-      #     end
-      #   end
-      # end
-      #
-      # out_handle.close
-      #
-      # # Print some statistics
-      # ohai "reads in clusters:  #{clusters_total}"
-      # ohai "number of reads:    #{reads_total}"
-      # ohai "unique clusters:    #{clusters.keys.length}"
     end
   end

data/lib/lederhosen/tasks/uc_filter.rb ADDED Viewed

@@ -0,0 +1,58 @@
+##
+# FILTER UC FILE BY MIN SAMPLES
+#
+module Lederhosen
+  class CLI
+    desc "uc_filter filter uc file by min samples",
+         "--input=clusters.uc --output=clusters.uc.filtered --reads=50 --samples=10"
+    method_option :input,    :type => :string,  :required => true
+    method_option :output,   :type => :string,  :required => true
+    method_option :reads,    :type => :numeric, :required => true
+    method_option :samples,  :type => :numeric, :required => true
+    def uc_filter
+      input   = options[:input]
+      output  = options[:output]
+      reads   = options[:reads].to_i
+      samples = options[:samples].to_i
+      # load UC file
+      clstr_info   = Helpers.load_uc_file input
+      clstr_counts = clstr_info[:clstr_counts] # clstr_counts[:clstr][sample.to_i] = reads
+      # filter
+      survivors = clstr_counts.reject do |a, b|
+        b.reject{ |i, j| j < reads }.length < samples
+      end
+      surviving_clusters = survivors.keys
+      # print filtered uc file
+      out = File.open(output, 'w')
+      kept, total = 0, 0
+      File.open(input) do |handle|
+        handle.each do |line|
+          if line =~ /^#/
+            out.print line
+            next
+          end
+          total += 1
+          if surviving_clusters.include? line.split[1].to_i
+            out.print line
+            kept += 1
+          end
+        end
+      end
+      out.close
+      ohai "Survivors"
+      ohai "clusters: #{surviving_clusters.length}/#{clstr_counts.keys.length} = #{100*surviving_clusters.length/clstr_counts.keys.length.to_f}%"
+      ohai "reads:    #{kept}/#{total} = #{100*kept/total.to_f}%"
+    end
+  end
+end

data/lib/lederhosen.rb CHANGED Viewed

@@ -1,8 +1,7 @@
 require 'rubygems'
-require 'thor'
-require 'dna'
-require 'set'
-require 'progressbar'
+require 'bundler'
+Bundler.require
 Dir.glob(File.join(File.dirname(__FILE__), 'lederhosen', '*.rb')).each { |f| require f }

data/lib/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Lederhosen
-  VERSION = '0.0.10'
+  VERSION = '0.0.11'
 end

data/pipeline.sh ADDED Viewed

@@ -0,0 +1,37 @@
+#!/bin/bash
+set +e
+# An example OTU clustering pipeline
+# Austin G. Davis-Richardson
+# <harekrishna at gmail dot com>
+raw_reads='raw_reads/*.txt'
+identities='0.975'
+out_dir='pipeline'
+# trim reads
+bin/lederhosen trim --reads-dir=$raw_reads --out-dir=$out_dir/trimmed
+# join reads
+bin/lederhosen join --trimmed=$out_dir/trimmed/*.fasta --output=$out_dir/joined.fasta
+# filter reads
+bin/lederhosen filter --input=$out_dir/joined.fasta --output=$out_dir/filtered.fasta -k=10 --cutoff=50
+# sort
+bin/lederhosen sort --input=$out_dir/filtered.fasta --output=$out_dir/sorted.fasta
+# cluster
+for i in $identities
+do
+    bin/lederhosen cluster --input=$out_dir/sorted.fasta --output=$out_dir/clusters_"$i"_.uc --identity=$i
+done
+# generate otu tables
+for i in $identities
+do
+    bin/lederhosen otu_table --clusters=$out_dir/clusters_"$i"_.uc --output=$out_dir/otus_"$i"
+done
+echo "complete!"

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: lederhosen
 version: !ruby/object:Gem::Version
-  hash: 11
+  hash: 9
   prerelease:
   segments:
   - 0
   - 0
-  - 10
-  version: 0.0.10
+  - 11
+  version: 0.0.11
 platform: ruby
 authors:
 - Austin G. Davis-Richardson
@@ -15,7 +15,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-05-14 00:00:00 Z
+date: 2012-05-22 00:00:00 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: dna
@@ -121,15 +121,17 @@ files:
 - lib/lederhosen/cli.rb
 - lib/lederhosen/helpers.rb
 - lib/lederhosen/tasks/cluster.rb
-- lib/lederhosen/tasks/filter.rb
 - lib/lederhosen/tasks/join.rb
+- lib/lederhosen/tasks/k_filter.rb
 - lib/lederhosen/tasks/name.rb
 - lib/lederhosen/tasks/otu_table.rb
 - lib/lederhosen/tasks/rep_reads.rb
 - lib/lederhosen/tasks/sort.rb
 - lib/lederhosen/tasks/split.rb
 - lib/lederhosen/tasks/trim.rb
+- lib/lederhosen/tasks/uc_filter.rb
 - lib/version.rb
+- pipeline.sh
 - readme.md
 - spec/data/ILT_L_9_B_001_1.txt
 - spec/data/ILT_L_9_B_001_3.txt