lederhosen 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lederhosen.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "0.3.5"
8
+ s.version = "0.3.6"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
@@ -1,55 +1,46 @@
1
+ require 'set'
2
+
1
3
  module Lederhosen
2
4
  class CLI
3
5
 
4
6
  desc 'otu_filter', 'works like uc_filter but uses an OTU table as input'
5
7
 
6
- method_option :input, :type => :string, :required => true
7
- method_option :output, :type => :string, :required => true
8
- method_option :reads, :type => :numeric, :required => true
8
+ method_option :input, :type => :string, :required => true
9
+ method_option :output, :type => :string, :required => true
10
+ method_option :reads, :type => :numeric, :required => true
9
11
  method_option :samples, :type => :numeric, :required => true
10
12
 
11
13
  def otu_filter
12
- input = options[:input]
13
- output = options[:output]
14
- reads = options[:reads]
14
+ input = options[:input]
15
+ output = options[:output]
16
+ reads = options[:reads]
15
17
  min_samples = options[:samples]
16
18
 
17
19
  ohai "filtering otu file #{input} (reads = #{reads}, samples = #{min_samples}), saving to #{output}"
18
20
 
19
- ##
20
- # Iterate over otu table line by line.
21
- # Only print if cluster meets criteria
22
- #
23
- kept_clusters = 0
24
- total_reads = 0
25
- kept_reads = 0
26
-
27
- out = File.open(output, 'w')
28
-
29
- File.open(input) do |handle|
30
- header = handle.gets.strip
31
- header = header.split(',')
32
- samples = header[1..-1]
33
-
34
- out.puts header.join(',')
21
+ cluster_sample_count = Hash.new { |h, k| h[k] = Hash.new }
35
22
 
23
+ # slurp up CSV file
24
+ File.open input do |handle|
25
+ header = handle.gets.strip.split(',')
26
+ cluster_ids = header[1..-1]
36
27
  handle.each do |line|
37
- line = line.strip.split(',')
38
- cluster_no = line[0]
39
- counts = line[1..-1].collect { |x| x.to_i }
40
-
41
- # should be the same as uc_filter
42
- if counts.reject { |x| x < reads }.length > min_samples
43
- out.puts line.join(',')
44
- kept_clusters += 1
45
- kept_reads += counts.inject(:+)
28
+ line = line.strip.split(',')
29
+ sample_id = line[0].to_sym
30
+ counts = line[1..-1].map(&:to_i)
31
+ cluster_ids.zip(counts).each do |cluster, count|
32
+ cluster_sample_count[cluster][sample_id] = count
46
33
  end
47
- total_reads += counts.inject(:+)
48
34
  end
49
35
  end
50
36
 
51
- ohai "kept #{kept_reads} reads (#{kept_reads/total_reads.to_f})."
52
- ohai "kept #{kept_clusters} clusters."
37
+ # filter sample_cluster_count
38
+ filtered = cluster_sample_count.reject { |k, v| v.reject { |k, v| v < reads }.size < min_samples }
39
+
40
+ ohai "kept #{filtered.keys.size} clusters (#{filtered.keys.size/cluster_sample_count.size.to_f})."
41
+ kept_reads = filtered.values.map { |x| x.values.inject(:+) }.inject(:+)
42
+ total_reads = cluster_sample_count.values.map { |x| x.values.inject(:+) }.inject(:+)
43
+ ohai "kept #{kept_reads}/#{total_reads} reads (#{kept_reads/total_reads.to_f})."
53
44
  end
54
45
 
55
46
  end
@@ -2,7 +2,7 @@ module Lederhosen
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 3
5
- PATCH = 5
5
+ PATCH = 6
6
6
 
7
7
  STRING = [MAJOR, MINOR, PATCH].join('.')
8
8
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 31
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 5
10
- version: 0.3.5
9
+ - 6
10
+ version: 0.3.6
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson