lederhosen 0.3.5 → 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
data/lederhosen.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "0.3.5"
8
+ s.version = "0.3.6"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
@@ -1,55 +1,46 @@
1
+ require 'set'
2
+
1
3
  module Lederhosen
2
4
  class CLI
3
5
 
4
6
  desc 'otu_filter', 'works like uc_filter but uses an OTU table as input'
5
7
 
6
- method_option :input, :type => :string, :required => true
7
- method_option :output, :type => :string, :required => true
8
- method_option :reads, :type => :numeric, :required => true
8
+ method_option :input, :type => :string, :required => true
9
+ method_option :output, :type => :string, :required => true
10
+ method_option :reads, :type => :numeric, :required => true
9
11
  method_option :samples, :type => :numeric, :required => true
10
12
 
11
13
  def otu_filter
12
- input = options[:input]
13
- output = options[:output]
14
- reads = options[:reads]
14
+ input = options[:input]
15
+ output = options[:output]
16
+ reads = options[:reads]
15
17
  min_samples = options[:samples]
16
18
 
17
19
  ohai "filtering otu file #{input} (reads = #{reads}, samples = #{min_samples}), saving to #{output}"
18
20
 
19
- ##
20
- # Iterate over otu table line by line.
21
- # Only print if cluster meets criteria
22
- #
23
- kept_clusters = 0
24
- total_reads = 0
25
- kept_reads = 0
26
-
27
- out = File.open(output, 'w')
28
-
29
- File.open(input) do |handle|
30
- header = handle.gets.strip
31
- header = header.split(',')
32
- samples = header[1..-1]
33
-
34
- out.puts header.join(',')
21
+ cluster_sample_count = Hash.new { |h, k| h[k] = Hash.new }
35
22
 
23
+ # slurp up CSV file
24
+ File.open input do |handle|
25
+ header = handle.gets.strip.split(',')
26
+ cluster_ids = header[1..-1]
36
27
  handle.each do |line|
37
- line = line.strip.split(',')
38
- cluster_no = line[0]
39
- counts = line[1..-1].collect { |x| x.to_i }
40
-
41
- # should be the same as uc_filter
42
- if counts.reject { |x| x < reads }.length > min_samples
43
- out.puts line.join(',')
44
- kept_clusters += 1
45
- kept_reads += counts.inject(:+)
28
+ line = line.strip.split(',')
29
+ sample_id = line[0].to_sym
30
+ counts = line[1..-1].map(&:to_i)
31
+ cluster_ids.zip(counts).each do |cluster, count|
32
+ cluster_sample_count[cluster][sample_id] = count
46
33
  end
47
- total_reads += counts.inject(:+)
48
34
  end
49
35
  end
50
36
 
51
- ohai "kept #{kept_reads} reads (#{kept_reads/total_reads.to_f})."
52
- ohai "kept #{kept_clusters} clusters."
37
+ # filter sample_cluster_count
38
+ filtered = cluster_sample_count.reject { |k, v| v.reject { |k, v| v < reads }.size < min_samples }
39
+
40
+ ohai "kept #{filtered.keys.size} clusters (#{filtered.keys.size/cluster_sample_count.size.to_f})."
41
+ kept_reads = filtered.values.map { |x| x.values.inject(:+) }.inject(:+)
42
+ total_reads = cluster_sample_count.values.map { |x| x.values.inject(:+) }.inject(:+)
43
+ ohai "kept #{kept_reads}/#{total_reads} reads (#{kept_reads/total_reads.to_f})."
53
44
  end
54
45
 
55
46
  end
@@ -2,7 +2,7 @@ module Lederhosen
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 3
5
- PATCH = 5
5
+ PATCH = 6
6
6
 
7
7
  STRING = [MAJOR, MINOR, PATCH].join('.')
8
8
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 31
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 5
10
- version: 0.3.5
9
+ - 6
10
+ version: 0.3.6
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson