lederhosen 0.3.5 → 0.3.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lederhosen.gemspec +1 -1
- data/lib/lederhosen/tasks/otu_filter.rb +25 -34
- data/lib/lederhosen/version.rb +1 -1
- metadata +3 -3
data/lederhosen.gemspec
CHANGED
@@ -1,55 +1,46 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
1
3
|
module Lederhosen
|
2
4
|
class CLI
|
3
5
|
|
4
6
|
desc 'otu_filter', 'works like uc_filter but uses an OTU table as input'
|
5
7
|
|
6
|
-
method_option :input,
|
7
|
-
method_option :output,
|
8
|
-
method_option :reads,
|
8
|
+
method_option :input, :type => :string, :required => true
|
9
|
+
method_option :output, :type => :string, :required => true
|
10
|
+
method_option :reads, :type => :numeric, :required => true
|
9
11
|
method_option :samples, :type => :numeric, :required => true
|
10
12
|
|
11
13
|
def otu_filter
|
12
|
-
input
|
13
|
-
output
|
14
|
-
reads
|
14
|
+
input = options[:input]
|
15
|
+
output = options[:output]
|
16
|
+
reads = options[:reads]
|
15
17
|
min_samples = options[:samples]
|
16
18
|
|
17
19
|
ohai "filtering otu file #{input} (reads = #{reads}, samples = #{min_samples}), saving to #{output}"
|
18
20
|
|
19
|
-
|
20
|
-
# Iterate over otu table line by line.
|
21
|
-
# Only print if cluster meets criteria
|
22
|
-
#
|
23
|
-
kept_clusters = 0
|
24
|
-
total_reads = 0
|
25
|
-
kept_reads = 0
|
26
|
-
|
27
|
-
out = File.open(output, 'w')
|
28
|
-
|
29
|
-
File.open(input) do |handle|
|
30
|
-
header = handle.gets.strip
|
31
|
-
header = header.split(',')
|
32
|
-
samples = header[1..-1]
|
33
|
-
|
34
|
-
out.puts header.join(',')
|
21
|
+
cluster_sample_count = Hash.new { |h, k| h[k] = Hash.new }
|
35
22
|
|
23
|
+
# slurp up CSV file
|
24
|
+
File.open input do |handle|
|
25
|
+
header = handle.gets.strip.split(',')
|
26
|
+
cluster_ids = header[1..-1]
|
36
27
|
handle.each do |line|
|
37
|
-
line
|
38
|
-
|
39
|
-
counts
|
40
|
-
|
41
|
-
|
42
|
-
if counts.reject { |x| x < reads }.length > min_samples
|
43
|
-
out.puts line.join(',')
|
44
|
-
kept_clusters += 1
|
45
|
-
kept_reads += counts.inject(:+)
|
28
|
+
line = line.strip.split(',')
|
29
|
+
sample_id = line[0].to_sym
|
30
|
+
counts = line[1..-1].map(&:to_i)
|
31
|
+
cluster_ids.zip(counts).each do |cluster, count|
|
32
|
+
cluster_sample_count[cluster][sample_id] = count
|
46
33
|
end
|
47
|
-
total_reads += counts.inject(:+)
|
48
34
|
end
|
49
35
|
end
|
50
36
|
|
51
|
-
|
52
|
-
|
37
|
+
# filter sample_cluster_count
|
38
|
+
filtered = cluster_sample_count.reject { |k, v| v.reject { |k, v| v < reads }.size < min_samples }
|
39
|
+
|
40
|
+
ohai "kept #{filtered.keys.size} clusters (#{filtered.keys.size/cluster_sample_count.size.to_f})."
|
41
|
+
kept_reads = filtered.values.map { |x| x.values.inject(:+) }.inject(:+)
|
42
|
+
total_reads = cluster_sample_count.values.map { |x| x.values.inject(:+) }.inject(:+)
|
43
|
+
ohai "kept #{kept_reads}/#{total_reads} reads (#{kept_reads/total_reads.to_f})."
|
53
44
|
end
|
54
45
|
|
55
46
|
end
|
data/lib/lederhosen/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 3
|
9
|
-
-
|
10
|
-
version: 0.3.
|
9
|
+
- 6
|
10
|
+
version: 0.3.6
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|