lederhosen 2.0.6 → 2.0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/lederhosen.gemspec +2 -2
- data/lib/lederhosen/tasks/otu_filter.rb +48 -34
- data/lib/lederhosen/version.rb +1 -1
- metadata +3 -3
data/lederhosen.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "lederhosen"
|
8
|
-
s.version = "2.0.
|
8
|
+
s.version = "2.0.7"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
-
s.date = "2013-
|
12
|
+
s.date = "2013-03-11"
|
13
13
|
s.description = "Various tools for OTU clustering"
|
14
14
|
s.email = "harekrishna@gmail.com"
|
15
15
|
s.executables = ["lederhosen"]
|
@@ -18,57 +18,71 @@ module Lederhosen
|
|
18
18
|
|
19
19
|
ohai "filtering otu file #{input} (reads = #{reads}, samples = #{min_samples})"
|
20
20
|
|
21
|
-
|
21
|
+
# make one pass finding which OTUs to keep
|
22
|
+
# create mask that maps which columns correspond to good OTUs
|
23
|
+
# pass over table again printing only those columns
|
22
24
|
|
23
|
-
|
25
|
+
seen = Hash.new { |h, k| h[k] = 0 }
|
24
26
|
|
25
|
-
|
26
|
-
|
27
|
+
otu_order = []
|
28
|
+
|
29
|
+
pbar = ProgressBar.new 'counting', File.size(input)
|
30
|
+
total_reads = 0
|
31
|
+
|
32
|
+
File.open(input) do |handle|
|
27
33
|
header = handle.gets.strip.split(',')
|
28
|
-
|
34
|
+
header.each { |x| otu_order << x }
|
35
|
+
|
29
36
|
handle.each do |line|
|
37
|
+
pbar.set handle.pos
|
30
38
|
line = line.strip.split(',')
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
39
|
+
sample_name = line[0]
|
40
|
+
abunds = line[1..-1].map &:to_i
|
41
|
+
otu_order.zip(abunds) do |o, a|
|
42
|
+
total_reads += a
|
43
|
+
seen[o] += 1 if a >= reads
|
35
44
|
end
|
36
45
|
end
|
37
46
|
end
|
38
47
|
|
39
|
-
|
48
|
+
pbar.finish
|
40
49
|
|
41
|
-
|
42
|
-
# todo: move filtered reads to 'unclassified_reads' classification
|
43
|
-
filtered = cluster_sample_count.reject { |k, v| v.reject { |k, v| v < reads }.size < min_samples }
|
50
|
+
mask = otu_order.map { |x| seen[x] >= min_samples }
|
44
51
|
|
45
|
-
|
46
|
-
# it will make your better they said
|
47
|
-
noise = cluster_sample_count.keys - filtered.keys
|
52
|
+
ohai "found #{otu_order.size} otus, keeping #{mask.count(true)}"
|
48
53
|
|
49
|
-
|
54
|
+
output = File.open(output, 'w')
|
50
55
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
+
pbar = ProgressBar.new 'writing', File.size(input)
|
57
|
+
filtered_reads = 0
|
58
|
+
File.open(input) do |handle|
|
59
|
+
header = handle.gets.strip.split(',')
|
60
|
+
header = header.zip(mask).map { |k, m| k if m }.compact
|
61
|
+
output.print header.join(',')
|
62
|
+
output.print ",noise\n" # need a "noise" column
|
63
|
+
|
64
|
+
handle.each do |line|
|
65
|
+
pbar.set handle.pos
|
66
|
+
line = line.strip.split(',')
|
67
|
+
|
68
|
+
sample_name = line[0]
|
69
|
+
counts = line[1..-1].map &:to_i
|
70
|
+
|
71
|
+
kept_counts = counts.zip(mask).map { |c, m| c if m }.compact
|
72
|
+
noise = counts.zip(mask).map { |c, m| c unless m }.compact.inject(:+)
|
73
|
+
filtered_reads += noise
|
74
|
+
|
75
|
+
output.puts "#{sample_name},#{kept_counts.join(',')},#{noise}"
|
56
76
|
|
57
|
-
samples.each do |sample|
|
58
|
-
out.print "#{sample}"
|
59
|
-
clusters.each do |cluster|
|
60
|
-
out.print ",#{filtered[cluster][sample]}"
|
61
77
|
end
|
62
|
-
noise_sum = noise.map { |n| cluster_sample_count[n][sample] }.inject(:+)
|
63
|
-
out.print ",#{noise_sum || 0}"
|
64
|
-
out.print "\n"
|
65
78
|
end
|
66
|
-
out.close
|
67
79
|
|
68
|
-
|
69
|
-
|
70
|
-
total_reads
|
71
|
-
|
80
|
+
pbar.finish
|
81
|
+
|
82
|
+
ohai "kept #{total_reads - filtered_reads}/#{total_reads} reads (#{100*(total_reads - filtered_reads)/total_reads.to_f}%)"
|
83
|
+
|
84
|
+
output.close
|
85
|
+
|
72
86
|
end
|
73
87
|
|
74
88
|
end
|
data/lib/lederhosen/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-03-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: dna
|
@@ -149,7 +149,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
149
149
|
version: '0'
|
150
150
|
segments:
|
151
151
|
- 0
|
152
|
-
hash: -
|
152
|
+
hash: -4462113867490056177
|
153
153
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
154
154
|
none: false
|
155
155
|
requirements:
|