lederhosen 2.0.6 → 2.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lederhosen.gemspec +2 -2
- data/lib/lederhosen/tasks/otu_filter.rb +48 -34
- data/lib/lederhosen/version.rb +1 -1
- metadata +3 -3
data/lederhosen.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "lederhosen"
|
8
|
-
s.version = "2.0.
|
8
|
+
s.version = "2.0.7"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
-
s.date = "2013-
|
12
|
+
s.date = "2013-03-11"
|
13
13
|
s.description = "Various tools for OTU clustering"
|
14
14
|
s.email = "harekrishna@gmail.com"
|
15
15
|
s.executables = ["lederhosen"]
|
@@ -18,57 +18,71 @@ module Lederhosen
|
|
18
18
|
|
19
19
|
ohai "filtering otu file #{input} (reads = #{reads}, samples = #{min_samples})"
|
20
20
|
|
21
|
-
|
21
|
+
# make one pass finding which OTUs to keep
|
22
|
+
# create mask that maps which columns correspond to good OTUs
|
23
|
+
# pass over table again printing only those columns
|
22
24
|
|
23
|
-
|
25
|
+
seen = Hash.new { |h, k| h[k] = 0 }
|
24
26
|
|
25
|
-
|
26
|
-
|
27
|
+
otu_order = []
|
28
|
+
|
29
|
+
pbar = ProgressBar.new 'counting', File.size(input)
|
30
|
+
total_reads = 0
|
31
|
+
|
32
|
+
File.open(input) do |handle|
|
27
33
|
header = handle.gets.strip.split(',')
|
28
|
-
|
34
|
+
header.each { |x| otu_order << x }
|
35
|
+
|
29
36
|
handle.each do |line|
|
37
|
+
pbar.set handle.pos
|
30
38
|
line = line.strip.split(',')
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
39
|
+
sample_name = line[0]
|
40
|
+
abunds = line[1..-1].map &:to_i
|
41
|
+
otu_order.zip(abunds) do |o, a|
|
42
|
+
total_reads += a
|
43
|
+
seen[o] += 1 if a >= reads
|
35
44
|
end
|
36
45
|
end
|
37
46
|
end
|
38
47
|
|
39
|
-
|
48
|
+
pbar.finish
|
40
49
|
|
41
|
-
|
42
|
-
# todo: move filtered reads to 'unclassified_reads' classification
|
43
|
-
filtered = cluster_sample_count.reject { |k, v| v.reject { |k, v| v < reads }.size < min_samples }
|
50
|
+
mask = otu_order.map { |x| seen[x] >= min_samples }
|
44
51
|
|
45
|
-
|
46
|
-
# it will make your better they said
|
47
|
-
noise = cluster_sample_count.keys - filtered.keys
|
52
|
+
ohai "found #{otu_order.size} otus, keeping #{mask.count(true)}"
|
48
53
|
|
49
|
-
|
54
|
+
output = File.open(output, 'w')
|
50
55
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
+
pbar = ProgressBar.new 'writing', File.size(input)
|
57
|
+
filtered_reads = 0
|
58
|
+
File.open(input) do |handle|
|
59
|
+
header = handle.gets.strip.split(',')
|
60
|
+
header = header.zip(mask).map { |k, m| k if m }.compact
|
61
|
+
output.print header.join(',')
|
62
|
+
output.print ",noise\n" # need a "noise" column
|
63
|
+
|
64
|
+
handle.each do |line|
|
65
|
+
pbar.set handle.pos
|
66
|
+
line = line.strip.split(',')
|
67
|
+
|
68
|
+
sample_name = line[0]
|
69
|
+
counts = line[1..-1].map &:to_i
|
70
|
+
|
71
|
+
kept_counts = counts.zip(mask).map { |c, m| c if m }.compact
|
72
|
+
noise = counts.zip(mask).map { |c, m| c unless m }.compact.inject(:+)
|
73
|
+
filtered_reads += noise
|
74
|
+
|
75
|
+
output.puts "#{sample_name},#{kept_counts.join(',')},#{noise}"
|
56
76
|
|
57
|
-
samples.each do |sample|
|
58
|
-
out.print "#{sample}"
|
59
|
-
clusters.each do |cluster|
|
60
|
-
out.print ",#{filtered[cluster][sample]}"
|
61
77
|
end
|
62
|
-
noise_sum = noise.map { |n| cluster_sample_count[n][sample] }.inject(:+)
|
63
|
-
out.print ",#{noise_sum || 0}"
|
64
|
-
out.print "\n"
|
65
78
|
end
|
66
|
-
out.close
|
67
79
|
|
68
|
-
|
69
|
-
|
70
|
-
total_reads
|
71
|
-
|
80
|
+
pbar.finish
|
81
|
+
|
82
|
+
ohai "kept #{total_reads - filtered_reads}/#{total_reads} reads (#{100*(total_reads - filtered_reads)/total_reads.to_f}%)"
|
83
|
+
|
84
|
+
output.close
|
85
|
+
|
72
86
|
end
|
73
87
|
|
74
88
|
end
|
data/lib/lederhosen/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-03-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: dna
|
@@ -149,7 +149,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
149
149
|
version: '0'
|
150
150
|
segments:
|
151
151
|
- 0
|
152
|
-
hash: -
|
152
|
+
hash: -4462113867490056177
|
153
153
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
154
154
|
none: false
|
155
155
|
requirements:
|