lederhosen 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/lederhosen/tasks/filter.rb +79 -0
- data/lib/lederhosen.rb +8 -0
- data/lib/version.rb +1 -1
- data/spec/misc_spec.rb +11 -0
- metadata +7 -4
@@ -0,0 +1,79 @@
|
|
1
|
+
##
|
2
|
+
# FILTER READS WITH LOW ABUNDANCE KMERS
|
3
|
+
#
|
4
|
+
|
5
|
+
module Lederhosen
|
6
|
+
class CLI
|
7
|
+
|
8
|
+
desc "filter fasta file",
|
9
|
+
"--input=joined.fasta --output=filtered.fasta --k=10 --cutoff=50"
|
10
|
+
|
11
|
+
method_option :input, :type => :string, :required => true
|
12
|
+
method_option :output, :type => :string, :required => true
|
13
|
+
method_option :k, :type => :numeric, :required => true
|
14
|
+
method_option :cutoff, :type => :numeric, :required => true
|
15
|
+
|
16
|
+
def filter
|
17
|
+
input = options[:input]
|
18
|
+
output = options[:output]
|
19
|
+
k_len = options[:k].to_i
|
20
|
+
cutoff = options[:cutoff]
|
21
|
+
|
22
|
+
counting_table = Hash.new { |h, k| h[k] = 0 }
|
23
|
+
total_reads = 0
|
24
|
+
|
25
|
+
ohai "counting kmers"
|
26
|
+
File.open(input) do |handle|
|
27
|
+
records = Dna.new handle
|
28
|
+
records.each do |r|
|
29
|
+
total_reads += 1
|
30
|
+
kmers = r.sequence.to_kmers(k_len)
|
31
|
+
kmers.each { |x| counting_table[x] += 1 }
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
sum_of_kmers = counting_table.values.inject(:+)
|
36
|
+
|
37
|
+
ohai "total reads = #{total_reads}"
|
38
|
+
ohai "sum of kmers = #{sum_of_kmers}"
|
39
|
+
|
40
|
+
kept = 0
|
41
|
+
total_reads = total_reads.to_f
|
42
|
+
|
43
|
+
pbar = ProgressBar.new "saving", total_reads.to_i
|
44
|
+
output = File.open(output, 'w')
|
45
|
+
File.open(input) do |handle|
|
46
|
+
records = Dna.new handle
|
47
|
+
|
48
|
+
records.each do |r|
|
49
|
+
kmers = r.sequence.to_kmers(k_len)
|
50
|
+
|
51
|
+
# check if any of the kmers are rare
|
52
|
+
keep = true
|
53
|
+
coverage = 0
|
54
|
+
kmers.each do |kmer|
|
55
|
+
# if any of the kmers are rare, don't print the read
|
56
|
+
c = counting_table[kmer]
|
57
|
+
coverage += c
|
58
|
+
if c < cutoff
|
59
|
+
keep = false
|
60
|
+
break
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
if keep
|
65
|
+
kept += 1
|
66
|
+
output.puts r
|
67
|
+
end
|
68
|
+
pbar.inc
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
pbar.finish
|
73
|
+
|
74
|
+
ohai "survivors = #{kept} (#{kept/total_reads.to_f})"
|
75
|
+
output.close
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
data/lib/lederhosen.rb
CHANGED
@@ -5,3 +5,11 @@ require 'set'
|
|
5
5
|
require 'progressbar'
|
6
6
|
|
7
7
|
Dir.glob(File.join(File.dirname(__FILE__), 'lederhosen', '*.rb')).each { |f| require f }
|
8
|
+
|
9
|
+
class String
|
10
|
+
def to_kmers(k)
|
11
|
+
return [] if k == 0
|
12
|
+
k -= 1
|
13
|
+
(0..(self.length-k-1)).collect { |i| self[i..i+k] }
|
14
|
+
end
|
15
|
+
end
|
data/lib/version.rb
CHANGED
data/spec/misc_spec.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe String do
|
4
|
+
it 'generate_kmers should generate kmers for a string' do
|
5
|
+
'test'.to_kmers(2).should == ['te', 'es', 'st']
|
6
|
+
'test'.to_kmers(3).should == ['tes', 'est']
|
7
|
+
'test'.to_kmers(4).should == ['test']
|
8
|
+
'test'.to_kmers(5).should == []
|
9
|
+
'test'.to_kmers(0).should == []
|
10
|
+
end
|
11
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 11
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 10
|
10
|
+
version: 0.0.10
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-05-
|
18
|
+
date: 2012-05-14 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: dna
|
@@ -121,6 +121,7 @@ files:
|
|
121
121
|
- lib/lederhosen/cli.rb
|
122
122
|
- lib/lederhosen/helpers.rb
|
123
123
|
- lib/lederhosen/tasks/cluster.rb
|
124
|
+
- lib/lederhosen/tasks/filter.rb
|
124
125
|
- lib/lederhosen/tasks/join.rb
|
125
126
|
- lib/lederhosen/tasks/name.rb
|
126
127
|
- lib/lederhosen/tasks/otu_table.rb
|
@@ -135,6 +136,7 @@ files:
|
|
135
136
|
- spec/data/ILT_L_9_B_002_1.txt
|
136
137
|
- spec/data/ILT_L_9_B_002_3.txt
|
137
138
|
- spec/helpers_spec.rb
|
139
|
+
- spec/misc_spec.rb
|
138
140
|
- spec/pipeline_spec.rb
|
139
141
|
- spec/spec_helper.rb
|
140
142
|
homepage: http://github.com/audy/lederhosen
|
@@ -176,5 +178,6 @@ test_files:
|
|
176
178
|
- spec/data/ILT_L_9_B_002_1.txt
|
177
179
|
- spec/data/ILT_L_9_B_002_3.txt
|
178
180
|
- spec/helpers_spec.rb
|
181
|
+
- spec/misc_spec.rb
|
179
182
|
- spec/pipeline_spec.rb
|
180
183
|
- spec/spec_helper.rb
|