lederhosen 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/lederhosen/tasks/filter.rb +79 -0
- data/lib/lederhosen.rb +8 -0
- data/lib/version.rb +1 -1
- data/spec/misc_spec.rb +11 -0
- metadata +7 -4
@@ -0,0 +1,79 @@
|
|
1
|
+
##
|
2
|
+
# FILTER READS WITH LOW ABUNDANCE KMERS
|
3
|
+
#
|
4
|
+
|
5
|
+
module Lederhosen
|
6
|
+
class CLI
|
7
|
+
|
8
|
+
desc "filter fasta file",
|
9
|
+
"--input=joined.fasta --output=filtered.fasta --k=10 --cutoff=50"
|
10
|
+
|
11
|
+
method_option :input, :type => :string, :required => true
|
12
|
+
method_option :output, :type => :string, :required => true
|
13
|
+
method_option :k, :type => :numeric, :required => true
|
14
|
+
method_option :cutoff, :type => :numeric, :required => true
|
15
|
+
|
16
|
+
def filter
|
17
|
+
input = options[:input]
|
18
|
+
output = options[:output]
|
19
|
+
k_len = options[:k].to_i
|
20
|
+
cutoff = options[:cutoff]
|
21
|
+
|
22
|
+
counting_table = Hash.new { |h, k| h[k] = 0 }
|
23
|
+
total_reads = 0
|
24
|
+
|
25
|
+
ohai "counting kmers"
|
26
|
+
File.open(input) do |handle|
|
27
|
+
records = Dna.new handle
|
28
|
+
records.each do |r|
|
29
|
+
total_reads += 1
|
30
|
+
kmers = r.sequence.to_kmers(k_len)
|
31
|
+
kmers.each { |x| counting_table[x] += 1 }
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
sum_of_kmers = counting_table.values.inject(:+)
|
36
|
+
|
37
|
+
ohai "total reads = #{total_reads}"
|
38
|
+
ohai "sum of kmers = #{sum_of_kmers}"
|
39
|
+
|
40
|
+
kept = 0
|
41
|
+
total_reads = total_reads.to_f
|
42
|
+
|
43
|
+
pbar = ProgressBar.new "saving", total_reads.to_i
|
44
|
+
output = File.open(output, 'w')
|
45
|
+
File.open(input) do |handle|
|
46
|
+
records = Dna.new handle
|
47
|
+
|
48
|
+
records.each do |r|
|
49
|
+
kmers = r.sequence.to_kmers(k_len)
|
50
|
+
|
51
|
+
# check if any of the kmers are rare
|
52
|
+
keep = true
|
53
|
+
coverage = 0
|
54
|
+
kmers.each do |kmer|
|
55
|
+
# if any of the kmers are rare, don't print the read
|
56
|
+
c = counting_table[kmer]
|
57
|
+
coverage += c
|
58
|
+
if c < cutoff
|
59
|
+
keep = false
|
60
|
+
break
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
if keep
|
65
|
+
kept += 1
|
66
|
+
output.puts r
|
67
|
+
end
|
68
|
+
pbar.inc
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
pbar.finish
|
73
|
+
|
74
|
+
ohai "survivors = #{kept} (#{kept/total_reads.to_f})"
|
75
|
+
output.close
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
data/lib/lederhosen.rb
CHANGED
@@ -5,3 +5,11 @@ require 'set'
|
|
5
5
|
require 'progressbar'
|
6
6
|
|
7
7
|
Dir.glob(File.join(File.dirname(__FILE__), 'lederhosen', '*.rb')).each { |f| require f }
|
8
|
+
|
9
|
+
class String
|
10
|
+
def to_kmers(k)
|
11
|
+
return [] if k == 0
|
12
|
+
k -= 1
|
13
|
+
(0..(self.length-k-1)).collect { |i| self[i..i+k] }
|
14
|
+
end
|
15
|
+
end
|
data/lib/version.rb
CHANGED
data/spec/misc_spec.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe String do
|
4
|
+
it 'generate_kmers should generate kmers for a string' do
|
5
|
+
'test'.to_kmers(2).should == ['te', 'es', 'st']
|
6
|
+
'test'.to_kmers(3).should == ['tes', 'est']
|
7
|
+
'test'.to_kmers(4).should == ['test']
|
8
|
+
'test'.to_kmers(5).should == []
|
9
|
+
'test'.to_kmers(0).should == []
|
10
|
+
end
|
11
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 11
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 10
|
10
|
+
version: 0.0.10
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-05-
|
18
|
+
date: 2012-05-14 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: dna
|
@@ -121,6 +121,7 @@ files:
|
|
121
121
|
- lib/lederhosen/cli.rb
|
122
122
|
- lib/lederhosen/helpers.rb
|
123
123
|
- lib/lederhosen/tasks/cluster.rb
|
124
|
+
- lib/lederhosen/tasks/filter.rb
|
124
125
|
- lib/lederhosen/tasks/join.rb
|
125
126
|
- lib/lederhosen/tasks/name.rb
|
126
127
|
- lib/lederhosen/tasks/otu_table.rb
|
@@ -135,6 +136,7 @@ files:
|
|
135
136
|
- spec/data/ILT_L_9_B_002_1.txt
|
136
137
|
- spec/data/ILT_L_9_B_002_3.txt
|
137
138
|
- spec/helpers_spec.rb
|
139
|
+
- spec/misc_spec.rb
|
138
140
|
- spec/pipeline_spec.rb
|
139
141
|
- spec/spec_helper.rb
|
140
142
|
homepage: http://github.com/audy/lederhosen
|
@@ -176,5 +178,6 @@ test_files:
|
|
176
178
|
- spec/data/ILT_L_9_B_002_1.txt
|
177
179
|
- spec/data/ILT_L_9_B_002_3.txt
|
178
180
|
- spec/helpers_spec.rb
|
181
|
+
- spec/misc_spec.rb
|
179
182
|
- spec/pipeline_spec.rb
|
180
183
|
- spec/spec_helper.rb
|