lederhosen 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,79 @@
1
+ ##
2
+ # FILTER READS WITH LOW ABUNDANCE KMERS
3
+ #
4
+
5
+ module Lederhosen
6
+ class CLI
7
+
8
+ desc "filter fasta file",
9
+ "--input=joined.fasta --output=filtered.fasta --k=10 --cutoff=50"
10
+
11
+ method_option :input, :type => :string, :required => true
12
+ method_option :output, :type => :string, :required => true
13
+ method_option :k, :type => :numeric, :required => true
14
+ method_option :cutoff, :type => :numeric, :required => true
15
+
16
+ def filter
17
+ input = options[:input]
18
+ output = options[:output]
19
+ k_len = options[:k].to_i
20
+ cutoff = options[:cutoff]
21
+
22
+ counting_table = Hash.new { |h, k| h[k] = 0 }
23
+ total_reads = 0
24
+
25
+ ohai "counting kmers"
26
+ File.open(input) do |handle|
27
+ records = Dna.new handle
28
+ records.each do |r|
29
+ total_reads += 1
30
+ kmers = r.sequence.to_kmers(k_len)
31
+ kmers.each { |x| counting_table[x] += 1 }
32
+ end
33
+ end
34
+
35
+ sum_of_kmers = counting_table.values.inject(:+)
36
+
37
+ ohai "total reads = #{total_reads}"
38
+ ohai "sum of kmers = #{sum_of_kmers}"
39
+
40
+ kept = 0
41
+ total_reads = total_reads.to_f
42
+
43
+ pbar = ProgressBar.new "saving", total_reads.to_i
44
+ output = File.open(output, 'w')
45
+ File.open(input) do |handle|
46
+ records = Dna.new handle
47
+
48
+ records.each do |r|
49
+ kmers = r.sequence.to_kmers(k_len)
50
+
51
+ # check if any of the kmers are rare
52
+ keep = true
53
+ coverage = 0
54
+ kmers.each do |kmer|
55
+ # if any of the kmers are rare, don't print the read
56
+ c = counting_table[kmer]
57
+ coverage += c
58
+ if c < cutoff
59
+ keep = false
60
+ break
61
+ end
62
+ end
63
+
64
+ if keep
65
+ kept += 1
66
+ output.puts r
67
+ end
68
+ pbar.inc
69
+ end
70
+ end
71
+
72
+ pbar.finish
73
+
74
+ ohai "survivors = #{kept} (#{kept/total_reads.to_f})"
75
+ output.close
76
+ end
77
+ end
78
+
79
+ end
data/lib/lederhosen.rb CHANGED
@@ -5,3 +5,11 @@ require 'set'
5
5
  require 'progressbar'
6
6
 
7
7
  Dir.glob(File.join(File.dirname(__FILE__), 'lederhosen', '*.rb')).each { |f| require f }
8
+
9
+ class String
10
+ def to_kmers(k)
11
+ return [] if k == 0
12
+ k -= 1
13
+ (0..(self.length-k-1)).collect { |i| self[i..i+k] }
14
+ end
15
+ end
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Lederhosen
2
- VERSION = '0.0.9'
2
+ VERSION = '0.0.10'
3
3
  end
data/spec/misc_spec.rb ADDED
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+
3
+ describe String do
4
+ it 'generate_kmers should generate kmers for a string' do
5
+ 'test'.to_kmers(2).should == ['te', 'es', 'st']
6
+ 'test'.to_kmers(3).should == ['tes', 'est']
7
+ 'test'.to_kmers(4).should == ['test']
8
+ 'test'.to_kmers(5).should == []
9
+ 'test'.to_kmers(0).should == []
10
+ end
11
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 13
4
+ hash: 11
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 9
10
- version: 0.0.9
9
+ - 10
10
+ version: 0.0.10
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-05-01 00:00:00 Z
18
+ date: 2012-05-14 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: dna
@@ -121,6 +121,7 @@ files:
121
121
  - lib/lederhosen/cli.rb
122
122
  - lib/lederhosen/helpers.rb
123
123
  - lib/lederhosen/tasks/cluster.rb
124
+ - lib/lederhosen/tasks/filter.rb
124
125
  - lib/lederhosen/tasks/join.rb
125
126
  - lib/lederhosen/tasks/name.rb
126
127
  - lib/lederhosen/tasks/otu_table.rb
@@ -135,6 +136,7 @@ files:
135
136
  - spec/data/ILT_L_9_B_002_1.txt
136
137
  - spec/data/ILT_L_9_B_002_3.txt
137
138
  - spec/helpers_spec.rb
139
+ - spec/misc_spec.rb
138
140
  - spec/pipeline_spec.rb
139
141
  - spec/spec_helper.rb
140
142
  homepage: http://github.com/audy/lederhosen
@@ -176,5 +178,6 @@ test_files:
176
178
  - spec/data/ILT_L_9_B_002_1.txt
177
179
  - spec/data/ILT_L_9_B_002_3.txt
178
180
  - spec/helpers_spec.rb
181
+ - spec/misc_spec.rb
179
182
  - spec/pipeline_spec.rb
180
183
  - spec/spec_helper.rb