lederhosen 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,79 @@
1
+ ##
2
+ # FILTER READS WITH LOW ABUNDANCE KMERS
3
+ #
4
+
5
+ module Lederhosen
6
+ class CLI
7
+
8
+ desc "filter fasta file",
9
+ "--input=joined.fasta --output=filtered.fasta --k=10 --cutoff=50"
10
+
11
+ method_option :input, :type => :string, :required => true
12
+ method_option :output, :type => :string, :required => true
13
+ method_option :k, :type => :numeric, :required => true
14
+ method_option :cutoff, :type => :numeric, :required => true
15
+
16
+ def filter
17
+ input = options[:input]
18
+ output = options[:output]
19
+ k_len = options[:k].to_i
20
+ cutoff = options[:cutoff]
21
+
22
+ counting_table = Hash.new { |h, k| h[k] = 0 }
23
+ total_reads = 0
24
+
25
+ ohai "counting kmers"
26
+ File.open(input) do |handle|
27
+ records = Dna.new handle
28
+ records.each do |r|
29
+ total_reads += 1
30
+ kmers = r.sequence.to_kmers(k_len)
31
+ kmers.each { |x| counting_table[x] += 1 }
32
+ end
33
+ end
34
+
35
+ sum_of_kmers = counting_table.values.inject(:+)
36
+
37
+ ohai "total reads = #{total_reads}"
38
+ ohai "sum of kmers = #{sum_of_kmers}"
39
+
40
+ kept = 0
41
+ total_reads = total_reads.to_f
42
+
43
+ pbar = ProgressBar.new "saving", total_reads.to_i
44
+ output = File.open(output, 'w')
45
+ File.open(input) do |handle|
46
+ records = Dna.new handle
47
+
48
+ records.each do |r|
49
+ kmers = r.sequence.to_kmers(k_len)
50
+
51
+ # check if any of the kmers are rare
52
+ keep = true
53
+ coverage = 0
54
+ kmers.each do |kmer|
55
+ # if any of the kmers are rare, don't print the read
56
+ c = counting_table[kmer]
57
+ coverage += c
58
+ if c < cutoff
59
+ keep = false
60
+ break
61
+ end
62
+ end
63
+
64
+ if keep
65
+ kept += 1
66
+ output.puts r
67
+ end
68
+ pbar.inc
69
+ end
70
+ end
71
+
72
+ pbar.finish
73
+
74
+ ohai "survivors = #{kept} (#{kept/total_reads.to_f})"
75
+ output.close
76
+ end
77
+ end
78
+
79
+ end
data/lib/lederhosen.rb CHANGED
@@ -5,3 +5,11 @@ require 'set'
5
5
  require 'progressbar'
6
6
 
7
7
  Dir.glob(File.join(File.dirname(__FILE__), 'lederhosen', '*.rb')).each { |f| require f }
8
+
9
+ class String
10
+ def to_kmers(k)
11
+ return [] if k == 0
12
+ k -= 1
13
+ (0..(self.length-k-1)).collect { |i| self[i..i+k] }
14
+ end
15
+ end
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Lederhosen
2
- VERSION = '0.0.9'
2
+ VERSION = '0.0.10'
3
3
  end
data/spec/misc_spec.rb ADDED
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+
3
+ describe String do
4
+ it 'generate_kmers should generate kmers for a string' do
5
+ 'test'.to_kmers(2).should == ['te', 'es', 'st']
6
+ 'test'.to_kmers(3).should == ['tes', 'est']
7
+ 'test'.to_kmers(4).should == ['test']
8
+ 'test'.to_kmers(5).should == []
9
+ 'test'.to_kmers(0).should == []
10
+ end
11
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 13
4
+ hash: 11
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 9
10
- version: 0.0.9
9
+ - 10
10
+ version: 0.0.10
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-05-01 00:00:00 Z
18
+ date: 2012-05-14 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: dna
@@ -121,6 +121,7 @@ files:
121
121
  - lib/lederhosen/cli.rb
122
122
  - lib/lederhosen/helpers.rb
123
123
  - lib/lederhosen/tasks/cluster.rb
124
+ - lib/lederhosen/tasks/filter.rb
124
125
  - lib/lederhosen/tasks/join.rb
125
126
  - lib/lederhosen/tasks/name.rb
126
127
  - lib/lederhosen/tasks/otu_table.rb
@@ -135,6 +136,7 @@ files:
135
136
  - spec/data/ILT_L_9_B_002_1.txt
136
137
  - spec/data/ILT_L_9_B_002_3.txt
137
138
  - spec/helpers_spec.rb
139
+ - spec/misc_spec.rb
138
140
  - spec/pipeline_spec.rb
139
141
  - spec/spec_helper.rb
140
142
  homepage: http://github.com/audy/lederhosen
@@ -176,5 +178,6 @@ test_files:
176
178
  - spec/data/ILT_L_9_B_002_1.txt
177
179
  - spec/data/ILT_L_9_B_002_3.txt
178
180
  - spec/helpers_spec.rb
181
+ - spec/misc_spec.rb
179
182
  - spec/pipeline_spec.rb
180
183
  - spec/spec_helper.rb