lederhosen 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,49 @@
1
+ ##
2
+ # uniquify - uniquify a fasta file, also output table with md5 -> number of reads
3
+ #
4
+
5
+ require 'digest/md5'
6
+
7
+ module Lederhosen
8
+ class CLI
9
+ desc 'uniquify',
10
+ 'uniquify a fasta file and generate a table with md5 -> abundance'
11
+
12
+ method_option :input, :type => :string, :required => true
13
+ method_option :output, :type => :string, :required => true
14
+ method_option :table_out, :type => :string, :required => true
15
+
16
+ def uniquify
17
+ input = options[:input]
18
+ output = options[:output]
19
+ table_out = options[:table_out]
20
+
21
+ sequence_counts = Hash.new { |h, k| h[k] = 0 }
22
+
23
+ out = File.open(output, 'w')
24
+
25
+ File.open(input) do |handle|
26
+ Dna.new(handle).each do |record|
27
+ unless sequence_counts.has_key? record.sequence
28
+ out.puts record
29
+ end
30
+ sequence_counts[record.sequence] += 1
31
+ end
32
+ end
33
+
34
+ out.close
35
+
36
+ # write table
37
+ File.open(table_out, 'w') do |out|
38
+ sequence_counts.each_pair do |sequence, count|
39
+ digest = Digest::MD5.hexdigest(sequence)
40
+ out.puts "#{digest},#{count}"
41
+ end
42
+ end
43
+
44
+ kept = sequence_counts.keys.size
45
+ total = sequence_counts.values.inject(:+)
46
+ ohai "kept #{kept} out of #{total} reads (#{100*kept/total.to_f})"
47
+ end
48
+ end
49
+ end
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Lederhosen
2
- VERSION = '0.2.2'
2
+ VERSION = '0.2.3'
3
3
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 2
10
- version: 0.2.2
9
+ - 3
10
+ version: 0.2.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -95,6 +95,7 @@ files:
95
95
  - lib/lederhosen/tasks/trim.rb
96
96
  - lib/lederhosen/tasks/uc_filter.rb
97
97
  - lib/lederhosen/tasks/uc_stats.rb
98
+ - lib/lederhosen/tasks/uniquify.rb
98
99
  - lib/version.rb
99
100
  - readme.md
100
101
  - spec/data/ILT_L_9_B_001_1.txt.gz