lederhosen 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,49 @@
1
+ ##
2
+ # uniquify - uniquify a fasta file, also output table with md5 -> number of reads
3
+ #
4
+
5
+ require 'digest/md5'
6
+
7
+ module Lederhosen
8
+ class CLI
9
+ desc 'uniquify',
10
+ 'uniquify a fasta file and generate a table with md5 -> abundance'
11
+
12
+ method_option :input, :type => :string, :required => true
13
+ method_option :output, :type => :string, :required => true
14
+ method_option :table_out, :type => :string, :required => true
15
+
16
+ def uniquify
17
+ input = options[:input]
18
+ output = options[:output]
19
+ table_out = options[:table_out]
20
+
21
+ sequence_counts = Hash.new { |h, k| h[k] = 0 }
22
+
23
+ out = File.open(output, 'w')
24
+
25
+ File.open(input) do |handle|
26
+ Dna.new(handle).each do |record|
27
+ unless sequence_counts.has_key? record.sequence
28
+ out.puts record
29
+ end
30
+ sequence_counts[record.sequence] += 1
31
+ end
32
+ end
33
+
34
+ out.close
35
+
36
+ # write table
37
+ File.open(table_out, 'w') do |out|
38
+ sequence_counts.each_pair do |sequence, count|
39
+ digest = Digest::MD5.hexdigest(sequence)
40
+ out.puts "#{digest},#{count}"
41
+ end
42
+ end
43
+
44
+ kept = sequence_counts.keys.size
45
+ total = sequence_counts.values.inject(:+)
46
+ ohai "kept #{kept} out of #{total} reads (#{100*kept/total.to_f})"
47
+ end
48
+ end
49
+ end
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Lederhosen
2
- VERSION = '0.2.2'
2
+ VERSION = '0.2.3'
3
3
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 2
10
- version: 0.2.2
9
+ - 3
10
+ version: 0.2.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -95,6 +95,7 @@ files:
95
95
  - lib/lederhosen/tasks/trim.rb
96
96
  - lib/lederhosen/tasks/uc_filter.rb
97
97
  - lib/lederhosen/tasks/uc_stats.rb
98
+ - lib/lederhosen/tasks/uniquify.rb
98
99
  - lib/version.rb
99
100
  - readme.md
100
101
  - spec/data/ILT_L_9_B_001_1.txt.gz