lederhosen 0.2.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/lederhosen/tasks/uniquify.rb +49 -0
- data/lib/version.rb +1 -1
- metadata +4 -3
@@ -0,0 +1,49 @@
|
|
1
|
+
##
|
2
|
+
# uniquify - uniquify a fasta file, also output table with md5 -> number of reads
|
3
|
+
#
|
4
|
+
|
5
|
+
require 'digest/md5'
|
6
|
+
|
7
|
+
module Lederhosen
|
8
|
+
class CLI
|
9
|
+
desc 'uniquify',
|
10
|
+
'uniquify a fasta file and generate a table with md5 -> abundance'
|
11
|
+
|
12
|
+
method_option :input, :type => :string, :required => true
|
13
|
+
method_option :output, :type => :string, :required => true
|
14
|
+
method_option :table_out, :type => :string, :required => true
|
15
|
+
|
16
|
+
def uniquify
|
17
|
+
input = options[:input]
|
18
|
+
output = options[:output]
|
19
|
+
table_out = options[:table_out]
|
20
|
+
|
21
|
+
sequence_counts = Hash.new { |h, k| h[k] = 0 }
|
22
|
+
|
23
|
+
out = File.open(output, 'w')
|
24
|
+
|
25
|
+
File.open(input) do |handle|
|
26
|
+
Dna.new(handle).each do |record|
|
27
|
+
unless sequence_counts.has_key? record.sequence
|
28
|
+
out.puts record
|
29
|
+
end
|
30
|
+
sequence_counts[record.sequence] += 1
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
out.close
|
35
|
+
|
36
|
+
# write table
|
37
|
+
File.open(table_out, 'w') do |out|
|
38
|
+
sequence_counts.each_pair do |sequence, count|
|
39
|
+
digest = Digest::MD5.hexdigest(sequence)
|
40
|
+
out.puts "#{digest},#{count}"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
kept = sequence_counts.keys.size
|
45
|
+
total = sequence_counts.values.inject(:+)
|
46
|
+
ohai "kept #{kept} out of #{total} reads (#{100*kept/total.to_f})"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 3
|
10
|
+
version: 0.2.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|
@@ -95,6 +95,7 @@ files:
|
|
95
95
|
- lib/lederhosen/tasks/trim.rb
|
96
96
|
- lib/lederhosen/tasks/uc_filter.rb
|
97
97
|
- lib/lederhosen/tasks/uc_stats.rb
|
98
|
+
- lib/lederhosen/tasks/uniquify.rb
|
98
99
|
- lib/version.rb
|
99
100
|
- readme.md
|
100
101
|
- spec/data/ILT_L_9_B_001_1.txt.gz
|