lederhosen 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/lederhosen/tasks/uniquify.rb +49 -0
- data/lib/version.rb +1 -1
- metadata +4 -3
@@ -0,0 +1,49 @@
|
|
1
|
+
##
|
2
|
+
# uniquify - uniquify a fasta file, also output table with md5 -> number of reads
|
3
|
+
#
|
4
|
+
|
5
|
+
require 'digest/md5'
|
6
|
+
|
7
|
+
module Lederhosen
|
8
|
+
class CLI
|
9
|
+
desc 'uniquify',
|
10
|
+
'uniquify a fasta file and generate a table with md5 -> abundance'
|
11
|
+
|
12
|
+
method_option :input, :type => :string, :required => true
|
13
|
+
method_option :output, :type => :string, :required => true
|
14
|
+
method_option :table_out, :type => :string, :required => true
|
15
|
+
|
16
|
+
def uniquify
|
17
|
+
input = options[:input]
|
18
|
+
output = options[:output]
|
19
|
+
table_out = options[:table_out]
|
20
|
+
|
21
|
+
sequence_counts = Hash.new { |h, k| h[k] = 0 }
|
22
|
+
|
23
|
+
out = File.open(output, 'w')
|
24
|
+
|
25
|
+
File.open(input) do |handle|
|
26
|
+
Dna.new(handle).each do |record|
|
27
|
+
unless sequence_counts.has_key? record.sequence
|
28
|
+
out.puts record
|
29
|
+
end
|
30
|
+
sequence_counts[record.sequence] += 1
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
out.close
|
35
|
+
|
36
|
+
# write table
|
37
|
+
File.open(table_out, 'w') do |out|
|
38
|
+
sequence_counts.each_pair do |sequence, count|
|
39
|
+
digest = Digest::MD5.hexdigest(sequence)
|
40
|
+
out.puts "#{digest},#{count}"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
kept = sequence_counts.keys.size
|
45
|
+
total = sequence_counts.values.inject(:+)
|
46
|
+
ohai "kept #{kept} out of #{total} reads (#{100*kept/total.to_f})"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 3
|
10
|
+
version: 0.2.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|
@@ -95,6 +95,7 @@ files:
|
|
95
95
|
- lib/lederhosen/tasks/trim.rb
|
96
96
|
- lib/lederhosen/tasks/uc_filter.rb
|
97
97
|
- lib/lederhosen/tasks/uc_stats.rb
|
98
|
+
- lib/lederhosen/tasks/uniquify.rb
|
98
99
|
- lib/version.rb
|
99
100
|
- readme.md
|
100
101
|
- spec/data/ILT_L_9_B_001_1.txt.gz
|