lederhosen 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lederhosen.gemspec +1 -2
- data/lib/lederhosen/version.rb +1 -1
- data/spec/cli_spec.rb +0 -5
- metadata +3 -4
- data/lib/lederhosen/tasks/uniquify.rb +0 -61
data/lederhosen.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "lederhosen"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.5.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
@@ -45,7 +45,6 @@ Gem::Specification.new do |s|
|
|
45
45
|
"lib/lederhosen/tasks/trim.rb",
|
46
46
|
"lib/lederhosen/tasks/uc_filter.rb",
|
47
47
|
"lib/lederhosen/tasks/uc_stats.rb",
|
48
|
-
"lib/lederhosen/tasks/uniquify.rb",
|
49
48
|
"lib/lederhosen/tasks/version.rb",
|
50
49
|
"lib/lederhosen/version.rb",
|
51
50
|
"readme.md",
|
data/lib/lederhosen/version.rb
CHANGED
data/spec/cli_spec.rb
CHANGED
@@ -45,11 +45,6 @@ describe Lederhosen::CLI do
|
|
45
45
|
`./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
|
46
46
|
end
|
47
47
|
|
48
|
-
it 'should uniquify reads' do
|
49
|
-
`./bin/lederhosen uniquify --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/uniqued.fasta --table-out=#{$test_dir}/uniquify.txt`
|
50
|
-
$?.success?.should be_true
|
51
|
-
end
|
52
|
-
|
53
48
|
it 'should split joined.fasta into reads for each cluster' do
|
54
49
|
`./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1`
|
55
50
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 11
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 5
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.5.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|
@@ -141,7 +141,6 @@ files:
|
|
141
141
|
- lib/lederhosen/tasks/trim.rb
|
142
142
|
- lib/lederhosen/tasks/uc_filter.rb
|
143
143
|
- lib/lederhosen/tasks/uc_stats.rb
|
144
|
-
- lib/lederhosen/tasks/uniquify.rb
|
145
144
|
- lib/lederhosen/tasks/version.rb
|
146
145
|
- lib/lederhosen/version.rb
|
147
146
|
- readme.md
|
@@ -1,61 +0,0 @@
|
|
1
|
-
##
|
2
|
-
# uniquify - uniquify a fasta file generating a fasta file of only unique sequences
|
3
|
-
# also output table with sequence_id -> number of reads
|
4
|
-
#
|
5
|
-
|
6
|
-
module Lederhosen
|
7
|
-
class CLI
|
8
|
-
desc 'uniquify',
|
9
|
-
'uniquify a fasta file generating a fasta file of only unique sequences.' +\
|
10
|
-
'also generate a table with sequence_id -> abundance'
|
11
|
-
|
12
|
-
method_option :input, :type => :string, :required => true
|
13
|
-
method_option :output, :type => :string, :required => true
|
14
|
-
method_option :table_out, :type => :string, :required => true
|
15
|
-
|
16
|
-
def uniquify
|
17
|
-
input = options[:input]
|
18
|
-
output = options[:output]
|
19
|
-
table_out = options[:table_out]
|
20
|
-
|
21
|
-
ohai "uniquifying #{input} to #{output} w/ table #{table_out}"
|
22
|
-
|
23
|
-
sequence_counts = Hash.new { |h, k| h[k] = 0 }
|
24
|
-
sequence_to_id = Hash.new
|
25
|
-
|
26
|
-
out = File.open(output, 'w')
|
27
|
-
|
28
|
-
File.open(input) do |handle|
|
29
|
-
pbar = ProgressBar.new 'loading', File.size(input)
|
30
|
-
Dna.new(handle).each do |record|
|
31
|
-
pbar.inc handle.pos
|
32
|
-
unless sequence_counts.has_key? record.sequence
|
33
|
-
# store the sequence and id so we can have ids in the
|
34
|
-
# table. If the file is sorted by length then this
|
35
|
-
# should also be a seed sequence.
|
36
|
-
sequence_to_id[record.sequence] = record.name
|
37
|
-
out.puts record
|
38
|
-
end
|
39
|
-
sequence_counts[record.sequence] += 1
|
40
|
-
end
|
41
|
-
pbar.finish
|
42
|
-
end
|
43
|
-
|
44
|
-
out.close
|
45
|
-
|
46
|
-
# write table
|
47
|
-
pbar = ProgressBar.new 'table', sequence_counts.size
|
48
|
-
File.open(table_out, 'w') do |out|
|
49
|
-
sequence_counts.each_pair do |sequence, count|
|
50
|
-
pbar.inc
|
51
|
-
id = sequence_to_id[sequence]
|
52
|
-
out.puts "#{id}\t#{count}"
|
53
|
-
end
|
54
|
-
end
|
55
|
-
pbar.finish
|
56
|
-
kept = sequence_counts.keys.size
|
57
|
-
total = sequence_counts.values.inject(:+)
|
58
|
-
ohai "kept #{kept} out of #{total} reads (#{100*kept/total.to_f})"
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|