lederhosen 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lederhosen.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "0.4.0"
8
+ s.version = "0.5.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
@@ -45,7 +45,6 @@ Gem::Specification.new do |s|
45
45
  "lib/lederhosen/tasks/trim.rb",
46
46
  "lib/lederhosen/tasks/uc_filter.rb",
47
47
  "lib/lederhosen/tasks/uc_stats.rb",
48
- "lib/lederhosen/tasks/uniquify.rb",
49
48
  "lib/lederhosen/tasks/version.rb",
50
49
  "lib/lederhosen/version.rb",
51
50
  "readme.md",
@@ -1,7 +1,7 @@
1
1
  module Lederhosen
2
2
  module Version
3
3
  MAJOR = 0
4
- MINOR = 4
4
+ MINOR = 5
5
5
  PATCH = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, PATCH].join('.')
data/spec/cli_spec.rb CHANGED
@@ -45,11 +45,6 @@ describe Lederhosen::CLI do
45
45
  `./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
46
46
  end
47
47
 
48
- it 'should uniquify reads' do
49
- `./bin/lederhosen uniquify --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/uniqued.fasta --table-out=#{$test_dir}/uniquify.txt`
50
- $?.success?.should be_true
51
- end
52
-
53
48
  it 'should split joined.fasta into reads for each cluster' do
54
49
  `./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1`
55
50
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 15
4
+ hash: 11
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 4
8
+ - 5
9
9
  - 0
10
- version: 0.4.0
10
+ version: 0.5.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -141,7 +141,6 @@ files:
141
141
  - lib/lederhosen/tasks/trim.rb
142
142
  - lib/lederhosen/tasks/uc_filter.rb
143
143
  - lib/lederhosen/tasks/uc_stats.rb
144
- - lib/lederhosen/tasks/uniquify.rb
145
144
  - lib/lederhosen/tasks/version.rb
146
145
  - lib/lederhosen/version.rb
147
146
  - readme.md
@@ -1,61 +0,0 @@
1
- ##
2
- # uniquify - uniquify a fasta file generating a fasta file of only unique sequences
3
- # also output table with sequence_id -> number of reads
4
- #
5
-
6
- module Lederhosen
7
- class CLI
8
- desc 'uniquify',
9
- 'uniquify a fasta file generating a fasta file of only unique sequences.' +\
10
- 'also generate a table with sequence_id -> abundance'
11
-
12
- method_option :input, :type => :string, :required => true
13
- method_option :output, :type => :string, :required => true
14
- method_option :table_out, :type => :string, :required => true
15
-
16
- def uniquify
17
- input = options[:input]
18
- output = options[:output]
19
- table_out = options[:table_out]
20
-
21
- ohai "uniquifying #{input} to #{output} w/ table #{table_out}"
22
-
23
- sequence_counts = Hash.new { |h, k| h[k] = 0 }
24
- sequence_to_id = Hash.new
25
-
26
- out = File.open(output, 'w')
27
-
28
- File.open(input) do |handle|
29
- pbar = ProgressBar.new 'loading', File.size(input)
30
- Dna.new(handle).each do |record|
31
- pbar.inc handle.pos
32
- unless sequence_counts.has_key? record.sequence
33
- # store the sequence and id so we can have ids in the
34
- # table. If the file is sorted by length then this
35
- # should also be a seed sequence.
36
- sequence_to_id[record.sequence] = record.name
37
- out.puts record
38
- end
39
- sequence_counts[record.sequence] += 1
40
- end
41
- pbar.finish
42
- end
43
-
44
- out.close
45
-
46
- # write table
47
- pbar = ProgressBar.new 'table', sequence_counts.size
48
- File.open(table_out, 'w') do |out|
49
- sequence_counts.each_pair do |sequence, count|
50
- pbar.inc
51
- id = sequence_to_id[sequence]
52
- out.puts "#{id}\t#{count}"
53
- end
54
- end
55
- pbar.finish
56
- kept = sequence_counts.keys.size
57
- total = sequence_counts.values.inject(:+)
58
- ohai "kept #{kept} out of #{total} reads (#{100*kept/total.to_f})"
59
- end
60
- end
61
- end