lederhosen 0.3.7 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lederhosen.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "0.3.7"
8
+ s.version = "0.3.8"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
12
- s.date = "2012-08-14"
12
+ s.date = "2012-08-20"
13
13
  s.description = "Various tools for OTU clustering"
14
14
  s.email = "harekrishna@gmail.com"
15
15
  s.executables = ["lederhosen"]
@@ -1,16 +1,14 @@
1
1
  ##
2
- # uniquify - uniquify a fasta file, also output table with md5 -> number of reads
2
+ # uniquify - uniquify a fasta file, also output table with sequence_id -> number of reads
3
3
  #
4
4
 
5
- require 'digest/md5'
6
-
7
5
  module Lederhosen
8
6
  class CLI
9
7
  desc 'uniquify',
10
- 'uniquify a fasta file and generate a table with md5 -> abundance'
8
+ 'uniquify a fasta file and generate a table with sequence_id -> abundance'
11
9
 
12
- method_option :input, :type => :string, :required => true
13
- method_option :output, :type => :string, :required => true
10
+ method_option :input, :type => :string, :required => true
11
+ method_option :output, :type => :string, :required => true
14
12
  method_option :table_out, :type => :string, :required => true
15
13
 
16
14
  def uniquify
@@ -21,15 +19,21 @@ module Lederhosen
21
19
  ohai "uniquifying #{input} to #{output} w/ table #{table_out}"
22
20
 
23
21
  sequence_counts = Hash.new { |h, k| h[k] = 0 }
22
+ sequence_to_id = Hash.new
24
23
 
25
24
  out = File.open(output, 'w')
26
25
 
27
26
  no_records = `grep -c '^>' #{input}`.split.first.to_i
28
27
  pbar = ProgressBar.new 'loading', no_records
28
+
29
29
  File.open(input) do |handle|
30
30
  Dna.new(handle).each do |record|
31
31
  pbar.inc
32
32
  unless sequence_counts.has_key? record.sequence
33
+ # store the sequence and id so we can have ids in the
34
+ # table. If the file is sorted by length then this
35
+ # should also be a seed sequence.
36
+ sequence_to_id[record.sequence] = record.name
33
37
  out.puts record
34
38
  end
35
39
  sequence_counts[record.sequence] += 1
@@ -44,8 +48,8 @@ module Lederhosen
44
48
  File.open(table_out, 'w') do |out|
45
49
  sequence_counts.each_pair do |sequence, count|
46
50
  pbar.inc
47
- digest = Digest::MD5.hexdigest(sequence)
48
- out.puts "#{digest},#{count}"
51
+ id = sequence_to_id[sequence]
52
+ out.puts "#{id}\t#{count}"
49
53
  end
50
54
  end
51
55
  pbar.finish
@@ -2,7 +2,7 @@ module Lederhosen
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 3
5
- PATCH = 7
5
+ PATCH = 8
6
6
 
7
7
  STRING = [MAJOR, MINOR, PATCH].join('.')
8
8
  end
data/spec/cli_spec.rb CHANGED
@@ -8,49 +8,54 @@ describe Lederhosen::CLI do
8
8
  end
9
9
 
10
10
  it 'should have a version command' do
11
- `./bin/lederhosen version`.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
11
+ `./bin/lederhosen version 2>/dev/null`.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
12
12
  end
13
13
 
14
14
  it 'should trim reads' do
15
- `./bin/lederhosen trim --reads-dir=spec/data/IL*.txt.gz --out-dir=#{$test_dir}/trimmed`
15
+ `./bin/lederhosen trim --reads-dir=spec/data/IL*.txt.gz --out-dir=#{$test_dir}/trimmed 2>/dev/null`
16
16
  $?.success?.should be_true
17
17
  end
18
18
 
19
19
  it 'should join reads' do
20
- `./bin/lederhosen join --trimmed=#{$test_dir}/trimmed/*.fasta --output=#{$test_dir}/joined.fasta`
20
+ `./bin/lederhosen join --trimmed=#{$test_dir}/trimmed/*.fasta --output=#{$test_dir}/joined.fasta 2>/dev/null`
21
21
  $?.success?.should be_true
22
22
  end
23
23
 
24
24
  it 'should sort reads' do
25
- `./bin/lederhosen sort --input=#{$test_dir}/joined.fasta --output=#{$test_dir}/sorted.fasta`
25
+ `./bin/lederhosen sort --input=#{$test_dir}/joined.fasta --output=#{$test_dir}/sorted.fasta 2>/dev/null`
26
26
  $?.success?.should be_true
27
27
  end
28
28
 
29
29
  it 'should k_filter reads' do
30
- `./bin/lederhosen k_filter --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/filtered.fasta -k=15 --cutoff 1`
30
+ `./bin/lederhosen k_filter --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/filtered.fasta -k=15 --cutoff 1 2>/dev/null`
31
31
  $?.success?.should be_true
32
32
  end
33
33
 
34
34
  it 'should cluster reads' do
35
- `./bin/lederhosen cluster --identity=0.80 --input=#{$test_dir}/filtered.fasta --output=#{$test_dir}/clusters.uc`
35
+ `./bin/lederhosen cluster --identity=0.80 --input=#{$test_dir}/filtered.fasta --output=#{$test_dir}/clusters.uc 2>/dev/null`
36
36
  $?.success?.should be_true
37
37
  end
38
38
 
39
39
  it 'should build OTU abundance matrices' do
40
- `./bin/lederhosen otu_table --clusters=#{$test_dir}/clusters.uc --output=#{$test_dir}/otu_table.csv`
40
+ `./bin/lederhosen otu_table --clusters=#{$test_dir}/clusters.uc --output=#{$test_dir}/otu_table.csv 2>/dev/null`
41
41
  $?.success?.should be_true
42
42
  end
43
43
 
44
44
  it 'should filter OTU abundance matrices' do
45
- `./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
45
+ `./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1 2>/dev/null`
46
+ end
47
+
48
+ it 'should uniquify reads' do
49
+ `./bin/lederhosen uniquify --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/uniqued.fasta --table-out=#{$test_dir}/uniquify.txt 2>/dev/null`
50
+ $?.success?.should be_true
46
51
  end
47
52
 
48
53
  it 'should split joined.fasta into reads for each cluster' do
49
- `./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1`
54
+ `./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1 2>/dev/null`
50
55
  end
51
56
 
52
57
  it 'should create a fasta file containing representative reads for each cluster' do
53
- `./bin/lederhosen rep_reads --clusters=#{$test_dir}/clusters.uc --joined=#{$test_dir}/filtered.fasta --output=#{$test_dir}/representatives.fasta`
58
+ `./bin/lederhosen rep_reads --clusters=#{$test_dir}/clusters.uc --joined=#{$test_dir}/filtered.fasta --output=#{$test_dir}/representatives.fasta 2>/dev/null`
54
59
  $?.success?.should be_true
55
60
  end
56
61
 
@@ -59,18 +64,14 @@ describe Lederhosen::CLI do
59
64
 
60
65
  it 'should add names to otu abundance matrix given blat output' do
61
66
  levels = %w{kingdom domain phylum class order genus speces}
62
- level =
63
- begin
64
- levels.sample # 1.9 and up
65
- rescue
66
- levels.choice # 1.8 and lower
67
- end
68
- `./bin/lederhosen add_names --table=spec/data/otus.csv --blat=spec/data/blat.txt --level=#{level} --output=#{$test_dir}/named_otus.csv`
67
+ # Ruby 1.9 vs Ruby 1.8
68
+ level = levels.sample rescue levels.choice
69
+ `./bin/lederhosen add_names --table=spec/data/otus.csv --blat=spec/data/blat.txt --level=#{level} --output=#{$test_dir}/named_otus.csv 2>/dev/null`
69
70
  $?.success?.should be_true
70
71
  end
71
72
 
72
73
  it 'should squish otu abundance matrix by same name' do
73
- `./bin/lederhosen squish --csv-file=#{$test_dir}/named_otus.csv --output=#{$test_dir}/squished.csv`
74
+ `./bin/lederhosen squish --csv-file=#{$test_dir}/named_otus.csv --output=#{$test_dir}/squished.csv 2>/dev/null`
74
75
  $?.success?.should be_true
75
76
  end
76
77
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 3
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 7
10
- version: 0.3.7
9
+ - 8
10
+ version: 0.3.8
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-08-14 00:00:00 Z
18
+ date: 2012-08-20 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  type: :runtime