lederhosen 0.3.7 → 0.3.8

Sign up to get free protection for your applications and to get access to all the features.
data/lederhosen.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "0.3.7"
8
+ s.version = "0.3.8"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
12
- s.date = "2012-08-14"
12
+ s.date = "2012-08-20"
13
13
  s.description = "Various tools for OTU clustering"
14
14
  s.email = "harekrishna@gmail.com"
15
15
  s.executables = ["lederhosen"]
@@ -1,16 +1,14 @@
1
1
  ##
2
- # uniquify - uniquify a fasta file, also output table with md5 -> number of reads
2
+ # uniquify - uniquify a fasta file, also output table with sequence_id -> number of reads
3
3
  #
4
4
 
5
- require 'digest/md5'
6
-
7
5
  module Lederhosen
8
6
  class CLI
9
7
  desc 'uniquify',
10
- 'uniquify a fasta file and generate a table with md5 -> abundance'
8
+ 'uniquify a fasta file and generate a table with sequence_id -> abundance'
11
9
 
12
- method_option :input, :type => :string, :required => true
13
- method_option :output, :type => :string, :required => true
10
+ method_option :input, :type => :string, :required => true
11
+ method_option :output, :type => :string, :required => true
14
12
  method_option :table_out, :type => :string, :required => true
15
13
 
16
14
  def uniquify
@@ -21,15 +19,21 @@ module Lederhosen
21
19
  ohai "uniquifying #{input} to #{output} w/ table #{table_out}"
22
20
 
23
21
  sequence_counts = Hash.new { |h, k| h[k] = 0 }
22
+ sequence_to_id = Hash.new
24
23
 
25
24
  out = File.open(output, 'w')
26
25
 
27
26
  no_records = `grep -c '^>' #{input}`.split.first.to_i
28
27
  pbar = ProgressBar.new 'loading', no_records
28
+
29
29
  File.open(input) do |handle|
30
30
  Dna.new(handle).each do |record|
31
31
  pbar.inc
32
32
  unless sequence_counts.has_key? record.sequence
33
+ # store the sequence and id so we can have ids in the
34
+ # table. If the file is sorted by length then this
35
+ # should also be a seed sequence.
36
+ sequence_to_id[record.sequence] = record.name
33
37
  out.puts record
34
38
  end
35
39
  sequence_counts[record.sequence] += 1
@@ -44,8 +48,8 @@ module Lederhosen
44
48
  File.open(table_out, 'w') do |out|
45
49
  sequence_counts.each_pair do |sequence, count|
46
50
  pbar.inc
47
- digest = Digest::MD5.hexdigest(sequence)
48
- out.puts "#{digest},#{count}"
51
+ id = sequence_to_id[sequence]
52
+ out.puts "#{id}\t#{count}"
49
53
  end
50
54
  end
51
55
  pbar.finish
@@ -2,7 +2,7 @@ module Lederhosen
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 3
5
- PATCH = 7
5
+ PATCH = 8
6
6
 
7
7
  STRING = [MAJOR, MINOR, PATCH].join('.')
8
8
  end
data/spec/cli_spec.rb CHANGED
@@ -8,49 +8,54 @@ describe Lederhosen::CLI do
8
8
  end
9
9
 
10
10
  it 'should have a version command' do
11
- `./bin/lederhosen version`.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
11
+ `./bin/lederhosen version 2>/dev/null`.strip.should == "lederhosen-#{Lederhosen::Version::STRING}"
12
12
  end
13
13
 
14
14
  it 'should trim reads' do
15
- `./bin/lederhosen trim --reads-dir=spec/data/IL*.txt.gz --out-dir=#{$test_dir}/trimmed`
15
+ `./bin/lederhosen trim --reads-dir=spec/data/IL*.txt.gz --out-dir=#{$test_dir}/trimmed 2>/dev/null`
16
16
  $?.success?.should be_true
17
17
  end
18
18
 
19
19
  it 'should join reads' do
20
- `./bin/lederhosen join --trimmed=#{$test_dir}/trimmed/*.fasta --output=#{$test_dir}/joined.fasta`
20
+ `./bin/lederhosen join --trimmed=#{$test_dir}/trimmed/*.fasta --output=#{$test_dir}/joined.fasta 2>/dev/null`
21
21
  $?.success?.should be_true
22
22
  end
23
23
 
24
24
  it 'should sort reads' do
25
- `./bin/lederhosen sort --input=#{$test_dir}/joined.fasta --output=#{$test_dir}/sorted.fasta`
25
+ `./bin/lederhosen sort --input=#{$test_dir}/joined.fasta --output=#{$test_dir}/sorted.fasta 2>/dev/null`
26
26
  $?.success?.should be_true
27
27
  end
28
28
 
29
29
  it 'should k_filter reads' do
30
- `./bin/lederhosen k_filter --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/filtered.fasta -k=15 --cutoff 1`
30
+ `./bin/lederhosen k_filter --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/filtered.fasta -k=15 --cutoff 1 2>/dev/null`
31
31
  $?.success?.should be_true
32
32
  end
33
33
 
34
34
  it 'should cluster reads' do
35
- `./bin/lederhosen cluster --identity=0.80 --input=#{$test_dir}/filtered.fasta --output=#{$test_dir}/clusters.uc`
35
+ `./bin/lederhosen cluster --identity=0.80 --input=#{$test_dir}/filtered.fasta --output=#{$test_dir}/clusters.uc 2>/dev/null`
36
36
  $?.success?.should be_true
37
37
  end
38
38
 
39
39
  it 'should build OTU abundance matrices' do
40
- `./bin/lederhosen otu_table --clusters=#{$test_dir}/clusters.uc --output=#{$test_dir}/otu_table.csv`
40
+ `./bin/lederhosen otu_table --clusters=#{$test_dir}/clusters.uc --output=#{$test_dir}/otu_table.csv 2>/dev/null`
41
41
  $?.success?.should be_true
42
42
  end
43
43
 
44
44
  it 'should filter OTU abundance matrices' do
45
- `./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1`
45
+ `./bin/lederhosen otu_filter --input=#{$test_dir}/otu_table.csv --output=#{$test_dir}/otu_table.filtered.csv --reads 1 --samples 1 2>/dev/null`
46
+ end
47
+
48
+ it 'should uniquify reads' do
49
+ `./bin/lederhosen uniquify --input=#{$test_dir}/sorted.fasta --output=#{$test_dir}/uniqued.fasta --table-out=#{$test_dir}/uniquify.txt 2>/dev/null`
50
+ $?.success?.should be_true
46
51
  end
47
52
 
48
53
  it 'should split joined.fasta into reads for each cluster' do
49
- `./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1`
54
+ `./bin/lederhosen split --reads=#{$test_dir}/joined.fasta --clusters=#{$test_dir}/clusters.uc --out-dir=#{$test_dir}/split --min-clst-size=1 2>/dev/null`
50
55
  end
51
56
 
52
57
  it 'should create a fasta file containing representative reads for each cluster' do
53
- `./bin/lederhosen rep_reads --clusters=#{$test_dir}/clusters.uc --joined=#{$test_dir}/filtered.fasta --output=#{$test_dir}/representatives.fasta`
58
+ `./bin/lederhosen rep_reads --clusters=#{$test_dir}/clusters.uc --joined=#{$test_dir}/filtered.fasta --output=#{$test_dir}/representatives.fasta 2>/dev/null`
54
59
  $?.success?.should be_true
55
60
  end
56
61
 
@@ -59,18 +64,14 @@ describe Lederhosen::CLI do
59
64
 
60
65
  it 'should add names to otu abundance matrix given blat output' do
61
66
  levels = %w{kingdom domain phylum class order genus speces}
62
- level =
63
- begin
64
- levels.sample # 1.9 and up
65
- rescue
66
- levels.choice # 1.8 and lower
67
- end
68
- `./bin/lederhosen add_names --table=spec/data/otus.csv --blat=spec/data/blat.txt --level=#{level} --output=#{$test_dir}/named_otus.csv`
67
+ # Ruby 1.9 vs Ruby 1.8
68
+ level = levels.sample rescue levels.choice
69
+ `./bin/lederhosen add_names --table=spec/data/otus.csv --blat=spec/data/blat.txt --level=#{level} --output=#{$test_dir}/named_otus.csv 2>/dev/null`
69
70
  $?.success?.should be_true
70
71
  end
71
72
 
72
73
  it 'should squish otu abundance matrix by same name' do
73
- `./bin/lederhosen squish --csv-file=#{$test_dir}/named_otus.csv --output=#{$test_dir}/squished.csv`
74
+ `./bin/lederhosen squish --csv-file=#{$test_dir}/named_otus.csv --output=#{$test_dir}/squished.csv 2>/dev/null`
74
75
  $?.success?.should be_true
75
76
  end
76
77
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 3
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 3
9
- - 7
10
- version: 0.3.7
9
+ - 8
10
+ version: 0.3.8
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-08-14 00:00:00 Z
18
+ date: 2012-08-20 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  type: :runtime