lederhosen 1.3.7 → 1.3.8

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -6,10 +6,12 @@ gem 'thor'
6
6
 
7
7
  group :test do
8
8
  gem 'rspec'
9
+ gem 'rspec-prof'
9
10
  end
10
11
 
11
12
  group :development do
12
13
  gem 'rdoc', '~> 3.12'
13
14
  gem 'bundler'
14
15
  gem 'jeweler'
16
+ gem 'ruby-prof'
15
17
  end
data/lederhosen.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "1.3.7"
8
+ s.version = "1.3.8"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
12
- s.date = "2012-12-06"
12
+ s.date = "2012-12-07"
13
13
  s.description = "Various tools for OTU clustering"
14
14
  s.email = "harekrishna@gmail.com"
15
15
  s.executables = ["lederhosen"]
@@ -64,6 +64,7 @@ Gem::Specification.new do |s|
64
64
  s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
65
65
  s.add_development_dependency(%q<bundler>, [">= 0"])
66
66
  s.add_development_dependency(%q<jeweler>, [">= 0"])
67
+ s.add_development_dependency(%q<ruby-prof>, [">= 0"])
67
68
  else
68
69
  s.add_dependency(%q<dna>, ["= 0.0.12"])
69
70
  s.add_dependency(%q<progressbar>, [">= 0"])
@@ -71,6 +72,7 @@ Gem::Specification.new do |s|
71
72
  s.add_dependency(%q<rdoc>, ["~> 3.12"])
72
73
  s.add_dependency(%q<bundler>, [">= 0"])
73
74
  s.add_dependency(%q<jeweler>, [">= 0"])
75
+ s.add_dependency(%q<ruby-prof>, [">= 0"])
74
76
  end
75
77
  else
76
78
  s.add_dependency(%q<dna>, ["= 0.0.12"])
@@ -79,6 +81,7 @@ Gem::Specification.new do |s|
79
81
  s.add_dependency(%q<rdoc>, ["~> 3.12"])
80
82
  s.add_dependency(%q<bundler>, [">= 0"])
81
83
  s.add_dependency(%q<jeweler>, [">= 0"])
84
+ s.add_dependency(%q<ruby-prof>, [">= 0"])
82
85
  end
83
86
  end
84
87
 
@@ -1,5 +1,8 @@
1
1
  module Lederhosen
2
2
  class CLI
3
+
4
+ attr_accessor :taxonomy_format
5
+
3
6
  no_tasks do
4
7
 
5
8
  # parse a line of usearch prefix
@@ -18,7 +21,7 @@ module Lederhosen
18
21
  identity = str[3].to_f
19
22
 
20
23
  # parse taxonomic_description
21
- taxonomies = parse_taxonomy(taxonomic_description)
24
+ taxonomies = parse_taxonomy(taxonomic_description) rescue { 'original' => str[9] }
22
25
 
23
26
  { :identity => identity }.merge(taxonomies)
24
27
  end
@@ -28,49 +31,57 @@ module Lederhosen
28
31
  #
29
32
  # - :taxcollector
30
33
  # - :greengenes
34
+ # - :qiime (subset of greengenes)
31
35
  #
32
36
  def detect_taxonomy_format(taxonomy)
33
37
  # taxcollector taxonomy starts with a open square bracked
34
38
  if taxonomy =~ /^\[/
35
39
  :taxcollector
36
- else
40
+ elsif taxonomy =~ /^\d/
37
41
  :greengenes
42
+ else
43
+ :qiime
38
44
  end
39
45
  end
40
46
 
41
47
  def parse_taxonomy(taxonomy)
42
- format = detect_taxonomy_format(taxonomy)
48
+ @taxonomy_format ||= detect_taxonomy_format(taxonomy)
43
49
 
44
- case format
50
+ case @taxonomy_format
45
51
  when :greengenes
46
52
  parse_taxonomy_greengenes(taxonomy)
47
53
  when :taxcollector
48
54
  parse_taxonomy_taxcollector(taxonomy)
49
- else
50
- fail 'unknown format!'
55
+ when :qiime
56
+ parse_taxonomy_qiime(taxonomy)
57
+ else # return original string
58
+ { :original => taxonomy }
51
59
  end
52
60
  end
53
61
 
54
- def parse_taxonomy_greengenes(taxonomy)
55
-
56
- levels = { 'domain' => /k__(\w*)/,
57
- 'kingdom' => /k__(\w*)/,
58
- 'phylum' => /p__(\w*)/,
59
- 'class' => /c__(\w*)/,
60
- 'order' => /o__(\w*)/,
61
- 'family' => /f__(\w*)/,
62
- 'genus' => /g__(\w*)/,
63
- 'species' => /s__(\w*)/
64
- }
62
+ def parse_taxonomy_qiime(taxonomy)
63
+ levels = %w{kingdom phylum class order family genus species}
64
+ match_data = taxonomy.match(/k__(\w*);p__(\w*);c__(\w*);o__(\w*);f__(\w*);g__(\w*);s__(\w*)/)
65
+ match_data = match_data[1..-1]
65
66
 
66
67
  names = Hash.new
67
-
68
- levels.each_pair do |level, regexp|
69
- names[level] = taxonomy.match(regexp)[1] rescue nil
70
- end
68
+ # for some reason Hash[*levels.zip(match_data)] ain't working
69
+ levels.zip(match_data).each { |l, n| names[l] = n }
71
70
 
72
71
  names['original'] = taxonomy
72
+ names
73
+ end
74
+
75
+ def parse_taxonomy_greengenes(taxonomy)
76
+ levels = %w{kingdom phylum class order family genus species}
77
+ match_data = taxonomy.match(/k__(\w*); ?p__(\w*); ?c__(\w*); ?o__(\w*); ?f__(\w*); ?g__(\w*); ?(\w*);/)
78
+ match_data = match_data[1..-1]
79
+
80
+ names = Hash.new
81
+ # for some reason Hash[*levels.zip(match_data)] ain't working
82
+ levels.zip(match_data).each { |l, n| names[l] = n }
73
83
 
84
+ names['original'] = taxonomy
74
85
  names
75
86
  end
76
87
 
@@ -85,25 +96,21 @@ module Lederhosen
85
96
  #
86
97
  def parse_taxonomy_taxcollector(taxonomy)
87
98
 
88
- levels = { 'domain' => 0,
89
- 'kingdom' => 0,
90
- 'phylum' => 1,
91
- 'class' => 2,
92
- 'order' => 3,
93
- 'family' => 4,
94
- 'genus' => 5,
95
- 'species' => 6,
96
- 'strain' => 7 }
99
+ levels = %w{kingdom phylum class order family genus species strain}
97
100
 
98
- names = Hash.new
101
+ match_data =
102
+ begin
103
+ taxonomy.match(/\[0\](.*);\[1\](.*);\[2\](.*);\[3\](.*);\[4\](.*);\[5\](.*);\[6\](.*);\[7\](.*);\[8\](.*)/)[1..-1]
104
+ rescue
105
+ $stderr.puts taxonomy.inspect
106
+ return nil
107
+ end
99
108
 
100
- levels.each_pair do |level, num|
101
- name = taxonomy.match(/\[#{num}\](\w*)[;\[]/)[1] rescue nil
102
- names[level] = name
103
- end
109
+ names = Hash.new
110
+ # for some reason Hash[*levels.zip(match_data)] ain't working
111
+ levels.zip(match_data).each { |l, n| names[l] = n }
104
112
 
105
113
  # check if species name contains the word 'bacterium'
106
- # if so, replace it with the strain name
107
114
  if names['species'] =~ /_bacterium/
108
115
  names['species'] = names['strain']
109
116
  end
@@ -39,6 +39,7 @@ module Lederhosen
39
39
  pbar = ProgressBar.new "loading", input.size
40
40
 
41
41
  # Load cluster table
42
+
42
43
  input.each do |input_file|
43
44
  pbar.inc
44
45
  File.open(input_file) do |handle|
@@ -50,11 +51,9 @@ module Lederhosen
50
51
  if dat.nil?
51
52
  'unclassified_reads'
52
53
  else
53
- dat[level]
54
+ dat[level] || 'unparsed_name'
54
55
  end
55
56
 
56
- name = 'unparsed_name' if name.nil?
57
-
58
57
  level_sample_cluster_count[level][input_file][name] += 1
59
58
  all_names[level] << name
60
59
  end
@@ -3,7 +3,7 @@ module Lederhosen
3
3
  MAJOR = 1
4
4
  MINOR = 3
5
5
  CODENAME = 'Dirndl' # changes for minor versions
6
- PATCH = 7
6
+ PATCH = 8
7
7
 
8
8
  STRING = [MAJOR, MINOR, PATCH].join('.')
9
9
  end
data/lib/lederhosen.rb CHANGED
@@ -1,8 +1,6 @@
1
1
  require 'rubygems'
2
- require 'thor'
3
- require 'progressbar'
4
- require 'zlib'
5
- require 'dna'
2
+ require 'bundler'
3
+ Bundler.require :default
6
4
  require 'set'
7
5
 
8
6
  Dir.glob(File.join(File.dirname(__FILE__), 'lederhosen', '*.rb')).each { |f| require f }
@@ -1,12 +1,13 @@
1
1
  require 'spec_helper'
2
2
 
3
- lederhosen = Lederhosen::CLI.new
4
-
5
3
  describe 'no_tasks' do
6
4
 
7
- let(:greengenes_taxonomies) { ['124 U55236.1 Methanobrevibacter thaueri str. CW k__Archaea; p__Euryarchaeota; c__Methanobacteria; o__Methanobacteriales; f__Methanobacteriaceae; g__Methanobrevibacter; Unclassified; otu_127',
8
- 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Rahnella;s__' ]}
5
+ let(:greengenes_taxonomies) { ['124 U55236.1 Methanobrevibacter thaueri str. CW k__Archaea; p__Euryarchaeota; c__Methanobacteria; o__Methanobacteriales; f__Methanobacteriaceae; g__Methanobrevibacter; Unclassified; otu_127']}
6
+ let(:qiime_taxonomies) { [ 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Rahnella;s__' ]}
9
7
  let(:taxcollector_taxonomies) { ['[0]Bacteria;[1]Actinobacteria;[2]Actinobacteria;[3]null;[4]null;[5]null;[6]bacterium_TH3;[7]bacterium_TH3;[8]bacterium_TH3|M79434|8'] }
8
+ let(:lederhosen) { Lederhosen::CLI.new }
9
+
10
+ it '#parse_usearch_line should parse a line of usearch output'
10
11
 
11
12
  it '#detect_taxonomy_format should recognize GreenGenes' do
12
13
  greengenes_taxonomies.each do |greengenes_taxonomy|
@@ -48,6 +49,17 @@ describe 'no_tasks' do
48
49
  end
49
50
  end
50
51
 
52
+ it '#parse_taxonomy_greengenes should parse qiime taxonomy' do
53
+ qiime_taxonomies.each do |qiime_taxonomy|
54
+ taxonomy = lederhosen.parse_taxonomy_qiime(qiime_taxonomy)
55
+ levels = %w{domain phylum class order family genus species kingdom original}
56
+
57
+ taxonomy.keys.each do |v|
58
+ levels.should include v
59
+ end
60
+ end
61
+ end
62
+
51
63
  it '#parse_taxonomy should automatically detect and parse greengenes taxonomy' do
52
64
  greengenes_taxonomies.each do |greengenes_taxonomy|
53
65
  lederhosen.parse_taxonomy(greengenes_taxonomy).should_not be_nil
@@ -65,4 +77,5 @@ describe 'no_tasks' do
65
77
  tax = lederhosen.parse_taxonomy(t)
66
78
  tax['species'].should == tax['strain']
67
79
  end
80
+
68
81
  end
data/spec/spec_helper.rb CHANGED
@@ -1,6 +1,8 @@
1
1
  $:.unshift File.join(File.dirname(__FILE__), '..')
2
+
2
3
  require 'lederhosen'
3
- require 'rspec'
4
+
5
+ Bundler.require :test, :development
4
6
 
5
7
  $test_dir = ENV['TEST_DIR'] || "/tmp/lederhosen_test_#{(0...8).map{65.+(rand(25)).chr}.join}/"
6
8
  `mkdir -p #{$test_dir}`
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.7
4
+ version: 1.3.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-06 00:00:00.000000000 Z
12
+ date: 2012-12-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: dna
@@ -107,6 +107,22 @@ dependencies:
107
107
  - - ! '>='
108
108
  - !ruby/object:Gem::Version
109
109
  version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: ruby-prof
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
110
126
  description: Various tools for OTU clustering
111
127
  email: harekrishna@gmail.com
112
128
  executables:
@@ -160,7 +176,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
160
176
  version: '0'
161
177
  segments:
162
178
  - 0
163
- hash: 3956169746555075526
179
+ hash: 1569227273029021963
164
180
  required_rubygems_version: !ruby/object:Gem::Requirement
165
181
  none: false
166
182
  requirements: