lederhosen 1.3.7 → 1.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -6,10 +6,12 @@ gem 'thor'
6
6
 
7
7
  group :test do
8
8
  gem 'rspec'
9
+ gem 'rspec-prof'
9
10
  end
10
11
 
11
12
  group :development do
12
13
  gem 'rdoc', '~> 3.12'
13
14
  gem 'bundler'
14
15
  gem 'jeweler'
16
+ gem 'ruby-prof'
15
17
  end
data/lederhosen.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "1.3.7"
8
+ s.version = "1.3.8"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
12
- s.date = "2012-12-06"
12
+ s.date = "2012-12-07"
13
13
  s.description = "Various tools for OTU clustering"
14
14
  s.email = "harekrishna@gmail.com"
15
15
  s.executables = ["lederhosen"]
@@ -64,6 +64,7 @@ Gem::Specification.new do |s|
64
64
  s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
65
65
  s.add_development_dependency(%q<bundler>, [">= 0"])
66
66
  s.add_development_dependency(%q<jeweler>, [">= 0"])
67
+ s.add_development_dependency(%q<ruby-prof>, [">= 0"])
67
68
  else
68
69
  s.add_dependency(%q<dna>, ["= 0.0.12"])
69
70
  s.add_dependency(%q<progressbar>, [">= 0"])
@@ -71,6 +72,7 @@ Gem::Specification.new do |s|
71
72
  s.add_dependency(%q<rdoc>, ["~> 3.12"])
72
73
  s.add_dependency(%q<bundler>, [">= 0"])
73
74
  s.add_dependency(%q<jeweler>, [">= 0"])
75
+ s.add_dependency(%q<ruby-prof>, [">= 0"])
74
76
  end
75
77
  else
76
78
  s.add_dependency(%q<dna>, ["= 0.0.12"])
@@ -79,6 +81,7 @@ Gem::Specification.new do |s|
79
81
  s.add_dependency(%q<rdoc>, ["~> 3.12"])
80
82
  s.add_dependency(%q<bundler>, [">= 0"])
81
83
  s.add_dependency(%q<jeweler>, [">= 0"])
84
+ s.add_dependency(%q<ruby-prof>, [">= 0"])
82
85
  end
83
86
  end
84
87
 
@@ -1,5 +1,8 @@
1
1
  module Lederhosen
2
2
  class CLI
3
+
4
+ attr_accessor :taxonomy_format
5
+
3
6
  no_tasks do
4
7
 
5
8
  # parse a line of usearch prefix
@@ -18,7 +21,7 @@ module Lederhosen
18
21
  identity = str[3].to_f
19
22
 
20
23
  # parse taxonomic_description
21
- taxonomies = parse_taxonomy(taxonomic_description)
24
+ taxonomies = parse_taxonomy(taxonomic_description) rescue { 'original' => str[9] }
22
25
 
23
26
  { :identity => identity }.merge(taxonomies)
24
27
  end
@@ -28,49 +31,57 @@ module Lederhosen
28
31
  #
29
32
  # - :taxcollector
30
33
  # - :greengenes
34
+ # - :qiime (subset of greengenes)
31
35
  #
32
36
  def detect_taxonomy_format(taxonomy)
33
37
  # taxcollector taxonomy starts with a open square bracked
34
38
  if taxonomy =~ /^\[/
35
39
  :taxcollector
36
- else
40
+ elsif taxonomy =~ /^\d/
37
41
  :greengenes
42
+ else
43
+ :qiime
38
44
  end
39
45
  end
40
46
 
41
47
  def parse_taxonomy(taxonomy)
42
- format = detect_taxonomy_format(taxonomy)
48
+ @taxonomy_format ||= detect_taxonomy_format(taxonomy)
43
49
 
44
- case format
50
+ case @taxonomy_format
45
51
  when :greengenes
46
52
  parse_taxonomy_greengenes(taxonomy)
47
53
  when :taxcollector
48
54
  parse_taxonomy_taxcollector(taxonomy)
49
- else
50
- fail 'unknown format!'
55
+ when :qiime
56
+ parse_taxonomy_qiime(taxonomy)
57
+ else # return original string
58
+ { :original => taxonomy }
51
59
  end
52
60
  end
53
61
 
54
- def parse_taxonomy_greengenes(taxonomy)
55
-
56
- levels = { 'domain' => /k__(\w*)/,
57
- 'kingdom' => /k__(\w*)/,
58
- 'phylum' => /p__(\w*)/,
59
- 'class' => /c__(\w*)/,
60
- 'order' => /o__(\w*)/,
61
- 'family' => /f__(\w*)/,
62
- 'genus' => /g__(\w*)/,
63
- 'species' => /s__(\w*)/
64
- }
62
+ def parse_taxonomy_qiime(taxonomy)
63
+ levels = %w{kingdom phylum class order family genus species}
64
+ match_data = taxonomy.match(/k__(\w*);p__(\w*);c__(\w*);o__(\w*);f__(\w*);g__(\w*);s__(\w*)/)
65
+ match_data = match_data[1..-1]
65
66
 
66
67
  names = Hash.new
67
-
68
- levels.each_pair do |level, regexp|
69
- names[level] = taxonomy.match(regexp)[1] rescue nil
70
- end
68
+ # for some reason Hash[*levels.zip(match_data)] ain't working
69
+ levels.zip(match_data).each { |l, n| names[l] = n }
71
70
 
72
71
  names['original'] = taxonomy
72
+ names
73
+ end
74
+
75
+ def parse_taxonomy_greengenes(taxonomy)
76
+ levels = %w{kingdom phylum class order family genus species}
77
+ match_data = taxonomy.match(/k__(\w*); ?p__(\w*); ?c__(\w*); ?o__(\w*); ?f__(\w*); ?g__(\w*); ?(\w*);/)
78
+ match_data = match_data[1..-1]
79
+
80
+ names = Hash.new
81
+ # for some reason Hash[*levels.zip(match_data)] ain't working
82
+ levels.zip(match_data).each { |l, n| names[l] = n }
73
83
 
84
+ names['original'] = taxonomy
74
85
  names
75
86
  end
76
87
 
@@ -85,25 +96,21 @@ module Lederhosen
85
96
  #
86
97
  def parse_taxonomy_taxcollector(taxonomy)
87
98
 
88
- levels = { 'domain' => 0,
89
- 'kingdom' => 0,
90
- 'phylum' => 1,
91
- 'class' => 2,
92
- 'order' => 3,
93
- 'family' => 4,
94
- 'genus' => 5,
95
- 'species' => 6,
96
- 'strain' => 7 }
99
+ levels = %w{kingdom phylum class order family genus species strain}
97
100
 
98
- names = Hash.new
101
+ match_data =
102
+ begin
103
+ taxonomy.match(/\[0\](.*);\[1\](.*);\[2\](.*);\[3\](.*);\[4\](.*);\[5\](.*);\[6\](.*);\[7\](.*);\[8\](.*)/)[1..-1]
104
+ rescue
105
+ $stderr.puts taxonomy.inspect
106
+ return nil
107
+ end
99
108
 
100
- levels.each_pair do |level, num|
101
- name = taxonomy.match(/\[#{num}\](\w*)[;\[]/)[1] rescue nil
102
- names[level] = name
103
- end
109
+ names = Hash.new
110
+ # for some reason Hash[*levels.zip(match_data)] ain't working
111
+ levels.zip(match_data).each { |l, n| names[l] = n }
104
112
 
105
113
  # check if species name contains the word 'bacterium'
106
- # if so, replace it with the strain name
107
114
  if names['species'] =~ /_bacterium/
108
115
  names['species'] = names['strain']
109
116
  end
@@ -39,6 +39,7 @@ module Lederhosen
39
39
  pbar = ProgressBar.new "loading", input.size
40
40
 
41
41
  # Load cluster table
42
+
42
43
  input.each do |input_file|
43
44
  pbar.inc
44
45
  File.open(input_file) do |handle|
@@ -50,11 +51,9 @@ module Lederhosen
50
51
  if dat.nil?
51
52
  'unclassified_reads'
52
53
  else
53
- dat[level]
54
+ dat[level] || 'unparsed_name'
54
55
  end
55
56
 
56
- name = 'unparsed_name' if name.nil?
57
-
58
57
  level_sample_cluster_count[level][input_file][name] += 1
59
58
  all_names[level] << name
60
59
  end
@@ -3,7 +3,7 @@ module Lederhosen
3
3
  MAJOR = 1
4
4
  MINOR = 3
5
5
  CODENAME = 'Dirndl' # changes for minor versions
6
- PATCH = 7
6
+ PATCH = 8
7
7
 
8
8
  STRING = [MAJOR, MINOR, PATCH].join('.')
9
9
  end
data/lib/lederhosen.rb CHANGED
@@ -1,8 +1,6 @@
1
1
  require 'rubygems'
2
- require 'thor'
3
- require 'progressbar'
4
- require 'zlib'
5
- require 'dna'
2
+ require 'bundler'
3
+ Bundler.require :default
6
4
  require 'set'
7
5
 
8
6
  Dir.glob(File.join(File.dirname(__FILE__), 'lederhosen', '*.rb')).each { |f| require f }
@@ -1,12 +1,13 @@
1
1
  require 'spec_helper'
2
2
 
3
- lederhosen = Lederhosen::CLI.new
4
-
5
3
  describe 'no_tasks' do
6
4
 
7
- let(:greengenes_taxonomies) { ['124 U55236.1 Methanobrevibacter thaueri str. CW k__Archaea; p__Euryarchaeota; c__Methanobacteria; o__Methanobacteriales; f__Methanobacteriaceae; g__Methanobrevibacter; Unclassified; otu_127',
8
- 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Rahnella;s__' ]}
5
+ let(:greengenes_taxonomies) { ['124 U55236.1 Methanobrevibacter thaueri str. CW k__Archaea; p__Euryarchaeota; c__Methanobacteria; o__Methanobacteriales; f__Methanobacteriaceae; g__Methanobrevibacter; Unclassified; otu_127']}
6
+ let(:qiime_taxonomies) { [ 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Rahnella;s__' ]}
9
7
  let(:taxcollector_taxonomies) { ['[0]Bacteria;[1]Actinobacteria;[2]Actinobacteria;[3]null;[4]null;[5]null;[6]bacterium_TH3;[7]bacterium_TH3;[8]bacterium_TH3|M79434|8'] }
8
+ let(:lederhosen) { Lederhosen::CLI.new }
9
+
10
+ it '#parse_usearch_line should parse a line of usearch output'
10
11
 
11
12
  it '#detect_taxonomy_format should recognize GreenGenes' do
12
13
  greengenes_taxonomies.each do |greengenes_taxonomy|
@@ -48,6 +49,17 @@ describe 'no_tasks' do
48
49
  end
49
50
  end
50
51
 
52
+ it '#parse_taxonomy_greengenes should parse qiime taxonomy' do
53
+ qiime_taxonomies.each do |qiime_taxonomy|
54
+ taxonomy = lederhosen.parse_taxonomy_qiime(qiime_taxonomy)
55
+ levels = %w{domain phylum class order family genus species kingdom original}
56
+
57
+ taxonomy.keys.each do |v|
58
+ levels.should include v
59
+ end
60
+ end
61
+ end
62
+
51
63
  it '#parse_taxonomy should automatically detect and parse greengenes taxonomy' do
52
64
  greengenes_taxonomies.each do |greengenes_taxonomy|
53
65
  lederhosen.parse_taxonomy(greengenes_taxonomy).should_not be_nil
@@ -65,4 +77,5 @@ describe 'no_tasks' do
65
77
  tax = lederhosen.parse_taxonomy(t)
66
78
  tax['species'].should == tax['strain']
67
79
  end
80
+
68
81
  end
data/spec/spec_helper.rb CHANGED
@@ -1,6 +1,8 @@
1
1
  $:.unshift File.join(File.dirname(__FILE__), '..')
2
+
2
3
  require 'lederhosen'
3
- require 'rspec'
4
+
5
+ Bundler.require :test, :development
4
6
 
5
7
  $test_dir = ENV['TEST_DIR'] || "/tmp/lederhosen_test_#{(0...8).map{65.+(rand(25)).chr}.join}/"
6
8
  `mkdir -p #{$test_dir}`
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.7
4
+ version: 1.3.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-06 00:00:00.000000000 Z
12
+ date: 2012-12-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: dna
@@ -107,6 +107,22 @@ dependencies:
107
107
  - - ! '>='
108
108
  - !ruby/object:Gem::Version
109
109
  version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: ruby-prof
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
110
126
  description: Various tools for OTU clustering
111
127
  email: harekrishna@gmail.com
112
128
  executables:
@@ -160,7 +176,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
160
176
  version: '0'
161
177
  segments:
162
178
  - 0
163
- hash: 3956169746555075526
179
+ hash: 1569227273029021963
164
180
  required_rubygems_version: !ruby/object:Gem::Requirement
165
181
  none: false
166
182
  requirements: