lederhosen 1.3.7 → 1.3.8
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -0
- data/lederhosen.gemspec +5 -2
- data/lib/lederhosen/no_tasks.rb +43 -36
- data/lib/lederhosen/tasks/otu_table.rb +2 -3
- data/lib/lederhosen/version.rb +1 -1
- data/lib/lederhosen.rb +2 -4
- data/spec/no_tasks_spec.rb +17 -4
- data/spec/spec_helper.rb +3 -1
- metadata +19 -3
data/Gemfile
CHANGED
data/lederhosen.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "lederhosen"
|
8
|
-
s.version = "1.3.
|
8
|
+
s.version = "1.3.8"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
-
s.date = "2012-12-
|
12
|
+
s.date = "2012-12-07"
|
13
13
|
s.description = "Various tools for OTU clustering"
|
14
14
|
s.email = "harekrishna@gmail.com"
|
15
15
|
s.executables = ["lederhosen"]
|
@@ -64,6 +64,7 @@ Gem::Specification.new do |s|
|
|
64
64
|
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
65
65
|
s.add_development_dependency(%q<bundler>, [">= 0"])
|
66
66
|
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
67
|
+
s.add_development_dependency(%q<ruby-prof>, [">= 0"])
|
67
68
|
else
|
68
69
|
s.add_dependency(%q<dna>, ["= 0.0.12"])
|
69
70
|
s.add_dependency(%q<progressbar>, [">= 0"])
|
@@ -71,6 +72,7 @@ Gem::Specification.new do |s|
|
|
71
72
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
72
73
|
s.add_dependency(%q<bundler>, [">= 0"])
|
73
74
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
75
|
+
s.add_dependency(%q<ruby-prof>, [">= 0"])
|
74
76
|
end
|
75
77
|
else
|
76
78
|
s.add_dependency(%q<dna>, ["= 0.0.12"])
|
@@ -79,6 +81,7 @@ Gem::Specification.new do |s|
|
|
79
81
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
80
82
|
s.add_dependency(%q<bundler>, [">= 0"])
|
81
83
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
84
|
+
s.add_dependency(%q<ruby-prof>, [">= 0"])
|
82
85
|
end
|
83
86
|
end
|
84
87
|
|
data/lib/lederhosen/no_tasks.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
module Lederhosen
|
2
2
|
class CLI
|
3
|
+
|
4
|
+
attr_accessor :taxonomy_format
|
5
|
+
|
3
6
|
no_tasks do
|
4
7
|
|
5
8
|
# parse a line of usearch prefix
|
@@ -18,7 +21,7 @@ module Lederhosen
|
|
18
21
|
identity = str[3].to_f
|
19
22
|
|
20
23
|
# parse taxonomic_description
|
21
|
-
taxonomies = parse_taxonomy(taxonomic_description)
|
24
|
+
taxonomies = parse_taxonomy(taxonomic_description) rescue { 'original' => str[9] }
|
22
25
|
|
23
26
|
{ :identity => identity }.merge(taxonomies)
|
24
27
|
end
|
@@ -28,49 +31,57 @@ module Lederhosen
|
|
28
31
|
#
|
29
32
|
# - :taxcollector
|
30
33
|
# - :greengenes
|
34
|
+
# - :qiime (subset of greengenes)
|
31
35
|
#
|
32
36
|
def detect_taxonomy_format(taxonomy)
|
33
37
|
# taxcollector taxonomy starts with a open square bracked
|
34
38
|
if taxonomy =~ /^\[/
|
35
39
|
:taxcollector
|
36
|
-
|
40
|
+
elsif taxonomy =~ /^\d/
|
37
41
|
:greengenes
|
42
|
+
else
|
43
|
+
:qiime
|
38
44
|
end
|
39
45
|
end
|
40
46
|
|
41
47
|
def parse_taxonomy(taxonomy)
|
42
|
-
|
48
|
+
@taxonomy_format ||= detect_taxonomy_format(taxonomy)
|
43
49
|
|
44
|
-
case
|
50
|
+
case @taxonomy_format
|
45
51
|
when :greengenes
|
46
52
|
parse_taxonomy_greengenes(taxonomy)
|
47
53
|
when :taxcollector
|
48
54
|
parse_taxonomy_taxcollector(taxonomy)
|
49
|
-
|
50
|
-
|
55
|
+
when :qiime
|
56
|
+
parse_taxonomy_qiime(taxonomy)
|
57
|
+
else # return original string
|
58
|
+
{ :original => taxonomy }
|
51
59
|
end
|
52
60
|
end
|
53
61
|
|
54
|
-
def
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
'phylum' => /p__(\w*)/,
|
59
|
-
'class' => /c__(\w*)/,
|
60
|
-
'order' => /o__(\w*)/,
|
61
|
-
'family' => /f__(\w*)/,
|
62
|
-
'genus' => /g__(\w*)/,
|
63
|
-
'species' => /s__(\w*)/
|
64
|
-
}
|
62
|
+
def parse_taxonomy_qiime(taxonomy)
|
63
|
+
levels = %w{kingdom phylum class order family genus species}
|
64
|
+
match_data = taxonomy.match(/k__(\w*);p__(\w*);c__(\w*);o__(\w*);f__(\w*);g__(\w*);s__(\w*)/)
|
65
|
+
match_data = match_data[1..-1]
|
65
66
|
|
66
67
|
names = Hash.new
|
67
|
-
|
68
|
-
levels.
|
69
|
-
names[level] = taxonomy.match(regexp)[1] rescue nil
|
70
|
-
end
|
68
|
+
# for some reason Hash[*levels.zip(match_data)] ain't working
|
69
|
+
levels.zip(match_data).each { |l, n| names[l] = n }
|
71
70
|
|
72
71
|
names['original'] = taxonomy
|
72
|
+
names
|
73
|
+
end
|
74
|
+
|
75
|
+
def parse_taxonomy_greengenes(taxonomy)
|
76
|
+
levels = %w{kingdom phylum class order family genus species}
|
77
|
+
match_data = taxonomy.match(/k__(\w*); ?p__(\w*); ?c__(\w*); ?o__(\w*); ?f__(\w*); ?g__(\w*); ?(\w*);/)
|
78
|
+
match_data = match_data[1..-1]
|
79
|
+
|
80
|
+
names = Hash.new
|
81
|
+
# for some reason Hash[*levels.zip(match_data)] ain't working
|
82
|
+
levels.zip(match_data).each { |l, n| names[l] = n }
|
73
83
|
|
84
|
+
names['original'] = taxonomy
|
74
85
|
names
|
75
86
|
end
|
76
87
|
|
@@ -85,25 +96,21 @@ module Lederhosen
|
|
85
96
|
#
|
86
97
|
def parse_taxonomy_taxcollector(taxonomy)
|
87
98
|
|
88
|
-
levels = {
|
89
|
-
'kingdom' => 0,
|
90
|
-
'phylum' => 1,
|
91
|
-
'class' => 2,
|
92
|
-
'order' => 3,
|
93
|
-
'family' => 4,
|
94
|
-
'genus' => 5,
|
95
|
-
'species' => 6,
|
96
|
-
'strain' => 7 }
|
99
|
+
levels = %w{kingdom phylum class order family genus species strain}
|
97
100
|
|
98
|
-
|
101
|
+
match_data =
|
102
|
+
begin
|
103
|
+
taxonomy.match(/\[0\](.*);\[1\](.*);\[2\](.*);\[3\](.*);\[4\](.*);\[5\](.*);\[6\](.*);\[7\](.*);\[8\](.*)/)[1..-1]
|
104
|
+
rescue
|
105
|
+
$stderr.puts taxonomy.inspect
|
106
|
+
return nil
|
107
|
+
end
|
99
108
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
end
|
109
|
+
names = Hash.new
|
110
|
+
# for some reason Hash[*levels.zip(match_data)] ain't working
|
111
|
+
levels.zip(match_data).each { |l, n| names[l] = n }
|
104
112
|
|
105
113
|
# check if species name contains the word 'bacterium'
|
106
|
-
# if so, replace it with the strain name
|
107
114
|
if names['species'] =~ /_bacterium/
|
108
115
|
names['species'] = names['strain']
|
109
116
|
end
|
@@ -39,6 +39,7 @@ module Lederhosen
|
|
39
39
|
pbar = ProgressBar.new "loading", input.size
|
40
40
|
|
41
41
|
# Load cluster table
|
42
|
+
|
42
43
|
input.each do |input_file|
|
43
44
|
pbar.inc
|
44
45
|
File.open(input_file) do |handle|
|
@@ -50,11 +51,9 @@ module Lederhosen
|
|
50
51
|
if dat.nil?
|
51
52
|
'unclassified_reads'
|
52
53
|
else
|
53
|
-
dat[level]
|
54
|
+
dat[level] || 'unparsed_name'
|
54
55
|
end
|
55
56
|
|
56
|
-
name = 'unparsed_name' if name.nil?
|
57
|
-
|
58
57
|
level_sample_cluster_count[level][input_file][name] += 1
|
59
58
|
all_names[level] << name
|
60
59
|
end
|
data/lib/lederhosen/version.rb
CHANGED
data/lib/lederhosen.rb
CHANGED
data/spec/no_tasks_spec.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
lederhosen = Lederhosen::CLI.new
|
4
|
-
|
5
3
|
describe 'no_tasks' do
|
6
4
|
|
7
|
-
let(:greengenes_taxonomies) { ['124 U55236.1 Methanobrevibacter thaueri str. CW k__Archaea; p__Euryarchaeota; c__Methanobacteria; o__Methanobacteriales; f__Methanobacteriaceae; g__Methanobrevibacter; Unclassified; otu_127'
|
8
|
-
|
5
|
+
let(:greengenes_taxonomies) { ['124 U55236.1 Methanobrevibacter thaueri str. CW k__Archaea; p__Euryarchaeota; c__Methanobacteria; o__Methanobacteriales; f__Methanobacteriaceae; g__Methanobrevibacter; Unclassified; otu_127']}
|
6
|
+
let(:qiime_taxonomies) { [ 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Rahnella;s__' ]}
|
9
7
|
let(:taxcollector_taxonomies) { ['[0]Bacteria;[1]Actinobacteria;[2]Actinobacteria;[3]null;[4]null;[5]null;[6]bacterium_TH3;[7]bacterium_TH3;[8]bacterium_TH3|M79434|8'] }
|
8
|
+
let(:lederhosen) { Lederhosen::CLI.new }
|
9
|
+
|
10
|
+
it '#parse_usearch_line should parse a line of usearch output'
|
10
11
|
|
11
12
|
it '#detect_taxonomy_format should recognize GreenGenes' do
|
12
13
|
greengenes_taxonomies.each do |greengenes_taxonomy|
|
@@ -48,6 +49,17 @@ describe 'no_tasks' do
|
|
48
49
|
end
|
49
50
|
end
|
50
51
|
|
52
|
+
it '#parse_taxonomy_greengenes should parse qiime taxonomy' do
|
53
|
+
qiime_taxonomies.each do |qiime_taxonomy|
|
54
|
+
taxonomy = lederhosen.parse_taxonomy_qiime(qiime_taxonomy)
|
55
|
+
levels = %w{domain phylum class order family genus species kingdom original}
|
56
|
+
|
57
|
+
taxonomy.keys.each do |v|
|
58
|
+
levels.should include v
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
51
63
|
it '#parse_taxonomy should automatically detect and parse greengenes taxonomy' do
|
52
64
|
greengenes_taxonomies.each do |greengenes_taxonomy|
|
53
65
|
lederhosen.parse_taxonomy(greengenes_taxonomy).should_not be_nil
|
@@ -65,4 +77,5 @@ describe 'no_tasks' do
|
|
65
77
|
tax = lederhosen.parse_taxonomy(t)
|
66
78
|
tax['species'].should == tax['strain']
|
67
79
|
end
|
80
|
+
|
68
81
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: dna
|
@@ -107,6 +107,22 @@ dependencies:
|
|
107
107
|
- - ! '>='
|
108
108
|
- !ruby/object:Gem::Version
|
109
109
|
version: '0'
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: ruby-prof
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
110
126
|
description: Various tools for OTU clustering
|
111
127
|
email: harekrishna@gmail.com
|
112
128
|
executables:
|
@@ -160,7 +176,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
160
176
|
version: '0'
|
161
177
|
segments:
|
162
178
|
- 0
|
163
|
-
hash:
|
179
|
+
hash: 1569227273029021963
|
164
180
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
165
181
|
none: false
|
166
182
|
requirements:
|