lederhosen 1.3.7 → 1.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -0
- data/lederhosen.gemspec +5 -2
- data/lib/lederhosen/no_tasks.rb +43 -36
- data/lib/lederhosen/tasks/otu_table.rb +2 -3
- data/lib/lederhosen/version.rb +1 -1
- data/lib/lederhosen.rb +2 -4
- data/spec/no_tasks_spec.rb +17 -4
- data/spec/spec_helper.rb +3 -1
- metadata +19 -3
data/Gemfile
CHANGED
data/lederhosen.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "lederhosen"
|
8
|
-
s.version = "1.3.
|
8
|
+
s.version = "1.3.8"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
-
s.date = "2012-12-
|
12
|
+
s.date = "2012-12-07"
|
13
13
|
s.description = "Various tools for OTU clustering"
|
14
14
|
s.email = "harekrishna@gmail.com"
|
15
15
|
s.executables = ["lederhosen"]
|
@@ -64,6 +64,7 @@ Gem::Specification.new do |s|
|
|
64
64
|
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
65
65
|
s.add_development_dependency(%q<bundler>, [">= 0"])
|
66
66
|
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
67
|
+
s.add_development_dependency(%q<ruby-prof>, [">= 0"])
|
67
68
|
else
|
68
69
|
s.add_dependency(%q<dna>, ["= 0.0.12"])
|
69
70
|
s.add_dependency(%q<progressbar>, [">= 0"])
|
@@ -71,6 +72,7 @@ Gem::Specification.new do |s|
|
|
71
72
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
72
73
|
s.add_dependency(%q<bundler>, [">= 0"])
|
73
74
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
75
|
+
s.add_dependency(%q<ruby-prof>, [">= 0"])
|
74
76
|
end
|
75
77
|
else
|
76
78
|
s.add_dependency(%q<dna>, ["= 0.0.12"])
|
@@ -79,6 +81,7 @@ Gem::Specification.new do |s|
|
|
79
81
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
80
82
|
s.add_dependency(%q<bundler>, [">= 0"])
|
81
83
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
84
|
+
s.add_dependency(%q<ruby-prof>, [">= 0"])
|
82
85
|
end
|
83
86
|
end
|
84
87
|
|
data/lib/lederhosen/no_tasks.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
module Lederhosen
|
2
2
|
class CLI
|
3
|
+
|
4
|
+
attr_accessor :taxonomy_format
|
5
|
+
|
3
6
|
no_tasks do
|
4
7
|
|
5
8
|
# parse a line of usearch prefix
|
@@ -18,7 +21,7 @@ module Lederhosen
|
|
18
21
|
identity = str[3].to_f
|
19
22
|
|
20
23
|
# parse taxonomic_description
|
21
|
-
taxonomies = parse_taxonomy(taxonomic_description)
|
24
|
+
taxonomies = parse_taxonomy(taxonomic_description) rescue { 'original' => str[9] }
|
22
25
|
|
23
26
|
{ :identity => identity }.merge(taxonomies)
|
24
27
|
end
|
@@ -28,49 +31,57 @@ module Lederhosen
|
|
28
31
|
#
|
29
32
|
# - :taxcollector
|
30
33
|
# - :greengenes
|
34
|
+
# - :qiime (subset of greengenes)
|
31
35
|
#
|
32
36
|
def detect_taxonomy_format(taxonomy)
|
33
37
|
# taxcollector taxonomy starts with a open square bracked
|
34
38
|
if taxonomy =~ /^\[/
|
35
39
|
:taxcollector
|
36
|
-
|
40
|
+
elsif taxonomy =~ /^\d/
|
37
41
|
:greengenes
|
42
|
+
else
|
43
|
+
:qiime
|
38
44
|
end
|
39
45
|
end
|
40
46
|
|
41
47
|
def parse_taxonomy(taxonomy)
|
42
|
-
|
48
|
+
@taxonomy_format ||= detect_taxonomy_format(taxonomy)
|
43
49
|
|
44
|
-
case
|
50
|
+
case @taxonomy_format
|
45
51
|
when :greengenes
|
46
52
|
parse_taxonomy_greengenes(taxonomy)
|
47
53
|
when :taxcollector
|
48
54
|
parse_taxonomy_taxcollector(taxonomy)
|
49
|
-
|
50
|
-
|
55
|
+
when :qiime
|
56
|
+
parse_taxonomy_qiime(taxonomy)
|
57
|
+
else # return original string
|
58
|
+
{ :original => taxonomy }
|
51
59
|
end
|
52
60
|
end
|
53
61
|
|
54
|
-
def
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
'phylum' => /p__(\w*)/,
|
59
|
-
'class' => /c__(\w*)/,
|
60
|
-
'order' => /o__(\w*)/,
|
61
|
-
'family' => /f__(\w*)/,
|
62
|
-
'genus' => /g__(\w*)/,
|
63
|
-
'species' => /s__(\w*)/
|
64
|
-
}
|
62
|
+
def parse_taxonomy_qiime(taxonomy)
|
63
|
+
levels = %w{kingdom phylum class order family genus species}
|
64
|
+
match_data = taxonomy.match(/k__(\w*);p__(\w*);c__(\w*);o__(\w*);f__(\w*);g__(\w*);s__(\w*)/)
|
65
|
+
match_data = match_data[1..-1]
|
65
66
|
|
66
67
|
names = Hash.new
|
67
|
-
|
68
|
-
levels.
|
69
|
-
names[level] = taxonomy.match(regexp)[1] rescue nil
|
70
|
-
end
|
68
|
+
# for some reason Hash[*levels.zip(match_data)] ain't working
|
69
|
+
levels.zip(match_data).each { |l, n| names[l] = n }
|
71
70
|
|
72
71
|
names['original'] = taxonomy
|
72
|
+
names
|
73
|
+
end
|
74
|
+
|
75
|
+
def parse_taxonomy_greengenes(taxonomy)
|
76
|
+
levels = %w{kingdom phylum class order family genus species}
|
77
|
+
match_data = taxonomy.match(/k__(\w*); ?p__(\w*); ?c__(\w*); ?o__(\w*); ?f__(\w*); ?g__(\w*); ?(\w*);/)
|
78
|
+
match_data = match_data[1..-1]
|
79
|
+
|
80
|
+
names = Hash.new
|
81
|
+
# for some reason Hash[*levels.zip(match_data)] ain't working
|
82
|
+
levels.zip(match_data).each { |l, n| names[l] = n }
|
73
83
|
|
84
|
+
names['original'] = taxonomy
|
74
85
|
names
|
75
86
|
end
|
76
87
|
|
@@ -85,25 +96,21 @@ module Lederhosen
|
|
85
96
|
#
|
86
97
|
def parse_taxonomy_taxcollector(taxonomy)
|
87
98
|
|
88
|
-
levels = {
|
89
|
-
'kingdom' => 0,
|
90
|
-
'phylum' => 1,
|
91
|
-
'class' => 2,
|
92
|
-
'order' => 3,
|
93
|
-
'family' => 4,
|
94
|
-
'genus' => 5,
|
95
|
-
'species' => 6,
|
96
|
-
'strain' => 7 }
|
99
|
+
levels = %w{kingdom phylum class order family genus species strain}
|
97
100
|
|
98
|
-
|
101
|
+
match_data =
|
102
|
+
begin
|
103
|
+
taxonomy.match(/\[0\](.*);\[1\](.*);\[2\](.*);\[3\](.*);\[4\](.*);\[5\](.*);\[6\](.*);\[7\](.*);\[8\](.*)/)[1..-1]
|
104
|
+
rescue
|
105
|
+
$stderr.puts taxonomy.inspect
|
106
|
+
return nil
|
107
|
+
end
|
99
108
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
end
|
109
|
+
names = Hash.new
|
110
|
+
# for some reason Hash[*levels.zip(match_data)] ain't working
|
111
|
+
levels.zip(match_data).each { |l, n| names[l] = n }
|
104
112
|
|
105
113
|
# check if species name contains the word 'bacterium'
|
106
|
-
# if so, replace it with the strain name
|
107
114
|
if names['species'] =~ /_bacterium/
|
108
115
|
names['species'] = names['strain']
|
109
116
|
end
|
@@ -39,6 +39,7 @@ module Lederhosen
|
|
39
39
|
pbar = ProgressBar.new "loading", input.size
|
40
40
|
|
41
41
|
# Load cluster table
|
42
|
+
|
42
43
|
input.each do |input_file|
|
43
44
|
pbar.inc
|
44
45
|
File.open(input_file) do |handle|
|
@@ -50,11 +51,9 @@ module Lederhosen
|
|
50
51
|
if dat.nil?
|
51
52
|
'unclassified_reads'
|
52
53
|
else
|
53
|
-
dat[level]
|
54
|
+
dat[level] || 'unparsed_name'
|
54
55
|
end
|
55
56
|
|
56
|
-
name = 'unparsed_name' if name.nil?
|
57
|
-
|
58
57
|
level_sample_cluster_count[level][input_file][name] += 1
|
59
58
|
all_names[level] << name
|
60
59
|
end
|
data/lib/lederhosen/version.rb
CHANGED
data/lib/lederhosen.rb
CHANGED
data/spec/no_tasks_spec.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
lederhosen = Lederhosen::CLI.new
|
4
|
-
|
5
3
|
describe 'no_tasks' do
|
6
4
|
|
7
|
-
let(:greengenes_taxonomies) { ['124 U55236.1 Methanobrevibacter thaueri str. CW k__Archaea; p__Euryarchaeota; c__Methanobacteria; o__Methanobacteriales; f__Methanobacteriaceae; g__Methanobrevibacter; Unclassified; otu_127'
|
8
|
-
|
5
|
+
let(:greengenes_taxonomies) { ['124 U55236.1 Methanobrevibacter thaueri str. CW k__Archaea; p__Euryarchaeota; c__Methanobacteria; o__Methanobacteriales; f__Methanobacteriaceae; g__Methanobrevibacter; Unclassified; otu_127']}
|
6
|
+
let(:qiime_taxonomies) { [ 'k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Rahnella;s__' ]}
|
9
7
|
let(:taxcollector_taxonomies) { ['[0]Bacteria;[1]Actinobacteria;[2]Actinobacteria;[3]null;[4]null;[5]null;[6]bacterium_TH3;[7]bacterium_TH3;[8]bacterium_TH3|M79434|8'] }
|
8
|
+
let(:lederhosen) { Lederhosen::CLI.new }
|
9
|
+
|
10
|
+
it '#parse_usearch_line should parse a line of usearch output'
|
10
11
|
|
11
12
|
it '#detect_taxonomy_format should recognize GreenGenes' do
|
12
13
|
greengenes_taxonomies.each do |greengenes_taxonomy|
|
@@ -48,6 +49,17 @@ describe 'no_tasks' do
|
|
48
49
|
end
|
49
50
|
end
|
50
51
|
|
52
|
+
it '#parse_taxonomy_greengenes should parse qiime taxonomy' do
|
53
|
+
qiime_taxonomies.each do |qiime_taxonomy|
|
54
|
+
taxonomy = lederhosen.parse_taxonomy_qiime(qiime_taxonomy)
|
55
|
+
levels = %w{domain phylum class order family genus species kingdom original}
|
56
|
+
|
57
|
+
taxonomy.keys.each do |v|
|
58
|
+
levels.should include v
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
51
63
|
it '#parse_taxonomy should automatically detect and parse greengenes taxonomy' do
|
52
64
|
greengenes_taxonomies.each do |greengenes_taxonomy|
|
53
65
|
lederhosen.parse_taxonomy(greengenes_taxonomy).should_not be_nil
|
@@ -65,4 +77,5 @@ describe 'no_tasks' do
|
|
65
77
|
tax = lederhosen.parse_taxonomy(t)
|
66
78
|
tax['species'].should == tax['strain']
|
67
79
|
end
|
80
|
+
|
68
81
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: dna
|
@@ -107,6 +107,22 @@ dependencies:
|
|
107
107
|
- - ! '>='
|
108
108
|
- !ruby/object:Gem::Version
|
109
109
|
version: '0'
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: ruby-prof
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
110
126
|
description: Various tools for OTU clustering
|
111
127
|
email: harekrishna@gmail.com
|
112
128
|
executables:
|
@@ -160,7 +176,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
160
176
|
version: '0'
|
161
177
|
segments:
|
162
178
|
- 0
|
163
|
-
hash:
|
179
|
+
hash: 1569227273029021963
|
164
180
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
165
181
|
none: false
|
166
182
|
requirements:
|