lederhosen 2.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -4
- data/lederhosen.gemspec +3 -6
- data/lib/lederhosen/uc_parser.rb +5 -5
- data/lib/lederhosen/version.rb +1 -1
- data/readme.md +7 -4
- data/spec/cli_spec.rb +15 -15
- metadata +96 -102
data/Gemfile
CHANGED
@@ -6,13 +6,10 @@ gem 'thor', '0.16.0'
|
|
6
6
|
|
7
7
|
group :test do
|
8
8
|
gem 'rspec', '2.12.0'
|
9
|
-
gem 'rspec-prof', '0.0.3'
|
10
9
|
gem 'pry'
|
11
|
-
# gem 'plymouth'
|
12
10
|
end
|
13
11
|
|
14
12
|
group :development do
|
15
13
|
gem 'rdoc', '~> 3.12'
|
16
14
|
gem 'jeweler', '1.8.4'
|
17
|
-
|
18
|
-
end
|
15
|
+
end
|
data/lederhosen.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "lederhosen"
|
8
|
-
s.version = "2.0.
|
8
|
+
s.version = "2.0.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
-
s.date = "2013-01-
|
12
|
+
s.date = "2013-01-25"
|
13
13
|
s.description = "Various tools for OTU clustering"
|
14
14
|
s.email = "harekrishna@gmail.com"
|
15
15
|
s.executables = ["lederhosen"]
|
@@ -55,7 +55,7 @@ Gem::Specification.new do |s|
|
|
55
55
|
s.homepage = "http://audy.github.com/lederhosen"
|
56
56
|
s.licenses = ["MIT"]
|
57
57
|
s.require_paths = ["lib"]
|
58
|
-
s.rubygems_version = "1.8.
|
58
|
+
s.rubygems_version = "1.8.24"
|
59
59
|
s.summary = "OTU Clustering"
|
60
60
|
|
61
61
|
if s.respond_to? :specification_version then
|
@@ -67,14 +67,12 @@ Gem::Specification.new do |s|
|
|
67
67
|
s.add_runtime_dependency(%q<thor>, ["= 0.16.0"])
|
68
68
|
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
69
69
|
s.add_development_dependency(%q<jeweler>, ["= 1.8.4"])
|
70
|
-
s.add_development_dependency(%q<ruby-prof>, ["= 0.11.2"])
|
71
70
|
else
|
72
71
|
s.add_dependency(%q<dna>, [">= 0"])
|
73
72
|
s.add_dependency(%q<progressbar>, ["= 0.12.0"])
|
74
73
|
s.add_dependency(%q<thor>, ["= 0.16.0"])
|
75
74
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
76
75
|
s.add_dependency(%q<jeweler>, ["= 1.8.4"])
|
77
|
-
s.add_dependency(%q<ruby-prof>, ["= 0.11.2"])
|
78
76
|
end
|
79
77
|
else
|
80
78
|
s.add_dependency(%q<dna>, [">= 0"])
|
@@ -82,7 +80,6 @@ Gem::Specification.new do |s|
|
|
82
80
|
s.add_dependency(%q<thor>, ["= 0.16.0"])
|
83
81
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
84
82
|
s.add_dependency(%q<jeweler>, ["= 1.8.4"])
|
85
|
-
s.add_dependency(%q<ruby-prof>, ["= 0.11.2"])
|
86
83
|
end
|
87
84
|
end
|
88
85
|
|
data/lib/lederhosen/uc_parser.rb
CHANGED
@@ -14,11 +14,11 @@ module Lederhosen
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def hit?
|
17
|
-
@source.
|
17
|
+
@source.hit_type == 'H'
|
18
18
|
end
|
19
19
|
|
20
20
|
def miss?
|
21
|
-
@source.
|
21
|
+
@source.hit_type == 'N'
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
@@ -63,7 +63,7 @@ module Lederhosen
|
|
63
63
|
str = str.split("\t")
|
64
64
|
|
65
65
|
dat = {
|
66
|
-
:
|
66
|
+
:hit_type => str[0],
|
67
67
|
:cluster_no => str[1],
|
68
68
|
:alignment => str[7],
|
69
69
|
:query => str[8],
|
@@ -71,12 +71,12 @@ module Lederhosen
|
|
71
71
|
}
|
72
72
|
|
73
73
|
r =
|
74
|
-
if dat[:
|
74
|
+
if dat[:hit_type] =~ /[SNH]/ # hits
|
75
75
|
{ :length => str[2].to_i,
|
76
76
|
:identity => str[3],
|
77
77
|
:strand => str[4],
|
78
78
|
}
|
79
|
-
elsif dat[:
|
79
|
+
elsif dat[:hit_type] == 'C' # clusters
|
80
80
|
{ :cluster_size => str[2].to_i }
|
81
81
|
else
|
82
82
|
raise Exception, "Do not understand record type #{str[0]}!"
|
data/lib/lederhosen/version.rb
CHANGED
data/readme.md
CHANGED
@@ -13,10 +13,14 @@ Lederhosen is not a pipeline but rather a set of tools broken up into tasks. Tas
|
|
13
13
|
|
14
14
|
Lederhosen is designed with the following "pipeline" in mind:
|
15
15
|
|
16
|
-
1. Clustering sequences to
|
16
|
+
1. Clustering sequences to reference sequences (read: database) and/or _de novo_ OTU clustering.
|
17
|
+
- `lederhosen cluster ...`
|
17
18
|
2. Generating tables from USEARCH output.
|
19
|
+
- `lederhosen count_taxonomies ...`
|
20
|
+
- `lederhosen otu_table ...`
|
18
21
|
3. Filtering tables to remove small or insignificant OTUs.
|
19
|
-
|
22
|
+
- `lederhosen otu_filter ...`
|
23
|
+
|
20
24
|
|
21
25
|
### About
|
22
26
|
|
@@ -33,6 +37,7 @@ using paired and non-paired end short reads such as those produced by Illumina (
|
|
33
37
|
- Parallel support (pipe commands into [parallel](http://savannah.gnu.org/projects/parallel/), or use your cluster's queue).
|
34
38
|
- Support for RDP, TaxCollector or GreenGenes 16S rRNA databases.
|
35
39
|
- Generation and filtering of OTU abundancy matrices.
|
40
|
+
-. Support for paired end reads (considers taxonomic assignment for both reads in a pair).
|
36
41
|
|
37
42
|
### Installation
|
38
43
|
|
@@ -169,8 +174,6 @@ for filtered clusters will be moved to the `noise` psuedocluster.
|
|
169
174
|
|
170
175
|
### Get representative sequences
|
171
176
|
|
172
|
-
(not yet implemented)
|
173
|
-
|
174
177
|
You can get the representative sequences for each cluster using the `get_reps` tasks.
|
175
178
|
This will extract the representative sequence from the __database__ you ran usearch with.
|
176
179
|
Make sure you use the same database that you used when running usearch.
|
data/spec/cli_spec.rb
CHANGED
@@ -33,11 +33,11 @@ describe Lederhosen::CLI do
|
|
33
33
|
it 'can separate unclassified reads from usearch output' do
|
34
34
|
`./bin/lederhosen separate_unclassified --uc-file=spec/data/test.uc --reads=spec/data/trimmed/ILT_L_9_B_001.fasta --output=#{$test_dir}/unclassified.fasta`
|
35
35
|
$?.success?.should be_true
|
36
|
-
unclassified_results = File.readlines("spec/data/test.uc")
|
37
|
-
.select { |x| x =~ /^N/ }
|
36
|
+
unclassified_results = File.readlines("spec/data/test.uc")\
|
37
|
+
.select { |x| x =~ /^N/ }\
|
38
38
|
.size
|
39
|
-
unclassified_reads = File.readlines("#{$test_dir}/unclassified.fasta")
|
40
|
-
.select { |x| x =~ /^>/ }
|
39
|
+
unclassified_reads = File.readlines("#{$test_dir}/unclassified.fasta")\
|
40
|
+
.select { |x| x =~ /^>/ }\
|
41
41
|
.size
|
42
42
|
|
43
43
|
unclassified_results.should == unclassified_reads
|
@@ -46,8 +46,8 @@ describe Lederhosen::CLI do
|
|
46
46
|
it 'can separate unclassified reads from usearch output using strict pairing' do
|
47
47
|
`./bin/lederhosen separate_unclassified --strict=genus --uc-file=spec/data/test.uc --reads=spec/data/trimmed/ILT_L_9_B_001.fasta --output=#{$test_dir}/unclassified.strict_genus.fasta`
|
48
48
|
$?.success?.should be_true
|
49
|
-
File.readlines("#{$test_dir}/unclassified.strict_genus.fasta")
|
50
|
-
.select { |x| x =~ /^>/ }
|
49
|
+
File.readlines("#{$test_dir}/unclassified.strict_genus.fasta")\
|
50
|
+
.select { |x| x =~ /^>/ }\
|
51
51
|
.size.should be_even
|
52
52
|
end
|
53
53
|
|
@@ -58,10 +58,10 @@ describe Lederhosen::CLI do
|
|
58
58
|
end
|
59
59
|
|
60
60
|
it 'generates taxonomy tables w/ comma-free taxonomic descriptions' do
|
61
|
-
File.readlines(File.join($test_dir, 'taxonomy_count.txt'))
|
62
|
-
.map(&:strip)
|
63
|
-
.map { |x| x.count(',') }
|
64
|
-
.uniq
|
61
|
+
File.readlines(File.join($test_dir, 'taxonomy_count.txt'))\
|
62
|
+
.map(&:strip)\
|
63
|
+
.map { |x| x.count(',') }\
|
64
|
+
.uniq\
|
65
65
|
.should == [1]
|
66
66
|
end
|
67
67
|
|
@@ -74,11 +74,11 @@ describe Lederhosen::CLI do
|
|
74
74
|
|
75
75
|
# make sure total number of reads is even
|
76
76
|
# requires that there should be an odd number if classification is not strict
|
77
|
-
lines.select { |x| !(x =~ /^#/) }
|
78
|
-
.map(&:strip)
|
79
|
-
.map { |x| x.split(',') }
|
80
|
-
.map(&:last)
|
81
|
-
.map(&:to_i)
|
77
|
+
lines.select { |x| !(x =~ /^#/) }\
|
78
|
+
.map(&:strip)\
|
79
|
+
.map { |x| x.split(',') }\
|
80
|
+
.map(&:last)\
|
81
|
+
.map(&:to_i)\
|
82
82
|
.inject(:+).should be_even
|
83
83
|
end
|
84
84
|
end
|
metadata
CHANGED
@@ -1,120 +1,108 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 13
|
5
5
|
prerelease:
|
6
|
+
segments:
|
7
|
+
- 2
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 2.0.1
|
6
11
|
platform: ruby
|
7
|
-
authors:
|
12
|
+
authors:
|
8
13
|
- Austin G. Davis-Richardson
|
9
14
|
autorequire:
|
10
15
|
bindir: bin
|
11
16
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
|
-
requirements:
|
19
|
-
- - ! '>='
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: '0'
|
22
|
-
type: :runtime
|
17
|
+
|
18
|
+
date: 2013-01-25 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
23
21
|
prerelease: false
|
24
|
-
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
23
|
none: false
|
26
|
-
requirements:
|
27
|
-
- -
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
none: false
|
34
|
-
requirements:
|
35
|
-
- - '='
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
version: 0.12.0
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
hash: 3
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
38
31
|
type: :runtime
|
32
|
+
name: dna
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
39
35
|
prerelease: false
|
40
|
-
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
37
|
none: false
|
42
|
-
requirements:
|
43
|
-
- -
|
44
|
-
- !ruby/object:Gem::Version
|
38
|
+
requirements:
|
39
|
+
- - "="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
hash: 47
|
42
|
+
segments:
|
43
|
+
- 0
|
44
|
+
- 12
|
45
|
+
- 0
|
45
46
|
version: 0.12.0
|
46
|
-
- !ruby/object:Gem::Dependency
|
47
|
-
name: thor
|
48
|
-
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
|
-
requirements:
|
51
|
-
- - '='
|
52
|
-
- !ruby/object:Gem::Version
|
53
|
-
version: 0.16.0
|
54
47
|
type: :runtime
|
48
|
+
name: progressbar
|
49
|
+
version_requirements: *id002
|
50
|
+
- !ruby/object:Gem::Dependency
|
55
51
|
prerelease: false
|
56
|
-
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
57
53
|
none: false
|
58
|
-
requirements:
|
59
|
-
- -
|
60
|
-
- !ruby/object:Gem::Version
|
54
|
+
requirements:
|
55
|
+
- - "="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 95
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
- 16
|
61
|
+
- 0
|
61
62
|
version: 0.16.0
|
62
|
-
|
63
|
-
name:
|
64
|
-
|
65
|
-
|
66
|
-
requirements:
|
67
|
-
- - ~>
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '3.12'
|
70
|
-
type: :development
|
63
|
+
type: :runtime
|
64
|
+
name: thor
|
65
|
+
version_requirements: *id003
|
66
|
+
- !ruby/object:Gem::Dependency
|
71
67
|
prerelease: false
|
72
|
-
|
68
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
73
69
|
none: false
|
74
|
-
requirements:
|
70
|
+
requirements:
|
75
71
|
- - ~>
|
76
|
-
- !ruby/object:Gem::Version
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
requirements:
|
83
|
-
- - '='
|
84
|
-
- !ruby/object:Gem::Version
|
85
|
-
version: 1.8.4
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
hash: 31
|
74
|
+
segments:
|
75
|
+
- 3
|
76
|
+
- 12
|
77
|
+
version: "3.12"
|
86
78
|
type: :development
|
79
|
+
name: rdoc
|
80
|
+
version_requirements: *id004
|
81
|
+
- !ruby/object:Gem::Dependency
|
87
82
|
prerelease: false
|
88
|
-
|
83
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
89
84
|
none: false
|
90
|
-
requirements:
|
91
|
-
- -
|
92
|
-
- !ruby/object:Gem::Version
|
85
|
+
requirements:
|
86
|
+
- - "="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
hash: 63
|
89
|
+
segments:
|
90
|
+
- 1
|
91
|
+
- 8
|
92
|
+
- 4
|
93
93
|
version: 1.8.4
|
94
|
-
- !ruby/object:Gem::Dependency
|
95
|
-
name: ruby-prof
|
96
|
-
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
|
-
requirements:
|
99
|
-
- - '='
|
100
|
-
- !ruby/object:Gem::Version
|
101
|
-
version: 0.11.2
|
102
94
|
type: :development
|
103
|
-
|
104
|
-
version_requirements:
|
105
|
-
none: false
|
106
|
-
requirements:
|
107
|
-
- - '='
|
108
|
-
- !ruby/object:Gem::Version
|
109
|
-
version: 0.11.2
|
95
|
+
name: jeweler
|
96
|
+
version_requirements: *id005
|
110
97
|
description: Various tools for OTU clustering
|
111
98
|
email: harekrishna@gmail.com
|
112
|
-
executables:
|
99
|
+
executables:
|
113
100
|
- lederhosen
|
114
101
|
extensions: []
|
115
|
-
|
102
|
+
|
103
|
+
extra_rdoc_files:
|
116
104
|
- LICENSE.txt
|
117
|
-
files:
|
105
|
+
files:
|
118
106
|
- .rspec
|
119
107
|
- Gemfile
|
120
108
|
- LICENSE.txt
|
@@ -150,31 +138,37 @@ files:
|
|
150
138
|
- spec/spec_helper.rb
|
151
139
|
- spec/uc_parser_spec.rb
|
152
140
|
homepage: http://audy.github.com/lederhosen
|
153
|
-
licenses:
|
141
|
+
licenses:
|
154
142
|
- MIT
|
155
143
|
post_install_message:
|
156
144
|
rdoc_options: []
|
157
|
-
|
145
|
+
|
146
|
+
require_paths:
|
158
147
|
- lib
|
159
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
148
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
160
149
|
none: false
|
161
|
-
requirements:
|
162
|
-
- -
|
163
|
-
- !ruby/object:Gem::Version
|
164
|
-
|
165
|
-
segments:
|
150
|
+
requirements:
|
151
|
+
- - ">="
|
152
|
+
- !ruby/object:Gem::Version
|
153
|
+
hash: 3
|
154
|
+
segments:
|
166
155
|
- 0
|
167
|
-
|
168
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
156
|
+
version: "0"
|
157
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
169
158
|
none: false
|
170
|
-
requirements:
|
171
|
-
- -
|
172
|
-
- !ruby/object:Gem::Version
|
173
|
-
|
159
|
+
requirements:
|
160
|
+
- - ">="
|
161
|
+
- !ruby/object:Gem::Version
|
162
|
+
hash: 3
|
163
|
+
segments:
|
164
|
+
- 0
|
165
|
+
version: "0"
|
174
166
|
requirements: []
|
167
|
+
|
175
168
|
rubyforge_project:
|
176
|
-
rubygems_version: 1.8.
|
169
|
+
rubygems_version: 1.8.24
|
177
170
|
signing_key:
|
178
171
|
specification_version: 3
|
179
172
|
summary: OTU Clustering
|
180
173
|
test_files: []
|
174
|
+
|