lederhosen 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -4
- data/lederhosen.gemspec +3 -6
- data/lib/lederhosen/uc_parser.rb +5 -5
- data/lib/lederhosen/version.rb +1 -1
- data/readme.md +7 -4
- data/spec/cli_spec.rb +15 -15
- metadata +96 -102
data/Gemfile
CHANGED
@@ -6,13 +6,10 @@ gem 'thor', '0.16.0'
|
|
6
6
|
|
7
7
|
group :test do
|
8
8
|
gem 'rspec', '2.12.0'
|
9
|
-
gem 'rspec-prof', '0.0.3'
|
10
9
|
gem 'pry'
|
11
|
-
# gem 'plymouth'
|
12
10
|
end
|
13
11
|
|
14
12
|
group :development do
|
15
13
|
gem 'rdoc', '~> 3.12'
|
16
14
|
gem 'jeweler', '1.8.4'
|
17
|
-
|
18
|
-
end
|
15
|
+
end
|
data/lederhosen.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "lederhosen"
|
8
|
-
s.version = "2.0.
|
8
|
+
s.version = "2.0.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
-
s.date = "2013-01-
|
12
|
+
s.date = "2013-01-25"
|
13
13
|
s.description = "Various tools for OTU clustering"
|
14
14
|
s.email = "harekrishna@gmail.com"
|
15
15
|
s.executables = ["lederhosen"]
|
@@ -55,7 +55,7 @@ Gem::Specification.new do |s|
|
|
55
55
|
s.homepage = "http://audy.github.com/lederhosen"
|
56
56
|
s.licenses = ["MIT"]
|
57
57
|
s.require_paths = ["lib"]
|
58
|
-
s.rubygems_version = "1.8.
|
58
|
+
s.rubygems_version = "1.8.24"
|
59
59
|
s.summary = "OTU Clustering"
|
60
60
|
|
61
61
|
if s.respond_to? :specification_version then
|
@@ -67,14 +67,12 @@ Gem::Specification.new do |s|
|
|
67
67
|
s.add_runtime_dependency(%q<thor>, ["= 0.16.0"])
|
68
68
|
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
69
69
|
s.add_development_dependency(%q<jeweler>, ["= 1.8.4"])
|
70
|
-
s.add_development_dependency(%q<ruby-prof>, ["= 0.11.2"])
|
71
70
|
else
|
72
71
|
s.add_dependency(%q<dna>, [">= 0"])
|
73
72
|
s.add_dependency(%q<progressbar>, ["= 0.12.0"])
|
74
73
|
s.add_dependency(%q<thor>, ["= 0.16.0"])
|
75
74
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
76
75
|
s.add_dependency(%q<jeweler>, ["= 1.8.4"])
|
77
|
-
s.add_dependency(%q<ruby-prof>, ["= 0.11.2"])
|
78
76
|
end
|
79
77
|
else
|
80
78
|
s.add_dependency(%q<dna>, [">= 0"])
|
@@ -82,7 +80,6 @@ Gem::Specification.new do |s|
|
|
82
80
|
s.add_dependency(%q<thor>, ["= 0.16.0"])
|
83
81
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
84
82
|
s.add_dependency(%q<jeweler>, ["= 1.8.4"])
|
85
|
-
s.add_dependency(%q<ruby-prof>, ["= 0.11.2"])
|
86
83
|
end
|
87
84
|
end
|
88
85
|
|
data/lib/lederhosen/uc_parser.rb
CHANGED
@@ -14,11 +14,11 @@ module Lederhosen
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def hit?
|
17
|
-
@source.
|
17
|
+
@source.hit_type == 'H'
|
18
18
|
end
|
19
19
|
|
20
20
|
def miss?
|
21
|
-
@source.
|
21
|
+
@source.hit_type == 'N'
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
@@ -63,7 +63,7 @@ module Lederhosen
|
|
63
63
|
str = str.split("\t")
|
64
64
|
|
65
65
|
dat = {
|
66
|
-
:
|
66
|
+
:hit_type => str[0],
|
67
67
|
:cluster_no => str[1],
|
68
68
|
:alignment => str[7],
|
69
69
|
:query => str[8],
|
@@ -71,12 +71,12 @@ module Lederhosen
|
|
71
71
|
}
|
72
72
|
|
73
73
|
r =
|
74
|
-
if dat[:
|
74
|
+
if dat[:hit_type] =~ /[SNH]/ # hits
|
75
75
|
{ :length => str[2].to_i,
|
76
76
|
:identity => str[3],
|
77
77
|
:strand => str[4],
|
78
78
|
}
|
79
|
-
elsif dat[:
|
79
|
+
elsif dat[:hit_type] == 'C' # clusters
|
80
80
|
{ :cluster_size => str[2].to_i }
|
81
81
|
else
|
82
82
|
raise Exception, "Do not understand record type #{str[0]}!"
|
data/lib/lederhosen/version.rb
CHANGED
data/readme.md
CHANGED
@@ -13,10 +13,14 @@ Lederhosen is not a pipeline but rather a set of tools broken up into tasks. Tas
|
|
13
13
|
|
14
14
|
Lederhosen is designed with the following "pipeline" in mind:
|
15
15
|
|
16
|
-
1. Clustering sequences to
|
16
|
+
1. Clustering sequences to reference sequences (read: database) and/or _de novo_ OTU clustering.
|
17
|
+
- `lederhosen cluster ...`
|
17
18
|
2. Generating tables from USEARCH output.
|
19
|
+
- `lederhosen count_taxonomies ...`
|
20
|
+
- `lederhosen otu_table ...`
|
18
21
|
3. Filtering tables to remove small or insignificant OTUs.
|
19
|
-
|
22
|
+
- `lederhosen otu_filter ...`
|
23
|
+
|
20
24
|
|
21
25
|
### About
|
22
26
|
|
@@ -33,6 +37,7 @@ using paired and non-paired end short reads such as those produced by Illumina (
|
|
33
37
|
- Parallel support (pipe commands into [parallel](http://savannah.gnu.org/projects/parallel/), or use your cluster's queue).
|
34
38
|
- Support for RDP, TaxCollector or GreenGenes 16S rRNA databases.
|
35
39
|
- Generation and filtering of OTU abundancy matrices.
|
40
|
+
-. Support for paired end reads (considers taxonomic assignment for both reads in a pair).
|
36
41
|
|
37
42
|
### Installation
|
38
43
|
|
@@ -169,8 +174,6 @@ for filtered clusters will be moved to the `noise` psuedocluster.
|
|
169
174
|
|
170
175
|
### Get representative sequences
|
171
176
|
|
172
|
-
(not yet implemented)
|
173
|
-
|
174
177
|
You can get the representative sequences for each cluster using the `get_reps` tasks.
|
175
178
|
This will extract the representative sequence from the __database__ you ran usearch with.
|
176
179
|
Make sure you use the same database that you used when running usearch.
|
data/spec/cli_spec.rb
CHANGED
@@ -33,11 +33,11 @@ describe Lederhosen::CLI do
|
|
33
33
|
it 'can separate unclassified reads from usearch output' do
|
34
34
|
`./bin/lederhosen separate_unclassified --uc-file=spec/data/test.uc --reads=spec/data/trimmed/ILT_L_9_B_001.fasta --output=#{$test_dir}/unclassified.fasta`
|
35
35
|
$?.success?.should be_true
|
36
|
-
unclassified_results = File.readlines("spec/data/test.uc")
|
37
|
-
.select { |x| x =~ /^N/ }
|
36
|
+
unclassified_results = File.readlines("spec/data/test.uc")\
|
37
|
+
.select { |x| x =~ /^N/ }\
|
38
38
|
.size
|
39
|
-
unclassified_reads = File.readlines("#{$test_dir}/unclassified.fasta")
|
40
|
-
.select { |x| x =~ /^>/ }
|
39
|
+
unclassified_reads = File.readlines("#{$test_dir}/unclassified.fasta")\
|
40
|
+
.select { |x| x =~ /^>/ }\
|
41
41
|
.size
|
42
42
|
|
43
43
|
unclassified_results.should == unclassified_reads
|
@@ -46,8 +46,8 @@ describe Lederhosen::CLI do
|
|
46
46
|
it 'can separate unclassified reads from usearch output using strict pairing' do
|
47
47
|
`./bin/lederhosen separate_unclassified --strict=genus --uc-file=spec/data/test.uc --reads=spec/data/trimmed/ILT_L_9_B_001.fasta --output=#{$test_dir}/unclassified.strict_genus.fasta`
|
48
48
|
$?.success?.should be_true
|
49
|
-
File.readlines("#{$test_dir}/unclassified.strict_genus.fasta")
|
50
|
-
.select { |x| x =~ /^>/ }
|
49
|
+
File.readlines("#{$test_dir}/unclassified.strict_genus.fasta")\
|
50
|
+
.select { |x| x =~ /^>/ }\
|
51
51
|
.size.should be_even
|
52
52
|
end
|
53
53
|
|
@@ -58,10 +58,10 @@ describe Lederhosen::CLI do
|
|
58
58
|
end
|
59
59
|
|
60
60
|
it 'generates taxonomy tables w/ comma-free taxonomic descriptions' do
|
61
|
-
File.readlines(File.join($test_dir, 'taxonomy_count.txt'))
|
62
|
-
.map(&:strip)
|
63
|
-
.map { |x| x.count(',') }
|
64
|
-
.uniq
|
61
|
+
File.readlines(File.join($test_dir, 'taxonomy_count.txt'))\
|
62
|
+
.map(&:strip)\
|
63
|
+
.map { |x| x.count(',') }\
|
64
|
+
.uniq\
|
65
65
|
.should == [1]
|
66
66
|
end
|
67
67
|
|
@@ -74,11 +74,11 @@ describe Lederhosen::CLI do
|
|
74
74
|
|
75
75
|
# make sure total number of reads is even
|
76
76
|
# requires that there should be an odd number if classification is not strict
|
77
|
-
lines.select { |x| !(x =~ /^#/) }
|
78
|
-
.map(&:strip)
|
79
|
-
.map { |x| x.split(',') }
|
80
|
-
.map(&:last)
|
81
|
-
.map(&:to_i)
|
77
|
+
lines.select { |x| !(x =~ /^#/) }\
|
78
|
+
.map(&:strip)\
|
79
|
+
.map { |x| x.split(',') }\
|
80
|
+
.map(&:last)\
|
81
|
+
.map(&:to_i)\
|
82
82
|
.inject(:+).should be_even
|
83
83
|
end
|
84
84
|
end
|
metadata
CHANGED
@@ -1,120 +1,108 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 13
|
5
5
|
prerelease:
|
6
|
+
segments:
|
7
|
+
- 2
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 2.0.1
|
6
11
|
platform: ruby
|
7
|
-
authors:
|
12
|
+
authors:
|
8
13
|
- Austin G. Davis-Richardson
|
9
14
|
autorequire:
|
10
15
|
bindir: bin
|
11
16
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
|
-
requirements:
|
19
|
-
- - ! '>='
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: '0'
|
22
|
-
type: :runtime
|
17
|
+
|
18
|
+
date: 2013-01-25 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
23
21
|
prerelease: false
|
24
|
-
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
23
|
none: false
|
26
|
-
requirements:
|
27
|
-
- -
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
none: false
|
34
|
-
requirements:
|
35
|
-
- - '='
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
version: 0.12.0
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
hash: 3
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
38
31
|
type: :runtime
|
32
|
+
name: dna
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
39
35
|
prerelease: false
|
40
|
-
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
37
|
none: false
|
42
|
-
requirements:
|
43
|
-
- -
|
44
|
-
- !ruby/object:Gem::Version
|
38
|
+
requirements:
|
39
|
+
- - "="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
hash: 47
|
42
|
+
segments:
|
43
|
+
- 0
|
44
|
+
- 12
|
45
|
+
- 0
|
45
46
|
version: 0.12.0
|
46
|
-
- !ruby/object:Gem::Dependency
|
47
|
-
name: thor
|
48
|
-
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
|
-
requirements:
|
51
|
-
- - '='
|
52
|
-
- !ruby/object:Gem::Version
|
53
|
-
version: 0.16.0
|
54
47
|
type: :runtime
|
48
|
+
name: progressbar
|
49
|
+
version_requirements: *id002
|
50
|
+
- !ruby/object:Gem::Dependency
|
55
51
|
prerelease: false
|
56
|
-
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
57
53
|
none: false
|
58
|
-
requirements:
|
59
|
-
- -
|
60
|
-
- !ruby/object:Gem::Version
|
54
|
+
requirements:
|
55
|
+
- - "="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 95
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
- 16
|
61
|
+
- 0
|
61
62
|
version: 0.16.0
|
62
|
-
|
63
|
-
name:
|
64
|
-
|
65
|
-
|
66
|
-
requirements:
|
67
|
-
- - ~>
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '3.12'
|
70
|
-
type: :development
|
63
|
+
type: :runtime
|
64
|
+
name: thor
|
65
|
+
version_requirements: *id003
|
66
|
+
- !ruby/object:Gem::Dependency
|
71
67
|
prerelease: false
|
72
|
-
|
68
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
73
69
|
none: false
|
74
|
-
requirements:
|
70
|
+
requirements:
|
75
71
|
- - ~>
|
76
|
-
- !ruby/object:Gem::Version
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
requirements:
|
83
|
-
- - '='
|
84
|
-
- !ruby/object:Gem::Version
|
85
|
-
version: 1.8.4
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
hash: 31
|
74
|
+
segments:
|
75
|
+
- 3
|
76
|
+
- 12
|
77
|
+
version: "3.12"
|
86
78
|
type: :development
|
79
|
+
name: rdoc
|
80
|
+
version_requirements: *id004
|
81
|
+
- !ruby/object:Gem::Dependency
|
87
82
|
prerelease: false
|
88
|
-
|
83
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
89
84
|
none: false
|
90
|
-
requirements:
|
91
|
-
- -
|
92
|
-
- !ruby/object:Gem::Version
|
85
|
+
requirements:
|
86
|
+
- - "="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
hash: 63
|
89
|
+
segments:
|
90
|
+
- 1
|
91
|
+
- 8
|
92
|
+
- 4
|
93
93
|
version: 1.8.4
|
94
|
-
- !ruby/object:Gem::Dependency
|
95
|
-
name: ruby-prof
|
96
|
-
requirement: !ruby/object:Gem::Requirement
|
97
|
-
none: false
|
98
|
-
requirements:
|
99
|
-
- - '='
|
100
|
-
- !ruby/object:Gem::Version
|
101
|
-
version: 0.11.2
|
102
94
|
type: :development
|
103
|
-
|
104
|
-
version_requirements:
|
105
|
-
none: false
|
106
|
-
requirements:
|
107
|
-
- - '='
|
108
|
-
- !ruby/object:Gem::Version
|
109
|
-
version: 0.11.2
|
95
|
+
name: jeweler
|
96
|
+
version_requirements: *id005
|
110
97
|
description: Various tools for OTU clustering
|
111
98
|
email: harekrishna@gmail.com
|
112
|
-
executables:
|
99
|
+
executables:
|
113
100
|
- lederhosen
|
114
101
|
extensions: []
|
115
|
-
|
102
|
+
|
103
|
+
extra_rdoc_files:
|
116
104
|
- LICENSE.txt
|
117
|
-
files:
|
105
|
+
files:
|
118
106
|
- .rspec
|
119
107
|
- Gemfile
|
120
108
|
- LICENSE.txt
|
@@ -150,31 +138,37 @@ files:
|
|
150
138
|
- spec/spec_helper.rb
|
151
139
|
- spec/uc_parser_spec.rb
|
152
140
|
homepage: http://audy.github.com/lederhosen
|
153
|
-
licenses:
|
141
|
+
licenses:
|
154
142
|
- MIT
|
155
143
|
post_install_message:
|
156
144
|
rdoc_options: []
|
157
|
-
|
145
|
+
|
146
|
+
require_paths:
|
158
147
|
- lib
|
159
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
148
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
160
149
|
none: false
|
161
|
-
requirements:
|
162
|
-
- -
|
163
|
-
- !ruby/object:Gem::Version
|
164
|
-
|
165
|
-
segments:
|
150
|
+
requirements:
|
151
|
+
- - ">="
|
152
|
+
- !ruby/object:Gem::Version
|
153
|
+
hash: 3
|
154
|
+
segments:
|
166
155
|
- 0
|
167
|
-
|
168
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
156
|
+
version: "0"
|
157
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
169
158
|
none: false
|
170
|
-
requirements:
|
171
|
-
- -
|
172
|
-
- !ruby/object:Gem::Version
|
173
|
-
|
159
|
+
requirements:
|
160
|
+
- - ">="
|
161
|
+
- !ruby/object:Gem::Version
|
162
|
+
hash: 3
|
163
|
+
segments:
|
164
|
+
- 0
|
165
|
+
version: "0"
|
174
166
|
requirements: []
|
167
|
+
|
175
168
|
rubyforge_project:
|
176
|
-
rubygems_version: 1.8.
|
169
|
+
rubygems_version: 1.8.24
|
177
170
|
signing_key:
|
178
171
|
specification_version: 3
|
179
172
|
summary: OTU Clustering
|
180
173
|
test_files: []
|
174
|
+
|