lederhosen 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lederhosen.gemspec +5 -3
- data/lib/lederhosen/no_tasks.rb +56 -0
- data/lib/lederhosen/tasks/get_reps.rb +47 -0
- data/lib/lederhosen/tasks/otu_table.rb +0 -46
- data/lib/lederhosen/version.rb +3 -3
- data/readme.md +20 -4
- data/spec/cli_spec.rb +6 -0
- metadata +104 -106
data/lederhosen.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "lederhosen"
|
8
|
-
s.version = "1.
|
8
|
+
s.version = "1.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-11-13"
|
13
13
|
s.description = "Various tools for OTU clustering"
|
14
14
|
s.email = "harekrishna@gmail.com"
|
15
15
|
s.executables = ["lederhosen"]
|
@@ -26,7 +26,9 @@ Gem::Specification.new do |s|
|
|
26
26
|
"lederhosen.gemspec",
|
27
27
|
"lib/lederhosen.rb",
|
28
28
|
"lib/lederhosen/cli.rb",
|
29
|
+
"lib/lederhosen/no_tasks.rb",
|
29
30
|
"lib/lederhosen/tasks/cluster.rb",
|
31
|
+
"lib/lederhosen/tasks/get_reps.rb",
|
30
32
|
"lib/lederhosen/tasks/join_otu_tables.rb",
|
31
33
|
"lib/lederhosen/tasks/make_udb.rb",
|
32
34
|
"lib/lederhosen/tasks/otu_filter.rb",
|
@@ -48,7 +50,7 @@ Gem::Specification.new do |s|
|
|
48
50
|
s.homepage = "http://audy.github.com/lederhosen"
|
49
51
|
s.licenses = ["MIT"]
|
50
52
|
s.require_paths = ["lib"]
|
51
|
-
s.rubygems_version = "1.8.
|
53
|
+
s.rubygems_version = "1.8.23"
|
52
54
|
s.summary = "OTU Clustering"
|
53
55
|
|
54
56
|
if s.respond_to? :specification_version then
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Lederhosen
|
2
|
+
class CLI
|
3
|
+
no_tasks do
|
4
|
+
|
5
|
+
# parse a line of usearch prefix
|
6
|
+
# return a hash in the form:
|
7
|
+
# { :taxonomy => '', :identity => 0.00, ... }
|
8
|
+
# unless the line is not a "hit" in which case
|
9
|
+
# the function returns nil
|
10
|
+
def parse_usearch_line(str)
|
11
|
+
|
12
|
+
# skip non hits
|
13
|
+
return nil unless str =~ /^H/
|
14
|
+
|
15
|
+
str = str.split
|
16
|
+
|
17
|
+
taxonomic_description = str[9]
|
18
|
+
identity = str[3].to_f
|
19
|
+
|
20
|
+
# parse taxonomic_description
|
21
|
+
taxonomies = parse_taxonomy(taxonomic_description)
|
22
|
+
|
23
|
+
{ :identity => identity }.merge(taxonomies)
|
24
|
+
end
|
25
|
+
|
26
|
+
# parse a taxonomic description using the
|
27
|
+
# taxcollector format returning name at each level (genus, etc...)
|
28
|
+
def parse_taxonomy(taxonomy)
|
29
|
+
|
30
|
+
levels = { 'domain' => 0,
|
31
|
+
'kingdom' => 0,
|
32
|
+
'phylum' => 1,
|
33
|
+
'class' => 2,
|
34
|
+
'order' => 3,
|
35
|
+
'family' => 4,
|
36
|
+
'genus' => 5,
|
37
|
+
'species' => 6 }
|
38
|
+
|
39
|
+
names = Hash.new
|
40
|
+
|
41
|
+
levels.each_pair do |level, num|
|
42
|
+
name = taxonomy.match(/\[#{num}\](\w*)[;\[]/)[1] rescue nil
|
43
|
+
names[level] = name
|
44
|
+
end
|
45
|
+
|
46
|
+
# keep original taxonomic description
|
47
|
+
names[:original] = taxonomy
|
48
|
+
|
49
|
+
names
|
50
|
+
end
|
51
|
+
|
52
|
+
end # no tasks
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
module Lederhosen
|
4
|
+
class CLI
|
5
|
+
desc 'get_reps', 'get representative reads for a uc file'
|
6
|
+
|
7
|
+
method_option :input, :type => :string, :required => true
|
8
|
+
method_option :database, :type => :string, :required => true
|
9
|
+
method_option :output, :type => :string, :required => true
|
10
|
+
|
11
|
+
def get_reps
|
12
|
+
input = options[:input]
|
13
|
+
database = options[:database]
|
14
|
+
output = options[:output]
|
15
|
+
|
16
|
+
taxa = Set.new
|
17
|
+
|
18
|
+
ohai "getting representative database sequences from #{database} using #{input} clusters and saving to #{output}"
|
19
|
+
|
20
|
+
# parse uc file, get list of taxa we need to get
|
21
|
+
# full sequences for from the database
|
22
|
+
File.open(input).each do |line|
|
23
|
+
header = parse_usearch_line(line.strip)
|
24
|
+
taxa << header[:original] rescue nil
|
25
|
+
end
|
26
|
+
|
27
|
+
ohai "found #{taxa.size} representative sequences"
|
28
|
+
|
29
|
+
# print representative sequences from database
|
30
|
+
output = File.open(output, 'w')
|
31
|
+
kept = 0
|
32
|
+
File.open(database) do |handle|
|
33
|
+
Dna.new(handle).each do |record|
|
34
|
+
if taxa.include? record.name
|
35
|
+
output.puts record
|
36
|
+
kept += 1
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
output.close
|
42
|
+
|
43
|
+
ohai "saved #{kept} representatives"
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -81,52 +81,6 @@ module Lederhosen
|
|
81
81
|
end
|
82
82
|
end
|
83
83
|
|
84
|
-
no_tasks do
|
85
|
-
# parse a line of usearch prefix
|
86
|
-
# return a hash in the form:
|
87
|
-
# { :taxonomy => '', :identity => 0.00, ... }
|
88
|
-
# unless the line is not a "hit" in which case
|
89
|
-
# the function returns nil
|
90
|
-
def parse_usearch_line(str)
|
91
|
-
|
92
|
-
# skip non hits
|
93
|
-
return nil unless str =~ /^H/
|
94
|
-
|
95
|
-
str = str.split
|
96
|
-
|
97
|
-
taxonomic_description = str[9]
|
98
|
-
identity = str[3].to_f
|
99
|
-
|
100
|
-
# parse taxonomic_description
|
101
|
-
taxonomies = parse_taxonomy(taxonomic_description)
|
102
|
-
|
103
|
-
{ :identity => identity }.merge(taxonomies)
|
104
|
-
end
|
105
|
-
|
106
|
-
# parse a taxonomic description using the
|
107
|
-
# taxcollector format returning name at each level (genus, etc...)
|
108
|
-
def parse_taxonomy(taxonomy)
|
109
|
-
|
110
|
-
levels = { 'domain' => 0,
|
111
|
-
'kingdom' => 0,
|
112
|
-
'phylum' => 1,
|
113
|
-
'class' => 2,
|
114
|
-
'order' => 3,
|
115
|
-
'family' => 4,
|
116
|
-
'genus' => 5,
|
117
|
-
'species' => 6 }
|
118
|
-
|
119
|
-
names = Hash.new
|
120
|
-
|
121
|
-
levels.each_pair do |level, num|
|
122
|
-
name = taxonomy.match(/\[#{num}\](\w*)[;\[]/)[1] rescue nil
|
123
|
-
names[level] = name
|
124
|
-
end
|
125
|
-
|
126
|
-
names
|
127
|
-
end
|
128
|
-
|
129
|
-
end # no tasks
|
130
84
|
|
131
85
|
end # class CLI
|
132
86
|
end # module Lederhosen
|
data/lib/lederhosen/version.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
module Lederhosen
|
2
2
|
module Version
|
3
3
|
MAJOR = 1
|
4
|
-
MINOR =
|
5
|
-
CODENAME = '
|
6
|
-
PATCH =
|
4
|
+
MINOR = 2
|
5
|
+
CODENAME = 'Regenmantel' # changes for minor versions
|
6
|
+
PATCH = 0
|
7
7
|
|
8
8
|
STRING = [MAJOR, MINOR, PATCH].join('.')
|
9
9
|
end
|
data/readme.md
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
<img src="http://d.pr/i/26Js+#.png" align="right">
|
2
|
+
|
1
3
|
# Lederhosen
|
2
4
|
|
3
5
|
Cluster raw Illumina 16S rRNA amplicon data to generate OTUs.
|
@@ -46,19 +48,33 @@ The trimming process will reverse complement the "right" pair so that both reads
|
|
46
48
|
|
47
49
|
Create UDB database required by usearch from TaxCollector
|
48
50
|
|
49
|
-
|
51
|
+
```bash
|
52
|
+
lederhosen make_udb \
|
53
|
+
--input=taxcollector.fa \
|
54
|
+
--output=taxcollector.udb
|
55
|
+
```
|
50
56
|
|
51
57
|
### Cluster Reads using USEARCH
|
52
58
|
|
53
59
|
Cluster reads using USEARCH. Output is a uc file.
|
54
60
|
|
55
|
-
|
56
|
-
|
61
|
+
```bash
|
62
|
+
lederhosen cluster \
|
63
|
+
--input=trimmed/*.fasta \
|
64
|
+
--identity=0.95 \
|
65
|
+
--output=clusters_95.uc \
|
66
|
+
--database=taxcollector.udb
|
67
|
+
```
|
57
68
|
### Generate OTU table(s)
|
58
69
|
|
59
70
|
Create an OTU abundance table where rows are samples and columns are clusters. The entries are the number of reads for that cluster in a sample.
|
60
71
|
|
61
|
-
|
72
|
+
```bash
|
73
|
+
lederhosen otu_table \
|
74
|
+
--clusters=clusters_95.uc \
|
75
|
+
--prefix=otu_table \
|
76
|
+
--level=domain phylum class order family genus species
|
77
|
+
```
|
62
78
|
|
63
79
|
This will create the files:
|
64
80
|
|
data/spec/cli_spec.rb
CHANGED
@@ -24,6 +24,8 @@ describe Lederhosen::CLI do
|
|
24
24
|
|
25
25
|
it 'can cluster reads using usearch' do
|
26
26
|
`./bin/lederhosen cluster --input #{$test_dir}/trimmed/ILT_L_9_B_001.fasta --database #{$test_dir}/test_db.udb --identity 0.95 --output #{$test_dir}/clusters.uc`
|
27
|
+
$?.success?.should be_true
|
28
|
+
File.exists?(File.join($test_dir, 'clusters.uc')).should be_true
|
27
29
|
end
|
28
30
|
|
29
31
|
it 'should build abundance matrices for each level' do
|
@@ -47,5 +49,9 @@ describe Lederhosen::CLI do
|
|
47
49
|
$?.success?.should be_true
|
48
50
|
end
|
49
51
|
|
52
|
+
it 'should print representative sequences from uc files' do
|
53
|
+
`./bin/lederhosen get_reps --input=#{$test_dir}/clusters.uc --database=#{$test_dir}/trimmed/ILT_L_9_B_001.fasta --output=#{$test_dir}/representatives.fasta`
|
54
|
+
end
|
55
|
+
|
50
56
|
it 'should create a fasta file containing representative reads for each cluster'
|
51
57
|
end
|
metadata
CHANGED
@@ -1,118 +1,120 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: lederhosen
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.2.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 1
|
8
|
-
- 1
|
9
|
-
- 1
|
10
|
-
version: 1.1.1
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Austin G. Davis-Richardson
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
12
|
+
date: 2012-11-13 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: dna
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
23
17
|
none: false
|
24
|
-
requirements:
|
25
|
-
- -
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
hash: 7
|
28
|
-
segments:
|
29
|
-
- 0
|
30
|
-
- 0
|
31
|
-
- 12
|
18
|
+
requirements:
|
19
|
+
- - '='
|
20
|
+
- !ruby/object:Gem::Version
|
32
21
|
version: 0.0.12
|
33
|
-
version_requirements: *id001
|
34
|
-
name: dna
|
35
|
-
prerelease: false
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
22
|
type: :runtime
|
38
|
-
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
39
25
|
none: false
|
40
|
-
requirements:
|
41
|
-
- -
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
|
44
|
-
|
45
|
-
- 0
|
46
|
-
version: "0"
|
47
|
-
version_requirements: *id002
|
26
|
+
requirements:
|
27
|
+
- - '='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.0.12
|
30
|
+
- !ruby/object:Gem::Dependency
|
48
31
|
name: progressbar
|
49
|
-
|
50
|
-
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
51
38
|
type: :runtime
|
52
|
-
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
41
|
none: false
|
54
|
-
requirements:
|
55
|
-
- -
|
56
|
-
- !ruby/object:Gem::Version
|
57
|
-
|
58
|
-
|
59
|
-
- 0
|
60
|
-
version: "0"
|
61
|
-
version_requirements: *id003
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
62
47
|
name: thor
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
63
55
|
prerelease: false
|
64
|
-
|
65
|
-
type: :development
|
66
|
-
requirement: &id004 !ruby/object:Gem::Requirement
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
67
57
|
none: false
|
68
|
-
requirements:
|
69
|
-
- -
|
70
|
-
- !ruby/object:Gem::Version
|
71
|
-
|
72
|
-
|
73
|
-
- 3
|
74
|
-
- 12
|
75
|
-
version: "3.12"
|
76
|
-
version_requirements: *id004
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
77
63
|
name: rdoc
|
78
|
-
|
79
|
-
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ~>
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '3.12'
|
80
70
|
type: :development
|
81
|
-
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
82
73
|
none: false
|
83
|
-
requirements:
|
84
|
-
- -
|
85
|
-
- !ruby/object:Gem::Version
|
86
|
-
|
87
|
-
|
88
|
-
- 0
|
89
|
-
version: "0"
|
90
|
-
version_requirements: *id005
|
74
|
+
requirements:
|
75
|
+
- - ~>
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '3.12'
|
78
|
+
- !ruby/object:Gem::Dependency
|
91
79
|
name: bundler
|
92
|
-
|
93
|
-
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
94
86
|
type: :development
|
95
|
-
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
96
89
|
none: false
|
97
|
-
requirements:
|
98
|
-
- -
|
99
|
-
- !ruby/object:Gem::Version
|
100
|
-
|
101
|
-
|
102
|
-
- 0
|
103
|
-
version: "0"
|
104
|
-
version_requirements: *id006
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
- !ruby/object:Gem::Dependency
|
105
95
|
name: jeweler
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
type: :development
|
106
103
|
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
107
110
|
description: Various tools for OTU clustering
|
108
111
|
email: harekrishna@gmail.com
|
109
|
-
executables:
|
112
|
+
executables:
|
110
113
|
- lederhosen
|
111
114
|
extensions: []
|
112
|
-
|
113
|
-
extra_rdoc_files:
|
115
|
+
extra_rdoc_files:
|
114
116
|
- LICENSE.txt
|
115
|
-
files:
|
117
|
+
files:
|
116
118
|
- .rspec
|
117
119
|
- .rvmrc
|
118
120
|
- Gemfile
|
@@ -122,7 +124,9 @@ files:
|
|
122
124
|
- lederhosen.gemspec
|
123
125
|
- lib/lederhosen.rb
|
124
126
|
- lib/lederhosen/cli.rb
|
127
|
+
- lib/lederhosen/no_tasks.rb
|
125
128
|
- lib/lederhosen/tasks/cluster.rb
|
129
|
+
- lib/lederhosen/tasks/get_reps.rb
|
126
130
|
- lib/lederhosen/tasks/join_otu_tables.rb
|
127
131
|
- lib/lederhosen/tasks/make_udb.rb
|
128
132
|
- lib/lederhosen/tasks/otu_filter.rb
|
@@ -141,37 +145,31 @@ files:
|
|
141
145
|
- spec/misc_spec.rb
|
142
146
|
- spec/spec_helper.rb
|
143
147
|
homepage: http://audy.github.com/lederhosen
|
144
|
-
licenses:
|
148
|
+
licenses:
|
145
149
|
- MIT
|
146
150
|
post_install_message:
|
147
151
|
rdoc_options: []
|
148
|
-
|
149
|
-
require_paths:
|
152
|
+
require_paths:
|
150
153
|
- lib
|
151
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
154
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
152
155
|
none: false
|
153
|
-
requirements:
|
154
|
-
- -
|
155
|
-
- !ruby/object:Gem::Version
|
156
|
-
|
157
|
-
segments:
|
156
|
+
requirements:
|
157
|
+
- - ! '>='
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
segments:
|
158
161
|
- 0
|
159
|
-
|
160
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
162
|
+
hash: -2175708922821992201
|
163
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
161
164
|
none: false
|
162
|
-
requirements:
|
163
|
-
- -
|
164
|
-
- !ruby/object:Gem::Version
|
165
|
-
|
166
|
-
segments:
|
167
|
-
- 0
|
168
|
-
version: "0"
|
165
|
+
requirements:
|
166
|
+
- - ! '>='
|
167
|
+
- !ruby/object:Gem::Version
|
168
|
+
version: '0'
|
169
169
|
requirements: []
|
170
|
-
|
171
170
|
rubyforge_project:
|
172
|
-
rubygems_version: 1.8.
|
171
|
+
rubygems_version: 1.8.23
|
173
172
|
signing_key:
|
174
173
|
specification_version: 3
|
175
174
|
summary: OTU Clustering
|
176
175
|
test_files: []
|
177
|
-
|