lederhosen 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lederhosen.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "lederhosen"
8
- s.version = "1.1.1"
8
+ s.version = "1.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Austin G. Davis-Richardson"]
12
- s.date = "2012-10-30"
12
+ s.date = "2012-11-13"
13
13
  s.description = "Various tools for OTU clustering"
14
14
  s.email = "harekrishna@gmail.com"
15
15
  s.executables = ["lederhosen"]
@@ -26,7 +26,9 @@ Gem::Specification.new do |s|
26
26
  "lederhosen.gemspec",
27
27
  "lib/lederhosen.rb",
28
28
  "lib/lederhosen/cli.rb",
29
+ "lib/lederhosen/no_tasks.rb",
29
30
  "lib/lederhosen/tasks/cluster.rb",
31
+ "lib/lederhosen/tasks/get_reps.rb",
30
32
  "lib/lederhosen/tasks/join_otu_tables.rb",
31
33
  "lib/lederhosen/tasks/make_udb.rb",
32
34
  "lib/lederhosen/tasks/otu_filter.rb",
@@ -48,7 +50,7 @@ Gem::Specification.new do |s|
48
50
  s.homepage = "http://audy.github.com/lederhosen"
49
51
  s.licenses = ["MIT"]
50
52
  s.require_paths = ["lib"]
51
- s.rubygems_version = "1.8.24"
53
+ s.rubygems_version = "1.8.23"
52
54
  s.summary = "OTU Clustering"
53
55
 
54
56
  if s.respond_to? :specification_version then
@@ -0,0 +1,56 @@
1
+ module Lederhosen
2
+ class CLI
3
+ no_tasks do
4
+
5
+ # parse a line of usearch prefix
6
+ # return a hash in the form:
7
+ # { :taxonomy => '', :identity => 0.00, ... }
8
+ # unless the line is not a "hit" in which case
9
+ # the function returns nil
10
+ def parse_usearch_line(str)
11
+
12
+ # skip non hits
13
+ return nil unless str =~ /^H/
14
+
15
+ str = str.split
16
+
17
+ taxonomic_description = str[9]
18
+ identity = str[3].to_f
19
+
20
+ # parse taxonomic_description
21
+ taxonomies = parse_taxonomy(taxonomic_description)
22
+
23
+ { :identity => identity }.merge(taxonomies)
24
+ end
25
+
26
+ # parse a taxonomic description using the
27
+ # taxcollector format returning name at each level (genus, etc...)
28
+ def parse_taxonomy(taxonomy)
29
+
30
+ levels = { 'domain' => 0,
31
+ 'kingdom' => 0,
32
+ 'phylum' => 1,
33
+ 'class' => 2,
34
+ 'order' => 3,
35
+ 'family' => 4,
36
+ 'genus' => 5,
37
+ 'species' => 6 }
38
+
39
+ names = Hash.new
40
+
41
+ levels.each_pair do |level, num|
42
+ name = taxonomy.match(/\[#{num}\](\w*)[;\[]/)[1] rescue nil
43
+ names[level] = name
44
+ end
45
+
46
+ # keep original taxonomic description
47
+ names[:original] = taxonomy
48
+
49
+ names
50
+ end
51
+
52
+ end # no tasks
53
+
54
+ end
55
+ end
56
+
@@ -0,0 +1,47 @@
1
+ require 'set'
2
+
3
+ module Lederhosen
4
+ class CLI
5
+ desc 'get_reps', 'get representative reads for a uc file'
6
+
7
+ method_option :input, :type => :string, :required => true
8
+ method_option :database, :type => :string, :required => true
9
+ method_option :output, :type => :string, :required => true
10
+
11
+ def get_reps
12
+ input = options[:input]
13
+ database = options[:database]
14
+ output = options[:output]
15
+
16
+ taxa = Set.new
17
+
18
+ ohai "getting representative database sequences from #{database} using #{input} clusters and saving to #{output}"
19
+
20
+ # parse uc file, get list of taxa we need to get
21
+ # full sequences for from the database
22
+ File.open(input).each do |line|
23
+ header = parse_usearch_line(line.strip)
24
+ taxa << header[:original] rescue nil
25
+ end
26
+
27
+ ohai "found #{taxa.size} representative sequences"
28
+
29
+ # print representative sequences from database
30
+ output = File.open(output, 'w')
31
+ kept = 0
32
+ File.open(database) do |handle|
33
+ Dna.new(handle).each do |record|
34
+ if taxa.include? record.name
35
+ output.puts record
36
+ kept += 1
37
+ end
38
+ end
39
+ end
40
+
41
+ output.close
42
+
43
+ ohai "saved #{kept} representatives"
44
+
45
+ end
46
+ end
47
+ end
@@ -81,52 +81,6 @@ module Lederhosen
81
81
  end
82
82
  end
83
83
 
84
- no_tasks do
85
- # parse a line of usearch prefix
86
- # return a hash in the form:
87
- # { :taxonomy => '', :identity => 0.00, ... }
88
- # unless the line is not a "hit" in which case
89
- # the function returns nil
90
- def parse_usearch_line(str)
91
-
92
- # skip non hits
93
- return nil unless str =~ /^H/
94
-
95
- str = str.split
96
-
97
- taxonomic_description = str[9]
98
- identity = str[3].to_f
99
-
100
- # parse taxonomic_description
101
- taxonomies = parse_taxonomy(taxonomic_description)
102
-
103
- { :identity => identity }.merge(taxonomies)
104
- end
105
-
106
- # parse a taxonomic description using the
107
- # taxcollector format returning name at each level (genus, etc...)
108
- def parse_taxonomy(taxonomy)
109
-
110
- levels = { 'domain' => 0,
111
- 'kingdom' => 0,
112
- 'phylum' => 1,
113
- 'class' => 2,
114
- 'order' => 3,
115
- 'family' => 4,
116
- 'genus' => 5,
117
- 'species' => 6 }
118
-
119
- names = Hash.new
120
-
121
- levels.each_pair do |level, num|
122
- name = taxonomy.match(/\[#{num}\](\w*)[;\[]/)[1] rescue nil
123
- names[level] = name
124
- end
125
-
126
- names
127
- end
128
-
129
- end # no tasks
130
84
 
131
85
  end # class CLI
132
86
  end # module Lederhosen
@@ -1,9 +1,9 @@
1
1
  module Lederhosen
2
2
  module Version
3
3
  MAJOR = 1
4
- MINOR = 1
5
- CODENAME = 'Apfelstrudel' # changes for minor versions
6
- PATCH = 1
4
+ MINOR = 2
5
+ CODENAME = 'Regenmantel' # changes for minor versions
6
+ PATCH = 0
7
7
 
8
8
  STRING = [MAJOR, MINOR, PATCH].join('.')
9
9
  end
data/readme.md CHANGED
@@ -1,3 +1,5 @@
1
+ <img src="http://d.pr/i/26Js+#.png" align="right">
2
+
1
3
  # Lederhosen
2
4
 
3
5
  Cluster raw Illumina 16S rRNA amplicon data to generate OTUs.
@@ -46,19 +48,33 @@ The trimming process will reverse complement the "right" pair so that both reads
46
48
 
47
49
  Create UDB database required by usearch from TaxCollector
48
50
 
49
- lederhosen make_udb --input=taxcollector.fa --output=taxcollector.udb
51
+ ```bash
52
+ lederhosen make_udb \
53
+ --input=taxcollector.fa \
54
+ --output=taxcollector.udb
55
+ ```
50
56
 
51
57
  ### Cluster Reads using USEARCH
52
58
 
53
59
  Cluster reads using USEARCH. Output is a uc file.
54
60
 
55
- lederhosen cluster --input=trimmed/*.fasta --identity=0.95 --output=clusters_95.uc --database=taxcollector.udb
56
-
61
+ ```bash
62
+ lederhosen cluster \
63
+ --input=trimmed/*.fasta \
64
+ --identity=0.95 \
65
+ --output=clusters_95.uc \
66
+ --database=taxcollector.udb
67
+ ```
57
68
  ### Generate OTU table(s)
58
69
 
59
70
  Create an OTU abundance table where rows are samples and columns are clusters. The entries are the number of reads for that cluster in a sample.
60
71
 
61
- lederhosen otu_table --clusters=clusters_95.uc --prefix=otu_table --level=domain phylum class order family genus species
72
+ ```bash
73
+ lederhosen otu_table \
74
+ --clusters=clusters_95.uc \
75
+ --prefix=otu_table \
76
+ --level=domain phylum class order family genus species
77
+ ```
62
78
 
63
79
  This will create the files:
64
80
 
data/spec/cli_spec.rb CHANGED
@@ -24,6 +24,8 @@ describe Lederhosen::CLI do
24
24
 
25
25
  it 'can cluster reads using usearch' do
26
26
  `./bin/lederhosen cluster --input #{$test_dir}/trimmed/ILT_L_9_B_001.fasta --database #{$test_dir}/test_db.udb --identity 0.95 --output #{$test_dir}/clusters.uc`
27
+ $?.success?.should be_true
28
+ File.exists?(File.join($test_dir, 'clusters.uc')).should be_true
27
29
  end
28
30
 
29
31
  it 'should build abundance matrices for each level' do
@@ -47,5 +49,9 @@ describe Lederhosen::CLI do
47
49
  $?.success?.should be_true
48
50
  end
49
51
 
52
+ it 'should print representative sequences from uc files' do
53
+ `./bin/lederhosen get_reps --input=#{$test_dir}/clusters.uc --database=#{$test_dir}/trimmed/ILT_L_9_B_001.fasta --output=#{$test_dir}/representatives.fasta`
54
+ end
55
+
50
56
  it 'should create a fasta file containing representative reads for each cluster'
51
57
  end
metadata CHANGED
@@ -1,118 +1,120 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
- version: !ruby/object:Gem::Version
4
- hash: 17
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.2.0
5
5
  prerelease:
6
- segments:
7
- - 1
8
- - 1
9
- - 1
10
- version: 1.1.1
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Austin G. Davis-Richardson
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-10-30 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
21
- type: :runtime
22
- requirement: &id001 !ruby/object:Gem::Requirement
12
+ date: 2012-11-13 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: dna
16
+ requirement: !ruby/object:Gem::Requirement
23
17
  none: false
24
- requirements:
25
- - - "="
26
- - !ruby/object:Gem::Version
27
- hash: 7
28
- segments:
29
- - 0
30
- - 0
31
- - 12
18
+ requirements:
19
+ - - '='
20
+ - !ruby/object:Gem::Version
32
21
  version: 0.0.12
33
- version_requirements: *id001
34
- name: dna
35
- prerelease: false
36
- - !ruby/object:Gem::Dependency
37
22
  type: :runtime
38
- requirement: &id002 !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
39
25
  none: false
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- hash: 3
44
- segments:
45
- - 0
46
- version: "0"
47
- version_requirements: *id002
26
+ requirements:
27
+ - - '='
28
+ - !ruby/object:Gem::Version
29
+ version: 0.0.12
30
+ - !ruby/object:Gem::Dependency
48
31
  name: progressbar
49
- prerelease: false
50
- - !ruby/object:Gem::Dependency
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
51
38
  type: :runtime
52
- requirement: &id003 !ruby/object:Gem::Requirement
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
53
41
  none: false
54
- requirements:
55
- - - ">="
56
- - !ruby/object:Gem::Version
57
- hash: 3
58
- segments:
59
- - 0
60
- version: "0"
61
- version_requirements: *id003
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
62
47
  name: thor
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
63
55
  prerelease: false
64
- - !ruby/object:Gem::Dependency
65
- type: :development
66
- requirement: &id004 !ruby/object:Gem::Requirement
56
+ version_requirements: !ruby/object:Gem::Requirement
67
57
  none: false
68
- requirements:
69
- - - ~>
70
- - !ruby/object:Gem::Version
71
- hash: 31
72
- segments:
73
- - 3
74
- - 12
75
- version: "3.12"
76
- version_requirements: *id004
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
77
63
  name: rdoc
78
- prerelease: false
79
- - !ruby/object:Gem::Dependency
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '3.12'
80
70
  type: :development
81
- requirement: &id005 !ruby/object:Gem::Requirement
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
82
73
  none: false
83
- requirements:
84
- - - ">="
85
- - !ruby/object:Gem::Version
86
- hash: 3
87
- segments:
88
- - 0
89
- version: "0"
90
- version_requirements: *id005
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: '3.12'
78
+ - !ruby/object:Gem::Dependency
91
79
  name: bundler
92
- prerelease: false
93
- - !ruby/object:Gem::Dependency
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
94
86
  type: :development
95
- requirement: &id006 !ruby/object:Gem::Requirement
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
96
89
  none: false
97
- requirements:
98
- - - ">="
99
- - !ruby/object:Gem::Version
100
- hash: 3
101
- segments:
102
- - 0
103
- version: "0"
104
- version_requirements: *id006
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
105
95
  name: jeweler
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
106
103
  prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
107
110
  description: Various tools for OTU clustering
108
111
  email: harekrishna@gmail.com
109
- executables:
112
+ executables:
110
113
  - lederhosen
111
114
  extensions: []
112
-
113
- extra_rdoc_files:
115
+ extra_rdoc_files:
114
116
  - LICENSE.txt
115
- files:
117
+ files:
116
118
  - .rspec
117
119
  - .rvmrc
118
120
  - Gemfile
@@ -122,7 +124,9 @@ files:
122
124
  - lederhosen.gemspec
123
125
  - lib/lederhosen.rb
124
126
  - lib/lederhosen/cli.rb
127
+ - lib/lederhosen/no_tasks.rb
125
128
  - lib/lederhosen/tasks/cluster.rb
129
+ - lib/lederhosen/tasks/get_reps.rb
126
130
  - lib/lederhosen/tasks/join_otu_tables.rb
127
131
  - lib/lederhosen/tasks/make_udb.rb
128
132
  - lib/lederhosen/tasks/otu_filter.rb
@@ -141,37 +145,31 @@ files:
141
145
  - spec/misc_spec.rb
142
146
  - spec/spec_helper.rb
143
147
  homepage: http://audy.github.com/lederhosen
144
- licenses:
148
+ licenses:
145
149
  - MIT
146
150
  post_install_message:
147
151
  rdoc_options: []
148
-
149
- require_paths:
152
+ require_paths:
150
153
  - lib
151
- required_ruby_version: !ruby/object:Gem::Requirement
154
+ required_ruby_version: !ruby/object:Gem::Requirement
152
155
  none: false
153
- requirements:
154
- - - ">="
155
- - !ruby/object:Gem::Version
156
- hash: 3
157
- segments:
156
+ requirements:
157
+ - - ! '>='
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ segments:
158
161
  - 0
159
- version: "0"
160
- required_rubygems_version: !ruby/object:Gem::Requirement
162
+ hash: -2175708922821992201
163
+ required_rubygems_version: !ruby/object:Gem::Requirement
161
164
  none: false
162
- requirements:
163
- - - ">="
164
- - !ruby/object:Gem::Version
165
- hash: 3
166
- segments:
167
- - 0
168
- version: "0"
165
+ requirements:
166
+ - - ! '>='
167
+ - !ruby/object:Gem::Version
168
+ version: '0'
169
169
  requirements: []
170
-
171
170
  rubyforge_project:
172
- rubygems_version: 1.8.24
171
+ rubygems_version: 1.8.23
173
172
  signing_key:
174
173
  specification_version: 3
175
174
  summary: OTU Clustering
176
175
  test_files: []
177
-