genomer-plugin-summary 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.2
1
+ 0.0.3
@@ -0,0 +1,122 @@
1
+ Feature: Producing a summary of the scaffold
2
+ In order to have an overview of the scaffold
3
+ A user can use the "scaffod" command
4
+ to generate the a tabular output of the scaffold
5
+
6
+ Scenario: A scaffold with a single sequence
7
+ Given I create a new genomer project
8
+ And I write to "assembly/scaffold.yml" with:
9
+ """
10
+ ---
11
+ -
12
+ sequence:
13
+ source: contig0001
14
+ """
15
+ And I write to "assembly/sequence.fna" with:
16
+ """
17
+ >contig0001
18
+ ATGC
19
+ """
20
+ When I run `genomer summary scaffold`
21
+ Then the exit status should be 0
22
+ And the output should contain:
23
+ """
24
+ +--------------+-----------+
25
+ | Scaffold |
26
+ +--------------+-----------+
27
+ | Contigs (#) | 1 |
28
+ | Gaps (#) | 0 |
29
+ +--------------+-----------+
30
+ | Size (bp) | 4 |
31
+ | Contigs (bp) | 4 |
32
+ | Gaps (bp) | 0 |
33
+ +--------------+-----------+
34
+ | G+C (%) | 50.00 |
35
+ | Contigs (%) | 100.00 |
36
+ | Gaps (%) | 0.00 |
37
+ +--------------+-----------+
38
+
39
+ """
40
+
41
+ Scenario: A scaffold with a two sequences
42
+ Given I create a new genomer project
43
+ And I write to "assembly/scaffold.yml" with:
44
+ """
45
+ ---
46
+ -
47
+ sequence:
48
+ source: contig0001
49
+ -
50
+ sequence:
51
+ source: contig0002
52
+ """
53
+ And I write to "assembly/sequence.fna" with:
54
+ """
55
+ >contig0001
56
+ ATGC
57
+ >contig0002
58
+ GGGC
59
+ """
60
+ When I run `genomer summary scaffold`
61
+ Then the exit status should be 0
62
+ And the output should contain:
63
+ """
64
+ +--------------+-----------+
65
+ | Scaffold |
66
+ +--------------+-----------+
67
+ | Contigs (#) | 2 |
68
+ | Gaps (#) | 0 |
69
+ +--------------+-----------+
70
+ | Size (bp) | 8 |
71
+ | Contigs (bp) | 8 |
72
+ | Gaps (bp) | 0 |
73
+ +--------------+-----------+
74
+ | G+C (%) | 75.00 |
75
+ | Contigs (%) | 100.00 |
76
+ | Gaps (%) | 0.00 |
77
+ +--------------+-----------+
78
+
79
+ """
80
+
81
+ Scenario: A scaffold with a two sequences and a gap
82
+ Given I create a new genomer project
83
+ And I write to "assembly/scaffold.yml" with:
84
+ """
85
+ ---
86
+ -
87
+ sequence:
88
+ source: contig0001
89
+ -
90
+ unresolved:
91
+ length: 5
92
+ -
93
+ sequence:
94
+ source: contig0002
95
+ """
96
+ And I write to "assembly/sequence.fna" with:
97
+ """
98
+ >contig0001
99
+ ATGC
100
+ >contig0002
101
+ GGGC
102
+ """
103
+ When I run `genomer summary scaffold`
104
+ Then the exit status should be 0
105
+ And the output should contain:
106
+ """
107
+ +--------------+-----------+
108
+ | Scaffold |
109
+ +--------------+-----------+
110
+ | Contigs (#) | 2 |
111
+ | Gaps (#) | 1 |
112
+ +--------------+-----------+
113
+ | Size (bp) | 13 |
114
+ | Contigs (bp) | 8 |
115
+ | Gaps (bp) | 5 |
116
+ +--------------+-----------+
117
+ | G+C (%) | 75.00 |
118
+ | Contigs (%) | 61.54 |
119
+ | Gaps (%) | 38.46 |
120
+ +--------------+-----------+
121
+
122
+ """
@@ -15,6 +15,7 @@ Gem::Specification.new do |gem|
15
15
  gem.version = File.read 'VERSION'
16
16
 
17
17
  gem.add_dependency "genomer", ">= 0.0.4"
18
+ gem.add_dependency "lazing", ">= 0.1.1"
18
19
  gem.add_dependency "terminal-table", "~> 1.4.5"
19
20
 
20
21
  gem.add_development_dependency 'rake', '~> 0.9.0'
@@ -0,0 +1,41 @@
1
+ require 'genomer'
2
+ require 'lazing'
3
+
4
+ module GenomerPluginSummary::Metrics
5
+
6
+ ALL = :all
7
+
8
+ def gc_content(type,scfd)
9
+ gc = enumerator_for(type,scfd).mapping{|i| gc(i)}.inject(:+) || 0.0
10
+ atgc = enumerator_for(type,scfd).mapping{|i| atgc(i)}.inject(:+) || 0.0
11
+ gc / atgc * 100
12
+ end
13
+
14
+ def count(type,scfd)
15
+ enumerator_for(type,scfd).count
16
+ end
17
+
18
+ def percent(type,scfd)
19
+ length(type,scfd) / length(ALL,scfd).to_f * 100
20
+ end
21
+
22
+ def length(type,scfd)
23
+ enumerator_for(type,scfd).
24
+ mapping(&:sequence).
25
+ mapping(&:length).
26
+ inject(:+) || 0
27
+ end
28
+
29
+ def gc(entry)
30
+ entry.sequence.gsub(/[^GCgc]/,'').length.to_f
31
+ end
32
+
33
+ def atgc(entry)
34
+ entry.sequence.gsub(/[^ATGCatgc]/,'').length.to_f
35
+ end
36
+
37
+ def enumerator_for(type,scaffold)
38
+ scaffold.selecting{|i| [ALL,i.entry_type].include? type }
39
+ end
40
+
41
+ end
@@ -0,0 +1,56 @@
1
+ require 'genomer'
2
+ require 'genomer-plugin-summary/metrics'
3
+ require 'terminal-table'
4
+
5
+ class GenomerPluginSummary::Scaffold < Genomer::Plugin
6
+ include GenomerPluginSummary::Metrics
7
+
8
+ LAYOUT = [
9
+ {:name => 'Contigs (#)', :entry_type => :sequence, :method => :count},
10
+ {:name => 'Gaps (#)', :entry_type => :unresolved, :method => :count},
11
+ :separator,
12
+ {:name => 'Size (bp)', :entry_type => :all, :method => :length},
13
+ {:name => 'Contigs (bp)', :entry_type => :sequence, :method => :length},
14
+ {:name => 'Gaps (bp)', :entry_type => :unresolved, :method => :length},
15
+ :separator,
16
+ {:name => 'G+C (%)', :entry_type => :all, :method => :gc_content},
17
+ {:name => 'Contigs (%)', :entry_type => :sequence, :method => :percent},
18
+ {:name => 'Gaps (%)', :entry_type => :unresolved, :method => :percent}
19
+ ]
20
+
21
+ def run
22
+ tabulate calculate_metrics(LAYOUT, scaffold)
23
+ end
24
+
25
+ def title
26
+ 'Scaffold'
27
+ end
28
+
29
+ def tabulate(data)
30
+ table = Terminal::Table.new(:title => title) do |t|
31
+ data.each do |(k,v)|
32
+ t << if k == :separator
33
+ :separator
34
+ else
35
+ v = sprintf('%#.2f',v) if v.class == Float
36
+ [k.ljust(12),v.to_s.rjust(9)]
37
+ end
38
+ end
39
+ end
40
+
41
+ table.align_column 0, :left
42
+ table.align_column 1, :right
43
+ table.to_s
44
+ end
45
+
46
+ def calculate_metrics(specs,scaffold)
47
+ specs.map do |spec|
48
+ if spec == :separator
49
+ spec
50
+ else
51
+ [spec[:name], send(spec[:method],spec[:entry_type],scaffold)]
52
+ end
53
+ end
54
+ end
55
+
56
+ end
@@ -1,7 +1,9 @@
1
1
  require 'genomer'
2
+ require 'genomer-plugin-summary/metrics'
2
3
  require 'terminal-table'
3
4
 
4
5
  class GenomerPluginSummary::Sequences < Genomer::Plugin
6
+ include GenomerPluginSummary::Metrics
5
7
 
6
8
  def run
7
9
  sequences = calculate(scaffold)
@@ -51,22 +53,22 @@ class GenomerPluginSummary::Sequences < Genomer::Plugin
51
53
  end
52
54
 
53
55
  def calculate(scaffold)
54
- total_length = scaffold.map(&:sequence).join.length.to_f
56
+ total_length = length(:all,scaffold).to_f
57
+ running_length = 0
55
58
 
56
- length = 0
57
59
  scaffold.map do |entry|
58
60
  i = nil
59
61
  if entry.entry_type != :unresolved
60
62
  entry_length = entry.sequence.length
61
63
  i = { :sequence => entry.source,
62
- :start => length + 1,
63
- :end => length + entry_length,
64
+ :start => running_length + 1,
65
+ :end => running_length + entry_length,
64
66
  :size => entry_length,
65
67
  :percent => entry_length / total_length * 100,
66
- :gc => gc_content(entry.sequence) }
68
+ :gc => gc(entry) / atgc(entry) * 100 }
67
69
  end
68
70
 
69
- length += entry.sequence.length
71
+ running_length += entry.sequence.length
70
72
  i
71
73
  end.compact
72
74
  end
@@ -87,9 +89,4 @@ class GenomerPluginSummary::Sequences < Genomer::Plugin
87
89
  totals
88
90
  end
89
91
 
90
- def gc_content(sequence)
91
- nucleotides = sequence.gsub(/[^ATGCatgc]/,'')
92
- nucleotides.gsub(/[^GCgc]/,'').length.to_f / nucleotides.length * 100
93
- end
94
-
95
92
  end
@@ -0,0 +1,255 @@
1
+ require 'spec_helper'
2
+ require 'genomer-plugin-summary/metrics'
3
+
4
+ describe GenomerPluginSummary::Metrics do
5
+
6
+ let(:metric) do
7
+ o = Object.new
8
+ o.extend described_class
9
+ o
10
+ end
11
+
12
+ describe "#gc_content" do
13
+
14
+ subject do
15
+ metric.gc_content entry_type, scaffold
16
+ end
17
+
18
+ context "an empty scaffold" do
19
+ let(:scaffold){ [] }
20
+
21
+ context "contigs" do
22
+ let(:entry_type){ :sequence }
23
+ it{ should be_nan }
24
+ end
25
+ context "gaps" do
26
+ let(:entry_type){ :unresolved }
27
+ it{ should == be_nan }
28
+ end
29
+ context "everything" do
30
+ let(:entry_type){ :all }
31
+ it{ should == be_nan }
32
+ end
33
+
34
+ end
35
+
36
+ context "a single contig scaffold" do
37
+ let(:scaffold){ [sequence('ATGC')] }
38
+
39
+ context "contigs" do
40
+ let(:entry_type){ :sequence }
41
+ it{ should == 50.0 }
42
+ end
43
+ context "gaps" do
44
+ let(:entry_type){ :unresolved }
45
+ it{ should be_nan }
46
+ end
47
+ context "everything" do
48
+ let(:entry_type){ :all }
49
+ it{ should == 50.0 }
50
+ end
51
+
52
+ end
53
+
54
+ context "a mixed scaffold" do
55
+ let(:scaffold){ [sequence('ATGC'),unresolved('NNNN'),sequence('ATGC')] }
56
+
57
+ context "contigs" do
58
+ let(:entry_type){ :sequence }
59
+ it{ should == 50.0 }
60
+ end
61
+ context "gaps" do
62
+ let(:entry_type){ :unresolved }
63
+ it{ should == be_nan }
64
+ end
65
+ context "everything" do
66
+ let(:entry_type){ :all }
67
+ it{ should == 50.0 }
68
+ end
69
+
70
+ end
71
+
72
+ end
73
+ describe "#count" do
74
+
75
+ subject do
76
+ metric.count entry_type, scaffold
77
+ end
78
+
79
+ context "an empty scaffold" do
80
+ let(:scaffold){ [] }
81
+
82
+ context "contigs" do
83
+ let(:entry_type){ :sequence }
84
+ it{ should == 0 }
85
+ end
86
+ context "gaps" do
87
+ let(:entry_type){ :unresolved }
88
+ it{ should == 0 }
89
+ end
90
+ context "everything" do
91
+ let(:entry_type){ :all }
92
+ it{ should == 0 }
93
+ end
94
+
95
+ end
96
+
97
+ context "a single contig scaffold" do
98
+ let(:scaffold){ [sequence('ATGC')] }
99
+
100
+ context "contigs" do
101
+ let(:entry_type){ :sequence }
102
+ it{ should == 1 }
103
+ end
104
+ context "gaps" do
105
+ let(:entry_type){ :unresolved }
106
+ it{ should == 0 }
107
+ end
108
+ context "everything" do
109
+ let(:entry_type){ :all }
110
+ it{ should == 1 }
111
+ end
112
+
113
+ end
114
+
115
+ context "a mixed scaffold" do
116
+ let(:scaffold){ [sequence('ATGC'),unresolved('NNNN'),sequence('ATGC')] }
117
+
118
+ context "contigs" do
119
+ let(:entry_type){ :sequence }
120
+ it{ should == 2 }
121
+ end
122
+ context "gaps" do
123
+ let(:entry_type){ :unresolved }
124
+ it{ should == 1 }
125
+ end
126
+ context "everything" do
127
+ let(:entry_type){ :all }
128
+ it{ should == 3 }
129
+ end
130
+
131
+ end
132
+
133
+ end
134
+
135
+ describe "#length" do
136
+
137
+ subject do
138
+ metric.length entry_type, scaffold
139
+ end
140
+
141
+ context "an empty scaffold" do
142
+ let(:scaffold){ [] }
143
+
144
+ context "contigs" do
145
+ let(:entry_type){ :sequence }
146
+ it{ should == 0 }
147
+ end
148
+ context "gaps" do
149
+ let(:entry_type){ :unresolved }
150
+ it{ should == 0 }
151
+ end
152
+ context "everything" do
153
+ let(:entry_type){ :all }
154
+ it{ should == 0 }
155
+ end
156
+
157
+ end
158
+
159
+ context "a single contig scaffold" do
160
+ let(:scaffold){ [sequence('ATGC')] }
161
+
162
+ context "contigs" do
163
+ let(:entry_type){ :sequence }
164
+ it{ should == 4 }
165
+ end
166
+ context "gaps" do
167
+ let(:entry_type){ :unresolved }
168
+ it{ should == 0 }
169
+ end
170
+ context "everything" do
171
+ let(:entry_type){ :all }
172
+ it{ should == 4 }
173
+ end
174
+ end
175
+
176
+ context "a mixed scaffold" do
177
+ let(:scaffold){ [sequence('ATGC'),unresolved('NNNN'),sequence('ATGC')] }
178
+
179
+ context "contigs" do
180
+ let(:entry_type){ :sequence }
181
+ it{ should == 8 }
182
+ end
183
+ context "gaps" do
184
+ let(:entry_type){ :unresolved }
185
+ it{ should == 4 }
186
+ end
187
+ context "everything" do
188
+ let(:entry_type){ :all }
189
+ it{ should == 12 }
190
+ end
191
+ end
192
+
193
+ end
194
+
195
+ describe "#percent" do
196
+
197
+ subject do
198
+ metric.percent entry_type, scaffold
199
+ end
200
+
201
+ context "an empty scaffold" do
202
+ let(:scaffold){ [] }
203
+
204
+ context "contigs" do
205
+ let(:entry_type){ :sequence }
206
+ it{ should be_nan }
207
+ end
208
+ context "gaps" do
209
+ let(:entry_type){ :unresolved }
210
+ it{ should == be_nan }
211
+ end
212
+ context "everything" do
213
+ let(:entry_type){ :all }
214
+ it{ should == be_nan }
215
+ end
216
+
217
+ end
218
+
219
+ context "a single contig scaffold" do
220
+ let(:scaffold){ [sequence('ATGC')] }
221
+
222
+ context "contigs" do
223
+ let(:entry_type){ :sequence }
224
+ it{ should == 100.0 }
225
+ end
226
+ context "gaps" do
227
+ let(:entry_type){ :unresolved }
228
+ it{ should == 0.0 }
229
+ end
230
+ context "everything" do
231
+ let(:entry_type){ :all }
232
+ it{ should == 100.0 }
233
+ end
234
+ end
235
+
236
+ context "a mixed scaffold" do
237
+ let(:scaffold){ [sequence('ATGC'),unresolved('NNNN'),sequence('ATGC')] }
238
+
239
+ context "contigs" do
240
+ let(:entry_type){ :sequence }
241
+ it{ should == 8 / 12.0 * 100}
242
+ end
243
+ context "gaps" do
244
+ let(:entry_type){ :unresolved }
245
+ it{ should == 4 / 12.0 * 100 }
246
+ end
247
+ context "everything" do
248
+ let(:entry_type){ :all }
249
+ it{ should == 100.0 }
250
+ end
251
+ end
252
+
253
+ end
254
+
255
+ end
@@ -0,0 +1,76 @@
1
+ require 'spec_helper'
2
+ require 'genomer-plugin-summary/scaffold'
3
+
4
+ describe GenomerPluginSummary::Scaffold do
5
+
6
+ describe "#tabulate" do
7
+
8
+ subject do
9
+ described_class.new([],{}).tabulate(data) + "\n"
10
+ end
11
+
12
+ context "passed table data" do
13
+
14
+ let(:data) do
15
+ [['Contigs (#)',1.0],
16
+ :separator,
17
+ ['Gaps (#)',0]]
18
+ end
19
+
20
+ it do
21
+ should ==<<-EOS.unindent!
22
+ +--------------+-----------+
23
+ | Scaffold |
24
+ +--------------+-----------+
25
+ | Contigs (#) | 1.00 |
26
+ +--------------+-----------+
27
+ | Gaps (#) | 0 |
28
+ +--------------+-----------+
29
+ EOS
30
+ end
31
+ end
32
+ end
33
+
34
+ describe "#calculate_metrics" do
35
+
36
+ subject do
37
+ described_class.new([],{}).calculate_metrics(specs,scaffold)
38
+ end
39
+
40
+ context "should calculate a single metrics for the scaffold" do
41
+
42
+ let(:scaffold) do
43
+ [sequence('ATGC')]
44
+ end
45
+
46
+ let(:specs) do
47
+ [{:name => 'Contigs (%)', :entry_type => :sequence, :method => :percent}]
48
+ end
49
+
50
+ it do
51
+ should == [['Contigs (%)',100.0]]
52
+ end
53
+ end
54
+
55
+ context "should calculate a single metrics with separators" do
56
+
57
+ let(:scaffold) do
58
+ [sequence('ATGC')]
59
+ end
60
+
61
+ let(:specs) do
62
+ [:separator,
63
+ {:name => 'Contigs (%)', :entry_type => :sequence, :method => :percent}]
64
+ end
65
+
66
+ it do
67
+ should == [
68
+ :separator,
69
+ ['Contigs (%)',100.0]
70
+ ]
71
+ end
72
+ end
73
+
74
+ end
75
+
76
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: genomer-plugin-summary
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-12 00:00:00.000000000 Z
12
+ date: 2012-10-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: genomer
@@ -27,6 +27,22 @@ dependencies:
27
27
  - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
29
  version: 0.0.4
30
+ - !ruby/object:Gem::Dependency
31
+ name: lazing
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: 0.1.1
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: 0.1.1
30
46
  - !ruby/object:Gem::Dependency
31
47
  name: terminal-table
32
48
  requirement: !ruby/object:Gem::Requirement
@@ -153,17 +169,22 @@ files:
153
169
  - Rakefile
154
170
  - VERSION
155
171
  - features/gaps.feature
172
+ - features/scaffold.feature
156
173
  - features/sequences.feature
157
174
  - features/support/env.rb
158
175
  - features/support/genomer_steps.rb
159
176
  - genomer-plugin-summary.gemspec
160
177
  - lib/genomer-plugin-summary.rb
161
178
  - lib/genomer-plugin-summary/gaps.rb
179
+ - lib/genomer-plugin-summary/metrics.rb
180
+ - lib/genomer-plugin-summary/scaffold.rb
162
181
  - lib/genomer-plugin-summary/sequences.rb
163
182
  - man/genomer-summary-gaps.ronn
164
183
  - man/genomer-summary.ronn
165
184
  - spec/genomer-plugin-summary_spec.rb
166
185
  - spec/genomer-plugin-summary_spec/gaps_spec.rb
186
+ - spec/genomer-plugin-summary_spec/metrics_spec.rb
187
+ - spec/genomer-plugin-summary_spec/scaffold_spec.rb
167
188
  - spec/genomer-plugin-summary_spec/sequences_spec.rb
168
189
  - spec/spec_helper.rb
169
190
  homepage: ''
@@ -180,7 +201,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
180
201
  version: '0'
181
202
  segments:
182
203
  - 0
183
- hash: 1551182150525719363
204
+ hash: 4220695143332306969
184
205
  required_rubygems_version: !ruby/object:Gem::Requirement
185
206
  none: false
186
207
  requirements:
@@ -189,7 +210,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
189
210
  version: '0'
190
211
  segments:
191
212
  - 0
192
- hash: 1551182150525719363
213
+ hash: 4220695143332306969
193
214
  requirements: []
194
215
  rubyforge_project:
195
216
  rubygems_version: 1.8.23
@@ -198,10 +219,13 @@ specification_version: 3
198
219
  summary: Generates reports on the status of the genomer project
199
220
  test_files:
200
221
  - features/gaps.feature
222
+ - features/scaffold.feature
201
223
  - features/sequences.feature
202
224
  - features/support/env.rb
203
225
  - features/support/genomer_steps.rb
204
226
  - spec/genomer-plugin-summary_spec.rb
205
227
  - spec/genomer-plugin-summary_spec/gaps_spec.rb
228
+ - spec/genomer-plugin-summary_spec/metrics_spec.rb
229
+ - spec/genomer-plugin-summary_spec/scaffold_spec.rb
206
230
  - spec/genomer-plugin-summary_spec/sequences_spec.rb
207
231
  - spec/spec_helper.rb