genomer-plugin-summary 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.2
1
+ 0.0.3
@@ -0,0 +1,122 @@
1
+ Feature: Producing a summary of the scaffold
2
+ In order to have an overview of the scaffold
3
+ A user can use the "scaffod" command
4
+ to generate the a tabular output of the scaffold
5
+
6
+ Scenario: A scaffold with a single sequence
7
+ Given I create a new genomer project
8
+ And I write to "assembly/scaffold.yml" with:
9
+ """
10
+ ---
11
+ -
12
+ sequence:
13
+ source: contig0001
14
+ """
15
+ And I write to "assembly/sequence.fna" with:
16
+ """
17
+ >contig0001
18
+ ATGC
19
+ """
20
+ When I run `genomer summary scaffold`
21
+ Then the exit status should be 0
22
+ And the output should contain:
23
+ """
24
+ +--------------+-----------+
25
+ | Scaffold |
26
+ +--------------+-----------+
27
+ | Contigs (#) | 1 |
28
+ | Gaps (#) | 0 |
29
+ +--------------+-----------+
30
+ | Size (bp) | 4 |
31
+ | Contigs (bp) | 4 |
32
+ | Gaps (bp) | 0 |
33
+ +--------------+-----------+
34
+ | G+C (%) | 50.00 |
35
+ | Contigs (%) | 100.00 |
36
+ | Gaps (%) | 0.00 |
37
+ +--------------+-----------+
38
+
39
+ """
40
+
41
+ Scenario: A scaffold with a two sequences
42
+ Given I create a new genomer project
43
+ And I write to "assembly/scaffold.yml" with:
44
+ """
45
+ ---
46
+ -
47
+ sequence:
48
+ source: contig0001
49
+ -
50
+ sequence:
51
+ source: contig0002
52
+ """
53
+ And I write to "assembly/sequence.fna" with:
54
+ """
55
+ >contig0001
56
+ ATGC
57
+ >contig0002
58
+ GGGC
59
+ """
60
+ When I run `genomer summary scaffold`
61
+ Then the exit status should be 0
62
+ And the output should contain:
63
+ """
64
+ +--------------+-----------+
65
+ | Scaffold |
66
+ +--------------+-----------+
67
+ | Contigs (#) | 2 |
68
+ | Gaps (#) | 0 |
69
+ +--------------+-----------+
70
+ | Size (bp) | 8 |
71
+ | Contigs (bp) | 8 |
72
+ | Gaps (bp) | 0 |
73
+ +--------------+-----------+
74
+ | G+C (%) | 75.00 |
75
+ | Contigs (%) | 100.00 |
76
+ | Gaps (%) | 0.00 |
77
+ +--------------+-----------+
78
+
79
+ """
80
+
81
+ Scenario: A scaffold with a two sequences and a gap
82
+ Given I create a new genomer project
83
+ And I write to "assembly/scaffold.yml" with:
84
+ """
85
+ ---
86
+ -
87
+ sequence:
88
+ source: contig0001
89
+ -
90
+ unresolved:
91
+ length: 5
92
+ -
93
+ sequence:
94
+ source: contig0002
95
+ """
96
+ And I write to "assembly/sequence.fna" with:
97
+ """
98
+ >contig0001
99
+ ATGC
100
+ >contig0002
101
+ GGGC
102
+ """
103
+ When I run `genomer summary scaffold`
104
+ Then the exit status should be 0
105
+ And the output should contain:
106
+ """
107
+ +--------------+-----------+
108
+ | Scaffold |
109
+ +--------------+-----------+
110
+ | Contigs (#) | 2 |
111
+ | Gaps (#) | 1 |
112
+ +--------------+-----------+
113
+ | Size (bp) | 13 |
114
+ | Contigs (bp) | 8 |
115
+ | Gaps (bp) | 5 |
116
+ +--------------+-----------+
117
+ | G+C (%) | 75.00 |
118
+ | Contigs (%) | 61.54 |
119
+ | Gaps (%) | 38.46 |
120
+ +--------------+-----------+
121
+
122
+ """
@@ -15,6 +15,7 @@ Gem::Specification.new do |gem|
15
15
  gem.version = File.read 'VERSION'
16
16
 
17
17
  gem.add_dependency "genomer", ">= 0.0.4"
18
+ gem.add_dependency "lazing", ">= 0.1.1"
18
19
  gem.add_dependency "terminal-table", "~> 1.4.5"
19
20
 
20
21
  gem.add_development_dependency 'rake', '~> 0.9.0'
@@ -0,0 +1,41 @@
1
+ require 'genomer'
2
+ require 'lazing'
3
+
4
+ module GenomerPluginSummary::Metrics
5
+
6
+ ALL = :all
7
+
8
+ def gc_content(type,scfd)
9
+ gc = enumerator_for(type,scfd).mapping{|i| gc(i)}.inject(:+) || 0.0
10
+ atgc = enumerator_for(type,scfd).mapping{|i| atgc(i)}.inject(:+) || 0.0
11
+ gc / atgc * 100
12
+ end
13
+
14
+ def count(type,scfd)
15
+ enumerator_for(type,scfd).count
16
+ end
17
+
18
+ def percent(type,scfd)
19
+ length(type,scfd) / length(ALL,scfd).to_f * 100
20
+ end
21
+
22
+ def length(type,scfd)
23
+ enumerator_for(type,scfd).
24
+ mapping(&:sequence).
25
+ mapping(&:length).
26
+ inject(:+) || 0
27
+ end
28
+
29
+ def gc(entry)
30
+ entry.sequence.gsub(/[^GCgc]/,'').length.to_f
31
+ end
32
+
33
+ def atgc(entry)
34
+ entry.sequence.gsub(/[^ATGCatgc]/,'').length.to_f
35
+ end
36
+
37
+ def enumerator_for(type,scaffold)
38
+ scaffold.selecting{|i| [ALL,i.entry_type].include? type }
39
+ end
40
+
41
+ end
@@ -0,0 +1,56 @@
1
+ require 'genomer'
2
+ require 'genomer-plugin-summary/metrics'
3
+ require 'terminal-table'
4
+
5
+ class GenomerPluginSummary::Scaffold < Genomer::Plugin
6
+ include GenomerPluginSummary::Metrics
7
+
8
+ LAYOUT = [
9
+ {:name => 'Contigs (#)', :entry_type => :sequence, :method => :count},
10
+ {:name => 'Gaps (#)', :entry_type => :unresolved, :method => :count},
11
+ :separator,
12
+ {:name => 'Size (bp)', :entry_type => :all, :method => :length},
13
+ {:name => 'Contigs (bp)', :entry_type => :sequence, :method => :length},
14
+ {:name => 'Gaps (bp)', :entry_type => :unresolved, :method => :length},
15
+ :separator,
16
+ {:name => 'G+C (%)', :entry_type => :all, :method => :gc_content},
17
+ {:name => 'Contigs (%)', :entry_type => :sequence, :method => :percent},
18
+ {:name => 'Gaps (%)', :entry_type => :unresolved, :method => :percent}
19
+ ]
20
+
21
+ def run
22
+ tabulate calculate_metrics(LAYOUT, scaffold)
23
+ end
24
+
25
+ def title
26
+ 'Scaffold'
27
+ end
28
+
29
+ def tabulate(data)
30
+ table = Terminal::Table.new(:title => title) do |t|
31
+ data.each do |(k,v)|
32
+ t << if k == :separator
33
+ :separator
34
+ else
35
+ v = sprintf('%#.2f',v) if v.class == Float
36
+ [k.ljust(12),v.to_s.rjust(9)]
37
+ end
38
+ end
39
+ end
40
+
41
+ table.align_column 0, :left
42
+ table.align_column 1, :right
43
+ table.to_s
44
+ end
45
+
46
+ def calculate_metrics(specs,scaffold)
47
+ specs.map do |spec|
48
+ if spec == :separator
49
+ spec
50
+ else
51
+ [spec[:name], send(spec[:method],spec[:entry_type],scaffold)]
52
+ end
53
+ end
54
+ end
55
+
56
+ end
@@ -1,7 +1,9 @@
1
1
  require 'genomer'
2
+ require 'genomer-plugin-summary/metrics'
2
3
  require 'terminal-table'
3
4
 
4
5
  class GenomerPluginSummary::Sequences < Genomer::Plugin
6
+ include GenomerPluginSummary::Metrics
5
7
 
6
8
  def run
7
9
  sequences = calculate(scaffold)
@@ -51,22 +53,22 @@ class GenomerPluginSummary::Sequences < Genomer::Plugin
51
53
  end
52
54
 
53
55
  def calculate(scaffold)
54
- total_length = scaffold.map(&:sequence).join.length.to_f
56
+ total_length = length(:all,scaffold).to_f
57
+ running_length = 0
55
58
 
56
- length = 0
57
59
  scaffold.map do |entry|
58
60
  i = nil
59
61
  if entry.entry_type != :unresolved
60
62
  entry_length = entry.sequence.length
61
63
  i = { :sequence => entry.source,
62
- :start => length + 1,
63
- :end => length + entry_length,
64
+ :start => running_length + 1,
65
+ :end => running_length + entry_length,
64
66
  :size => entry_length,
65
67
  :percent => entry_length / total_length * 100,
66
- :gc => gc_content(entry.sequence) }
68
+ :gc => gc(entry) / atgc(entry) * 100 }
67
69
  end
68
70
 
69
- length += entry.sequence.length
71
+ running_length += entry.sequence.length
70
72
  i
71
73
  end.compact
72
74
  end
@@ -87,9 +89,4 @@ class GenomerPluginSummary::Sequences < Genomer::Plugin
87
89
  totals
88
90
  end
89
91
 
90
- def gc_content(sequence)
91
- nucleotides = sequence.gsub(/[^ATGCatgc]/,'')
92
- nucleotides.gsub(/[^GCgc]/,'').length.to_f / nucleotides.length * 100
93
- end
94
-
95
92
  end
@@ -0,0 +1,255 @@
1
+ require 'spec_helper'
2
+ require 'genomer-plugin-summary/metrics'
3
+
4
+ describe GenomerPluginSummary::Metrics do
5
+
6
+ let(:metric) do
7
+ o = Object.new
8
+ o.extend described_class
9
+ o
10
+ end
11
+
12
+ describe "#gc_content" do
13
+
14
+ subject do
15
+ metric.gc_content entry_type, scaffold
16
+ end
17
+
18
+ context "an empty scaffold" do
19
+ let(:scaffold){ [] }
20
+
21
+ context "contigs" do
22
+ let(:entry_type){ :sequence }
23
+ it{ should be_nan }
24
+ end
25
+ context "gaps" do
26
+ let(:entry_type){ :unresolved }
27
+ it{ should == be_nan }
28
+ end
29
+ context "everything" do
30
+ let(:entry_type){ :all }
31
+ it{ should == be_nan }
32
+ end
33
+
34
+ end
35
+
36
+ context "a single contig scaffold" do
37
+ let(:scaffold){ [sequence('ATGC')] }
38
+
39
+ context "contigs" do
40
+ let(:entry_type){ :sequence }
41
+ it{ should == 50.0 }
42
+ end
43
+ context "gaps" do
44
+ let(:entry_type){ :unresolved }
45
+ it{ should be_nan }
46
+ end
47
+ context "everything" do
48
+ let(:entry_type){ :all }
49
+ it{ should == 50.0 }
50
+ end
51
+
52
+ end
53
+
54
+ context "a mixed scaffold" do
55
+ let(:scaffold){ [sequence('ATGC'),unresolved('NNNN'),sequence('ATGC')] }
56
+
57
+ context "contigs" do
58
+ let(:entry_type){ :sequence }
59
+ it{ should == 50.0 }
60
+ end
61
+ context "gaps" do
62
+ let(:entry_type){ :unresolved }
63
+ it{ should == be_nan }
64
+ end
65
+ context "everything" do
66
+ let(:entry_type){ :all }
67
+ it{ should == 50.0 }
68
+ end
69
+
70
+ end
71
+
72
+ end
73
+ describe "#count" do
74
+
75
+ subject do
76
+ metric.count entry_type, scaffold
77
+ end
78
+
79
+ context "an empty scaffold" do
80
+ let(:scaffold){ [] }
81
+
82
+ context "contigs" do
83
+ let(:entry_type){ :sequence }
84
+ it{ should == 0 }
85
+ end
86
+ context "gaps" do
87
+ let(:entry_type){ :unresolved }
88
+ it{ should == 0 }
89
+ end
90
+ context "everything" do
91
+ let(:entry_type){ :all }
92
+ it{ should == 0 }
93
+ end
94
+
95
+ end
96
+
97
+ context "a single contig scaffold" do
98
+ let(:scaffold){ [sequence('ATGC')] }
99
+
100
+ context "contigs" do
101
+ let(:entry_type){ :sequence }
102
+ it{ should == 1 }
103
+ end
104
+ context "gaps" do
105
+ let(:entry_type){ :unresolved }
106
+ it{ should == 0 }
107
+ end
108
+ context "everything" do
109
+ let(:entry_type){ :all }
110
+ it{ should == 1 }
111
+ end
112
+
113
+ end
114
+
115
+ context "a mixed scaffold" do
116
+ let(:scaffold){ [sequence('ATGC'),unresolved('NNNN'),sequence('ATGC')] }
117
+
118
+ context "contigs" do
119
+ let(:entry_type){ :sequence }
120
+ it{ should == 2 }
121
+ end
122
+ context "gaps" do
123
+ let(:entry_type){ :unresolved }
124
+ it{ should == 1 }
125
+ end
126
+ context "everything" do
127
+ let(:entry_type){ :all }
128
+ it{ should == 3 }
129
+ end
130
+
131
+ end
132
+
133
+ end
134
+
135
+ describe "#length" do
136
+
137
+ subject do
138
+ metric.length entry_type, scaffold
139
+ end
140
+
141
+ context "an empty scaffold" do
142
+ let(:scaffold){ [] }
143
+
144
+ context "contigs" do
145
+ let(:entry_type){ :sequence }
146
+ it{ should == 0 }
147
+ end
148
+ context "gaps" do
149
+ let(:entry_type){ :unresolved }
150
+ it{ should == 0 }
151
+ end
152
+ context "everything" do
153
+ let(:entry_type){ :all }
154
+ it{ should == 0 }
155
+ end
156
+
157
+ end
158
+
159
+ context "a single contig scaffold" do
160
+ let(:scaffold){ [sequence('ATGC')] }
161
+
162
+ context "contigs" do
163
+ let(:entry_type){ :sequence }
164
+ it{ should == 4 }
165
+ end
166
+ context "gaps" do
167
+ let(:entry_type){ :unresolved }
168
+ it{ should == 0 }
169
+ end
170
+ context "everything" do
171
+ let(:entry_type){ :all }
172
+ it{ should == 4 }
173
+ end
174
+ end
175
+
176
+ context "a mixed scaffold" do
177
+ let(:scaffold){ [sequence('ATGC'),unresolved('NNNN'),sequence('ATGC')] }
178
+
179
+ context "contigs" do
180
+ let(:entry_type){ :sequence }
181
+ it{ should == 8 }
182
+ end
183
+ context "gaps" do
184
+ let(:entry_type){ :unresolved }
185
+ it{ should == 4 }
186
+ end
187
+ context "everything" do
188
+ let(:entry_type){ :all }
189
+ it{ should == 12 }
190
+ end
191
+ end
192
+
193
+ end
194
+
195
+ describe "#percent" do
196
+
197
+ subject do
198
+ metric.percent entry_type, scaffold
199
+ end
200
+
201
+ context "an empty scaffold" do
202
+ let(:scaffold){ [] }
203
+
204
+ context "contigs" do
205
+ let(:entry_type){ :sequence }
206
+ it{ should be_nan }
207
+ end
208
+ context "gaps" do
209
+ let(:entry_type){ :unresolved }
210
+ it{ should == be_nan }
211
+ end
212
+ context "everything" do
213
+ let(:entry_type){ :all }
214
+ it{ should == be_nan }
215
+ end
216
+
217
+ end
218
+
219
+ context "a single contig scaffold" do
220
+ let(:scaffold){ [sequence('ATGC')] }
221
+
222
+ context "contigs" do
223
+ let(:entry_type){ :sequence }
224
+ it{ should == 100.0 }
225
+ end
226
+ context "gaps" do
227
+ let(:entry_type){ :unresolved }
228
+ it{ should == 0.0 }
229
+ end
230
+ context "everything" do
231
+ let(:entry_type){ :all }
232
+ it{ should == 100.0 }
233
+ end
234
+ end
235
+
236
+ context "a mixed scaffold" do
237
+ let(:scaffold){ [sequence('ATGC'),unresolved('NNNN'),sequence('ATGC')] }
238
+
239
+ context "contigs" do
240
+ let(:entry_type){ :sequence }
241
+ it{ should == 8 / 12.0 * 100}
242
+ end
243
+ context "gaps" do
244
+ let(:entry_type){ :unresolved }
245
+ it{ should == 4 / 12.0 * 100 }
246
+ end
247
+ context "everything" do
248
+ let(:entry_type){ :all }
249
+ it{ should == 100.0 }
250
+ end
251
+ end
252
+
253
+ end
254
+
255
+ end
@@ -0,0 +1,76 @@
1
+ require 'spec_helper'
2
+ require 'genomer-plugin-summary/scaffold'
3
+
4
+ describe GenomerPluginSummary::Scaffold do
5
+
6
+ describe "#tabulate" do
7
+
8
+ subject do
9
+ described_class.new([],{}).tabulate(data) + "\n"
10
+ end
11
+
12
+ context "passed table data" do
13
+
14
+ let(:data) do
15
+ [['Contigs (#)',1.0],
16
+ :separator,
17
+ ['Gaps (#)',0]]
18
+ end
19
+
20
+ it do
21
+ should ==<<-EOS.unindent!
22
+ +--------------+-----------+
23
+ | Scaffold |
24
+ +--------------+-----------+
25
+ | Contigs (#) | 1.00 |
26
+ +--------------+-----------+
27
+ | Gaps (#) | 0 |
28
+ +--------------+-----------+
29
+ EOS
30
+ end
31
+ end
32
+ end
33
+
34
+ describe "#calculate_metrics" do
35
+
36
+ subject do
37
+ described_class.new([],{}).calculate_metrics(specs,scaffold)
38
+ end
39
+
40
+ context "should calculate a single metrics for the scaffold" do
41
+
42
+ let(:scaffold) do
43
+ [sequence('ATGC')]
44
+ end
45
+
46
+ let(:specs) do
47
+ [{:name => 'Contigs (%)', :entry_type => :sequence, :method => :percent}]
48
+ end
49
+
50
+ it do
51
+ should == [['Contigs (%)',100.0]]
52
+ end
53
+ end
54
+
55
+ context "should calculate a single metrics with separators" do
56
+
57
+ let(:scaffold) do
58
+ [sequence('ATGC')]
59
+ end
60
+
61
+ let(:specs) do
62
+ [:separator,
63
+ {:name => 'Contigs (%)', :entry_type => :sequence, :method => :percent}]
64
+ end
65
+
66
+ it do
67
+ should == [
68
+ :separator,
69
+ ['Contigs (%)',100.0]
70
+ ]
71
+ end
72
+ end
73
+
74
+ end
75
+
76
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: genomer-plugin-summary
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-12 00:00:00.000000000 Z
12
+ date: 2012-10-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: genomer
@@ -27,6 +27,22 @@ dependencies:
27
27
  - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
29
  version: 0.0.4
30
+ - !ruby/object:Gem::Dependency
31
+ name: lazing
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: 0.1.1
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: 0.1.1
30
46
  - !ruby/object:Gem::Dependency
31
47
  name: terminal-table
32
48
  requirement: !ruby/object:Gem::Requirement
@@ -153,17 +169,22 @@ files:
153
169
  - Rakefile
154
170
  - VERSION
155
171
  - features/gaps.feature
172
+ - features/scaffold.feature
156
173
  - features/sequences.feature
157
174
  - features/support/env.rb
158
175
  - features/support/genomer_steps.rb
159
176
  - genomer-plugin-summary.gemspec
160
177
  - lib/genomer-plugin-summary.rb
161
178
  - lib/genomer-plugin-summary/gaps.rb
179
+ - lib/genomer-plugin-summary/metrics.rb
180
+ - lib/genomer-plugin-summary/scaffold.rb
162
181
  - lib/genomer-plugin-summary/sequences.rb
163
182
  - man/genomer-summary-gaps.ronn
164
183
  - man/genomer-summary.ronn
165
184
  - spec/genomer-plugin-summary_spec.rb
166
185
  - spec/genomer-plugin-summary_spec/gaps_spec.rb
186
+ - spec/genomer-plugin-summary_spec/metrics_spec.rb
187
+ - spec/genomer-plugin-summary_spec/scaffold_spec.rb
167
188
  - spec/genomer-plugin-summary_spec/sequences_spec.rb
168
189
  - spec/spec_helper.rb
169
190
  homepage: ''
@@ -180,7 +201,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
180
201
  version: '0'
181
202
  segments:
182
203
  - 0
183
- hash: 1551182150525719363
204
+ hash: 4220695143332306969
184
205
  required_rubygems_version: !ruby/object:Gem::Requirement
185
206
  none: false
186
207
  requirements:
@@ -189,7 +210,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
189
210
  version: '0'
190
211
  segments:
191
212
  - 0
192
- hash: 1551182150525719363
213
+ hash: 4220695143332306969
193
214
  requirements: []
194
215
  rubyforge_project:
195
216
  rubygems_version: 1.8.23
@@ -198,10 +219,13 @@ specification_version: 3
198
219
  summary: Generates reports on the status of the genomer project
199
220
  test_files:
200
221
  - features/gaps.feature
222
+ - features/scaffold.feature
201
223
  - features/sequences.feature
202
224
  - features/support/env.rb
203
225
  - features/support/genomer_steps.rb
204
226
  - spec/genomer-plugin-summary_spec.rb
205
227
  - spec/genomer-plugin-summary_spec/gaps_spec.rb
228
+ - spec/genomer-plugin-summary_spec/metrics_spec.rb
229
+ - spec/genomer-plugin-summary_spec/scaffold_spec.rb
206
230
  - spec/genomer-plugin-summary_spec/sequences_spec.rb
207
231
  - spec/spec_helper.rb