genomer-plugin-summary 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -25,10 +25,10 @@ Feature: Producing a summary of the scaffold sequences
25
25
  +------------------+------------+------------+------------+----------+--------+
26
26
  | Scaffold Sequences |
27
27
  +------------------+------------+------------+------------+----------+--------+
28
- | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
28
+ | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
29
29
  +------------------+------------+------------+------------+----------+--------+
30
30
  +------------------+------------+------------+------------+----------+--------+
31
- | All | NA | NA | NA | NA | NA |
31
+ | All | 0 | 0 | 0 | 0.00 | 0.00 |
32
32
  +------------------+------------+------------+------------+----------+--------+
33
33
  """
34
34
 
@@ -53,7 +53,7 @@ Feature: Producing a summary of the scaffold sequences
53
53
  +------------------+------------+------------+------------+----------+--------+
54
54
  | Scaffold Sequences |
55
55
  +------------------+------------+------------+------------+----------+--------+
56
- | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
56
+ | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
57
57
  +------------------+------------+------------+------------+----------+--------+
58
58
  | contig0001 | 1 | 4 | 4 | 100.00 | 50.00 |
59
59
  +------------------+------------+------------+------------+----------+--------+
@@ -87,7 +87,7 @@ Feature: Producing a summary of the scaffold sequences
87
87
  +------------------+------------+------------+------------+----------+--------+
88
88
  | Scaffold Sequences |
89
89
  +------------------+------------+------------+------------+----------+--------+
90
- | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
90
+ | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
91
91
  +------------------+------------+------------+------------+----------+--------+
92
92
  | contig0001 | 1 | 6 | 6 | 50.00 | 66.67 |
93
93
  | contig0002 | 7 | 12 | 6 | 50.00 | 33.33 |
@@ -120,7 +120,7 @@ Feature: Producing a summary of the scaffold sequences
120
120
  +------------------+------------+------------+------------+----------+--------+
121
121
  | Scaffold Sequences |
122
122
  +------------------+------------+------------+------------+----------+--------+
123
- | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
123
+ | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
124
124
  +------------------+------------+------------+------------+----------+--------+
125
125
  | contig0001 | 1 | 6 | 6 | 50.00 | 66.67 |
126
126
  | contig0001 | 7 | 12 | 6 | 50.00 | 66.67 |
@@ -158,7 +158,7 @@ Feature: Producing a summary of the scaffold sequences
158
158
  +------------------+------------+------------+------------+----------+--------+
159
159
  | Scaffold Sequences |
160
160
  +------------------+------------+------------+------------+----------+--------+
161
- | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
161
+ | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
162
162
  +------------------+------------+------------+------------+----------+--------+
163
163
  | contig0001 | 1 | 6 | 6 | 30.00 | 66.67 |
164
164
  | contig0002 | 15 | 20 | 6 | 30.00 | 33.33 |
@@ -196,7 +196,7 @@ Feature: Producing a summary of the scaffold sequences
196
196
  +------------------+------------+------------+------------+----------+--------+
197
197
  | Scaffold Sequences |
198
198
  +------------------+------------+------------+------------+----------+--------+
199
- | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
199
+ | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
200
200
  +------------------+------------+------------+------------+----------+--------+
201
201
  | contig0001 | 9 | 14 | 6 | 30.00 | 66.67 |
202
202
  | contig0002 | 15 | 20 | 6 | 30.00 | 33.33 |
@@ -235,7 +235,7 @@ Feature: Producing a summary of the scaffold sequences
235
235
  +------------------+------------+------------+------------+----------+--------+
236
236
  | Scaffold Sequences |
237
237
  +------------------+------------+------------+------------+----------+--------+
238
- | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
238
+ | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
239
239
  +------------------+------------+------------+------------+----------+--------+
240
240
  | contig0001 | 1 | 6 | 6 | 30.00 | 66.67 |
241
241
  | contig0002 | 7 | 12 | 6 | 30.00 | 33.33 |
@@ -244,3 +244,34 @@ Feature: Producing a summary of the scaffold sequences
244
244
  +------------------+------------+------------+------------+----------+--------+
245
245
  """
246
246
 
247
+ Scenario: Generating CSV output
248
+ Given I create a new genomer project
249
+ And I write to "assembly/scaffold.yml" with:
250
+ """
251
+ ---
252
+ -
253
+ sequence:
254
+ source: contig0001
255
+ -
256
+ sequence:
257
+ source: contig0002
258
+ -
259
+ unresolved:
260
+ length: 8
261
+ """
262
+ And I write to "assembly/sequence.fna" with:
263
+ """
264
+ >contig0001
265
+ ATGCGC
266
+ >contig0002
267
+ ATATGC
268
+ """
269
+ When I run `genomer summary sequences --output=csv`
270
+ Then the exit status should be 0
271
+ And the output should contain:
272
+ """
273
+ sequence,start_bp,end_bp,size_bp,size_%,gc_%
274
+ contig0001,1,6,6,30.00,66.67
275
+ contig0002,7,12,6,30.00,33.33
276
+ all,1,12,12,60.00,50.00
277
+ """
@@ -0,0 +1,63 @@
1
+ require 'genomer'
2
+ require 'genomer-plugin-summary/format'
3
+ require 'genomer-plugin-summary/enumerators'
4
+ require 'genomer-plugin-summary/metrics'
5
+
6
+ class GenomerPluginSummary::Contigs < Genomer::Plugin
7
+ include GenomerPluginSummary::Metrics
8
+ include GenomerPluginSummary::Format
9
+ include GenomerPluginSummary::Enumerators
10
+
11
+ FORMATTING = {
12
+ :title => 'Scaffold Contigs',
13
+ :headers => ['Contig', 'Start (bp)', 'End (bp)', 'Size (bp)', 'Size (%)', 'GC (%)'],
14
+ :width => {
15
+ 0 => 6,
16
+ 1 => 10,
17
+ 2 => 10,
18
+ 3 => 10,
19
+ 4 => 8,
20
+ 5 => 6
21
+ },
22
+ :justification => {
23
+ 0 => :right,
24
+ 1 => :right,
25
+ 2 => :right,
26
+ 3 => :right,
27
+ 4 => :right,
28
+ 5 => :right
29
+ },
30
+ :format => {
31
+ 4 => '%#.2f',
32
+ 5 => '%#.2f'
33
+ }
34
+ }
35
+ COLUMNS = [:id, :start, :stop, :size, :percent, :gc]
36
+
37
+ def run
38
+ contigs = calculate(scaffold)
39
+ total = sequence_total(contigs)
40
+
41
+ tabulate(contigs,total,flags)
42
+ end
43
+
44
+ def tabulate(contigs,total,flags)
45
+ rows = contigs.map{|contig| COLUMNS.map{|col| contig[col]}}.
46
+ <<(:separator).
47
+ <<(COLUMNS.map{|col| total[col] || 'All'})
48
+
49
+ FORMATTING[:output] = flags[:output]
50
+ table(rows,FORMATTING)
51
+ end
52
+
53
+ def calculate(scaffold)
54
+ total_length = scaffold.mapping(&:sequence).mapping(&:length).inject(&:+).to_f
55
+ enumerator_for_contig(scaffold).
56
+ mapping{|i| i[:gc] = gc(i[:sequence]) / atgc(i[:sequence]) * 100; i}.
57
+ mapping{|i| i[:size] = i[:sequence].length; i}.
58
+ mapping{|i| i[:percent] = i[:size] / total_length * 100; i}.
59
+ mapping{|i| i.delete(:sequence); i}.
60
+ to_a
61
+ end
62
+
63
+ end
@@ -0,0 +1,81 @@
1
+ require 'lazing'
2
+
3
+ module GenomerPluginSummary::Enumerators
4
+
5
+ def enumerator_for(type,scaffold)
6
+ send('enumerator_for_' + type.to_s, scaffold)
7
+ end
8
+
9
+ def enumerator_for_sequence(scaffold)
10
+ enumerator_for_all(scaffold).
11
+ selecting{|i| i[:type] == :sequence}
12
+ end
13
+
14
+ def enumerator_for_unresolved(scaffold)
15
+ enumerator_for_all(scaffold).
16
+ selecting{|i| i[:type] == :unresolved}
17
+ end
18
+
19
+ def enumerator_for_contig(scaffold)
20
+ genome = scaffold.mapping(&:sequence).to_a.join
21
+ regions = genome.
22
+ gsub(/([^Nn])([Nn])/,'\1 \2').
23
+ gsub(/([Nn])([^Nn])/,'\1 \2').
24
+ scan(/[^\s]+/)
25
+
26
+ regions.inject([0,1,[]]) do |memo,entry|
27
+ position, number, entries = memo
28
+
29
+ if entry.downcase.include? 'n'
30
+ next [position + entry.length, number, entries]
31
+ end
32
+
33
+ i = {:sequence => entry,
34
+ :start => position + 1,
35
+ :stop => position + entry.length,
36
+ :type => :contig,
37
+ :id => number}
38
+
39
+ [position + entry.length, number + 1, entries << i]
40
+ end.last
41
+ end
42
+
43
+ def enumerator_for_gap(scaffold)
44
+ genome = scaffold.mapping(&:sequence).to_a.join
45
+ regions = genome.
46
+ gsub(/([^Nn])([Nn])/,'\1 \2').
47
+ gsub(/([Nn])([^Nn])/,'\1 \2').
48
+ scan(/[^\s]+/)
49
+
50
+ regions.inject([0,1,[]]) do |memo,entry|
51
+ position, number, entries = memo
52
+
53
+ unless entry.downcase.include? 'n'
54
+ next [position + entry.length, number, entries]
55
+ end
56
+
57
+ i = {:sequence => entry,
58
+ :start => position + 1,
59
+ :stop => position + entry.length,
60
+ :type => :gap,
61
+ :id => number}
62
+
63
+ [position + entry.length, number + 1, entries << i]
64
+ end.last
65
+ end
66
+
67
+ def enumerator_for_all(scaffold)
68
+ scaffold.inject([0,[]]) do |memo,entry|
69
+ position, entries = memo
70
+
71
+ i = {:sequence => entry.sequence,
72
+ :start => position + 1,
73
+ :stop => position + entry.sequence.length,
74
+ :type => entry.entry_type,
75
+ :id => entry.entry_type == :sequence ? entry.source : nil}
76
+
77
+ [position + entry.sequence.length, entries << i]
78
+ end.last
79
+ end
80
+
81
+ end
@@ -0,0 +1,87 @@
1
+ require 'terminal-table'
2
+ require 'lazing'
3
+
4
+ module GenomerPluginSummary::Format
5
+
6
+ DEFAULTS = {
7
+ :justification => [],
8
+ :width => {},
9
+ :format => {}
10
+ }
11
+
12
+ def table(data,opts = {})
13
+ opts = DEFAULTS.merge opts
14
+ case opts[:output]
15
+ when 'csv' then csv(data,opts)
16
+ else pretty(data,opts)
17
+ end
18
+ end
19
+
20
+ def create_cells(data,opts)
21
+ data.map do |row|
22
+ if row == :separator
23
+ :separator
24
+ else
25
+ row.each_with_index.map do |cell,index|
26
+ format_cell(cell,
27
+ opts[:width][index],
28
+ opts[:justification][index],
29
+ opts[:format][index])
30
+ end
31
+ end
32
+ end
33
+ end
34
+
35
+ def format_cell(cell,width,justification,format = nil)
36
+ formatted = case format
37
+ when String then sprintf(format,cell)
38
+ when Proc then format.call(cell).to_s
39
+ when nil then cell.to_s
40
+ end
41
+
42
+ return formatted if width.nil?
43
+
44
+ case justification
45
+ when :right then formatted.rjust(width)
46
+ when :center then formatted.center(width)
47
+ else formatted.ljust(width)
48
+ end
49
+ end
50
+
51
+ def csv(data,opts)
52
+ opts[:width] = {}
53
+ opts[:justification] = {}
54
+
55
+ cells = create_cells(data,opts)
56
+
57
+ cells.unshift opts[:headers] if opts[:headers]
58
+
59
+ cells.compact.
60
+ rejecting{|i| i == :separator}.
61
+ mapping{|i| i.join(',')}.
62
+ mapping{|i| i.gsub(' ','_')}.
63
+ mapping{|i| i.gsub(/[()]/,'')}.
64
+ mapping{|i| i.downcase}.
65
+ to_a. join("\n") + "\n"
66
+ end
67
+
68
+ def pretty(data,opts)
69
+ cells = create_cells(data,opts)
70
+
71
+ if opts[:headers]
72
+ cells.unshift :separator
73
+ cells.unshift(opts[:headers].each_with_index.map do |header,index|
74
+ width = opts[:width][index] || cells.mapping{|c| c[index].length }.max
75
+ format_cell(header, width, :center)
76
+ end)
77
+ end
78
+
79
+ table = Terminal::Table.new do |t|
80
+ cells.each{|c| t << c}
81
+ end
82
+ opts[:justification].each{|(k,v)| table.align_column k, v }
83
+ table.title ||= opts[:title]
84
+ table.to_s + "\n"
85
+ end
86
+
87
+ end
@@ -1,45 +1,37 @@
1
1
  require 'genomer'
2
- require 'terminal-table'
2
+ require 'genomer-plugin-summary/format'
3
3
 
4
4
  class GenomerPluginSummary::Gaps < Genomer::Plugin
5
+ include GenomerPluginSummary::Format
5
6
 
6
7
  def run
7
- tabulate determine_gaps scaffold
8
+ tabulate(determine_gaps(scaffold),flags)
8
9
  end
9
10
 
10
- def headings
11
- ['Number'.center(8),
12
- 'Length'.center(8),
13
- 'Start'.center(8),
14
- 'End'.center(8),
15
- 'Type'.center(12)]
16
- end
17
-
18
- def title
19
- 'Scaffold Gaps'
20
- end
21
-
22
- def tabulate(contigs)
23
- table = Terminal::Table.new(:title => title) do |t|
24
- t << headings
25
- t << :separator
26
- contigs.each do |ctg|
27
- t << [ctg[:number],
28
- ctg[:length],
29
- ctg[:start],
30
- ctg[:end],
31
- ctg[:type]]
32
- end
33
- end
11
+ COLUMNS = [:number, :length, :start, :end, :type]
34
12
 
35
- table.style = {:width => 60}
36
- table.align_column 0, :right
37
- table.align_column 1, :right
38
- table.align_column 2, :right
39
- table.align_column 3, :right
40
- table.align_column 4, :center
13
+ FORMATTING = {
14
+ :title => 'Scaffold Gaps',
15
+ :headers => ['Number', 'Length', 'Start', 'End', 'Type'],
16
+ :width => {
17
+ 0 => 8,
18
+ 1 => 8,
19
+ 2 => 8,
20
+ 3 => 8,
21
+ 4 => 12
22
+ },
23
+ :justification => {
24
+ 0 => :right,
25
+ 1 => :right,
26
+ 2 => :right,
27
+ 3 => :right,
28
+ 4 => :center
29
+ }
30
+ }
41
31
 
42
- table.to_s
32
+ def tabulate(gaps,flags)
33
+ FORMATTING[:output] = flags[:output]
34
+ table(gaps.map{|gap| COLUMNS.map{|col| gap[col]}},FORMATTING)
43
35
  end
44
36
 
45
37
  def gap_locations(seq)
@@ -0,0 +1,51 @@
1
+ require 'genomer'
2
+ require 'genomer-plugin-summary/metrics'
3
+ require 'genomer-plugin-summary/format'
4
+
5
+ class GenomerPluginSummary::Genome < Genomer::Plugin
6
+ include GenomerPluginSummary::Metrics
7
+ include GenomerPluginSummary::Format
8
+
9
+ LAYOUT = [
10
+ {:name => 'Sequences (#)', :entry_type => :sequence, :method => :count},
11
+ {:name => 'Contigs (#)', :entry_type => :contig, :method => :count},
12
+ {:name => 'Gaps (#)', :entry_type => :gap, :method => :count},
13
+ :separator,
14
+ {:name => 'Size (bp)', :entry_type => :all, :method => :length},
15
+ {:name => 'Sequences (bp)', :entry_type => :sequence, :method => :length},
16
+ {:name => 'Contigs (bp)', :entry_type => :contig, :method => :length},
17
+ {:name => 'Gaps (bp)', :entry_type => :gap, :method => :length},
18
+ :separator,
19
+ {:name => 'G+C (%)', :entry_type => :all, :method => :gc_content},
20
+ {:name => 'Sequences (%)', :entry_type => :sequence, :method => :percent},
21
+ {:name => 'Contigs (%)', :entry_type => :contig, :method => :percent},
22
+ {:name => 'Gaps (%)', :entry_type => :gap, :method => :percent}
23
+ ]
24
+
25
+ FORMATTING = {
26
+ :title => 'Scaffold',
27
+ :width => {0 => 12, 1 => 9},
28
+ :justification => {1 => :right},
29
+ :format => {1 => lambda{|i| i.class == Float ? sprintf('%#.2f',i) : i }}
30
+ }
31
+
32
+ def run
33
+ tabulate(calculate_metrics(LAYOUT, scaffold),flags)
34
+ end
35
+
36
+ def tabulate(data,flags)
37
+ FORMATTING.store(:output,flags[:output]) if flags[:output]
38
+ table(data,FORMATTING)
39
+ end
40
+
41
+ def calculate_metrics(specs,scaffold)
42
+ specs.map do |spec|
43
+ if spec == :separator
44
+ spec
45
+ else
46
+ [spec[:name], send(spec[:method],spec[:entry_type],scaffold)]
47
+ end
48
+ end
49
+ end
50
+
51
+ end
@@ -1,13 +1,15 @@
1
1
  require 'genomer'
2
+ require 'genomer-plugin-summary/enumerators'
2
3
  require 'lazing'
3
4
 
4
5
  module GenomerPluginSummary::Metrics
6
+ include GenomerPluginSummary::Enumerators
5
7
 
6
8
  ALL = :all
7
9
 
8
10
  def gc_content(type,scfd)
9
- gc = enumerator_for(type,scfd).mapping{|i| gc(i)}.inject(:+) || 0.0
10
- atgc = enumerator_for(type,scfd).mapping{|i| atgc(i)}.inject(:+) || 0.0
11
+ gc = enumerator_for(type,scfd).mapping{|i| gc(i[:sequence])}.inject(:+) || 0.0
12
+ atgc = enumerator_for(type,scfd).mapping{|i| atgc(i[:sequence])}.inject(:+) || 0.0
11
13
  gc / atgc * 100
12
14
  end
13
15
 
@@ -21,21 +23,33 @@ module GenomerPluginSummary::Metrics
21
23
 
22
24
  def length(type,scfd)
23
25
  enumerator_for(type,scfd).
24
- mapping(&:sequence).
26
+ mapping{|i| i[:sequence]}.
25
27
  mapping(&:length).
26
28
  inject(:+) || 0
27
29
  end
28
30
 
29
- def gc(entry)
30
- entry.sequence.gsub(/[^GCgc]/,'').length.to_f
31
+ def gc(sequence)
32
+ sequence.gsub(/[^GCgc]/,'').length.to_f
31
33
  end
32
34
 
33
- def atgc(entry)
34
- entry.sequence.gsub(/[^ATGCatgc]/,'').length.to_f
35
+ def atgc(sequence)
36
+ sequence.gsub(/[^ATGCatgc]/,'').length.to_f
35
37
  end
36
38
 
37
- def enumerator_for(type,scaffold)
38
- scaffold.selecting{|i| [ALL,i.entry_type].include? type }
39
+ def sequence_total(seqs)
40
+ return Hash[[:start, :stop, :size, :percent, :gc].map{|i| [i, 0]}] if seqs.empty?
41
+
42
+ totals = seqs.inject({:size => 0, :percent => 0, :gc => 0}) do |hash,entry|
43
+ hash[:start] ||= entry[:start]
44
+ hash[:stop] = entry[:stop]
45
+ hash[:size] += entry[:size]
46
+ hash[:percent] += entry[:percent]
47
+ hash[:gc] += entry[:gc] * entry[:size]
48
+
49
+ hash
50
+ end
51
+ totals[:gc] /= totals[:size]
52
+ totals
39
53
  end
40
54
 
41
55
  end
@@ -1,92 +1,66 @@
1
1
  require 'genomer'
2
2
  require 'genomer-plugin-summary/metrics'
3
- require 'terminal-table'
3
+ require 'genomer-plugin-summary/format'
4
4
 
5
5
  class GenomerPluginSummary::Sequences < Genomer::Plugin
6
6
  include GenomerPluginSummary::Metrics
7
+ include GenomerPluginSummary::Format
8
+ include GenomerPluginSummary::Enumerators
7
9
 
8
10
  def run
9
11
  sequences = calculate(scaffold)
10
- total = total(sequences)
12
+ total = sequence_total(sequences)
11
13
 
12
- tabulate(sequences,total)
14
+ tabulate(sequences,total,flags)
13
15
  end
14
16
 
15
- def headings
16
- ['Sequence'.left(16),
17
- 'Start (bp)'.center(10),
18
- 'End (bp)'.center(10),
19
- 'Size (bp)'.center(10),
20
- 'Size (%)'.center(8),
21
- 'GC (%)'.center(6)]
22
- end
23
-
24
- def title
25
- 'Scaffold Sequences'
26
- end
27
-
28
- def tabulate(rows,total)
29
- table = Terminal::Table.new(:title => title) do |t|
30
- t << headings
31
- t << :separator
32
- rows.each do |row|
33
- t << table_array(row)
34
- end
35
- t << :separator
36
- t << table_array(total.merge({:sequence => 'All'}))
37
- end
17
+ COLUMNS = [:id, :start, :stop, :size, :percent, :gc]
38
18
 
39
- table.align_column 0, :left
40
- table.align_column 1, :right
41
- table.align_column 2, :right
42
- table.align_column 3, :right
43
- table.align_column 4, :right
44
- table.align_column 5, :right
19
+ FORMATTING = {
20
+ :title => 'Scaffold Sequences',
21
+ :headers => ['Sequence', 'Start (bp)', 'End (bp)', 'Size (bp)', 'Size (%)', 'GC (%)'],
22
+ :width => {
23
+ 0 => 16,
24
+ 1 => 10,
25
+ 2 => 10,
26
+ 3 => 10,
27
+ 4 => 8,
28
+ 5 => 6
29
+ },
30
+ :justification => {
31
+ 0 => :left,
32
+ 1 => :right,
33
+ 2 => :right,
34
+ 3 => :right,
35
+ 4 => :right,
36
+ 5 => :right
37
+ },
38
+ :format => {
39
+ 4 => '%#.2f',
40
+ 5 => '%#.2f'
41
+ }
42
+ }
45
43
 
46
- table.to_s
47
- end
44
+ def tabulate(sequences,total,flags)
45
+ rows = sequences.map{|sequence| COLUMNS.map{|col| sequence[col]}}.
46
+ <<(:separator).
47
+ <<(COLUMNS.map{|col| total[col] || 'All'})
48
48
 
49
- def table_array(hash)
50
- [:sequence,:start,:end,:size,:percent,:gc].
51
- map{|i| hash[i]}.
52
- map{|i| i.class == Float ? sprintf('%#.2f',i) : i }
49
+ FORMATTING[:output] = flags[:output]
50
+ table(rows,FORMATTING)
53
51
  end
54
52
 
55
53
  def calculate(scaffold)
56
- total_length = length(:all,scaffold).to_f
57
- running_length = 0
58
-
59
- scaffold.map do |entry|
60
- i = nil
61
- if entry.entry_type != :unresolved
62
- entry_length = entry.sequence.length
63
- i = { :sequence => entry.source,
64
- :start => running_length + 1,
65
- :end => running_length + entry_length,
66
- :size => entry_length,
67
- :percent => entry_length / total_length * 100,
68
- :gc => gc(entry) / atgc(entry) * 100 }
69
- end
70
-
71
- running_length += entry.sequence.length
72
- i
73
- end.compact
74
- end
75
-
76
- def total(seqs)
77
- return Hash[[:start, :end, :size, :percent, :gc].map{|i| [i, 'NA']}] if seqs.empty?
54
+ total_length = scaffold.mapping(&:sequence).mapping(&:length).inject(&:+).to_f
78
55
 
79
- totals = seqs.inject({:size => 0, :percent => 0, :gc => 0}) do |hash,entry|
80
- hash[:start] ||= entry[:start]
81
- hash[:end] = entry[:end]
82
- hash[:size] += entry[:size]
83
- hash[:percent] += entry[:percent]
84
- hash[:gc] += entry[:gc] * entry[:size]
56
+ enumerator_for(:sequence,scaffold).mapping do |entry|
57
+ sequence = entry.delete(:sequence)
85
58
 
86
- hash
87
- end
88
- totals[:gc] /= totals[:size]
89
- totals
59
+ entry[:size] = sequence.length
60
+ entry[:gc] = gc(sequence) / atgc(sequence) * 100
61
+ entry[:percent] = sequence.length / total_length * 100
62
+ entry
63
+ end.to_a
90
64
  end
91
65
 
92
66
  end