genomer-plugin-summary 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,10 +25,10 @@ Feature: Producing a summary of the scaffold sequences
25
25
  +------------------+------------+------------+------------+----------+--------+
26
26
  | Scaffold Sequences |
27
27
  +------------------+------------+------------+------------+----------+--------+
28
- | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
28
+ | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
29
29
  +------------------+------------+------------+------------+----------+--------+
30
30
  +------------------+------------+------------+------------+----------+--------+
31
- | All | NA | NA | NA | NA | NA |
31
+ | All | 0 | 0 | 0 | 0.00 | 0.00 |
32
32
  +------------------+------------+------------+------------+----------+--------+
33
33
  """
34
34
 
@@ -53,7 +53,7 @@ Feature: Producing a summary of the scaffold sequences
53
53
  +------------------+------------+------------+------------+----------+--------+
54
54
  | Scaffold Sequences |
55
55
  +------------------+------------+------------+------------+----------+--------+
56
- | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
56
+ | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
57
57
  +------------------+------------+------------+------------+----------+--------+
58
58
  | contig0001 | 1 | 4 | 4 | 100.00 | 50.00 |
59
59
  +------------------+------------+------------+------------+----------+--------+
@@ -87,7 +87,7 @@ Feature: Producing a summary of the scaffold sequences
87
87
  +------------------+------------+------------+------------+----------+--------+
88
88
  | Scaffold Sequences |
89
89
  +------------------+------------+------------+------------+----------+--------+
90
- | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
90
+ | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
91
91
  +------------------+------------+------------+------------+----------+--------+
92
92
  | contig0001 | 1 | 6 | 6 | 50.00 | 66.67 |
93
93
  | contig0002 | 7 | 12 | 6 | 50.00 | 33.33 |
@@ -120,7 +120,7 @@ Feature: Producing a summary of the scaffold sequences
120
120
  +------------------+------------+------------+------------+----------+--------+
121
121
  | Scaffold Sequences |
122
122
  +------------------+------------+------------+------------+----------+--------+
123
- | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
123
+ | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
124
124
  +------------------+------------+------------+------------+----------+--------+
125
125
  | contig0001 | 1 | 6 | 6 | 50.00 | 66.67 |
126
126
  | contig0001 | 7 | 12 | 6 | 50.00 | 66.67 |
@@ -158,7 +158,7 @@ Feature: Producing a summary of the scaffold sequences
158
158
  +------------------+------------+------------+------------+----------+--------+
159
159
  | Scaffold Sequences |
160
160
  +------------------+------------+------------+------------+----------+--------+
161
- | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
161
+ | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
162
162
  +------------------+------------+------------+------------+----------+--------+
163
163
  | contig0001 | 1 | 6 | 6 | 30.00 | 66.67 |
164
164
  | contig0002 | 15 | 20 | 6 | 30.00 | 33.33 |
@@ -196,7 +196,7 @@ Feature: Producing a summary of the scaffold sequences
196
196
  +------------------+------------+------------+------------+----------+--------+
197
197
  | Scaffold Sequences |
198
198
  +------------------+------------+------------+------------+----------+--------+
199
- | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
199
+ | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
200
200
  +------------------+------------+------------+------------+----------+--------+
201
201
  | contig0001 | 9 | 14 | 6 | 30.00 | 66.67 |
202
202
  | contig0002 | 15 | 20 | 6 | 30.00 | 33.33 |
@@ -235,7 +235,7 @@ Feature: Producing a summary of the scaffold sequences
235
235
  +------------------+------------+------------+------------+----------+--------+
236
236
  | Scaffold Sequences |
237
237
  +------------------+------------+------------+------------+----------+--------+
238
- | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
238
+ | Sequence | Start (bp) | End (bp) | Size (bp) | Size (%) | GC (%) |
239
239
  +------------------+------------+------------+------------+----------+--------+
240
240
  | contig0001 | 1 | 6 | 6 | 30.00 | 66.67 |
241
241
  | contig0002 | 7 | 12 | 6 | 30.00 | 33.33 |
@@ -244,3 +244,34 @@ Feature: Producing a summary of the scaffold sequences
244
244
  +------------------+------------+------------+------------+----------+--------+
245
245
  """
246
246
 
247
+ Scenario: Generating CSV output
248
+ Given I create a new genomer project
249
+ And I write to "assembly/scaffold.yml" with:
250
+ """
251
+ ---
252
+ -
253
+ sequence:
254
+ source: contig0001
255
+ -
256
+ sequence:
257
+ source: contig0002
258
+ -
259
+ unresolved:
260
+ length: 8
261
+ """
262
+ And I write to "assembly/sequence.fna" with:
263
+ """
264
+ >contig0001
265
+ ATGCGC
266
+ >contig0002
267
+ ATATGC
268
+ """
269
+ When I run `genomer summary sequences --output=csv`
270
+ Then the exit status should be 0
271
+ And the output should contain:
272
+ """
273
+ sequence,start_bp,end_bp,size_bp,size_%,gc_%
274
+ contig0001,1,6,6,30.00,66.67
275
+ contig0002,7,12,6,30.00,33.33
276
+ all,1,12,12,60.00,50.00
277
+ """
@@ -0,0 +1,63 @@
1
+ require 'genomer'
2
+ require 'genomer-plugin-summary/format'
3
+ require 'genomer-plugin-summary/enumerators'
4
+ require 'genomer-plugin-summary/metrics'
5
+
6
+ class GenomerPluginSummary::Contigs < Genomer::Plugin
7
+ include GenomerPluginSummary::Metrics
8
+ include GenomerPluginSummary::Format
9
+ include GenomerPluginSummary::Enumerators
10
+
11
+ FORMATTING = {
12
+ :title => 'Scaffold Contigs',
13
+ :headers => ['Contig', 'Start (bp)', 'End (bp)', 'Size (bp)', 'Size (%)', 'GC (%)'],
14
+ :width => {
15
+ 0 => 6,
16
+ 1 => 10,
17
+ 2 => 10,
18
+ 3 => 10,
19
+ 4 => 8,
20
+ 5 => 6
21
+ },
22
+ :justification => {
23
+ 0 => :right,
24
+ 1 => :right,
25
+ 2 => :right,
26
+ 3 => :right,
27
+ 4 => :right,
28
+ 5 => :right
29
+ },
30
+ :format => {
31
+ 4 => '%#.2f',
32
+ 5 => '%#.2f'
33
+ }
34
+ }
35
+ COLUMNS = [:id, :start, :stop, :size, :percent, :gc]
36
+
37
+ def run
38
+ contigs = calculate(scaffold)
39
+ total = sequence_total(contigs)
40
+
41
+ tabulate(contigs,total,flags)
42
+ end
43
+
44
+ def tabulate(contigs,total,flags)
45
+ rows = contigs.map{|contig| COLUMNS.map{|col| contig[col]}}.
46
+ <<(:separator).
47
+ <<(COLUMNS.map{|col| total[col] || 'All'})
48
+
49
+ FORMATTING[:output] = flags[:output]
50
+ table(rows,FORMATTING)
51
+ end
52
+
53
+ def calculate(scaffold)
54
+ total_length = scaffold.mapping(&:sequence).mapping(&:length).inject(&:+).to_f
55
+ enumerator_for_contig(scaffold).
56
+ mapping{|i| i[:gc] = gc(i[:sequence]) / atgc(i[:sequence]) * 100; i}.
57
+ mapping{|i| i[:size] = i[:sequence].length; i}.
58
+ mapping{|i| i[:percent] = i[:size] / total_length * 100; i}.
59
+ mapping{|i| i.delete(:sequence); i}.
60
+ to_a
61
+ end
62
+
63
+ end
@@ -0,0 +1,81 @@
1
+ require 'lazing'
2
+
3
+ module GenomerPluginSummary::Enumerators
4
+
5
+ def enumerator_for(type,scaffold)
6
+ send('enumerator_for_' + type.to_s, scaffold)
7
+ end
8
+
9
+ def enumerator_for_sequence(scaffold)
10
+ enumerator_for_all(scaffold).
11
+ selecting{|i| i[:type] == :sequence}
12
+ end
13
+
14
+ def enumerator_for_unresolved(scaffold)
15
+ enumerator_for_all(scaffold).
16
+ selecting{|i| i[:type] == :unresolved}
17
+ end
18
+
19
+ def enumerator_for_contig(scaffold)
20
+ genome = scaffold.mapping(&:sequence).to_a.join
21
+ regions = genome.
22
+ gsub(/([^Nn])([Nn])/,'\1 \2').
23
+ gsub(/([Nn])([^Nn])/,'\1 \2').
24
+ scan(/[^\s]+/)
25
+
26
+ regions.inject([0,1,[]]) do |memo,entry|
27
+ position, number, entries = memo
28
+
29
+ if entry.downcase.include? 'n'
30
+ next [position + entry.length, number, entries]
31
+ end
32
+
33
+ i = {:sequence => entry,
34
+ :start => position + 1,
35
+ :stop => position + entry.length,
36
+ :type => :contig,
37
+ :id => number}
38
+
39
+ [position + entry.length, number + 1, entries << i]
40
+ end.last
41
+ end
42
+
43
+ def enumerator_for_gap(scaffold)
44
+ genome = scaffold.mapping(&:sequence).to_a.join
45
+ regions = genome.
46
+ gsub(/([^Nn])([Nn])/,'\1 \2').
47
+ gsub(/([Nn])([^Nn])/,'\1 \2').
48
+ scan(/[^\s]+/)
49
+
50
+ regions.inject([0,1,[]]) do |memo,entry|
51
+ position, number, entries = memo
52
+
53
+ unless entry.downcase.include? 'n'
54
+ next [position + entry.length, number, entries]
55
+ end
56
+
57
+ i = {:sequence => entry,
58
+ :start => position + 1,
59
+ :stop => position + entry.length,
60
+ :type => :gap,
61
+ :id => number}
62
+
63
+ [position + entry.length, number + 1, entries << i]
64
+ end.last
65
+ end
66
+
67
+ def enumerator_for_all(scaffold)
68
+ scaffold.inject([0,[]]) do |memo,entry|
69
+ position, entries = memo
70
+
71
+ i = {:sequence => entry.sequence,
72
+ :start => position + 1,
73
+ :stop => position + entry.sequence.length,
74
+ :type => entry.entry_type,
75
+ :id => entry.entry_type == :sequence ? entry.source : nil}
76
+
77
+ [position + entry.sequence.length, entries << i]
78
+ end.last
79
+ end
80
+
81
+ end
@@ -0,0 +1,87 @@
1
+ require 'terminal-table'
2
+ require 'lazing'
3
+
4
+ module GenomerPluginSummary::Format
5
+
6
+ DEFAULTS = {
7
+ :justification => [],
8
+ :width => {},
9
+ :format => {}
10
+ }
11
+
12
+ def table(data,opts = {})
13
+ opts = DEFAULTS.merge opts
14
+ case opts[:output]
15
+ when 'csv' then csv(data,opts)
16
+ else pretty(data,opts)
17
+ end
18
+ end
19
+
20
+ def create_cells(data,opts)
21
+ data.map do |row|
22
+ if row == :separator
23
+ :separator
24
+ else
25
+ row.each_with_index.map do |cell,index|
26
+ format_cell(cell,
27
+ opts[:width][index],
28
+ opts[:justification][index],
29
+ opts[:format][index])
30
+ end
31
+ end
32
+ end
33
+ end
34
+
35
+ def format_cell(cell,width,justification,format = nil)
36
+ formatted = case format
37
+ when String then sprintf(format,cell)
38
+ when Proc then format.call(cell).to_s
39
+ when nil then cell.to_s
40
+ end
41
+
42
+ return formatted if width.nil?
43
+
44
+ case justification
45
+ when :right then formatted.rjust(width)
46
+ when :center then formatted.center(width)
47
+ else formatted.ljust(width)
48
+ end
49
+ end
50
+
51
+ def csv(data,opts)
52
+ opts[:width] = {}
53
+ opts[:justification] = {}
54
+
55
+ cells = create_cells(data,opts)
56
+
57
+ cells.unshift opts[:headers] if opts[:headers]
58
+
59
+ cells.compact.
60
+ rejecting{|i| i == :separator}.
61
+ mapping{|i| i.join(',')}.
62
+ mapping{|i| i.gsub(' ','_')}.
63
+ mapping{|i| i.gsub(/[()]/,'')}.
64
+ mapping{|i| i.downcase}.
65
+ to_a. join("\n") + "\n"
66
+ end
67
+
68
+ def pretty(data,opts)
69
+ cells = create_cells(data,opts)
70
+
71
+ if opts[:headers]
72
+ cells.unshift :separator
73
+ cells.unshift(opts[:headers].each_with_index.map do |header,index|
74
+ width = opts[:width][index] || cells.mapping{|c| c[index].length }.max
75
+ format_cell(header, width, :center)
76
+ end)
77
+ end
78
+
79
+ table = Terminal::Table.new do |t|
80
+ cells.each{|c| t << c}
81
+ end
82
+ opts[:justification].each{|(k,v)| table.align_column k, v }
83
+ table.title ||= opts[:title]
84
+ table.to_s + "\n"
85
+ end
86
+
87
+ end
@@ -1,45 +1,37 @@
1
1
  require 'genomer'
2
- require 'terminal-table'
2
+ require 'genomer-plugin-summary/format'
3
3
 
4
4
  class GenomerPluginSummary::Gaps < Genomer::Plugin
5
+ include GenomerPluginSummary::Format
5
6
 
6
7
  def run
7
- tabulate determine_gaps scaffold
8
+ tabulate(determine_gaps(scaffold),flags)
8
9
  end
9
10
 
10
- def headings
11
- ['Number'.center(8),
12
- 'Length'.center(8),
13
- 'Start'.center(8),
14
- 'End'.center(8),
15
- 'Type'.center(12)]
16
- end
17
-
18
- def title
19
- 'Scaffold Gaps'
20
- end
21
-
22
- def tabulate(contigs)
23
- table = Terminal::Table.new(:title => title) do |t|
24
- t << headings
25
- t << :separator
26
- contigs.each do |ctg|
27
- t << [ctg[:number],
28
- ctg[:length],
29
- ctg[:start],
30
- ctg[:end],
31
- ctg[:type]]
32
- end
33
- end
11
+ COLUMNS = [:number, :length, :start, :end, :type]
34
12
 
35
- table.style = {:width => 60}
36
- table.align_column 0, :right
37
- table.align_column 1, :right
38
- table.align_column 2, :right
39
- table.align_column 3, :right
40
- table.align_column 4, :center
13
+ FORMATTING = {
14
+ :title => 'Scaffold Gaps',
15
+ :headers => ['Number', 'Length', 'Start', 'End', 'Type'],
16
+ :width => {
17
+ 0 => 8,
18
+ 1 => 8,
19
+ 2 => 8,
20
+ 3 => 8,
21
+ 4 => 12
22
+ },
23
+ :justification => {
24
+ 0 => :right,
25
+ 1 => :right,
26
+ 2 => :right,
27
+ 3 => :right,
28
+ 4 => :center
29
+ }
30
+ }
41
31
 
42
- table.to_s
32
+ def tabulate(gaps,flags)
33
+ FORMATTING[:output] = flags[:output]
34
+ table(gaps.map{|gap| COLUMNS.map{|col| gap[col]}},FORMATTING)
43
35
  end
44
36
 
45
37
  def gap_locations(seq)
@@ -0,0 +1,51 @@
1
+ require 'genomer'
2
+ require 'genomer-plugin-summary/metrics'
3
+ require 'genomer-plugin-summary/format'
4
+
5
+ class GenomerPluginSummary::Genome < Genomer::Plugin
6
+ include GenomerPluginSummary::Metrics
7
+ include GenomerPluginSummary::Format
8
+
9
+ LAYOUT = [
10
+ {:name => 'Sequences (#)', :entry_type => :sequence, :method => :count},
11
+ {:name => 'Contigs (#)', :entry_type => :contig, :method => :count},
12
+ {:name => 'Gaps (#)', :entry_type => :gap, :method => :count},
13
+ :separator,
14
+ {:name => 'Size (bp)', :entry_type => :all, :method => :length},
15
+ {:name => 'Sequences (bp)', :entry_type => :sequence, :method => :length},
16
+ {:name => 'Contigs (bp)', :entry_type => :contig, :method => :length},
17
+ {:name => 'Gaps (bp)', :entry_type => :gap, :method => :length},
18
+ :separator,
19
+ {:name => 'G+C (%)', :entry_type => :all, :method => :gc_content},
20
+ {:name => 'Sequences (%)', :entry_type => :sequence, :method => :percent},
21
+ {:name => 'Contigs (%)', :entry_type => :contig, :method => :percent},
22
+ {:name => 'Gaps (%)', :entry_type => :gap, :method => :percent}
23
+ ]
24
+
25
+ FORMATTING = {
26
+ :title => 'Scaffold',
27
+ :width => {0 => 12, 1 => 9},
28
+ :justification => {1 => :right},
29
+ :format => {1 => lambda{|i| i.class == Float ? sprintf('%#.2f',i) : i }}
30
+ }
31
+
32
+ def run
33
+ tabulate(calculate_metrics(LAYOUT, scaffold),flags)
34
+ end
35
+
36
+ def tabulate(data,flags)
37
+ FORMATTING.store(:output,flags[:output]) if flags[:output]
38
+ table(data,FORMATTING)
39
+ end
40
+
41
+ def calculate_metrics(specs,scaffold)
42
+ specs.map do |spec|
43
+ if spec == :separator
44
+ spec
45
+ else
46
+ [spec[:name], send(spec[:method],spec[:entry_type],scaffold)]
47
+ end
48
+ end
49
+ end
50
+
51
+ end
@@ -1,13 +1,15 @@
1
1
  require 'genomer'
2
+ require 'genomer-plugin-summary/enumerators'
2
3
  require 'lazing'
3
4
 
4
5
  module GenomerPluginSummary::Metrics
6
+ include GenomerPluginSummary::Enumerators
5
7
 
6
8
  ALL = :all
7
9
 
8
10
  def gc_content(type,scfd)
9
- gc = enumerator_for(type,scfd).mapping{|i| gc(i)}.inject(:+) || 0.0
10
- atgc = enumerator_for(type,scfd).mapping{|i| atgc(i)}.inject(:+) || 0.0
11
+ gc = enumerator_for(type,scfd).mapping{|i| gc(i[:sequence])}.inject(:+) || 0.0
12
+ atgc = enumerator_for(type,scfd).mapping{|i| atgc(i[:sequence])}.inject(:+) || 0.0
11
13
  gc / atgc * 100
12
14
  end
13
15
 
@@ -21,21 +23,33 @@ module GenomerPluginSummary::Metrics
21
23
 
22
24
  def length(type,scfd)
23
25
  enumerator_for(type,scfd).
24
- mapping(&:sequence).
26
+ mapping{|i| i[:sequence]}.
25
27
  mapping(&:length).
26
28
  inject(:+) || 0
27
29
  end
28
30
 
29
- def gc(entry)
30
- entry.sequence.gsub(/[^GCgc]/,'').length.to_f
31
+ def gc(sequence)
32
+ sequence.gsub(/[^GCgc]/,'').length.to_f
31
33
  end
32
34
 
33
- def atgc(entry)
34
- entry.sequence.gsub(/[^ATGCatgc]/,'').length.to_f
35
+ def atgc(sequence)
36
+ sequence.gsub(/[^ATGCatgc]/,'').length.to_f
35
37
  end
36
38
 
37
- def enumerator_for(type,scaffold)
38
- scaffold.selecting{|i| [ALL,i.entry_type].include? type }
39
+ def sequence_total(seqs)
40
+ return Hash[[:start, :stop, :size, :percent, :gc].map{|i| [i, 0]}] if seqs.empty?
41
+
42
+ totals = seqs.inject({:size => 0, :percent => 0, :gc => 0}) do |hash,entry|
43
+ hash[:start] ||= entry[:start]
44
+ hash[:stop] = entry[:stop]
45
+ hash[:size] += entry[:size]
46
+ hash[:percent] += entry[:percent]
47
+ hash[:gc] += entry[:gc] * entry[:size]
48
+
49
+ hash
50
+ end
51
+ totals[:gc] /= totals[:size]
52
+ totals
39
53
  end
40
54
 
41
55
  end
@@ -1,92 +1,66 @@
1
1
  require 'genomer'
2
2
  require 'genomer-plugin-summary/metrics'
3
- require 'terminal-table'
3
+ require 'genomer-plugin-summary/format'
4
4
 
5
5
  class GenomerPluginSummary::Sequences < Genomer::Plugin
6
6
  include GenomerPluginSummary::Metrics
7
+ include GenomerPluginSummary::Format
8
+ include GenomerPluginSummary::Enumerators
7
9
 
8
10
  def run
9
11
  sequences = calculate(scaffold)
10
- total = total(sequences)
12
+ total = sequence_total(sequences)
11
13
 
12
- tabulate(sequences,total)
14
+ tabulate(sequences,total,flags)
13
15
  end
14
16
 
15
- def headings
16
- ['Sequence'.left(16),
17
- 'Start (bp)'.center(10),
18
- 'End (bp)'.center(10),
19
- 'Size (bp)'.center(10),
20
- 'Size (%)'.center(8),
21
- 'GC (%)'.center(6)]
22
- end
23
-
24
- def title
25
- 'Scaffold Sequences'
26
- end
27
-
28
- def tabulate(rows,total)
29
- table = Terminal::Table.new(:title => title) do |t|
30
- t << headings
31
- t << :separator
32
- rows.each do |row|
33
- t << table_array(row)
34
- end
35
- t << :separator
36
- t << table_array(total.merge({:sequence => 'All'}))
37
- end
17
+ COLUMNS = [:id, :start, :stop, :size, :percent, :gc]
38
18
 
39
- table.align_column 0, :left
40
- table.align_column 1, :right
41
- table.align_column 2, :right
42
- table.align_column 3, :right
43
- table.align_column 4, :right
44
- table.align_column 5, :right
19
+ FORMATTING = {
20
+ :title => 'Scaffold Sequences',
21
+ :headers => ['Sequence', 'Start (bp)', 'End (bp)', 'Size (bp)', 'Size (%)', 'GC (%)'],
22
+ :width => {
23
+ 0 => 16,
24
+ 1 => 10,
25
+ 2 => 10,
26
+ 3 => 10,
27
+ 4 => 8,
28
+ 5 => 6
29
+ },
30
+ :justification => {
31
+ 0 => :left,
32
+ 1 => :right,
33
+ 2 => :right,
34
+ 3 => :right,
35
+ 4 => :right,
36
+ 5 => :right
37
+ },
38
+ :format => {
39
+ 4 => '%#.2f',
40
+ 5 => '%#.2f'
41
+ }
42
+ }
45
43
 
46
- table.to_s
47
- end
44
+ def tabulate(sequences,total,flags)
45
+ rows = sequences.map{|sequence| COLUMNS.map{|col| sequence[col]}}.
46
+ <<(:separator).
47
+ <<(COLUMNS.map{|col| total[col] || 'All'})
48
48
 
49
- def table_array(hash)
50
- [:sequence,:start,:end,:size,:percent,:gc].
51
- map{|i| hash[i]}.
52
- map{|i| i.class == Float ? sprintf('%#.2f',i) : i }
49
+ FORMATTING[:output] = flags[:output]
50
+ table(rows,FORMATTING)
53
51
  end
54
52
 
55
53
  def calculate(scaffold)
56
- total_length = length(:all,scaffold).to_f
57
- running_length = 0
58
-
59
- scaffold.map do |entry|
60
- i = nil
61
- if entry.entry_type != :unresolved
62
- entry_length = entry.sequence.length
63
- i = { :sequence => entry.source,
64
- :start => running_length + 1,
65
- :end => running_length + entry_length,
66
- :size => entry_length,
67
- :percent => entry_length / total_length * 100,
68
- :gc => gc(entry) / atgc(entry) * 100 }
69
- end
70
-
71
- running_length += entry.sequence.length
72
- i
73
- end.compact
74
- end
75
-
76
- def total(seqs)
77
- return Hash[[:start, :end, :size, :percent, :gc].map{|i| [i, 'NA']}] if seqs.empty?
54
+ total_length = scaffold.mapping(&:sequence).mapping(&:length).inject(&:+).to_f
78
55
 
79
- totals = seqs.inject({:size => 0, :percent => 0, :gc => 0}) do |hash,entry|
80
- hash[:start] ||= entry[:start]
81
- hash[:end] = entry[:end]
82
- hash[:size] += entry[:size]
83
- hash[:percent] += entry[:percent]
84
- hash[:gc] += entry[:gc] * entry[:size]
56
+ enumerator_for(:sequence,scaffold).mapping do |entry|
57
+ sequence = entry.delete(:sequence)
85
58
 
86
- hash
87
- end
88
- totals[:gc] /= totals[:size]
89
- totals
59
+ entry[:size] = sequence.length
60
+ entry[:gc] = gc(sequence) / atgc(sequence) * 100
61
+ entry[:percent] = sequence.length / total_length * 100
62
+ entry
63
+ end.to_a
90
64
  end
91
65
 
92
66
  end