culturecode-roo 2.0.1 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/.gitignore +1 -0
  2. data/CHANGELOG.md +513 -0
  3. data/README.md +206 -73
  4. data/lib/roo.rb +3 -3
  5. data/lib/roo/base.rb +49 -33
  6. data/lib/roo/csv.rb +10 -0
  7. data/lib/roo/excelx.rb +187 -60
  8. data/lib/roo/excelx/comments.rb +2 -1
  9. data/lib/roo/excelx/sheet_doc.rb +30 -3
  10. data/lib/roo/open_office.rb +250 -221
  11. data/lib/roo/utils.rb +28 -31
  12. data/lib/roo/version.rb +1 -1
  13. data/roo.gemspec +10 -12
  14. data/spec/lib/roo/csv_spec.rb +14 -0
  15. data/spec/lib/roo/excelx_spec.rb +90 -2
  16. data/spec/lib/roo/libreoffice_spec.rb +16 -0
  17. data/spec/lib/roo/openoffice_spec.rb +11 -0
  18. data/spec/lib/roo/utils_spec.rb +5 -4
  19. data/test/test_roo.rb +113 -2
  20. metadata +29 -180
  21. data/CHANGELOG +0 -438
  22. data/scripts/txt2html +0 -67
  23. data/test/files/1900_base.xlsx +0 -0
  24. data/test/files/1904_base.xlsx +0 -0
  25. data/test/files/Bibelbund.csv +0 -3741
  26. data/test/files/Bibelbund.ods +0 -0
  27. data/test/files/Bibelbund.xlsx +0 -0
  28. data/test/files/Bibelbund1.ods +0 -0
  29. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  30. data/test/files/advanced_header.ods +0 -0
  31. data/test/files/bbu.ods +0 -0
  32. data/test/files/bbu.xlsx +0 -0
  33. data/test/files/bode-v1.ods.zip +0 -0
  34. data/test/files/bode-v1.xls.zip +0 -0
  35. data/test/files/boolean.csv +0 -2
  36. data/test/files/boolean.ods +0 -0
  37. data/test/files/boolean.xlsx +0 -0
  38. data/test/files/borders.ods +0 -0
  39. data/test/files/borders.xlsx +0 -0
  40. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  41. data/test/files/comments.ods +0 -0
  42. data/test/files/comments.xlsx +0 -0
  43. data/test/files/csvtypes.csv +0 -1
  44. data/test/files/datetime.ods +0 -0
  45. data/test/files/datetime.xlsx +0 -0
  46. data/test/files/dreimalvier.ods +0 -0
  47. data/test/files/emptysheets.ods +0 -0
  48. data/test/files/emptysheets.xlsx +0 -0
  49. data/test/files/encrypted-letmein.ods +0 -0
  50. data/test/files/file_item_error.xlsx +0 -0
  51. data/test/files/formula.ods +0 -0
  52. data/test/files/formula.xlsx +0 -0
  53. data/test/files/formula_string_error.xlsx +0 -0
  54. data/test/files/html-escape.ods +0 -0
  55. data/test/files/link.csv +0 -1
  56. data/test/files/link.xlsx +0 -0
  57. data/test/files/matrix.ods +0 -0
  58. data/test/files/named_cells.ods +0 -0
  59. data/test/files/named_cells.xlsx +0 -0
  60. data/test/files/no_spreadsheet_file.txt +0 -1
  61. data/test/files/numbers-export.xlsx +0 -0
  62. data/test/files/numbers1.csv +0 -18
  63. data/test/files/numbers1.ods +0 -0
  64. data/test/files/numbers1.xlsx +0 -0
  65. data/test/files/numbers1withnull.xlsx +0 -0
  66. data/test/files/numeric-link.xlsx +0 -0
  67. data/test/files/only_one_sheet.ods +0 -0
  68. data/test/files/only_one_sheet.xlsx +0 -0
  69. data/test/files/paragraph.ods +0 -0
  70. data/test/files/paragraph.xlsx +0 -0
  71. data/test/files/ric.ods +0 -0
  72. data/test/files/sheet1.xml +0 -109
  73. data/test/files/simple_spreadsheet.ods +0 -0
  74. data/test/files/simple_spreadsheet.xlsx +0 -0
  75. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  76. data/test/files/so_datetime.csv +0 -8
  77. data/test/files/style.ods +0 -0
  78. data/test/files/style.xlsx +0 -0
  79. data/test/files/time-test.csv +0 -2
  80. data/test/files/time-test.ods +0 -0
  81. data/test/files/time-test.xlsx +0 -0
  82. data/test/files/type_excel.ods +0 -0
  83. data/test/files/type_excel.xlsx +0 -0
  84. data/test/files/type_excelx.ods +0 -0
  85. data/test/files/type_openoffice.xlsx +0 -0
  86. data/test/files/whitespace.ods +0 -0
  87. data/test/files/whitespace.xlsx +0 -0
@@ -107,4 +107,14 @@ class Roo::CSV < Roo::Base
107
107
  @last_column[sheet] -= 1
108
108
  end
109
109
  end
110
+
111
+ def clean_sheet(sheet)
112
+ read_cells(sheet)
113
+
114
+ @cell.each_pair do |coord, value|
115
+ @cell[coord] = sanitize_value(value) if value.is_a?(::String)
116
+ end
117
+
118
+ @cleaned[sheet] = true
119
+ end
110
120
  end
@@ -78,6 +78,7 @@ class Roo::Excelx < Roo::Base
78
78
 
79
79
  class Cell
80
80
  attr_reader :type, :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
81
+ attr_writer :value
81
82
 
82
83
  def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate)
83
84
  @type = type
@@ -145,12 +146,12 @@ class Roo::Excelx < Roo::Base
145
146
  end
146
147
 
147
148
  class Sheet
148
- def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook)
149
+ def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook, options = {})
149
150
  @name = name
150
151
  @rels = Relationships.new(rels_path)
151
152
  @comments = Comments.new(comments_path)
152
153
  @styles = styles
153
- @sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook)
154
+ @sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook, options)
154
155
  end
155
156
 
156
157
  def cells
@@ -162,13 +163,16 @@ class Roo::Excelx < Roo::Base
162
163
  end
163
164
 
164
165
  # Yield each row as array of Excelx::Cell objects
165
- # accepts options max_rows (int) (offset by 1 for header)
166
- # and pad_cells (boolean)
166
+ # accepts options max_rows (int) (offset by 1 for header),
167
+ # pad_cells (boolean) and offset (int)
167
168
  def each_row(options = {}, &block)
168
169
  row_count = 0
170
+ options[:offset] ||= 0
169
171
  @sheet.each_row_streaming do |row|
170
- break if options[:max_rows] && row_count == options[:max_rows] + 1
171
- block.call(cells_for_row_element(row, options)) if block_given?
172
+ break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
173
+ if block_given? && !(options[:offset] && row_count < options[:offset])
174
+ block.call(cells_for_row_element(row, options))
175
+ end
172
176
  row_count += 1
173
177
  end
174
178
  end
@@ -187,25 +191,26 @@ class Roo::Excelx < Roo::Base
187
191
 
188
192
  # returns the number of the first non-empty row
189
193
  def first_row
190
- @first_row ||= present_cells.keys.map {|row, col| row }.min
194
+ @first_row ||= present_cells.keys.map {|row, _| row }.min
191
195
  end
192
196
 
193
197
  def last_row
194
- @last_row ||= present_cells.keys.map {|row, col| row }.max
198
+ @last_row ||= present_cells.keys.map {|row, _| row }.max
195
199
  end
196
200
 
197
201
  # returns the number of the first non-empty column
198
- def first_column(sheet=nil)
199
- @first_column ||= present_cells.keys.map {|row, col| col }.min
202
+ def first_column
203
+ @first_column ||= present_cells.keys.map {|_, col| col }.min
200
204
  end
201
205
 
202
206
  # returns the number of the last non-empty column
203
- def last_column(sheet=nil)
204
- @last_column ||= present_cells.keys.map {|row, col| col }.max
207
+ def last_column
208
+ @last_column ||= present_cells.keys.map {|_, col| col }.max
205
209
  end
206
210
 
207
211
  def excelx_format(key)
208
- @styles.style_format(cells[key].style).to_s
212
+ cell = cells[key]
213
+ @styles.style_format(cell.style).to_s if cell
209
214
  end
210
215
 
211
216
  def hyperlinks
@@ -250,23 +255,32 @@ class Roo::Excelx < Roo::Base
250
255
  # values for packed: :zip
251
256
  # optional cell_max (int) parameter for early aborting attempts to parse
252
257
  # enormous documents.
253
- def initialize(filename, options = {})
258
+ def initialize(filename_or_stream, options = {})
254
259
  packed = options[:packed]
255
260
  file_warning = options.fetch(:file_warning, :error)
256
261
  cell_max = options.delete(:cell_max)
262
+ sheet_options = {}
263
+ sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
257
264
 
258
- file_type_check(filename,'.xlsx','an Excel-xlsx', file_warning, packed)
265
+ unless is_stream?(filename_or_stream)
266
+ file_type_check(filename_or_stream,'.xlsx','an Excel-xlsx', file_warning, packed)
267
+ basename = File.basename(filename_or_stream)
268
+ end
259
269
 
260
- @tmpdir = make_tmpdir(filename.split('/').last, options[:tmpdir_root])
261
- @filename = local_filename(filename, @tmpdir, packed)
270
+ @tmpdir = make_tmpdir(basename, options[:tmpdir_root])
271
+ @filename = local_filename(filename_or_stream, @tmpdir, packed)
262
272
  @comments_files = []
263
273
  @rels_files = []
264
- process_zipfile(@tmpdir, @filename)
274
+ process_zipfile(@filename || filename_or_stream)
265
275
 
266
- @sheet_names = workbook.sheets.map { |sheet| sheet['name'] }
276
+ @sheet_names = workbook.sheets.map do |sheet|
277
+ unless options[:only_visible_sheets] && sheet['state'] == 'hidden'
278
+ sheet['name']
279
+ end
280
+ end.compact
267
281
  @sheets = []
268
282
  @sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n|
269
- @sheets[n] = Sheet.new(sheet_name, @rels_files[n], @sheet_files[n], @comments_files[n], styles, shared_strings, workbook)
283
+ @sheets[n] = Sheet.new(sheet_name, @rels_files[n], @sheet_files[n], @comments_files[n], styles, shared_strings, workbook, sheet_options)
270
284
  [sheet_name, @sheets[n]]
271
285
  end]
272
286
 
@@ -276,11 +290,14 @@ class Roo::Excelx < Roo::Base
276
290
  end
277
291
 
278
292
  super
293
+ rescue => e # clean up any temp files, but only if an error was raised
294
+ close
295
+ raise e
279
296
  end
280
297
 
281
298
  def method_missing(method,*args)
282
299
  if label = workbook.defined_names[method.to_s]
283
- sheet_for(label.sheet).cells[label.key].value
300
+ safe_send(sheet_for(label.sheet).cells[label.key], :value)
284
301
  else
285
302
  # call super for methods like #a1
286
303
  super
@@ -303,8 +320,7 @@ class Roo::Excelx < Roo::Base
303
320
  # cell at the first line and first row.
304
321
  def cell(row, col, sheet=nil)
305
322
  key = normalize(row,col)
306
- cell = sheet_for(sheet).cells[key]
307
- cell.value if cell
323
+ safe_send(sheet_for(sheet).cells[key], :value)
308
324
  end
309
325
 
310
326
  def row(rownumber,sheet=nil)
@@ -354,7 +370,7 @@ class Roo::Excelx < Roo::Base
354
370
  # The method #formula? checks if there is a formula.
355
371
  def formula(row,col,sheet=nil)
356
372
  key = normalize(row,col)
357
- sheet_for(sheet).cells[key].formula
373
+ safe_send(sheet_for(sheet).cells[key], :formula)
358
374
  end
359
375
 
360
376
  # Predicate methods really should return a boolean
@@ -375,7 +391,8 @@ class Roo::Excelx < Roo::Base
375
391
  # Given a cell, return the cell's style
376
392
  def font(row, col, sheet=nil)
377
393
  key = normalize(row,col)
378
- styles.definitions[sheet_for(sheet).cells[key].style]
394
+ definition_index = safe_send(sheet_for(sheet).cells[key], :style)
395
+ styles.definitions[definition_index] if definition_index
379
396
  end
380
397
 
381
398
  # returns the type of a cell:
@@ -388,7 +405,7 @@ class Roo::Excelx < Roo::Base
388
405
  # * :datetime
389
406
  def celltype(row,col,sheet=nil)
390
407
  key = normalize(row, col)
391
- sheet_for(sheet).cells[key].type
408
+ safe_send(sheet_for(sheet).cells[key], :type)
392
409
  end
393
410
 
394
411
  # returns the internal type of an excel cell
@@ -397,14 +414,14 @@ class Roo::Excelx < Roo::Base
397
414
  # Note: this is only available within the Excelx class
398
415
  def excelx_type(row,col,sheet=nil)
399
416
  key = normalize(row,col)
400
- sheet_for(sheet).cells[key].excelx_type
417
+ safe_send(sheet_for(sheet).cells[key], :excelx_type)
401
418
  end
402
419
 
403
420
  # returns the internal value of an excelx cell
404
421
  # Note: this is only available within the Excelx class
405
422
  def excelx_value(row,col,sheet=nil)
406
423
  key = normalize(row,col)
407
- sheet_for(sheet).cells[key].excelx_value
424
+ safe_send(sheet_for(sheet).cells[key], :excelx_value)
408
425
  end
409
426
 
410
427
  # returns the internal format of an excel cell
@@ -489,42 +506,148 @@ class Roo::Excelx < Roo::Base
489
506
 
490
507
  private
491
508
 
509
+ def clean_sheet(sheet)
510
+ @sheets_by_name[sheet].cells.each_pair do |coord, value|
511
+ next unless value.value.is_a?(::String)
512
+
513
+ @sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value)
514
+ end
515
+
516
+ @cleaned[sheet] = true
517
+ end
518
+
519
+ # Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
520
+ # documents require a workbook.xml file, so a if the file is missing
521
+ # it is not a valid xlsx file. In these cases, an ArgumentError is
522
+ # raised.
523
+ #
524
+ # wb - a Zip::Entry for the workbook.xml file.
525
+ # path - A String for Zip::Entry's destination path.
526
+ #
527
+ # Examples
528
+ #
529
+ # extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml')
530
+ # # => ["rId1", "rId2", "rId3"]
531
+ #
532
+ # Returns an Array of Strings.
533
+ def extract_worksheet_ids(entries, path)
534
+ wb = entries.find { |e| e.name[/workbook.xml$/] }
535
+ fail ArgumentError 'missing required workbook file' if wb.nil?
536
+
537
+ wb.extract(path)
538
+ workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
539
+ workbook_doc.xpath('//sheet').map{ |s| s.attributes['id'].value }
540
+ end
541
+
542
+ # Internal
543
+ #
544
+ # wb_rels - A Zip::Entry for the workbook.xml.rels file.
545
+ # path - A String for the Zip::Entry's destination path.
546
+ #
547
+ # Examples
548
+ #
549
+ # extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels')
550
+ # # => {
551
+ # "rId1"=>"worksheets/sheet1.xml",
552
+ # "rId2"=>"worksheets/sheet2.xml",
553
+ # "rId3"=>"worksheets/sheet3.xml"
554
+ # }
555
+ #
556
+ # Returns a Hash.
557
+ def extract_worksheet_rels(entries, path)
558
+ wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] }
559
+ fail ArgumentError 'missing required workbook file' if wb_rels.nil?
560
+
561
+ wb_rels.extract(path)
562
+ rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
563
+ worksheet_type ='http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'
564
+
565
+ relationships = rels_doc.xpath('//Relationship').select do |relationship|
566
+ relationship.attributes['Type'].value == worksheet_type
567
+ end
568
+
569
+ relationships.inject({}) do |hash, relationship|
570
+ attributes = relationship.attributes
571
+ id = attributes['Id'];
572
+ hash[id.value] = attributes['Target'].value
573
+ hash
574
+ end
575
+ end
576
+
577
+ def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
578
+ sheet_ids.each_with_index do |id, i|
579
+ name = sheets[id]
580
+ entry = entries.find { |entry| entry.name =~ /#{name}$/ }
581
+ path = "#{tmpdir}/roo_sheet#{i + 1}"
582
+ @sheet_files << path
583
+ entry.extract(path)
584
+ end
585
+ end
586
+
492
587
  # Extracts all needed files from the zip file
493
- def process_zipfile(tmpdir, zipfilename)
588
+ def process_zipfile(zipfilename_or_stream)
494
589
  @sheet_files = []
495
- Zip::File.foreach(zipfilename) do |entry|
496
- path =
497
- case entry.name.downcase
498
- when /workbook.xml$/
499
- "#{tmpdir}/roo_workbook.xml"
500
- when /sharedstrings.xml$/
501
- "#{tmpdir}/roo_sharedStrings.xml"
502
- when /styles.xml$/
503
- "#{tmpdir}/roo_styles.xml"
504
- when /sheet.xml$/
505
- path = "#{tmpdir}/roo_sheet"
506
- @sheet_files.unshift(path)
507
- path
508
- when /sheet([0-9]+).xml$/
509
- # Numbers 3.1 exports first sheet without sheet number. Such sheets
510
- # are always added to the beginning of the array which, naturally,
511
- # causes other sheets to be pushed to the next index which could
512
- # lead to sheet references getting overwritten, so we need to
513
- # handle that case specifically.
514
- nr = $1
515
- sheet_files_index = nr.to_i - 1
516
- sheet_files_index += 1 if @sheet_files[sheet_files_index]
517
- @sheet_files[sheet_files_index] = "#{tmpdir}/roo_sheet#{nr.to_i}"
518
- when /comments([0-9]+).xml$/
519
- nr = $1
520
- @comments_files[nr.to_i-1] = "#{tmpdir}/roo_comments#{nr}"
521
- when /sheet([0-9]+).xml.rels$/
522
- nr = $1
523
- @rels_files[nr.to_i-1] = "#{tmpdir}/roo_rels#{nr}"
590
+
591
+ unless is_stream?(zipfilename_or_stream)
592
+ process_zipfile_entries Zip::File.open(zipfilename_or_stream).to_a.sort_by(&:name)
593
+ else
594
+ stream = Zip::InputStream.open zipfilename_or_stream
595
+ begin
596
+ entries = []
597
+ while entry = stream.get_next_entry
598
+ entries << entry
524
599
  end
525
- if path
526
- entry.extract(path)
600
+ process_zipfile_entries entries
601
+ ensure
602
+ stream.close
603
+ end
604
+ end
605
+ end
606
+
607
+ def process_zipfile_entries entries
608
+ # NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
609
+ # are not in order. With Numbers 3.1, the first sheet is always
610
+ # sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
611
+ # independent of a worksheet's filename (i.e. sheet6.xml can be the
612
+ # first worksheet).
613
+ #
614
+ # workbook.xml lists the correct order of worksheets and
615
+ # workbook.xml.rels lists the filenames for those worksheets.
616
+ #
617
+ # workbook.xml:
618
+ # <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/>
619
+ # <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/>
620
+ # workbook.xml.rel:
621
+ # <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
622
+ # <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
623
+ sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
624
+ sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
625
+ extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
626
+
627
+ entries.each do |entry|
628
+ path =
629
+ case entry.name.downcase
630
+ when /sharedstrings.xml$/
631
+ "#{@tmpdir}/roo_sharedStrings.xml"
632
+ when /styles.xml$/
633
+ "#{@tmpdir}/roo_styles.xml"
634
+ when /comments([0-9]+).xml$/
635
+ # FIXME: Most of the time, The order of the comment files are the same
636
+ # the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
637
+ # In some situations, this isn't true. The true location of a
638
+ # sheet's comment file is in the sheet1.xml.rels file. SEE
639
+ # ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
640
+ nr = Regexp.last_match[1].to_i
641
+ @comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
642
+ when /sheet([0-9]+).xml.rels$/
643
+ # FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
644
+ # it also stores the location for sharedStrings, comments,
645
+ # drawings, etc.
646
+ nr = Regexp.last_match[1].to_i
647
+ @rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
527
648
  end
649
+
650
+ entry.extract(path) if path
528
651
  end
529
652
  end
530
653
 
@@ -539,4 +662,8 @@ class Roo::Excelx < Roo::Base
539
662
  def workbook
540
663
  @workbook ||= Workbook.new(File.join(@tmpdir, "roo_workbook.xml"))
541
664
  end
665
+
666
+ def safe_send(object, method, *args)
667
+ object.send(method, *args) if object && object.respond_to?(method)
668
+ end
542
669
  end
@@ -12,7 +12,8 @@ module Roo
12
12
  def extract_comments
13
13
  if doc_exists?
14
14
  Hash[doc.xpath("//comments/commentList/comment").map do |comment|
15
- [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), comment.at_xpath('./text/r/t').text]
15
+ value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
16
+ [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
16
17
  end]
17
18
  else
18
19
  {}
@@ -2,8 +2,9 @@ require 'roo/excelx/extractor'
2
2
 
3
3
  module Roo
4
4
  class Excelx::SheetDoc < Excelx::Extractor
5
- def initialize(path, relationships, styles, shared_strings, workbook)
5
+ def initialize(path, relationships, styles, shared_strings, workbook, options = {})
6
6
  super(path)
7
+ @options = options
7
8
  @relationships = relationships
8
9
  @styles = styles
9
10
  @shared_strings = shared_strings
@@ -43,6 +44,8 @@ module Roo
43
44
  private
44
45
 
45
46
  def cell_from_xml(cell_xml, hyperlink)
47
+ # This is error prone, to_i will silently turn a nil into a 0
48
+ # and it works by coincidence that Format[0] is general
46
49
  style = cell_xml['s'].to_i # should be here
47
50
  # c: <c r="A5" s="2">
48
51
  # <v>22606</v>
@@ -120,15 +123,39 @@ module Roo
120
123
  end.compact]
121
124
  end
122
125
 
126
+ def expand_merged_ranges(cells)
127
+ # Extract merged ranges from xml
128
+ merges = {}
129
+ doc.xpath("/worksheet/mergeCells/mergeCell").each do |mergecell_xml|
130
+ tl, br = mergecell_xml['ref'].split(/:/).map {|ref| ::Roo::Utils.ref_to_key(ref)}
131
+ for row in tl[0]..br[0] do
132
+ for col in tl[1]..br[1] do
133
+ next if row == tl[0] && col == tl[1]
134
+ merges[[row,col]] = tl
135
+ end
136
+ end
137
+ end
138
+ # Duplicate value into all cells in merged range
139
+ merges.each do |dst, src|
140
+ cells[dst] = cells[src]
141
+ end
142
+ end
143
+
123
144
  def extract_cells(relationships)
124
- Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
145
+ extracted_cells = Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
125
146
  key = ::Roo::Utils.ref_to_key(cell_xml['r'])
126
147
  [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
127
148
  end]
149
+ if @options[:expand_merged_ranges]
150
+ expand_merged_ranges(extracted_cells)
151
+ end
152
+ extracted_cells
128
153
  end
129
154
 
130
155
  def extract_dimensions
131
- doc.xpath("/worksheet/dimension").map { |dim| dim.attributes["ref"].value }.first
156
+ Roo::Utils.each_element(@path, 'dimension') do |dimension|
157
+ return dimension.attributes["ref"].value
158
+ end
132
159
  end
133
160
 
134
161
  =begin