culturecode-roo 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/.gitignore +1 -0
  2. data/CHANGELOG.md +513 -0
  3. data/README.md +206 -73
  4. data/lib/roo.rb +3 -3
  5. data/lib/roo/base.rb +49 -33
  6. data/lib/roo/csv.rb +10 -0
  7. data/lib/roo/excelx.rb +187 -60
  8. data/lib/roo/excelx/comments.rb +2 -1
  9. data/lib/roo/excelx/sheet_doc.rb +30 -3
  10. data/lib/roo/open_office.rb +250 -221
  11. data/lib/roo/utils.rb +28 -31
  12. data/lib/roo/version.rb +1 -1
  13. data/roo.gemspec +10 -12
  14. data/spec/lib/roo/csv_spec.rb +14 -0
  15. data/spec/lib/roo/excelx_spec.rb +90 -2
  16. data/spec/lib/roo/libreoffice_spec.rb +16 -0
  17. data/spec/lib/roo/openoffice_spec.rb +11 -0
  18. data/spec/lib/roo/utils_spec.rb +5 -4
  19. data/test/test_roo.rb +113 -2
  20. metadata +29 -180
  21. data/CHANGELOG +0 -438
  22. data/scripts/txt2html +0 -67
  23. data/test/files/1900_base.xlsx +0 -0
  24. data/test/files/1904_base.xlsx +0 -0
  25. data/test/files/Bibelbund.csv +0 -3741
  26. data/test/files/Bibelbund.ods +0 -0
  27. data/test/files/Bibelbund.xlsx +0 -0
  28. data/test/files/Bibelbund1.ods +0 -0
  29. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  30. data/test/files/advanced_header.ods +0 -0
  31. data/test/files/bbu.ods +0 -0
  32. data/test/files/bbu.xlsx +0 -0
  33. data/test/files/bode-v1.ods.zip +0 -0
  34. data/test/files/bode-v1.xls.zip +0 -0
  35. data/test/files/boolean.csv +0 -2
  36. data/test/files/boolean.ods +0 -0
  37. data/test/files/boolean.xlsx +0 -0
  38. data/test/files/borders.ods +0 -0
  39. data/test/files/borders.xlsx +0 -0
  40. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  41. data/test/files/comments.ods +0 -0
  42. data/test/files/comments.xlsx +0 -0
  43. data/test/files/csvtypes.csv +0 -1
  44. data/test/files/datetime.ods +0 -0
  45. data/test/files/datetime.xlsx +0 -0
  46. data/test/files/dreimalvier.ods +0 -0
  47. data/test/files/emptysheets.ods +0 -0
  48. data/test/files/emptysheets.xlsx +0 -0
  49. data/test/files/encrypted-letmein.ods +0 -0
  50. data/test/files/file_item_error.xlsx +0 -0
  51. data/test/files/formula.ods +0 -0
  52. data/test/files/formula.xlsx +0 -0
  53. data/test/files/formula_string_error.xlsx +0 -0
  54. data/test/files/html-escape.ods +0 -0
  55. data/test/files/link.csv +0 -1
  56. data/test/files/link.xlsx +0 -0
  57. data/test/files/matrix.ods +0 -0
  58. data/test/files/named_cells.ods +0 -0
  59. data/test/files/named_cells.xlsx +0 -0
  60. data/test/files/no_spreadsheet_file.txt +0 -1
  61. data/test/files/numbers-export.xlsx +0 -0
  62. data/test/files/numbers1.csv +0 -18
  63. data/test/files/numbers1.ods +0 -0
  64. data/test/files/numbers1.xlsx +0 -0
  65. data/test/files/numbers1withnull.xlsx +0 -0
  66. data/test/files/numeric-link.xlsx +0 -0
  67. data/test/files/only_one_sheet.ods +0 -0
  68. data/test/files/only_one_sheet.xlsx +0 -0
  69. data/test/files/paragraph.ods +0 -0
  70. data/test/files/paragraph.xlsx +0 -0
  71. data/test/files/ric.ods +0 -0
  72. data/test/files/sheet1.xml +0 -109
  73. data/test/files/simple_spreadsheet.ods +0 -0
  74. data/test/files/simple_spreadsheet.xlsx +0 -0
  75. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  76. data/test/files/so_datetime.csv +0 -8
  77. data/test/files/style.ods +0 -0
  78. data/test/files/style.xlsx +0 -0
  79. data/test/files/time-test.csv +0 -2
  80. data/test/files/time-test.ods +0 -0
  81. data/test/files/time-test.xlsx +0 -0
  82. data/test/files/type_excel.ods +0 -0
  83. data/test/files/type_excel.xlsx +0 -0
  84. data/test/files/type_excelx.ods +0 -0
  85. data/test/files/type_openoffice.xlsx +0 -0
  86. data/test/files/whitespace.ods +0 -0
  87. data/test/files/whitespace.xlsx +0 -0
@@ -107,4 +107,14 @@ class Roo::CSV < Roo::Base
107
107
  @last_column[sheet] -= 1
108
108
  end
109
109
  end
110
+
111
+ def clean_sheet(sheet)
112
+ read_cells(sheet)
113
+
114
+ @cell.each_pair do |coord, value|
115
+ @cell[coord] = sanitize_value(value) if value.is_a?(::String)
116
+ end
117
+
118
+ @cleaned[sheet] = true
119
+ end
110
120
  end
@@ -78,6 +78,7 @@ class Roo::Excelx < Roo::Base
78
78
 
79
79
  class Cell
80
80
  attr_reader :type, :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
81
+ attr_writer :value
81
82
 
82
83
  def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate)
83
84
  @type = type
@@ -145,12 +146,12 @@ class Roo::Excelx < Roo::Base
145
146
  end
146
147
 
147
148
  class Sheet
148
- def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook)
149
+ def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook, options = {})
149
150
  @name = name
150
151
  @rels = Relationships.new(rels_path)
151
152
  @comments = Comments.new(comments_path)
152
153
  @styles = styles
153
- @sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook)
154
+ @sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook, options)
154
155
  end
155
156
 
156
157
  def cells
@@ -162,13 +163,16 @@ class Roo::Excelx < Roo::Base
162
163
  end
163
164
 
164
165
  # Yield each row as array of Excelx::Cell objects
165
- # accepts options max_rows (int) (offset by 1 for header)
166
- # and pad_cells (boolean)
166
+ # accepts options max_rows (int) (offset by 1 for header),
167
+ # pad_cells (boolean) and offset (int)
167
168
  def each_row(options = {}, &block)
168
169
  row_count = 0
170
+ options[:offset] ||= 0
169
171
  @sheet.each_row_streaming do |row|
170
- break if options[:max_rows] && row_count == options[:max_rows] + 1
171
- block.call(cells_for_row_element(row, options)) if block_given?
172
+ break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
173
+ if block_given? && !(options[:offset] && row_count < options[:offset])
174
+ block.call(cells_for_row_element(row, options))
175
+ end
172
176
  row_count += 1
173
177
  end
174
178
  end
@@ -187,25 +191,26 @@ class Roo::Excelx < Roo::Base
187
191
 
188
192
  # returns the number of the first non-empty row
189
193
  def first_row
190
- @first_row ||= present_cells.keys.map {|row, col| row }.min
194
+ @first_row ||= present_cells.keys.map {|row, _| row }.min
191
195
  end
192
196
 
193
197
  def last_row
194
- @last_row ||= present_cells.keys.map {|row, col| row }.max
198
+ @last_row ||= present_cells.keys.map {|row, _| row }.max
195
199
  end
196
200
 
197
201
  # returns the number of the first non-empty column
198
- def first_column(sheet=nil)
199
- @first_column ||= present_cells.keys.map {|row, col| col }.min
202
+ def first_column
203
+ @first_column ||= present_cells.keys.map {|_, col| col }.min
200
204
  end
201
205
 
202
206
  # returns the number of the last non-empty column
203
- def last_column(sheet=nil)
204
- @last_column ||= present_cells.keys.map {|row, col| col }.max
207
+ def last_column
208
+ @last_column ||= present_cells.keys.map {|_, col| col }.max
205
209
  end
206
210
 
207
211
  def excelx_format(key)
208
- @styles.style_format(cells[key].style).to_s
212
+ cell = cells[key]
213
+ @styles.style_format(cell.style).to_s if cell
209
214
  end
210
215
 
211
216
  def hyperlinks
@@ -250,23 +255,32 @@ class Roo::Excelx < Roo::Base
250
255
  # values for packed: :zip
251
256
  # optional cell_max (int) parameter for early aborting attempts to parse
252
257
  # enormous documents.
253
- def initialize(filename, options = {})
258
+ def initialize(filename_or_stream, options = {})
254
259
  packed = options[:packed]
255
260
  file_warning = options.fetch(:file_warning, :error)
256
261
  cell_max = options.delete(:cell_max)
262
+ sheet_options = {}
263
+ sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
257
264
 
258
- file_type_check(filename,'.xlsx','an Excel-xlsx', file_warning, packed)
265
+ unless is_stream?(filename_or_stream)
266
+ file_type_check(filename_or_stream,'.xlsx','an Excel-xlsx', file_warning, packed)
267
+ basename = File.basename(filename_or_stream)
268
+ end
259
269
 
260
- @tmpdir = make_tmpdir(filename.split('/').last, options[:tmpdir_root])
261
- @filename = local_filename(filename, @tmpdir, packed)
270
+ @tmpdir = make_tmpdir(basename, options[:tmpdir_root])
271
+ @filename = local_filename(filename_or_stream, @tmpdir, packed)
262
272
  @comments_files = []
263
273
  @rels_files = []
264
- process_zipfile(@tmpdir, @filename)
274
+ process_zipfile(@filename || filename_or_stream)
265
275
 
266
- @sheet_names = workbook.sheets.map { |sheet| sheet['name'] }
276
+ @sheet_names = workbook.sheets.map do |sheet|
277
+ unless options[:only_visible_sheets] && sheet['state'] == 'hidden'
278
+ sheet['name']
279
+ end
280
+ end.compact
267
281
  @sheets = []
268
282
  @sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n|
269
- @sheets[n] = Sheet.new(sheet_name, @rels_files[n], @sheet_files[n], @comments_files[n], styles, shared_strings, workbook)
283
+ @sheets[n] = Sheet.new(sheet_name, @rels_files[n], @sheet_files[n], @comments_files[n], styles, shared_strings, workbook, sheet_options)
270
284
  [sheet_name, @sheets[n]]
271
285
  end]
272
286
 
@@ -276,11 +290,14 @@ class Roo::Excelx < Roo::Base
276
290
  end
277
291
 
278
292
  super
293
+ rescue => e # clean up any temp files, but only if an error was raised
294
+ close
295
+ raise e
279
296
  end
280
297
 
281
298
  def method_missing(method,*args)
282
299
  if label = workbook.defined_names[method.to_s]
283
- sheet_for(label.sheet).cells[label.key].value
300
+ safe_send(sheet_for(label.sheet).cells[label.key], :value)
284
301
  else
285
302
  # call super for methods like #a1
286
303
  super
@@ -303,8 +320,7 @@ class Roo::Excelx < Roo::Base
303
320
  # cell at the first line and first row.
304
321
  def cell(row, col, sheet=nil)
305
322
  key = normalize(row,col)
306
- cell = sheet_for(sheet).cells[key]
307
- cell.value if cell
323
+ safe_send(sheet_for(sheet).cells[key], :value)
308
324
  end
309
325
 
310
326
  def row(rownumber,sheet=nil)
@@ -354,7 +370,7 @@ class Roo::Excelx < Roo::Base
354
370
  # The method #formula? checks if there is a formula.
355
371
  def formula(row,col,sheet=nil)
356
372
  key = normalize(row,col)
357
- sheet_for(sheet).cells[key].formula
373
+ safe_send(sheet_for(sheet).cells[key], :formula)
358
374
  end
359
375
 
360
376
  # Predicate methods really should return a boolean
@@ -375,7 +391,8 @@ class Roo::Excelx < Roo::Base
375
391
  # Given a cell, return the cell's style
376
392
  def font(row, col, sheet=nil)
377
393
  key = normalize(row,col)
378
- styles.definitions[sheet_for(sheet).cells[key].style]
394
+ definition_index = safe_send(sheet_for(sheet).cells[key], :style)
395
+ styles.definitions[definition_index] if definition_index
379
396
  end
380
397
 
381
398
  # returns the type of a cell:
@@ -388,7 +405,7 @@ class Roo::Excelx < Roo::Base
388
405
  # * :datetime
389
406
  def celltype(row,col,sheet=nil)
390
407
  key = normalize(row, col)
391
- sheet_for(sheet).cells[key].type
408
+ safe_send(sheet_for(sheet).cells[key], :type)
392
409
  end
393
410
 
394
411
  # returns the internal type of an excel cell
@@ -397,14 +414,14 @@ class Roo::Excelx < Roo::Base
397
414
  # Note: this is only available within the Excelx class
398
415
  def excelx_type(row,col,sheet=nil)
399
416
  key = normalize(row,col)
400
- sheet_for(sheet).cells[key].excelx_type
417
+ safe_send(sheet_for(sheet).cells[key], :excelx_type)
401
418
  end
402
419
 
403
420
  # returns the internal value of an excelx cell
404
421
  # Note: this is only available within the Excelx class
405
422
  def excelx_value(row,col,sheet=nil)
406
423
  key = normalize(row,col)
407
- sheet_for(sheet).cells[key].excelx_value
424
+ safe_send(sheet_for(sheet).cells[key], :excelx_value)
408
425
  end
409
426
 
410
427
  # returns the internal format of an excel cell
@@ -489,42 +506,148 @@ class Roo::Excelx < Roo::Base
489
506
 
490
507
  private
491
508
 
509
+ def clean_sheet(sheet)
510
+ @sheets_by_name[sheet].cells.each_pair do |coord, value|
511
+ next unless value.value.is_a?(::String)
512
+
513
+ @sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value)
514
+ end
515
+
516
+ @cleaned[sheet] = true
517
+ end
518
+
519
+ # Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
520
+ # documents require a workbook.xml file, so a if the file is missing
521
+ # it is not a valid xlsx file. In these cases, an ArgumentError is
522
+ # raised.
523
+ #
524
+ # wb - a Zip::Entry for the workbook.xml file.
525
+ # path - A String for Zip::Entry's destination path.
526
+ #
527
+ # Examples
528
+ #
529
+ # extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml')
530
+ # # => ["rId1", "rId2", "rId3"]
531
+ #
532
+ # Returns an Array of Strings.
533
+ def extract_worksheet_ids(entries, path)
534
+ wb = entries.find { |e| e.name[/workbook.xml$/] }
535
+ fail ArgumentError 'missing required workbook file' if wb.nil?
536
+
537
+ wb.extract(path)
538
+ workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
539
+ workbook_doc.xpath('//sheet').map{ |s| s.attributes['id'].value }
540
+ end
541
+
542
+ # Internal
543
+ #
544
+ # wb_rels - A Zip::Entry for the workbook.xml.rels file.
545
+ # path - A String for the Zip::Entry's destination path.
546
+ #
547
+ # Examples
548
+ #
549
+ # extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels')
550
+ # # => {
551
+ # "rId1"=>"worksheets/sheet1.xml",
552
+ # "rId2"=>"worksheets/sheet2.xml",
553
+ # "rId3"=>"worksheets/sheet3.xml"
554
+ # }
555
+ #
556
+ # Returns a Hash.
557
+ def extract_worksheet_rels(entries, path)
558
+ wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] }
559
+ fail ArgumentError 'missing required workbook file' if wb_rels.nil?
560
+
561
+ wb_rels.extract(path)
562
+ rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
563
+ worksheet_type ='http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'
564
+
565
+ relationships = rels_doc.xpath('//Relationship').select do |relationship|
566
+ relationship.attributes['Type'].value == worksheet_type
567
+ end
568
+
569
+ relationships.inject({}) do |hash, relationship|
570
+ attributes = relationship.attributes
571
+ id = attributes['Id'];
572
+ hash[id.value] = attributes['Target'].value
573
+ hash
574
+ end
575
+ end
576
+
577
+ def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
578
+ sheet_ids.each_with_index do |id, i|
579
+ name = sheets[id]
580
+ entry = entries.find { |entry| entry.name =~ /#{name}$/ }
581
+ path = "#{tmpdir}/roo_sheet#{i + 1}"
582
+ @sheet_files << path
583
+ entry.extract(path)
584
+ end
585
+ end
586
+
492
587
  # Extracts all needed files from the zip file
493
- def process_zipfile(tmpdir, zipfilename)
588
+ def process_zipfile(zipfilename_or_stream)
494
589
  @sheet_files = []
495
- Zip::File.foreach(zipfilename) do |entry|
496
- path =
497
- case entry.name.downcase
498
- when /workbook.xml$/
499
- "#{tmpdir}/roo_workbook.xml"
500
- when /sharedstrings.xml$/
501
- "#{tmpdir}/roo_sharedStrings.xml"
502
- when /styles.xml$/
503
- "#{tmpdir}/roo_styles.xml"
504
- when /sheet.xml$/
505
- path = "#{tmpdir}/roo_sheet"
506
- @sheet_files.unshift(path)
507
- path
508
- when /sheet([0-9]+).xml$/
509
- # Numbers 3.1 exports first sheet without sheet number. Such sheets
510
- # are always added to the beginning of the array which, naturally,
511
- # causes other sheets to be pushed to the next index which could
512
- # lead to sheet references getting overwritten, so we need to
513
- # handle that case specifically.
514
- nr = $1
515
- sheet_files_index = nr.to_i - 1
516
- sheet_files_index += 1 if @sheet_files[sheet_files_index]
517
- @sheet_files[sheet_files_index] = "#{tmpdir}/roo_sheet#{nr.to_i}"
518
- when /comments([0-9]+).xml$/
519
- nr = $1
520
- @comments_files[nr.to_i-1] = "#{tmpdir}/roo_comments#{nr}"
521
- when /sheet([0-9]+).xml.rels$/
522
- nr = $1
523
- @rels_files[nr.to_i-1] = "#{tmpdir}/roo_rels#{nr}"
590
+
591
+ unless is_stream?(zipfilename_or_stream)
592
+ process_zipfile_entries Zip::File.open(zipfilename_or_stream).to_a.sort_by(&:name)
593
+ else
594
+ stream = Zip::InputStream.open zipfilename_or_stream
595
+ begin
596
+ entries = []
597
+ while entry = stream.get_next_entry
598
+ entries << entry
524
599
  end
525
- if path
526
- entry.extract(path)
600
+ process_zipfile_entries entries
601
+ ensure
602
+ stream.close
603
+ end
604
+ end
605
+ end
606
+
607
+ def process_zipfile_entries entries
608
+ # NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
609
+ # are not in order. With Numbers 3.1, the first sheet is always
610
+ # sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
611
+ # independent of a worksheet's filename (i.e. sheet6.xml can be the
612
+ # first worksheet).
613
+ #
614
+ # workbook.xml lists the correct order of worksheets and
615
+ # workbook.xml.rels lists the filenames for those worksheets.
616
+ #
617
+ # workbook.xml:
618
+ # <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/>
619
+ # <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/>
620
+ # workbook.xml.rel:
621
+ # <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
622
+ # <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
623
+ sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
624
+ sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
625
+ extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
626
+
627
+ entries.each do |entry|
628
+ path =
629
+ case entry.name.downcase
630
+ when /sharedstrings.xml$/
631
+ "#{@tmpdir}/roo_sharedStrings.xml"
632
+ when /styles.xml$/
633
+ "#{@tmpdir}/roo_styles.xml"
634
+ when /comments([0-9]+).xml$/
635
+ # FIXME: Most of the time, The order of the comment files are the same
636
+ # the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
637
+ # In some situations, this isn't true. The true location of a
638
+ # sheet's comment file is in the sheet1.xml.rels file. SEE
639
+ # ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
640
+ nr = Regexp.last_match[1].to_i
641
+ @comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
642
+ when /sheet([0-9]+).xml.rels$/
643
+ # FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
644
+ # it also stores the location for sharedStrings, comments,
645
+ # drawings, etc.
646
+ nr = Regexp.last_match[1].to_i
647
+ @rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
527
648
  end
649
+
650
+ entry.extract(path) if path
528
651
  end
529
652
  end
530
653
 
@@ -539,4 +662,8 @@ class Roo::Excelx < Roo::Base
539
662
  def workbook
540
663
  @workbook ||= Workbook.new(File.join(@tmpdir, "roo_workbook.xml"))
541
664
  end
665
+
666
+ def safe_send(object, method, *args)
667
+ object.send(method, *args) if object && object.respond_to?(method)
668
+ end
542
669
  end
@@ -12,7 +12,8 @@ module Roo
12
12
  def extract_comments
13
13
  if doc_exists?
14
14
  Hash[doc.xpath("//comments/commentList/comment").map do |comment|
15
- [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), comment.at_xpath('./text/r/t').text]
15
+ value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
16
+ [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
16
17
  end]
17
18
  else
18
19
  {}
@@ -2,8 +2,9 @@ require 'roo/excelx/extractor'
2
2
 
3
3
  module Roo
4
4
  class Excelx::SheetDoc < Excelx::Extractor
5
- def initialize(path, relationships, styles, shared_strings, workbook)
5
+ def initialize(path, relationships, styles, shared_strings, workbook, options = {})
6
6
  super(path)
7
+ @options = options
7
8
  @relationships = relationships
8
9
  @styles = styles
9
10
  @shared_strings = shared_strings
@@ -43,6 +44,8 @@ module Roo
43
44
  private
44
45
 
45
46
  def cell_from_xml(cell_xml, hyperlink)
47
+ # This is error prone, to_i will silently turn a nil into a 0
48
+ # and it works by coincidence that Format[0] is general
46
49
  style = cell_xml['s'].to_i # should be here
47
50
  # c: <c r="A5" s="2">
48
51
  # <v>22606</v>
@@ -120,15 +123,39 @@ module Roo
120
123
  end.compact]
121
124
  end
122
125
 
126
+ def expand_merged_ranges(cells)
127
+ # Extract merged ranges from xml
128
+ merges = {}
129
+ doc.xpath("/worksheet/mergeCells/mergeCell").each do |mergecell_xml|
130
+ tl, br = mergecell_xml['ref'].split(/:/).map {|ref| ::Roo::Utils.ref_to_key(ref)}
131
+ for row in tl[0]..br[0] do
132
+ for col in tl[1]..br[1] do
133
+ next if row == tl[0] && col == tl[1]
134
+ merges[[row,col]] = tl
135
+ end
136
+ end
137
+ end
138
+ # Duplicate value into all cells in merged range
139
+ merges.each do |dst, src|
140
+ cells[dst] = cells[src]
141
+ end
142
+ end
143
+
123
144
  def extract_cells(relationships)
124
- Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
145
+ extracted_cells = Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
125
146
  key = ::Roo::Utils.ref_to_key(cell_xml['r'])
126
147
  [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
127
148
  end]
149
+ if @options[:expand_merged_ranges]
150
+ expand_merged_ranges(extracted_cells)
151
+ end
152
+ extracted_cells
128
153
  end
129
154
 
130
155
  def extract_dimensions
131
- doc.xpath("/worksheet/dimension").map { |dim| dim.attributes["ref"].value }.first
156
+ Roo::Utils.each_element(@path, 'dimension') do |dimension|
157
+ return dimension.attributes["ref"].value
158
+ end
132
159
  end
133
160
 
134
161
  =begin