culturecode-roo 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/CHANGELOG.md +513 -0
- data/README.md +206 -73
- data/lib/roo.rb +3 -3
- data/lib/roo/base.rb +49 -33
- data/lib/roo/csv.rb +10 -0
- data/lib/roo/excelx.rb +187 -60
- data/lib/roo/excelx/comments.rb +2 -1
- data/lib/roo/excelx/sheet_doc.rb +30 -3
- data/lib/roo/open_office.rb +250 -221
- data/lib/roo/utils.rb +28 -31
- data/lib/roo/version.rb +1 -1
- data/roo.gemspec +10 -12
- data/spec/lib/roo/csv_spec.rb +14 -0
- data/spec/lib/roo/excelx_spec.rb +90 -2
- data/spec/lib/roo/libreoffice_spec.rb +16 -0
- data/spec/lib/roo/openoffice_spec.rb +11 -0
- data/spec/lib/roo/utils_spec.rb +5 -4
- data/test/test_roo.rb +113 -2
- metadata +29 -180
- data/CHANGELOG +0 -438
- data/scripts/txt2html +0 -67
- data/test/files/1900_base.xlsx +0 -0
- data/test/files/1904_base.xlsx +0 -0
- data/test/files/Bibelbund.csv +0 -3741
- data/test/files/Bibelbund.ods +0 -0
- data/test/files/Bibelbund.xlsx +0 -0
- data/test/files/Bibelbund1.ods +0 -0
- data/test/files/Pfand_from_windows_phone.xlsx +0 -0
- data/test/files/advanced_header.ods +0 -0
- data/test/files/bbu.ods +0 -0
- data/test/files/bbu.xlsx +0 -0
- data/test/files/bode-v1.ods.zip +0 -0
- data/test/files/bode-v1.xls.zip +0 -0
- data/test/files/boolean.csv +0 -2
- data/test/files/boolean.ods +0 -0
- data/test/files/boolean.xlsx +0 -0
- data/test/files/borders.ods +0 -0
- data/test/files/borders.xlsx +0 -0
- data/test/files/bug-numbered-sheet-names.xlsx +0 -0
- data/test/files/comments.ods +0 -0
- data/test/files/comments.xlsx +0 -0
- data/test/files/csvtypes.csv +0 -1
- data/test/files/datetime.ods +0 -0
- data/test/files/datetime.xlsx +0 -0
- data/test/files/dreimalvier.ods +0 -0
- data/test/files/emptysheets.ods +0 -0
- data/test/files/emptysheets.xlsx +0 -0
- data/test/files/encrypted-letmein.ods +0 -0
- data/test/files/file_item_error.xlsx +0 -0
- data/test/files/formula.ods +0 -0
- data/test/files/formula.xlsx +0 -0
- data/test/files/formula_string_error.xlsx +0 -0
- data/test/files/html-escape.ods +0 -0
- data/test/files/link.csv +0 -1
- data/test/files/link.xlsx +0 -0
- data/test/files/matrix.ods +0 -0
- data/test/files/named_cells.ods +0 -0
- data/test/files/named_cells.xlsx +0 -0
- data/test/files/no_spreadsheet_file.txt +0 -1
- data/test/files/numbers-export.xlsx +0 -0
- data/test/files/numbers1.csv +0 -18
- data/test/files/numbers1.ods +0 -0
- data/test/files/numbers1.xlsx +0 -0
- data/test/files/numbers1withnull.xlsx +0 -0
- data/test/files/numeric-link.xlsx +0 -0
- data/test/files/only_one_sheet.ods +0 -0
- data/test/files/only_one_sheet.xlsx +0 -0
- data/test/files/paragraph.ods +0 -0
- data/test/files/paragraph.xlsx +0 -0
- data/test/files/ric.ods +0 -0
- data/test/files/sheet1.xml +0 -109
- data/test/files/simple_spreadsheet.ods +0 -0
- data/test/files/simple_spreadsheet.xlsx +0 -0
- data/test/files/simple_spreadsheet_from_italo.ods +0 -0
- data/test/files/so_datetime.csv +0 -8
- data/test/files/style.ods +0 -0
- data/test/files/style.xlsx +0 -0
- data/test/files/time-test.csv +0 -2
- data/test/files/time-test.ods +0 -0
- data/test/files/time-test.xlsx +0 -0
- data/test/files/type_excel.ods +0 -0
- data/test/files/type_excel.xlsx +0 -0
- data/test/files/type_excelx.ods +0 -0
- data/test/files/type_openoffice.xlsx +0 -0
- data/test/files/whitespace.ods +0 -0
- data/test/files/whitespace.xlsx +0 -0
data/lib/roo/csv.rb
CHANGED
@@ -107,4 +107,14 @@ class Roo::CSV < Roo::Base
|
|
107
107
|
@last_column[sheet] -= 1
|
108
108
|
end
|
109
109
|
end
|
110
|
+
|
111
|
+
def clean_sheet(sheet)
|
112
|
+
read_cells(sheet)
|
113
|
+
|
114
|
+
@cell.each_pair do |coord, value|
|
115
|
+
@cell[coord] = sanitize_value(value) if value.is_a?(::String)
|
116
|
+
end
|
117
|
+
|
118
|
+
@cleaned[sheet] = true
|
119
|
+
end
|
110
120
|
end
|
data/lib/roo/excelx.rb
CHANGED
@@ -78,6 +78,7 @@ class Roo::Excelx < Roo::Base
|
|
78
78
|
|
79
79
|
class Cell
|
80
80
|
attr_reader :type, :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
|
81
|
+
attr_writer :value
|
81
82
|
|
82
83
|
def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate)
|
83
84
|
@type = type
|
@@ -145,12 +146,12 @@ class Roo::Excelx < Roo::Base
|
|
145
146
|
end
|
146
147
|
|
147
148
|
class Sheet
|
148
|
-
def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook)
|
149
|
+
def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook, options = {})
|
149
150
|
@name = name
|
150
151
|
@rels = Relationships.new(rels_path)
|
151
152
|
@comments = Comments.new(comments_path)
|
152
153
|
@styles = styles
|
153
|
-
@sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook)
|
154
|
+
@sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook, options)
|
154
155
|
end
|
155
156
|
|
156
157
|
def cells
|
@@ -162,13 +163,16 @@ class Roo::Excelx < Roo::Base
|
|
162
163
|
end
|
163
164
|
|
164
165
|
# Yield each row as array of Excelx::Cell objects
|
165
|
-
# accepts options max_rows (int) (offset by 1 for header)
|
166
|
-
#
|
166
|
+
# accepts options max_rows (int) (offset by 1 for header),
|
167
|
+
# pad_cells (boolean) and offset (int)
|
167
168
|
def each_row(options = {}, &block)
|
168
169
|
row_count = 0
|
170
|
+
options[:offset] ||= 0
|
169
171
|
@sheet.each_row_streaming do |row|
|
170
|
-
break if options[:max_rows] && row_count == options[:max_rows] + 1
|
171
|
-
|
172
|
+
break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
|
173
|
+
if block_given? && !(options[:offset] && row_count < options[:offset])
|
174
|
+
block.call(cells_for_row_element(row, options))
|
175
|
+
end
|
172
176
|
row_count += 1
|
173
177
|
end
|
174
178
|
end
|
@@ -187,25 +191,26 @@ class Roo::Excelx < Roo::Base
|
|
187
191
|
|
188
192
|
# returns the number of the first non-empty row
|
189
193
|
def first_row
|
190
|
-
@first_row ||= present_cells.keys.map {|row,
|
194
|
+
@first_row ||= present_cells.keys.map {|row, _| row }.min
|
191
195
|
end
|
192
196
|
|
193
197
|
def last_row
|
194
|
-
@last_row ||= present_cells.keys.map {|row,
|
198
|
+
@last_row ||= present_cells.keys.map {|row, _| row }.max
|
195
199
|
end
|
196
200
|
|
197
201
|
# returns the number of the first non-empty column
|
198
|
-
def first_column
|
199
|
-
@first_column ||= present_cells.keys.map {|
|
202
|
+
def first_column
|
203
|
+
@first_column ||= present_cells.keys.map {|_, col| col }.min
|
200
204
|
end
|
201
205
|
|
202
206
|
# returns the number of the last non-empty column
|
203
|
-
def last_column
|
204
|
-
@last_column ||= present_cells.keys.map {|
|
207
|
+
def last_column
|
208
|
+
@last_column ||= present_cells.keys.map {|_, col| col }.max
|
205
209
|
end
|
206
210
|
|
207
211
|
def excelx_format(key)
|
208
|
-
|
212
|
+
cell = cells[key]
|
213
|
+
@styles.style_format(cell.style).to_s if cell
|
209
214
|
end
|
210
215
|
|
211
216
|
def hyperlinks
|
@@ -250,23 +255,32 @@ class Roo::Excelx < Roo::Base
|
|
250
255
|
# values for packed: :zip
|
251
256
|
# optional cell_max (int) parameter for early aborting attempts to parse
|
252
257
|
# enormous documents.
|
253
|
-
def initialize(
|
258
|
+
def initialize(filename_or_stream, options = {})
|
254
259
|
packed = options[:packed]
|
255
260
|
file_warning = options.fetch(:file_warning, :error)
|
256
261
|
cell_max = options.delete(:cell_max)
|
262
|
+
sheet_options = {}
|
263
|
+
sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
|
257
264
|
|
258
|
-
|
265
|
+
unless is_stream?(filename_or_stream)
|
266
|
+
file_type_check(filename_or_stream,'.xlsx','an Excel-xlsx', file_warning, packed)
|
267
|
+
basename = File.basename(filename_or_stream)
|
268
|
+
end
|
259
269
|
|
260
|
-
@tmpdir = make_tmpdir(
|
261
|
-
@filename = local_filename(
|
270
|
+
@tmpdir = make_tmpdir(basename, options[:tmpdir_root])
|
271
|
+
@filename = local_filename(filename_or_stream, @tmpdir, packed)
|
262
272
|
@comments_files = []
|
263
273
|
@rels_files = []
|
264
|
-
process_zipfile(@
|
274
|
+
process_zipfile(@filename || filename_or_stream)
|
265
275
|
|
266
|
-
@sheet_names = workbook.sheets.map
|
276
|
+
@sheet_names = workbook.sheets.map do |sheet|
|
277
|
+
unless options[:only_visible_sheets] && sheet['state'] == 'hidden'
|
278
|
+
sheet['name']
|
279
|
+
end
|
280
|
+
end.compact
|
267
281
|
@sheets = []
|
268
282
|
@sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n|
|
269
|
-
@sheets[n] = Sheet.new(sheet_name, @rels_files[n], @sheet_files[n], @comments_files[n], styles, shared_strings, workbook)
|
283
|
+
@sheets[n] = Sheet.new(sheet_name, @rels_files[n], @sheet_files[n], @comments_files[n], styles, shared_strings, workbook, sheet_options)
|
270
284
|
[sheet_name, @sheets[n]]
|
271
285
|
end]
|
272
286
|
|
@@ -276,11 +290,14 @@ class Roo::Excelx < Roo::Base
|
|
276
290
|
end
|
277
291
|
|
278
292
|
super
|
293
|
+
rescue => e # clean up any temp files, but only if an error was raised
|
294
|
+
close
|
295
|
+
raise e
|
279
296
|
end
|
280
297
|
|
281
298
|
def method_missing(method,*args)
|
282
299
|
if label = workbook.defined_names[method.to_s]
|
283
|
-
sheet_for(label.sheet).cells[label.key]
|
300
|
+
safe_send(sheet_for(label.sheet).cells[label.key], :value)
|
284
301
|
else
|
285
302
|
# call super for methods like #a1
|
286
303
|
super
|
@@ -303,8 +320,7 @@ class Roo::Excelx < Roo::Base
|
|
303
320
|
# cell at the first line and first row.
|
304
321
|
def cell(row, col, sheet=nil)
|
305
322
|
key = normalize(row,col)
|
306
|
-
|
307
|
-
cell.value if cell
|
323
|
+
safe_send(sheet_for(sheet).cells[key], :value)
|
308
324
|
end
|
309
325
|
|
310
326
|
def row(rownumber,sheet=nil)
|
@@ -354,7 +370,7 @@ class Roo::Excelx < Roo::Base
|
|
354
370
|
# The method #formula? checks if there is a formula.
|
355
371
|
def formula(row,col,sheet=nil)
|
356
372
|
key = normalize(row,col)
|
357
|
-
sheet_for(sheet).cells[key]
|
373
|
+
safe_send(sheet_for(sheet).cells[key], :formula)
|
358
374
|
end
|
359
375
|
|
360
376
|
# Predicate methods really should return a boolean
|
@@ -375,7 +391,8 @@ class Roo::Excelx < Roo::Base
|
|
375
391
|
# Given a cell, return the cell's style
|
376
392
|
def font(row, col, sheet=nil)
|
377
393
|
key = normalize(row,col)
|
378
|
-
|
394
|
+
definition_index = safe_send(sheet_for(sheet).cells[key], :style)
|
395
|
+
styles.definitions[definition_index] if definition_index
|
379
396
|
end
|
380
397
|
|
381
398
|
# returns the type of a cell:
|
@@ -388,7 +405,7 @@ class Roo::Excelx < Roo::Base
|
|
388
405
|
# * :datetime
|
389
406
|
def celltype(row,col,sheet=nil)
|
390
407
|
key = normalize(row, col)
|
391
|
-
sheet_for(sheet).cells[key]
|
408
|
+
safe_send(sheet_for(sheet).cells[key], :type)
|
392
409
|
end
|
393
410
|
|
394
411
|
# returns the internal type of an excel cell
|
@@ -397,14 +414,14 @@ class Roo::Excelx < Roo::Base
|
|
397
414
|
# Note: this is only available within the Excelx class
|
398
415
|
def excelx_type(row,col,sheet=nil)
|
399
416
|
key = normalize(row,col)
|
400
|
-
sheet_for(sheet).cells[key]
|
417
|
+
safe_send(sheet_for(sheet).cells[key], :excelx_type)
|
401
418
|
end
|
402
419
|
|
403
420
|
# returns the internal value of an excelx cell
|
404
421
|
# Note: this is only available within the Excelx class
|
405
422
|
def excelx_value(row,col,sheet=nil)
|
406
423
|
key = normalize(row,col)
|
407
|
-
sheet_for(sheet).cells[key]
|
424
|
+
safe_send(sheet_for(sheet).cells[key], :excelx_value)
|
408
425
|
end
|
409
426
|
|
410
427
|
# returns the internal format of an excel cell
|
@@ -489,42 +506,148 @@ class Roo::Excelx < Roo::Base
|
|
489
506
|
|
490
507
|
private
|
491
508
|
|
509
|
+
def clean_sheet(sheet)
|
510
|
+
@sheets_by_name[sheet].cells.each_pair do |coord, value|
|
511
|
+
next unless value.value.is_a?(::String)
|
512
|
+
|
513
|
+
@sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value)
|
514
|
+
end
|
515
|
+
|
516
|
+
@cleaned[sheet] = true
|
517
|
+
end
|
518
|
+
|
519
|
+
# Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
|
520
|
+
# documents require a workbook.xml file, so a if the file is missing
|
521
|
+
# it is not a valid xlsx file. In these cases, an ArgumentError is
|
522
|
+
# raised.
|
523
|
+
#
|
524
|
+
# wb - a Zip::Entry for the workbook.xml file.
|
525
|
+
# path - A String for Zip::Entry's destination path.
|
526
|
+
#
|
527
|
+
# Examples
|
528
|
+
#
|
529
|
+
# extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml')
|
530
|
+
# # => ["rId1", "rId2", "rId3"]
|
531
|
+
#
|
532
|
+
# Returns an Array of Strings.
|
533
|
+
def extract_worksheet_ids(entries, path)
|
534
|
+
wb = entries.find { |e| e.name[/workbook.xml$/] }
|
535
|
+
fail ArgumentError 'missing required workbook file' if wb.nil?
|
536
|
+
|
537
|
+
wb.extract(path)
|
538
|
+
workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
|
539
|
+
workbook_doc.xpath('//sheet').map{ |s| s.attributes['id'].value }
|
540
|
+
end
|
541
|
+
|
542
|
+
# Internal
|
543
|
+
#
|
544
|
+
# wb_rels - A Zip::Entry for the workbook.xml.rels file.
|
545
|
+
# path - A String for the Zip::Entry's destination path.
|
546
|
+
#
|
547
|
+
# Examples
|
548
|
+
#
|
549
|
+
# extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels')
|
550
|
+
# # => {
|
551
|
+
# "rId1"=>"worksheets/sheet1.xml",
|
552
|
+
# "rId2"=>"worksheets/sheet2.xml",
|
553
|
+
# "rId3"=>"worksheets/sheet3.xml"
|
554
|
+
# }
|
555
|
+
#
|
556
|
+
# Returns a Hash.
|
557
|
+
def extract_worksheet_rels(entries, path)
|
558
|
+
wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] }
|
559
|
+
fail ArgumentError 'missing required workbook file' if wb_rels.nil?
|
560
|
+
|
561
|
+
wb_rels.extract(path)
|
562
|
+
rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
|
563
|
+
worksheet_type ='http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'
|
564
|
+
|
565
|
+
relationships = rels_doc.xpath('//Relationship').select do |relationship|
|
566
|
+
relationship.attributes['Type'].value == worksheet_type
|
567
|
+
end
|
568
|
+
|
569
|
+
relationships.inject({}) do |hash, relationship|
|
570
|
+
attributes = relationship.attributes
|
571
|
+
id = attributes['Id'];
|
572
|
+
hash[id.value] = attributes['Target'].value
|
573
|
+
hash
|
574
|
+
end
|
575
|
+
end
|
576
|
+
|
577
|
+
def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
|
578
|
+
sheet_ids.each_with_index do |id, i|
|
579
|
+
name = sheets[id]
|
580
|
+
entry = entries.find { |entry| entry.name =~ /#{name}$/ }
|
581
|
+
path = "#{tmpdir}/roo_sheet#{i + 1}"
|
582
|
+
@sheet_files << path
|
583
|
+
entry.extract(path)
|
584
|
+
end
|
585
|
+
end
|
586
|
+
|
492
587
|
# Extracts all needed files from the zip file
|
493
|
-
def process_zipfile(
|
588
|
+
def process_zipfile(zipfilename_or_stream)
|
494
589
|
@sheet_files = []
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
when /sheet.xml$/
|
505
|
-
path = "#{tmpdir}/roo_sheet"
|
506
|
-
@sheet_files.unshift(path)
|
507
|
-
path
|
508
|
-
when /sheet([0-9]+).xml$/
|
509
|
-
# Numbers 3.1 exports first sheet without sheet number. Such sheets
|
510
|
-
# are always added to the beginning of the array which, naturally,
|
511
|
-
# causes other sheets to be pushed to the next index which could
|
512
|
-
# lead to sheet references getting overwritten, so we need to
|
513
|
-
# handle that case specifically.
|
514
|
-
nr = $1
|
515
|
-
sheet_files_index = nr.to_i - 1
|
516
|
-
sheet_files_index += 1 if @sheet_files[sheet_files_index]
|
517
|
-
@sheet_files[sheet_files_index] = "#{tmpdir}/roo_sheet#{nr.to_i}"
|
518
|
-
when /comments([0-9]+).xml$/
|
519
|
-
nr = $1
|
520
|
-
@comments_files[nr.to_i-1] = "#{tmpdir}/roo_comments#{nr}"
|
521
|
-
when /sheet([0-9]+).xml.rels$/
|
522
|
-
nr = $1
|
523
|
-
@rels_files[nr.to_i-1] = "#{tmpdir}/roo_rels#{nr}"
|
590
|
+
|
591
|
+
unless is_stream?(zipfilename_or_stream)
|
592
|
+
process_zipfile_entries Zip::File.open(zipfilename_or_stream).to_a.sort_by(&:name)
|
593
|
+
else
|
594
|
+
stream = Zip::InputStream.open zipfilename_or_stream
|
595
|
+
begin
|
596
|
+
entries = []
|
597
|
+
while entry = stream.get_next_entry
|
598
|
+
entries << entry
|
524
599
|
end
|
525
|
-
|
526
|
-
|
600
|
+
process_zipfile_entries entries
|
601
|
+
ensure
|
602
|
+
stream.close
|
603
|
+
end
|
604
|
+
end
|
605
|
+
end
|
606
|
+
|
607
|
+
def process_zipfile_entries entries
|
608
|
+
# NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
|
609
|
+
# are not in order. With Numbers 3.1, the first sheet is always
|
610
|
+
# sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
|
611
|
+
# independent of a worksheet's filename (i.e. sheet6.xml can be the
|
612
|
+
# first worksheet).
|
613
|
+
#
|
614
|
+
# workbook.xml lists the correct order of worksheets and
|
615
|
+
# workbook.xml.rels lists the filenames for those worksheets.
|
616
|
+
#
|
617
|
+
# workbook.xml:
|
618
|
+
# <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/>
|
619
|
+
# <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/>
|
620
|
+
# workbook.xml.rel:
|
621
|
+
# <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
|
622
|
+
# <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
|
623
|
+
sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
|
624
|
+
sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
|
625
|
+
extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
|
626
|
+
|
627
|
+
entries.each do |entry|
|
628
|
+
path =
|
629
|
+
case entry.name.downcase
|
630
|
+
when /sharedstrings.xml$/
|
631
|
+
"#{@tmpdir}/roo_sharedStrings.xml"
|
632
|
+
when /styles.xml$/
|
633
|
+
"#{@tmpdir}/roo_styles.xml"
|
634
|
+
when /comments([0-9]+).xml$/
|
635
|
+
# FIXME: Most of the time, The order of the comment files are the same
|
636
|
+
# the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
|
637
|
+
# In some situations, this isn't true. The true location of a
|
638
|
+
# sheet's comment file is in the sheet1.xml.rels file. SEE
|
639
|
+
# ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
|
640
|
+
nr = Regexp.last_match[1].to_i
|
641
|
+
@comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
|
642
|
+
when /sheet([0-9]+).xml.rels$/
|
643
|
+
# FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
|
644
|
+
# it also stores the location for sharedStrings, comments,
|
645
|
+
# drawings, etc.
|
646
|
+
nr = Regexp.last_match[1].to_i
|
647
|
+
@rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
|
527
648
|
end
|
649
|
+
|
650
|
+
entry.extract(path) if path
|
528
651
|
end
|
529
652
|
end
|
530
653
|
|
@@ -539,4 +662,8 @@ class Roo::Excelx < Roo::Base
|
|
539
662
|
def workbook
|
540
663
|
@workbook ||= Workbook.new(File.join(@tmpdir, "roo_workbook.xml"))
|
541
664
|
end
|
665
|
+
|
666
|
+
def safe_send(object, method, *args)
|
667
|
+
object.send(method, *args) if object && object.respond_to?(method)
|
668
|
+
end
|
542
669
|
end
|
data/lib/roo/excelx/comments.rb
CHANGED
@@ -12,7 +12,8 @@ module Roo
|
|
12
12
|
def extract_comments
|
13
13
|
if doc_exists?
|
14
14
|
Hash[doc.xpath("//comments/commentList/comment").map do |comment|
|
15
|
-
|
15
|
+
value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
|
16
|
+
[::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
|
16
17
|
end]
|
17
18
|
else
|
18
19
|
{}
|
data/lib/roo/excelx/sheet_doc.rb
CHANGED
@@ -2,8 +2,9 @@ require 'roo/excelx/extractor'
|
|
2
2
|
|
3
3
|
module Roo
|
4
4
|
class Excelx::SheetDoc < Excelx::Extractor
|
5
|
-
def initialize(path, relationships, styles, shared_strings, workbook)
|
5
|
+
def initialize(path, relationships, styles, shared_strings, workbook, options = {})
|
6
6
|
super(path)
|
7
|
+
@options = options
|
7
8
|
@relationships = relationships
|
8
9
|
@styles = styles
|
9
10
|
@shared_strings = shared_strings
|
@@ -43,6 +44,8 @@ module Roo
|
|
43
44
|
private
|
44
45
|
|
45
46
|
def cell_from_xml(cell_xml, hyperlink)
|
47
|
+
# This is error prone, to_i will silently turn a nil into a 0
|
48
|
+
# and it works by coincidence that Format[0] is general
|
46
49
|
style = cell_xml['s'].to_i # should be here
|
47
50
|
# c: <c r="A5" s="2">
|
48
51
|
# <v>22606</v>
|
@@ -120,15 +123,39 @@ module Roo
|
|
120
123
|
end.compact]
|
121
124
|
end
|
122
125
|
|
126
|
+
def expand_merged_ranges(cells)
|
127
|
+
# Extract merged ranges from xml
|
128
|
+
merges = {}
|
129
|
+
doc.xpath("/worksheet/mergeCells/mergeCell").each do |mergecell_xml|
|
130
|
+
tl, br = mergecell_xml['ref'].split(/:/).map {|ref| ::Roo::Utils.ref_to_key(ref)}
|
131
|
+
for row in tl[0]..br[0] do
|
132
|
+
for col in tl[1]..br[1] do
|
133
|
+
next if row == tl[0] && col == tl[1]
|
134
|
+
merges[[row,col]] = tl
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
# Duplicate value into all cells in merged range
|
139
|
+
merges.each do |dst, src|
|
140
|
+
cells[dst] = cells[src]
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
123
144
|
def extract_cells(relationships)
|
124
|
-
Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
|
145
|
+
extracted_cells = Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
|
125
146
|
key = ::Roo::Utils.ref_to_key(cell_xml['r'])
|
126
147
|
[key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
|
127
148
|
end]
|
149
|
+
if @options[:expand_merged_ranges]
|
150
|
+
expand_merged_ranges(extracted_cells)
|
151
|
+
end
|
152
|
+
extracted_cells
|
128
153
|
end
|
129
154
|
|
130
155
|
def extract_dimensions
|
131
|
-
|
156
|
+
Roo::Utils.each_element(@path, 'dimension') do |dimension|
|
157
|
+
return dimension.attributes["ref"].value
|
158
|
+
end
|
132
159
|
end
|
133
160
|
|
134
161
|
=begin
|