culturecode-roo 2.0.1 → 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/CHANGELOG.md +513 -0
- data/README.md +206 -73
- data/lib/roo.rb +3 -3
- data/lib/roo/base.rb +49 -33
- data/lib/roo/csv.rb +10 -0
- data/lib/roo/excelx.rb +187 -60
- data/lib/roo/excelx/comments.rb +2 -1
- data/lib/roo/excelx/sheet_doc.rb +30 -3
- data/lib/roo/open_office.rb +250 -221
- data/lib/roo/utils.rb +28 -31
- data/lib/roo/version.rb +1 -1
- data/roo.gemspec +10 -12
- data/spec/lib/roo/csv_spec.rb +14 -0
- data/spec/lib/roo/excelx_spec.rb +90 -2
- data/spec/lib/roo/libreoffice_spec.rb +16 -0
- data/spec/lib/roo/openoffice_spec.rb +11 -0
- data/spec/lib/roo/utils_spec.rb +5 -4
- data/test/test_roo.rb +113 -2
- metadata +29 -180
- data/CHANGELOG +0 -438
- data/scripts/txt2html +0 -67
- data/test/files/1900_base.xlsx +0 -0
- data/test/files/1904_base.xlsx +0 -0
- data/test/files/Bibelbund.csv +0 -3741
- data/test/files/Bibelbund.ods +0 -0
- data/test/files/Bibelbund.xlsx +0 -0
- data/test/files/Bibelbund1.ods +0 -0
- data/test/files/Pfand_from_windows_phone.xlsx +0 -0
- data/test/files/advanced_header.ods +0 -0
- data/test/files/bbu.ods +0 -0
- data/test/files/bbu.xlsx +0 -0
- data/test/files/bode-v1.ods.zip +0 -0
- data/test/files/bode-v1.xls.zip +0 -0
- data/test/files/boolean.csv +0 -2
- data/test/files/boolean.ods +0 -0
- data/test/files/boolean.xlsx +0 -0
- data/test/files/borders.ods +0 -0
- data/test/files/borders.xlsx +0 -0
- data/test/files/bug-numbered-sheet-names.xlsx +0 -0
- data/test/files/comments.ods +0 -0
- data/test/files/comments.xlsx +0 -0
- data/test/files/csvtypes.csv +0 -1
- data/test/files/datetime.ods +0 -0
- data/test/files/datetime.xlsx +0 -0
- data/test/files/dreimalvier.ods +0 -0
- data/test/files/emptysheets.ods +0 -0
- data/test/files/emptysheets.xlsx +0 -0
- data/test/files/encrypted-letmein.ods +0 -0
- data/test/files/file_item_error.xlsx +0 -0
- data/test/files/formula.ods +0 -0
- data/test/files/formula.xlsx +0 -0
- data/test/files/formula_string_error.xlsx +0 -0
- data/test/files/html-escape.ods +0 -0
- data/test/files/link.csv +0 -1
- data/test/files/link.xlsx +0 -0
- data/test/files/matrix.ods +0 -0
- data/test/files/named_cells.ods +0 -0
- data/test/files/named_cells.xlsx +0 -0
- data/test/files/no_spreadsheet_file.txt +0 -1
- data/test/files/numbers-export.xlsx +0 -0
- data/test/files/numbers1.csv +0 -18
- data/test/files/numbers1.ods +0 -0
- data/test/files/numbers1.xlsx +0 -0
- data/test/files/numbers1withnull.xlsx +0 -0
- data/test/files/numeric-link.xlsx +0 -0
- data/test/files/only_one_sheet.ods +0 -0
- data/test/files/only_one_sheet.xlsx +0 -0
- data/test/files/paragraph.ods +0 -0
- data/test/files/paragraph.xlsx +0 -0
- data/test/files/ric.ods +0 -0
- data/test/files/sheet1.xml +0 -109
- data/test/files/simple_spreadsheet.ods +0 -0
- data/test/files/simple_spreadsheet.xlsx +0 -0
- data/test/files/simple_spreadsheet_from_italo.ods +0 -0
- data/test/files/so_datetime.csv +0 -8
- data/test/files/style.ods +0 -0
- data/test/files/style.xlsx +0 -0
- data/test/files/time-test.csv +0 -2
- data/test/files/time-test.ods +0 -0
- data/test/files/time-test.xlsx +0 -0
- data/test/files/type_excel.ods +0 -0
- data/test/files/type_excel.xlsx +0 -0
- data/test/files/type_excelx.ods +0 -0
- data/test/files/type_openoffice.xlsx +0 -0
- data/test/files/whitespace.ods +0 -0
- data/test/files/whitespace.xlsx +0 -0
data/lib/roo/csv.rb
CHANGED
@@ -107,4 +107,14 @@ class Roo::CSV < Roo::Base
|
|
107
107
|
@last_column[sheet] -= 1
|
108
108
|
end
|
109
109
|
end
|
110
|
+
|
111
|
+
def clean_sheet(sheet)
|
112
|
+
read_cells(sheet)
|
113
|
+
|
114
|
+
@cell.each_pair do |coord, value|
|
115
|
+
@cell[coord] = sanitize_value(value) if value.is_a?(::String)
|
116
|
+
end
|
117
|
+
|
118
|
+
@cleaned[sheet] = true
|
119
|
+
end
|
110
120
|
end
|
data/lib/roo/excelx.rb
CHANGED
@@ -78,6 +78,7 @@ class Roo::Excelx < Roo::Base
|
|
78
78
|
|
79
79
|
class Cell
|
80
80
|
attr_reader :type, :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
|
81
|
+
attr_writer :value
|
81
82
|
|
82
83
|
def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate)
|
83
84
|
@type = type
|
@@ -145,12 +146,12 @@ class Roo::Excelx < Roo::Base
|
|
145
146
|
end
|
146
147
|
|
147
148
|
class Sheet
|
148
|
-
def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook)
|
149
|
+
def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook, options = {})
|
149
150
|
@name = name
|
150
151
|
@rels = Relationships.new(rels_path)
|
151
152
|
@comments = Comments.new(comments_path)
|
152
153
|
@styles = styles
|
153
|
-
@sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook)
|
154
|
+
@sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook, options)
|
154
155
|
end
|
155
156
|
|
156
157
|
def cells
|
@@ -162,13 +163,16 @@ class Roo::Excelx < Roo::Base
|
|
162
163
|
end
|
163
164
|
|
164
165
|
# Yield each row as array of Excelx::Cell objects
|
165
|
-
# accepts options max_rows (int) (offset by 1 for header)
|
166
|
-
#
|
166
|
+
# accepts options max_rows (int) (offset by 1 for header),
|
167
|
+
# pad_cells (boolean) and offset (int)
|
167
168
|
def each_row(options = {}, &block)
|
168
169
|
row_count = 0
|
170
|
+
options[:offset] ||= 0
|
169
171
|
@sheet.each_row_streaming do |row|
|
170
|
-
break if options[:max_rows] && row_count == options[:max_rows] + 1
|
171
|
-
|
172
|
+
break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
|
173
|
+
if block_given? && !(options[:offset] && row_count < options[:offset])
|
174
|
+
block.call(cells_for_row_element(row, options))
|
175
|
+
end
|
172
176
|
row_count += 1
|
173
177
|
end
|
174
178
|
end
|
@@ -187,25 +191,26 @@ class Roo::Excelx < Roo::Base
|
|
187
191
|
|
188
192
|
# returns the number of the first non-empty row
|
189
193
|
def first_row
|
190
|
-
@first_row ||= present_cells.keys.map {|row,
|
194
|
+
@first_row ||= present_cells.keys.map {|row, _| row }.min
|
191
195
|
end
|
192
196
|
|
193
197
|
def last_row
|
194
|
-
@last_row ||= present_cells.keys.map {|row,
|
198
|
+
@last_row ||= present_cells.keys.map {|row, _| row }.max
|
195
199
|
end
|
196
200
|
|
197
201
|
# returns the number of the first non-empty column
|
198
|
-
def first_column
|
199
|
-
@first_column ||= present_cells.keys.map {|
|
202
|
+
def first_column
|
203
|
+
@first_column ||= present_cells.keys.map {|_, col| col }.min
|
200
204
|
end
|
201
205
|
|
202
206
|
# returns the number of the last non-empty column
|
203
|
-
def last_column
|
204
|
-
@last_column ||= present_cells.keys.map {|
|
207
|
+
def last_column
|
208
|
+
@last_column ||= present_cells.keys.map {|_, col| col }.max
|
205
209
|
end
|
206
210
|
|
207
211
|
def excelx_format(key)
|
208
|
-
|
212
|
+
cell = cells[key]
|
213
|
+
@styles.style_format(cell.style).to_s if cell
|
209
214
|
end
|
210
215
|
|
211
216
|
def hyperlinks
|
@@ -250,23 +255,32 @@ class Roo::Excelx < Roo::Base
|
|
250
255
|
# values for packed: :zip
|
251
256
|
# optional cell_max (int) parameter for early aborting attempts to parse
|
252
257
|
# enormous documents.
|
253
|
-
def initialize(
|
258
|
+
def initialize(filename_or_stream, options = {})
|
254
259
|
packed = options[:packed]
|
255
260
|
file_warning = options.fetch(:file_warning, :error)
|
256
261
|
cell_max = options.delete(:cell_max)
|
262
|
+
sheet_options = {}
|
263
|
+
sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
|
257
264
|
|
258
|
-
|
265
|
+
unless is_stream?(filename_or_stream)
|
266
|
+
file_type_check(filename_or_stream,'.xlsx','an Excel-xlsx', file_warning, packed)
|
267
|
+
basename = File.basename(filename_or_stream)
|
268
|
+
end
|
259
269
|
|
260
|
-
@tmpdir = make_tmpdir(
|
261
|
-
@filename = local_filename(
|
270
|
+
@tmpdir = make_tmpdir(basename, options[:tmpdir_root])
|
271
|
+
@filename = local_filename(filename_or_stream, @tmpdir, packed)
|
262
272
|
@comments_files = []
|
263
273
|
@rels_files = []
|
264
|
-
process_zipfile(@
|
274
|
+
process_zipfile(@filename || filename_or_stream)
|
265
275
|
|
266
|
-
@sheet_names = workbook.sheets.map
|
276
|
+
@sheet_names = workbook.sheets.map do |sheet|
|
277
|
+
unless options[:only_visible_sheets] && sheet['state'] == 'hidden'
|
278
|
+
sheet['name']
|
279
|
+
end
|
280
|
+
end.compact
|
267
281
|
@sheets = []
|
268
282
|
@sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n|
|
269
|
-
@sheets[n] = Sheet.new(sheet_name, @rels_files[n], @sheet_files[n], @comments_files[n], styles, shared_strings, workbook)
|
283
|
+
@sheets[n] = Sheet.new(sheet_name, @rels_files[n], @sheet_files[n], @comments_files[n], styles, shared_strings, workbook, sheet_options)
|
270
284
|
[sheet_name, @sheets[n]]
|
271
285
|
end]
|
272
286
|
|
@@ -276,11 +290,14 @@ class Roo::Excelx < Roo::Base
|
|
276
290
|
end
|
277
291
|
|
278
292
|
super
|
293
|
+
rescue => e # clean up any temp files, but only if an error was raised
|
294
|
+
close
|
295
|
+
raise e
|
279
296
|
end
|
280
297
|
|
281
298
|
def method_missing(method,*args)
|
282
299
|
if label = workbook.defined_names[method.to_s]
|
283
|
-
sheet_for(label.sheet).cells[label.key]
|
300
|
+
safe_send(sheet_for(label.sheet).cells[label.key], :value)
|
284
301
|
else
|
285
302
|
# call super for methods like #a1
|
286
303
|
super
|
@@ -303,8 +320,7 @@ class Roo::Excelx < Roo::Base
|
|
303
320
|
# cell at the first line and first row.
|
304
321
|
def cell(row, col, sheet=nil)
|
305
322
|
key = normalize(row,col)
|
306
|
-
|
307
|
-
cell.value if cell
|
323
|
+
safe_send(sheet_for(sheet).cells[key], :value)
|
308
324
|
end
|
309
325
|
|
310
326
|
def row(rownumber,sheet=nil)
|
@@ -354,7 +370,7 @@ class Roo::Excelx < Roo::Base
|
|
354
370
|
# The method #formula? checks if there is a formula.
|
355
371
|
def formula(row,col,sheet=nil)
|
356
372
|
key = normalize(row,col)
|
357
|
-
sheet_for(sheet).cells[key]
|
373
|
+
safe_send(sheet_for(sheet).cells[key], :formula)
|
358
374
|
end
|
359
375
|
|
360
376
|
# Predicate methods really should return a boolean
|
@@ -375,7 +391,8 @@ class Roo::Excelx < Roo::Base
|
|
375
391
|
# Given a cell, return the cell's style
|
376
392
|
def font(row, col, sheet=nil)
|
377
393
|
key = normalize(row,col)
|
378
|
-
|
394
|
+
definition_index = safe_send(sheet_for(sheet).cells[key], :style)
|
395
|
+
styles.definitions[definition_index] if definition_index
|
379
396
|
end
|
380
397
|
|
381
398
|
# returns the type of a cell:
|
@@ -388,7 +405,7 @@ class Roo::Excelx < Roo::Base
|
|
388
405
|
# * :datetime
|
389
406
|
def celltype(row,col,sheet=nil)
|
390
407
|
key = normalize(row, col)
|
391
|
-
sheet_for(sheet).cells[key]
|
408
|
+
safe_send(sheet_for(sheet).cells[key], :type)
|
392
409
|
end
|
393
410
|
|
394
411
|
# returns the internal type of an excel cell
|
@@ -397,14 +414,14 @@ class Roo::Excelx < Roo::Base
|
|
397
414
|
# Note: this is only available within the Excelx class
|
398
415
|
def excelx_type(row,col,sheet=nil)
|
399
416
|
key = normalize(row,col)
|
400
|
-
sheet_for(sheet).cells[key]
|
417
|
+
safe_send(sheet_for(sheet).cells[key], :excelx_type)
|
401
418
|
end
|
402
419
|
|
403
420
|
# returns the internal value of an excelx cell
|
404
421
|
# Note: this is only available within the Excelx class
|
405
422
|
def excelx_value(row,col,sheet=nil)
|
406
423
|
key = normalize(row,col)
|
407
|
-
sheet_for(sheet).cells[key]
|
424
|
+
safe_send(sheet_for(sheet).cells[key], :excelx_value)
|
408
425
|
end
|
409
426
|
|
410
427
|
# returns the internal format of an excel cell
|
@@ -489,42 +506,148 @@ class Roo::Excelx < Roo::Base
|
|
489
506
|
|
490
507
|
private
|
491
508
|
|
509
|
+
def clean_sheet(sheet)
|
510
|
+
@sheets_by_name[sheet].cells.each_pair do |coord, value|
|
511
|
+
next unless value.value.is_a?(::String)
|
512
|
+
|
513
|
+
@sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value)
|
514
|
+
end
|
515
|
+
|
516
|
+
@cleaned[sheet] = true
|
517
|
+
end
|
518
|
+
|
519
|
+
# Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
|
520
|
+
# documents require a workbook.xml file, so a if the file is missing
|
521
|
+
# it is not a valid xlsx file. In these cases, an ArgumentError is
|
522
|
+
# raised.
|
523
|
+
#
|
524
|
+
# wb - a Zip::Entry for the workbook.xml file.
|
525
|
+
# path - A String for Zip::Entry's destination path.
|
526
|
+
#
|
527
|
+
# Examples
|
528
|
+
#
|
529
|
+
# extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml')
|
530
|
+
# # => ["rId1", "rId2", "rId3"]
|
531
|
+
#
|
532
|
+
# Returns an Array of Strings.
|
533
|
+
def extract_worksheet_ids(entries, path)
|
534
|
+
wb = entries.find { |e| e.name[/workbook.xml$/] }
|
535
|
+
fail ArgumentError 'missing required workbook file' if wb.nil?
|
536
|
+
|
537
|
+
wb.extract(path)
|
538
|
+
workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
|
539
|
+
workbook_doc.xpath('//sheet').map{ |s| s.attributes['id'].value }
|
540
|
+
end
|
541
|
+
|
542
|
+
# Internal
|
543
|
+
#
|
544
|
+
# wb_rels - A Zip::Entry for the workbook.xml.rels file.
|
545
|
+
# path - A String for the Zip::Entry's destination path.
|
546
|
+
#
|
547
|
+
# Examples
|
548
|
+
#
|
549
|
+
# extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels')
|
550
|
+
# # => {
|
551
|
+
# "rId1"=>"worksheets/sheet1.xml",
|
552
|
+
# "rId2"=>"worksheets/sheet2.xml",
|
553
|
+
# "rId3"=>"worksheets/sheet3.xml"
|
554
|
+
# }
|
555
|
+
#
|
556
|
+
# Returns a Hash.
|
557
|
+
def extract_worksheet_rels(entries, path)
|
558
|
+
wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] }
|
559
|
+
fail ArgumentError 'missing required workbook file' if wb_rels.nil?
|
560
|
+
|
561
|
+
wb_rels.extract(path)
|
562
|
+
rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
|
563
|
+
worksheet_type ='http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'
|
564
|
+
|
565
|
+
relationships = rels_doc.xpath('//Relationship').select do |relationship|
|
566
|
+
relationship.attributes['Type'].value == worksheet_type
|
567
|
+
end
|
568
|
+
|
569
|
+
relationships.inject({}) do |hash, relationship|
|
570
|
+
attributes = relationship.attributes
|
571
|
+
id = attributes['Id'];
|
572
|
+
hash[id.value] = attributes['Target'].value
|
573
|
+
hash
|
574
|
+
end
|
575
|
+
end
|
576
|
+
|
577
|
+
def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
|
578
|
+
sheet_ids.each_with_index do |id, i|
|
579
|
+
name = sheets[id]
|
580
|
+
entry = entries.find { |entry| entry.name =~ /#{name}$/ }
|
581
|
+
path = "#{tmpdir}/roo_sheet#{i + 1}"
|
582
|
+
@sheet_files << path
|
583
|
+
entry.extract(path)
|
584
|
+
end
|
585
|
+
end
|
586
|
+
|
492
587
|
# Extracts all needed files from the zip file
|
493
|
-
def process_zipfile(
|
588
|
+
def process_zipfile(zipfilename_or_stream)
|
494
589
|
@sheet_files = []
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
when /sheet.xml$/
|
505
|
-
path = "#{tmpdir}/roo_sheet"
|
506
|
-
@sheet_files.unshift(path)
|
507
|
-
path
|
508
|
-
when /sheet([0-9]+).xml$/
|
509
|
-
# Numbers 3.1 exports first sheet without sheet number. Such sheets
|
510
|
-
# are always added to the beginning of the array which, naturally,
|
511
|
-
# causes other sheets to be pushed to the next index which could
|
512
|
-
# lead to sheet references getting overwritten, so we need to
|
513
|
-
# handle that case specifically.
|
514
|
-
nr = $1
|
515
|
-
sheet_files_index = nr.to_i - 1
|
516
|
-
sheet_files_index += 1 if @sheet_files[sheet_files_index]
|
517
|
-
@sheet_files[sheet_files_index] = "#{tmpdir}/roo_sheet#{nr.to_i}"
|
518
|
-
when /comments([0-9]+).xml$/
|
519
|
-
nr = $1
|
520
|
-
@comments_files[nr.to_i-1] = "#{tmpdir}/roo_comments#{nr}"
|
521
|
-
when /sheet([0-9]+).xml.rels$/
|
522
|
-
nr = $1
|
523
|
-
@rels_files[nr.to_i-1] = "#{tmpdir}/roo_rels#{nr}"
|
590
|
+
|
591
|
+
unless is_stream?(zipfilename_or_stream)
|
592
|
+
process_zipfile_entries Zip::File.open(zipfilename_or_stream).to_a.sort_by(&:name)
|
593
|
+
else
|
594
|
+
stream = Zip::InputStream.open zipfilename_or_stream
|
595
|
+
begin
|
596
|
+
entries = []
|
597
|
+
while entry = stream.get_next_entry
|
598
|
+
entries << entry
|
524
599
|
end
|
525
|
-
|
526
|
-
|
600
|
+
process_zipfile_entries entries
|
601
|
+
ensure
|
602
|
+
stream.close
|
603
|
+
end
|
604
|
+
end
|
605
|
+
end
|
606
|
+
|
607
|
+
def process_zipfile_entries entries
|
608
|
+
# NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
|
609
|
+
# are not in order. With Numbers 3.1, the first sheet is always
|
610
|
+
# sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
|
611
|
+
# independent of a worksheet's filename (i.e. sheet6.xml can be the
|
612
|
+
# first worksheet).
|
613
|
+
#
|
614
|
+
# workbook.xml lists the correct order of worksheets and
|
615
|
+
# workbook.xml.rels lists the filenames for those worksheets.
|
616
|
+
#
|
617
|
+
# workbook.xml:
|
618
|
+
# <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/>
|
619
|
+
# <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/>
|
620
|
+
# workbook.xml.rel:
|
621
|
+
# <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
|
622
|
+
# <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
|
623
|
+
sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
|
624
|
+
sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
|
625
|
+
extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
|
626
|
+
|
627
|
+
entries.each do |entry|
|
628
|
+
path =
|
629
|
+
case entry.name.downcase
|
630
|
+
when /sharedstrings.xml$/
|
631
|
+
"#{@tmpdir}/roo_sharedStrings.xml"
|
632
|
+
when /styles.xml$/
|
633
|
+
"#{@tmpdir}/roo_styles.xml"
|
634
|
+
when /comments([0-9]+).xml$/
|
635
|
+
# FIXME: Most of the time, The order of the comment files are the same
|
636
|
+
# the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
|
637
|
+
# In some situations, this isn't true. The true location of a
|
638
|
+
# sheet's comment file is in the sheet1.xml.rels file. SEE
|
639
|
+
# ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
|
640
|
+
nr = Regexp.last_match[1].to_i
|
641
|
+
@comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
|
642
|
+
when /sheet([0-9]+).xml.rels$/
|
643
|
+
# FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
|
644
|
+
# it also stores the location for sharedStrings, comments,
|
645
|
+
# drawings, etc.
|
646
|
+
nr = Regexp.last_match[1].to_i
|
647
|
+
@rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
|
527
648
|
end
|
649
|
+
|
650
|
+
entry.extract(path) if path
|
528
651
|
end
|
529
652
|
end
|
530
653
|
|
@@ -539,4 +662,8 @@ class Roo::Excelx < Roo::Base
|
|
539
662
|
def workbook
|
540
663
|
@workbook ||= Workbook.new(File.join(@tmpdir, "roo_workbook.xml"))
|
541
664
|
end
|
665
|
+
|
666
|
+
def safe_send(object, method, *args)
|
667
|
+
object.send(method, *args) if object && object.respond_to?(method)
|
668
|
+
end
|
542
669
|
end
|
data/lib/roo/excelx/comments.rb
CHANGED
@@ -12,7 +12,8 @@ module Roo
|
|
12
12
|
def extract_comments
|
13
13
|
if doc_exists?
|
14
14
|
Hash[doc.xpath("//comments/commentList/comment").map do |comment|
|
15
|
-
|
15
|
+
value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
|
16
|
+
[::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
|
16
17
|
end]
|
17
18
|
else
|
18
19
|
{}
|
data/lib/roo/excelx/sheet_doc.rb
CHANGED
@@ -2,8 +2,9 @@ require 'roo/excelx/extractor'
|
|
2
2
|
|
3
3
|
module Roo
|
4
4
|
class Excelx::SheetDoc < Excelx::Extractor
|
5
|
-
def initialize(path, relationships, styles, shared_strings, workbook)
|
5
|
+
def initialize(path, relationships, styles, shared_strings, workbook, options = {})
|
6
6
|
super(path)
|
7
|
+
@options = options
|
7
8
|
@relationships = relationships
|
8
9
|
@styles = styles
|
9
10
|
@shared_strings = shared_strings
|
@@ -43,6 +44,8 @@ module Roo
|
|
43
44
|
private
|
44
45
|
|
45
46
|
def cell_from_xml(cell_xml, hyperlink)
|
47
|
+
# This is error prone, to_i will silently turn a nil into a 0
|
48
|
+
# and it works by coincidence that Format[0] is general
|
46
49
|
style = cell_xml['s'].to_i # should be here
|
47
50
|
# c: <c r="A5" s="2">
|
48
51
|
# <v>22606</v>
|
@@ -120,15 +123,39 @@ module Roo
|
|
120
123
|
end.compact]
|
121
124
|
end
|
122
125
|
|
126
|
+
def expand_merged_ranges(cells)
|
127
|
+
# Extract merged ranges from xml
|
128
|
+
merges = {}
|
129
|
+
doc.xpath("/worksheet/mergeCells/mergeCell").each do |mergecell_xml|
|
130
|
+
tl, br = mergecell_xml['ref'].split(/:/).map {|ref| ::Roo::Utils.ref_to_key(ref)}
|
131
|
+
for row in tl[0]..br[0] do
|
132
|
+
for col in tl[1]..br[1] do
|
133
|
+
next if row == tl[0] && col == tl[1]
|
134
|
+
merges[[row,col]] = tl
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
# Duplicate value into all cells in merged range
|
139
|
+
merges.each do |dst, src|
|
140
|
+
cells[dst] = cells[src]
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
123
144
|
def extract_cells(relationships)
|
124
|
-
Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
|
145
|
+
extracted_cells = Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
|
125
146
|
key = ::Roo::Utils.ref_to_key(cell_xml['r'])
|
126
147
|
[key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
|
127
148
|
end]
|
149
|
+
if @options[:expand_merged_ranges]
|
150
|
+
expand_merged_ranges(extracted_cells)
|
151
|
+
end
|
152
|
+
extracted_cells
|
128
153
|
end
|
129
154
|
|
130
155
|
def extract_dimensions
|
131
|
-
|
156
|
+
Roo::Utils.each_element(@path, 'dimension') do |dimension|
|
157
|
+
return dimension.attributes["ref"].value
|
158
|
+
end
|
132
159
|
end
|
133
160
|
|
134
161
|
=begin
|