ruh-roo 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +677 -0
  3. data/Gemfile +24 -0
  4. data/LICENSE +24 -0
  5. data/README.md +315 -0
  6. data/lib/roo/base.rb +607 -0
  7. data/lib/roo/constants.rb +7 -0
  8. data/lib/roo/csv.rb +141 -0
  9. data/lib/roo/errors.rb +11 -0
  10. data/lib/roo/excelx/cell/base.rb +108 -0
  11. data/lib/roo/excelx/cell/boolean.rb +30 -0
  12. data/lib/roo/excelx/cell/date.rb +28 -0
  13. data/lib/roo/excelx/cell/datetime.rb +107 -0
  14. data/lib/roo/excelx/cell/empty.rb +20 -0
  15. data/lib/roo/excelx/cell/number.rb +89 -0
  16. data/lib/roo/excelx/cell/string.rb +19 -0
  17. data/lib/roo/excelx/cell/time.rb +44 -0
  18. data/lib/roo/excelx/cell.rb +110 -0
  19. data/lib/roo/excelx/comments.rb +55 -0
  20. data/lib/roo/excelx/coordinate.rb +19 -0
  21. data/lib/roo/excelx/extractor.rb +39 -0
  22. data/lib/roo/excelx/format.rb +71 -0
  23. data/lib/roo/excelx/images.rb +26 -0
  24. data/lib/roo/excelx/relationships.rb +33 -0
  25. data/lib/roo/excelx/shared.rb +39 -0
  26. data/lib/roo/excelx/shared_strings.rb +151 -0
  27. data/lib/roo/excelx/sheet.rb +151 -0
  28. data/lib/roo/excelx/sheet_doc.rb +248 -0
  29. data/lib/roo/excelx/styles.rb +64 -0
  30. data/lib/roo/excelx/workbook.rb +63 -0
  31. data/lib/roo/excelx.rb +480 -0
  32. data/lib/roo/font.rb +17 -0
  33. data/lib/roo/formatters/base.rb +15 -0
  34. data/lib/roo/formatters/csv.rb +84 -0
  35. data/lib/roo/formatters/matrix.rb +23 -0
  36. data/lib/roo/formatters/xml.rb +31 -0
  37. data/lib/roo/formatters/yaml.rb +40 -0
  38. data/lib/roo/helpers/default_attr_reader.rb +20 -0
  39. data/lib/roo/helpers/weak_instance_cache.rb +41 -0
  40. data/lib/roo/libre_office.rb +4 -0
  41. data/lib/roo/link.rb +34 -0
  42. data/lib/roo/open_office.rb +628 -0
  43. data/lib/roo/spreadsheet.rb +39 -0
  44. data/lib/roo/tempdir.rb +21 -0
  45. data/lib/roo/utils.rb +128 -0
  46. data/lib/roo/version.rb +3 -0
  47. data/lib/roo.rb +36 -0
  48. data/roo.gemspec +28 -0
  49. metadata +189 -0
@@ -0,0 +1,628 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'date'
4
+ require 'nokogiri'
5
+ require 'cgi'
6
+ require 'zip/filesystem'
7
+ require 'roo/font'
8
+ require 'roo/tempdir'
9
+ require 'base64'
10
+ require 'openssl'
11
+
12
+ module Roo
13
+ class OpenOffice < Roo::Base
14
+ extend Roo::Tempdir
15
+
16
+ ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'
17
+ XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']"
18
+ XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']"
19
+
20
+ # initialization and opening of a spreadsheet file
21
+ # values for packed: :zip
22
+ def initialize(filename, options = {})
23
+ packed = options[:packed]
24
+ file_warning = options[:file_warning] || :error
25
+
26
+ @only_visible_sheets = options[:only_visible_sheets]
27
+ file_type_check(filename, '.ods', 'an Roo::OpenOffice', file_warning, packed)
28
+ # NOTE: Create temp directory and allow Ruby to cleanup the temp directory
29
+ # when the object is garbage collected. Initially, the finalizer was
30
+ # created in the Roo::Tempdir module, but that led to a segfault
31
+ # when testing in Ruby 2.4.0.
32
+ @tmpdir = self.class.make_tempdir(self, find_basename(filename), options[:tmpdir_root])
33
+ ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
34
+ @filename = local_filename(filename, @tmpdir, packed)
35
+ # TODO: @cells_read[:default] = false
36
+ open_oo_file(options)
37
+ super(filename, options)
38
+ initialize_default_variables
39
+
40
+ unless @table_display.any?
41
+ doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style|
42
+ read_table_styles(style)
43
+ end
44
+ end
45
+
46
+ @sheet_names = doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet|
47
+ if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')]
48
+ sheet.attributes['name'].value
49
+ end
50
+ end.compact
51
+ rescue
52
+ self.class.finalize_tempdirs(object_id)
53
+ raise
54
+ end
55
+
56
+ def open_oo_file(options)
57
+ Zip::File.open(@filename) do |zip_file|
58
+ content_entry = zip_file.glob('content.xml').first
59
+ fail ArgumentError, ERROR_MISSING_CONTENT_XML unless content_entry
60
+
61
+ roo_content_xml_path = ::File.join(@tmpdir, 'roo_content.xml')
62
+ content_entry.extract(roo_content_xml_path)
63
+ decrypt_if_necessary(zip_file, content_entry, roo_content_xml_path, options)
64
+ end
65
+ end
66
+
67
+ def initialize_default_variables
68
+ @formula = {}
69
+ @style = {}
70
+ @style_defaults = Hash.new { |h, k| h[k] = [] }
71
+ @table_display = Hash.new { |h, k| h[k] = true }
72
+ @font_style_definitions = {}
73
+ @comment = {}
74
+ @comments_read = {}
75
+ end
76
+
77
+ def method_missing(m, *args)
78
+ read_labels
79
+ # is method name a label name
80
+ if @label.key?(m.to_s)
81
+ row, col = label(m.to_s)
82
+ cell(row, col)
83
+ else
84
+ # call super for methods like #a1
85
+ super
86
+ end
87
+ end
88
+
89
+ # Returns the content of a spreadsheet-cell.
90
+ # (1,1) is the upper left corner.
91
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
92
+ # cell at the first line and first row.
93
+ def cell(row, col, sheet = nil)
94
+ sheet ||= default_sheet
95
+ read_cells(sheet)
96
+ row, col = normalize(row, col)
97
+ if celltype(row, col, sheet) == :date
98
+ yyyy, mm, dd = @cell[sheet][[row, col]].to_s.split('-')
99
+ return Date.new(yyyy.to_i, mm.to_i, dd.to_i)
100
+ end
101
+
102
+ @cell[sheet][[row, col]]
103
+ end
104
+
105
+ # Returns the formula at (row,col).
106
+ # Returns nil if there is no formula.
107
+ # The method #formula? checks if there is a formula.
108
+ def formula(row, col, sheet = nil)
109
+ sheet ||= default_sheet
110
+ read_cells(sheet)
111
+ row, col = normalize(row, col)
112
+ @formula[sheet][[row, col]]
113
+ end
114
+
115
+ # Predicate methods really should return a boolean
116
+ # value. Hopefully no one was relying on the fact that this
117
+ # previously returned either nil/formula
118
+ def formula?(*args)
119
+ !!formula(*args)
120
+ end
121
+
122
+ # returns each formula in the selected sheet as an array of elements
123
+ # [row, col, formula]
124
+ def formulas(sheet = nil)
125
+ sheet ||= default_sheet
126
+ read_cells(sheet)
127
+ return [] unless @formula[sheet]
128
+ @formula[sheet].each.collect do |elem|
129
+ [elem[0][0], elem[0][1], elem[1]]
130
+ end
131
+ end
132
+
133
+ # Given a cell, return the cell's style
134
+ def font(row, col, sheet = nil)
135
+ sheet ||= default_sheet
136
+ read_cells(sheet)
137
+ row, col = normalize(row, col)
138
+ style_name = @style[sheet][[row, col]] || @style_defaults[sheet][col - 1] || 'Default'
139
+ @font_style_definitions[style_name]
140
+ end
141
+
142
+ # returns the type of a cell:
143
+ # * :float
144
+ # * :string
145
+ # * :date
146
+ # * :percentage
147
+ # * :formula
148
+ # * :time
149
+ # * :datetime
150
+ def celltype(row, col, sheet = nil)
151
+ sheet ||= default_sheet
152
+ read_cells(sheet)
153
+ row, col = normalize(row, col)
154
+ @formula[sheet][[row, col]] ? :formula : @cell_type[sheet][[row, col]]
155
+ end
156
+
157
+ def sheets
158
+ @sheet_names
159
+ end
160
+
161
+ # version of the Roo::OpenOffice document
162
+ # at 2007 this is always "1.0"
163
+ def officeversion
164
+ oo_version
165
+ @officeversion
166
+ end
167
+
168
+ # shows the internal representation of all cells
169
+ # mainly for debugging purposes
170
+ def to_s(sheet = nil)
171
+ sheet ||= default_sheet
172
+ read_cells(sheet)
173
+ @cell[sheet].inspect
174
+ end
175
+
176
+ # returns the row,col values of the labelled cell
177
+ # (nil,nil) if label is not defined
178
+ def label(labelname)
179
+ read_labels
180
+ return [nil, nil, nil] if @label.size < 1 || !@label.key?(labelname)
181
+ [
182
+ @label[labelname][1].to_i,
183
+ ::Roo::Utils.letter_to_number(@label[labelname][2]),
184
+ @label[labelname][0]
185
+ ]
186
+ end
187
+
188
+ # Returns an array which all labels. Each element is an array with
189
+ # [labelname, [row,col,sheetname]]
190
+ def labels(_sheet = nil)
191
+ read_labels
192
+ @label.map do |label|
193
+ [label[0], # name
194
+ [label[1][1].to_i, # row
195
+ ::Roo::Utils.letter_to_number(label[1][2]), # column
196
+ label[1][0], # sheet
197
+ ]]
198
+ end
199
+ end
200
+
201
+ # returns the comment at (row/col)
202
+ # nil if there is no comment
203
+ def comment(row, col, sheet = nil)
204
+ sheet ||= default_sheet
205
+ read_cells(sheet)
206
+ row, col = normalize(row, col)
207
+ return nil unless @comment[sheet]
208
+ @comment[sheet][[row, col]]
209
+ end
210
+
211
+ # returns each comment in the selected sheet as an array of elements
212
+ # [row, col, comment]
213
+ def comments(sheet = nil)
214
+ sheet ||= default_sheet
215
+ read_comments(sheet) unless @comments_read[sheet]
216
+ return [] unless @comment[sheet]
217
+ @comment[sheet].each.collect do |elem|
218
+ [elem[0][0], elem[0][1], elem[1]]
219
+ end
220
+ end
221
+
222
+ private
223
+
224
+ # If the ODS file has an encryption-data element, then try to decrypt.
225
+ # If successful, the temporary content.xml will be overwritten with
226
+ # decrypted contents.
227
+ def decrypt_if_necessary(
228
+ zip_file,
229
+ content_entry,
230
+ roo_content_xml_path, options
231
+ )
232
+ # Check if content.xml is encrypted by extracting manifest.xml
233
+ # and searching for a manifest:encryption-data element
234
+
235
+ if (manifest_entry = zip_file.glob('META-INF/manifest.xml').first)
236
+ roo_manifest_xml_path = File.join(@tmpdir, 'roo_manifest.xml')
237
+ manifest_entry.extract(roo_manifest_xml_path)
238
+ manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
239
+
240
+ # XPath search for manifest:encryption-data only for the content.xml
241
+ # file
242
+
243
+ encryption_data = manifest.xpath(
244
+ "//manifest:file-entry[@manifest:full-path='content.xml']"\
245
+ "/manifest:encryption-data"
246
+ ).first
247
+
248
+ # If XPath returns a node, then we know content.xml is encrypted
249
+
250
+ unless encryption_data.nil?
251
+
252
+ # Since we know it's encrypted, we check for the password option
253
+ # and if it doesn't exist, raise an argument error
254
+
255
+ password = options[:password]
256
+ if !password.nil?
257
+ perform_decryption(
258
+ encryption_data,
259
+ password,
260
+ content_entry,
261
+ roo_content_xml_path
262
+ )
263
+ else
264
+ fail ArgumentError, 'file is encrypted but password was not supplied'
265
+ end
266
+ end
267
+ else
268
+ fail ArgumentError, 'file missing required META-INF/manifest.xml'
269
+ end
270
+ end
271
+
272
+ # Process the ODS encryption manifest and perform the decryption
273
+ def perform_decryption(
274
+ encryption_data,
275
+ password,
276
+ content_entry,
277
+ roo_content_xml_path
278
+ )
279
+ # Extract various expected attributes from the manifest that
280
+ # describe the encryption
281
+
282
+ algorithm_node = encryption_data.xpath('manifest:algorithm').first
283
+ key_derivation_node =
284
+ encryption_data.xpath('manifest:key-derivation').first
285
+ start_key_generation_node =
286
+ encryption_data.xpath('manifest:start-key-generation').first
287
+
288
+ # If we have all the expected elements, then we can perform
289
+ # the decryption.
290
+
291
+ if !algorithm_node.nil? && !key_derivation_node.nil? &&
292
+ !start_key_generation_node.nil?
293
+
294
+ # The algorithm is a URI describing the algorithm used
295
+ algorithm = algorithm_node['manifest:algorithm-name']
296
+
297
+ # The initialization vector is base-64 encoded
298
+ iv = Base64.decode64(
299
+ algorithm_node['manifest:initialisation-vector']
300
+ )
301
+ key_derivation_name = key_derivation_node['manifest:key-derivation-name']
302
+ iteration_count = key_derivation_node['manifest:iteration-count'].to_i
303
+ salt = Base64.decode64(key_derivation_node['manifest:salt'])
304
+
305
+ # The key is hashed with an algorithm represented by this URI
306
+ key_generation_name =
307
+ start_key_generation_node[
308
+ 'manifest:start-key-generation-name'
309
+ ]
310
+
311
+ hashed_password = password
312
+
313
+ if key_generation_name == 'http://www.w3.org/2000/09/xmldsig#sha256'
314
+
315
+ hashed_password = Digest::SHA256.digest(password)
316
+ else
317
+ fail ArgumentError, "Unknown key generation algorithm #{key_generation_name}"
318
+ end
319
+
320
+ cipher = find_cipher(
321
+ algorithm,
322
+ key_derivation_name,
323
+ hashed_password,
324
+ salt,
325
+ iteration_count,
326
+ iv
327
+ )
328
+
329
+ begin
330
+ decrypted = decrypt(content_entry, cipher)
331
+
332
+ # Finally, inflate the decrypted stream and overwrite
333
+ # content.xml
334
+ IO.binwrite(
335
+ roo_content_xml_path,
336
+ Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(decrypted)
337
+ )
338
+ rescue StandardError => error
339
+ raise ArgumentError, "Invalid password or other data error: #{error}"
340
+ end
341
+ else
342
+ fail ArgumentError, 'manifest.xml missing encryption-data elements'
343
+ end
344
+ end
345
+
346
+ # Create a cipher based on an ODS algorithm URI from manifest.xml
347
+ # params: algorithm, key_derivation_name, hashed_password, salt, iteration_count, iv
348
+ def find_cipher(*args)
349
+ fail ArgumentError, 'Unknown algorithm ' + algorithm unless args[0] == 'http://www.w3.org/2001/04/xmlenc#aes256-cbc'
350
+
351
+ cipher = ::OpenSSL::Cipher.new('AES-256-CBC')
352
+ cipher.decrypt
353
+ cipher.padding = 0
354
+ cipher.key = find_cipher_key(cipher, *args[1..4])
355
+ cipher.iv = args[5]
356
+
357
+ cipher
358
+ end
359
+
360
+ # Create a cipher key based on an ODS algorithm string from manifest.xml
361
+ def find_cipher_key(*args)
362
+ fail ArgumentError, 'Unknown key derivation name ', args[1] unless args[1] == 'PBKDF2'
363
+
364
+ ::OpenSSL::PKCS5.pbkdf2_hmac_sha1(args[2], args[3], args[4], args[0].key_len)
365
+ end
366
+
367
+ # Block decrypt raw bytes from the zip file based on the cipher
368
+ def decrypt(content_entry, cipher)
369
+ # Zip::Entry.extract writes a 0-length file when trying
370
+ # to extract an encrypted stream, so we read the
371
+ # raw bytes based on the offset and lengths
372
+ decrypted = ''
373
+ File.open(@filename, 'rb') do |zipfile|
374
+ zipfile.seek(
375
+ content_entry.local_header_offset +
376
+ content_entry.calculate_local_header_size
377
+ )
378
+ total_to_read = content_entry.compressed_size
379
+
380
+ block_size = 4096
381
+ block_size = total_to_read if block_size > total_to_read
382
+
383
+ while (buffer = zipfile.read(block_size))
384
+ decrypted += cipher.update(buffer)
385
+ total_to_read -= buffer.length
386
+
387
+ break if total_to_read == 0
388
+
389
+ block_size = total_to_read if block_size > total_to_read
390
+ end
391
+ end
392
+
393
+ decrypted + cipher.final
394
+ end
395
+
396
+ def doc
397
+ @doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, 'roo_content.xml'))
398
+ end
399
+
400
+ # read the version of the OO-Version
401
+ def oo_version
402
+ doc.xpath("//*[local-name()='document-content']").each do |office|
403
+ @officeversion = attribute(office, 'version')
404
+ end
405
+ end
406
+
407
+ # helper function to set the internal representation of cells
408
+ def set_cell_values(sheet, x, y, i, v, value_type, formula, table_cell, str_v, style_name)
409
+ key = [y, x + i]
410
+ @cell_type[sheet] ||= {}
411
+ @cell_type[sheet][key] = value_type.to_sym if value_type
412
+ @formula[sheet] ||= {}
413
+ if formula
414
+ ['of:', 'oooc:'].each do |prefix|
415
+ if formula[0, prefix.length] == prefix
416
+ formula = formula[prefix.length..-1]
417
+ end
418
+ end
419
+ @formula[sheet][key] = formula
420
+ end
421
+ @cell[sheet] ||= {}
422
+ @style[sheet] ||= {}
423
+ @style[sheet][key] = style_name
424
+ case @cell_type[sheet][key]
425
+ when :float
426
+ @cell[sheet][key] = (table_cell.attributes['value'].to_s.include?(".") || table_cell.children.first.text.include?(".")) ? v.to_f : v.to_i
427
+ when :percentage
428
+ @cell[sheet][key] = v.to_f
429
+ when :string
430
+ @cell[sheet][key] = str_v
431
+ when :date
432
+ # TODO: if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size
433
+ if attribute(table_cell, 'date-value').size != 'XXXX-XX-XX'.size
434
+ #-- dann ist noch eine Uhrzeit vorhanden
435
+ #-- "1961-11-21T12:17:18"
436
+ @cell[sheet][key] = DateTime.parse(attribute(table_cell, 'date-value').to_s)
437
+ @cell_type[sheet][key] = :datetime
438
+ else
439
+ @cell[sheet][key] = table_cell.attributes['date-value']
440
+ end
441
+ when :time
442
+ hms = v.split(':')
443
+ @cell[sheet][key] = hms[0].to_i * 3600 + hms[1].to_i * 60 + hms[2].to_i
444
+ else
445
+ @cell[sheet][key] = v
446
+ end
447
+ end
448
+
449
+ # read all cells in the selected sheet
450
+ #--
451
+ # the following construct means '4 blanks'
452
+ # some content <text:s text:c="3"/>
453
+ #++
454
+ def read_cells(sheet = default_sheet)
455
+ validate_sheet!(sheet)
456
+ return if @cells_read[sheet]
457
+
458
+ sheet_found = false
459
+ doc.xpath("//*[local-name()='table']").each do |ws|
460
+ next unless sheet == attribute(ws, 'name')
461
+
462
+ sheet_found = true
463
+ col = 1
464
+ row = 1
465
+ ws.children.each do |table_element|
466
+ case table_element.name
467
+ when 'table-column'
468
+ @style_defaults[sheet] << table_element.attributes['default-cell-style-name']
469
+ when 'table-row'
470
+ if table_element.attributes['number-rows-repeated']
471
+ skip_row = attribute(table_element, 'number-rows-repeated').to_s.to_i
472
+ row = row + skip_row - 1
473
+ end
474
+ table_element.children.each do |cell|
475
+ skip_col = attribute(cell, 'number-columns-repeated')
476
+ formula = attribute(cell, 'formula')
477
+ value_type = attribute(cell, 'value-type')
478
+ v = attribute(cell, 'value')
479
+ style_name = attribute(cell, 'style-name')
480
+ case value_type
481
+ when 'string'
482
+ str_v = ''
483
+ # insert \n if there is more than one paragraph
484
+ para_count = 0
485
+ cell.children.each do |str|
486
+ # begin comments
487
+ #=begin
488
+ #- <table:table-cell office:value-type="string">
489
+ # - <office:annotation office:display="true" draw:style-name="gr1" draw:text-style-name="P1" svg:width="1.1413in" svg:height="0.3902in" svg:x="2.0142in" svg:y="0in" draw:caption-point-x="-0.2402in" draw:caption-point-y="0.5661in">
490
+ # <dc:date>2011-09-20T00:00:00</dc:date>
491
+ # <text:p text:style-name="P1">Kommentar fuer B4</text:p>
492
+ # </office:annotation>
493
+ # <text:p>B4 (mit Kommentar)</text:p>
494
+ # </table:table-cell>
495
+ #=end
496
+ if str.name == 'annotation'
497
+ str.children.each do |annotation|
498
+ next unless annotation.name == 'p'
499
+ # @comment ist ein Hash mit Sheet als Key (wie bei @cell)
500
+ # innerhalb eines Elements besteht ein Eintrag aus einem
501
+ # weiteren Hash mit Key [row,col] und dem eigentlichen
502
+ # Kommentartext als Inhalt
503
+ @comment[sheet] = Hash.new unless @comment[sheet]
504
+ key = [row, col]
505
+ @comment[sheet][key] = annotation.text
506
+ end
507
+ end
508
+ # end comments
509
+ if str.name == 'p'
510
+ v = str.content
511
+ str_v += "\n" if para_count > 0
512
+ para_count += 1
513
+ if str.children.size > 1
514
+ str_v += children_to_string(str.children)
515
+ else
516
+ str.children.each do |child|
517
+ str_v += child.content #.text
518
+ end
519
+ end
520
+ str_v.gsub!(/&apos;/, "'") # special case not supported by unescapeHTML
521
+ str_v = CGI.unescapeHTML(str_v)
522
+ end # == 'p'
523
+ end
524
+ when 'time'
525
+ cell.children.each do |str|
526
+ v = str.content if str.name == 'p'
527
+ end
528
+ when '', nil, 'date', 'percentage', 'float'
529
+ #
530
+ when 'boolean'
531
+ v = attribute(cell, 'boolean-value').to_s
532
+ end
533
+ if skip_col
534
+ if !v.nil? || cell.attributes['date-value']
535
+ 0.upto(skip_col.to_i - 1) do |i|
536
+ set_cell_values(sheet, col, row, i, v, value_type, formula, cell, str_v, style_name)
537
+ end
538
+ end
539
+ col += (skip_col.to_i - 1)
540
+ end # if skip
541
+ set_cell_values(sheet, col, row, 0, v, value_type, formula, cell, str_v, style_name)
542
+ col += 1
543
+ end
544
+ row += 1
545
+ col = 1
546
+ end
547
+ end
548
+ end
549
+ doc.xpath("//*[local-name()='automatic-styles']").each do |style|
550
+ read_styles(style)
551
+ end
552
+
553
+ fail RangeError unless sheet_found
554
+
555
+ @cells_read[sheet] = true
556
+ @comments_read[sheet] = true
557
+ end
558
+
559
+ # Only calls read_cells because Roo::Base calls read_comments
560
+ # whereas the reading of comments is done in read_cells for Roo::OpenOffice-objects
561
+ def read_comments(sheet = nil)
562
+ read_cells(sheet)
563
+ end
564
+
565
+ def read_labels
566
+ @label ||= doc.xpath('//table:named-range').each_with_object({}) do |ne, hash|
567
+ #-
568
+ # $Sheet1.$C$5
569
+ #+
570
+ name = attribute(ne, 'name').to_s
571
+ sheetname, coords = attribute(ne, 'cell-range-address').to_s.split('.$')
572
+ col, row = coords.split('$')
573
+ sheetname = sheetname[1..-1] if sheetname[0, 1] == '$'
574
+ hash[name] = [sheetname, row, col]
575
+ end
576
+ end
577
+
578
+ def read_styles(style_elements)
579
+ @font_style_definitions['Default'] = Roo::Font.new
580
+ style_elements.each do |style|
581
+ next unless style.name == 'style'
582
+ style_name = attribute(style, 'name')
583
+ style.each do |properties|
584
+ font = Roo::OpenOffice::Font.new
585
+ font.bold = attribute(properties, 'font-weight')
586
+ font.italic = attribute(properties, 'font-style')
587
+ font.underline = attribute(properties, 'text-underline-style')
588
+ @font_style_definitions[style_name] = font
589
+ end
590
+ end
591
+ end
592
+
593
+ def read_table_styles(styles)
594
+ styles.children.each do |style|
595
+ next unless style.name == 'style'
596
+ style_name = attribute(style, 'name')
597
+ style.children.each do |properties|
598
+ display = attribute(properties, 'display')
599
+ next unless display
600
+ @table_display[style_name] = (display == 'true')
601
+ end
602
+ end
603
+ end
604
+
605
+ # helper method to convert compressed spaces and other elements within
606
+ # an text into a string
607
+ # FIXME: add a test for compressed_spaces == 0. It's not currently tested.
608
+ def children_to_string(children)
609
+ children.map do |child|
610
+ if child.text?
611
+ child.content
612
+ else
613
+ if child.name == 's'
614
+ compressed_spaces = child.attributes['c'].to_s.to_i
615
+ # no explicit number means a count of 1:
616
+ compressed_spaces == 0 ? ' ' : ' ' * compressed_spaces
617
+ else
618
+ child.content
619
+ end
620
+ end
621
+ end.join
622
+ end
623
+
624
+ def attribute(node, attr_name)
625
+ node.attributes[attr_name].value if node.attributes[attr_name]
626
+ end
627
+ end
628
+ end
@@ -0,0 +1,39 @@
1
+ require 'uri'
2
+
3
+ module Roo
4
+ class Spreadsheet
5
+ class << self
6
+ def open(path, options = {})
7
+ path = path.respond_to?(:path) ? path.path : path
8
+ extension = extension_for(path, options)
9
+
10
+ begin
11
+ Roo::CLASS_FOR_EXTENSION.fetch(extension).new(path, options)
12
+ rescue KeyError
13
+ raise ArgumentError,
14
+ "Can't detect the type of #{path} - please use the :extension option to declare its type."
15
+ end
16
+ end
17
+
18
+ def extension_for(path, options)
19
+ case (extension = options.delete(:extension))
20
+ when ::Symbol
21
+ options[:file_warning] = :ignore
22
+ extension
23
+ when ::String
24
+ options[:file_warning] = :ignore
25
+ extension.tr('.', '').downcase.to_sym
26
+ else
27
+ parsed_path =
28
+ if path =~ /\A#{::URI::DEFAULT_PARSER.make_regexp}\z/
29
+ # path is 7th match
30
+ Regexp.last_match[7]
31
+ else
32
+ path
33
+ end
34
+ ::File.extname(parsed_path).tr('.', '').downcase.to_sym
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,21 @@
1
+ module Roo
2
+ module Tempdir
3
+ def finalize_tempdirs(object_id)
4
+ if @tempdirs && (dirs_to_remove = @tempdirs[object_id])
5
+ @tempdirs.delete(object_id)
6
+ dirs_to_remove.each do |dir|
7
+ ::FileUtils.remove_entry(dir)
8
+ end
9
+ end
10
+ end
11
+
12
+ def make_tempdir(object, prefix, root)
13
+ root ||= ENV["ROO_TMP"]
14
+ # NOTE: This folder is cleaned up by finalize_tempdirs.
15
+ ::Dir.mktmpdir("#{Roo::TEMP_PREFIX}#{prefix}", root).tap do |tmpdir|
16
+ @tempdirs ||= Hash.new { |h, k| h[k] = [] }
17
+ @tempdirs[object.object_id] << tmpdir
18
+ end
19
+ end
20
+ end
21
+ end