ruh-roo 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +677 -0
- data/Gemfile +24 -0
- data/LICENSE +24 -0
- data/README.md +315 -0
- data/lib/roo/base.rb +607 -0
- data/lib/roo/constants.rb +7 -0
- data/lib/roo/csv.rb +141 -0
- data/lib/roo/errors.rb +11 -0
- data/lib/roo/excelx/cell/base.rb +108 -0
- data/lib/roo/excelx/cell/boolean.rb +30 -0
- data/lib/roo/excelx/cell/date.rb +28 -0
- data/lib/roo/excelx/cell/datetime.rb +107 -0
- data/lib/roo/excelx/cell/empty.rb +20 -0
- data/lib/roo/excelx/cell/number.rb +89 -0
- data/lib/roo/excelx/cell/string.rb +19 -0
- data/lib/roo/excelx/cell/time.rb +44 -0
- data/lib/roo/excelx/cell.rb +110 -0
- data/lib/roo/excelx/comments.rb +55 -0
- data/lib/roo/excelx/coordinate.rb +19 -0
- data/lib/roo/excelx/extractor.rb +39 -0
- data/lib/roo/excelx/format.rb +71 -0
- data/lib/roo/excelx/images.rb +26 -0
- data/lib/roo/excelx/relationships.rb +33 -0
- data/lib/roo/excelx/shared.rb +39 -0
- data/lib/roo/excelx/shared_strings.rb +151 -0
- data/lib/roo/excelx/sheet.rb +151 -0
- data/lib/roo/excelx/sheet_doc.rb +248 -0
- data/lib/roo/excelx/styles.rb +64 -0
- data/lib/roo/excelx/workbook.rb +63 -0
- data/lib/roo/excelx.rb +480 -0
- data/lib/roo/font.rb +17 -0
- data/lib/roo/formatters/base.rb +15 -0
- data/lib/roo/formatters/csv.rb +84 -0
- data/lib/roo/formatters/matrix.rb +23 -0
- data/lib/roo/formatters/xml.rb +31 -0
- data/lib/roo/formatters/yaml.rb +40 -0
- data/lib/roo/helpers/default_attr_reader.rb +20 -0
- data/lib/roo/helpers/weak_instance_cache.rb +41 -0
- data/lib/roo/libre_office.rb +4 -0
- data/lib/roo/link.rb +34 -0
- data/lib/roo/open_office.rb +628 -0
- data/lib/roo/spreadsheet.rb +39 -0
- data/lib/roo/tempdir.rb +21 -0
- data/lib/roo/utils.rb +128 -0
- data/lib/roo/version.rb +3 -0
- data/lib/roo.rb +36 -0
- data/roo.gemspec +28 -0
- metadata +189 -0
@@ -0,0 +1,628 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'date'
|
4
|
+
require 'nokogiri'
|
5
|
+
require 'cgi'
|
6
|
+
require 'zip/filesystem'
|
7
|
+
require 'roo/font'
|
8
|
+
require 'roo/tempdir'
|
9
|
+
require 'base64'
|
10
|
+
require 'openssl'
|
11
|
+
|
12
|
+
module Roo
|
13
|
+
class OpenOffice < Roo::Base
|
14
|
+
extend Roo::Tempdir
|
15
|
+
|
16
|
+
ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'
|
17
|
+
XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']"
|
18
|
+
XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']"
|
19
|
+
|
20
|
+
# initialization and opening of a spreadsheet file
|
21
|
+
# values for packed: :zip
|
22
|
+
def initialize(filename, options = {})
|
23
|
+
packed = options[:packed]
|
24
|
+
file_warning = options[:file_warning] || :error
|
25
|
+
|
26
|
+
@only_visible_sheets = options[:only_visible_sheets]
|
27
|
+
file_type_check(filename, '.ods', 'an Roo::OpenOffice', file_warning, packed)
|
28
|
+
# NOTE: Create temp directory and allow Ruby to cleanup the temp directory
|
29
|
+
# when the object is garbage collected. Initially, the finalizer was
|
30
|
+
# created in the Roo::Tempdir module, but that led to a segfault
|
31
|
+
# when testing in Ruby 2.4.0.
|
32
|
+
@tmpdir = self.class.make_tempdir(self, find_basename(filename), options[:tmpdir_root])
|
33
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
|
34
|
+
@filename = local_filename(filename, @tmpdir, packed)
|
35
|
+
# TODO: @cells_read[:default] = false
|
36
|
+
open_oo_file(options)
|
37
|
+
super(filename, options)
|
38
|
+
initialize_default_variables
|
39
|
+
|
40
|
+
unless @table_display.any?
|
41
|
+
doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style|
|
42
|
+
read_table_styles(style)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
@sheet_names = doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet|
|
47
|
+
if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')]
|
48
|
+
sheet.attributes['name'].value
|
49
|
+
end
|
50
|
+
end.compact
|
51
|
+
rescue
|
52
|
+
self.class.finalize_tempdirs(object_id)
|
53
|
+
raise
|
54
|
+
end
|
55
|
+
|
56
|
+
def open_oo_file(options)
|
57
|
+
Zip::File.open(@filename) do |zip_file|
|
58
|
+
content_entry = zip_file.glob('content.xml').first
|
59
|
+
fail ArgumentError, ERROR_MISSING_CONTENT_XML unless content_entry
|
60
|
+
|
61
|
+
roo_content_xml_path = ::File.join(@tmpdir, 'roo_content.xml')
|
62
|
+
content_entry.extract(roo_content_xml_path)
|
63
|
+
decrypt_if_necessary(zip_file, content_entry, roo_content_xml_path, options)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def initialize_default_variables
|
68
|
+
@formula = {}
|
69
|
+
@style = {}
|
70
|
+
@style_defaults = Hash.new { |h, k| h[k] = [] }
|
71
|
+
@table_display = Hash.new { |h, k| h[k] = true }
|
72
|
+
@font_style_definitions = {}
|
73
|
+
@comment = {}
|
74
|
+
@comments_read = {}
|
75
|
+
end
|
76
|
+
|
77
|
+
def method_missing(m, *args)
|
78
|
+
read_labels
|
79
|
+
# is method name a label name
|
80
|
+
if @label.key?(m.to_s)
|
81
|
+
row, col = label(m.to_s)
|
82
|
+
cell(row, col)
|
83
|
+
else
|
84
|
+
# call super for methods like #a1
|
85
|
+
super
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Returns the content of a spreadsheet-cell.
|
90
|
+
# (1,1) is the upper left corner.
|
91
|
+
# (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
|
92
|
+
# cell at the first line and first row.
|
93
|
+
def cell(row, col, sheet = nil)
|
94
|
+
sheet ||= default_sheet
|
95
|
+
read_cells(sheet)
|
96
|
+
row, col = normalize(row, col)
|
97
|
+
if celltype(row, col, sheet) == :date
|
98
|
+
yyyy, mm, dd = @cell[sheet][[row, col]].to_s.split('-')
|
99
|
+
return Date.new(yyyy.to_i, mm.to_i, dd.to_i)
|
100
|
+
end
|
101
|
+
|
102
|
+
@cell[sheet][[row, col]]
|
103
|
+
end
|
104
|
+
|
105
|
+
# Returns the formula at (row,col).
|
106
|
+
# Returns nil if there is no formula.
|
107
|
+
# The method #formula? checks if there is a formula.
|
108
|
+
def formula(row, col, sheet = nil)
|
109
|
+
sheet ||= default_sheet
|
110
|
+
read_cells(sheet)
|
111
|
+
row, col = normalize(row, col)
|
112
|
+
@formula[sheet][[row, col]]
|
113
|
+
end
|
114
|
+
|
115
|
+
# Predicate methods really should return a boolean
|
116
|
+
# value. Hopefully no one was relying on the fact that this
|
117
|
+
# previously returned either nil/formula
|
118
|
+
def formula?(*args)
|
119
|
+
!!formula(*args)
|
120
|
+
end
|
121
|
+
|
122
|
+
# returns each formula in the selected sheet as an array of elements
|
123
|
+
# [row, col, formula]
|
124
|
+
def formulas(sheet = nil)
|
125
|
+
sheet ||= default_sheet
|
126
|
+
read_cells(sheet)
|
127
|
+
return [] unless @formula[sheet]
|
128
|
+
@formula[sheet].each.collect do |elem|
|
129
|
+
[elem[0][0], elem[0][1], elem[1]]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Given a cell, return the cell's style
|
134
|
+
def font(row, col, sheet = nil)
|
135
|
+
sheet ||= default_sheet
|
136
|
+
read_cells(sheet)
|
137
|
+
row, col = normalize(row, col)
|
138
|
+
style_name = @style[sheet][[row, col]] || @style_defaults[sheet][col - 1] || 'Default'
|
139
|
+
@font_style_definitions[style_name]
|
140
|
+
end
|
141
|
+
|
142
|
+
# returns the type of a cell:
|
143
|
+
# * :float
|
144
|
+
# * :string
|
145
|
+
# * :date
|
146
|
+
# * :percentage
|
147
|
+
# * :formula
|
148
|
+
# * :time
|
149
|
+
# * :datetime
|
150
|
+
def celltype(row, col, sheet = nil)
|
151
|
+
sheet ||= default_sheet
|
152
|
+
read_cells(sheet)
|
153
|
+
row, col = normalize(row, col)
|
154
|
+
@formula[sheet][[row, col]] ? :formula : @cell_type[sheet][[row, col]]
|
155
|
+
end
|
156
|
+
|
157
|
+
def sheets
|
158
|
+
@sheet_names
|
159
|
+
end
|
160
|
+
|
161
|
+
# version of the Roo::OpenOffice document
|
162
|
+
# at 2007 this is always "1.0"
|
163
|
+
def officeversion
|
164
|
+
oo_version
|
165
|
+
@officeversion
|
166
|
+
end
|
167
|
+
|
168
|
+
# shows the internal representation of all cells
|
169
|
+
# mainly for debugging purposes
|
170
|
+
def to_s(sheet = nil)
|
171
|
+
sheet ||= default_sheet
|
172
|
+
read_cells(sheet)
|
173
|
+
@cell[sheet].inspect
|
174
|
+
end
|
175
|
+
|
176
|
+
# returns the row,col values of the labelled cell
|
177
|
+
# (nil,nil) if label is not defined
|
178
|
+
def label(labelname)
|
179
|
+
read_labels
|
180
|
+
return [nil, nil, nil] if @label.size < 1 || !@label.key?(labelname)
|
181
|
+
[
|
182
|
+
@label[labelname][1].to_i,
|
183
|
+
::Roo::Utils.letter_to_number(@label[labelname][2]),
|
184
|
+
@label[labelname][0]
|
185
|
+
]
|
186
|
+
end
|
187
|
+
|
188
|
+
# Returns an array which all labels. Each element is an array with
|
189
|
+
# [labelname, [row,col,sheetname]]
|
190
|
+
def labels(_sheet = nil)
|
191
|
+
read_labels
|
192
|
+
@label.map do |label|
|
193
|
+
[label[0], # name
|
194
|
+
[label[1][1].to_i, # row
|
195
|
+
::Roo::Utils.letter_to_number(label[1][2]), # column
|
196
|
+
label[1][0], # sheet
|
197
|
+
]]
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
# returns the comment at (row/col)
|
202
|
+
# nil if there is no comment
|
203
|
+
def comment(row, col, sheet = nil)
|
204
|
+
sheet ||= default_sheet
|
205
|
+
read_cells(sheet)
|
206
|
+
row, col = normalize(row, col)
|
207
|
+
return nil unless @comment[sheet]
|
208
|
+
@comment[sheet][[row, col]]
|
209
|
+
end
|
210
|
+
|
211
|
+
# returns each comment in the selected sheet as an array of elements
|
212
|
+
# [row, col, comment]
|
213
|
+
def comments(sheet = nil)
|
214
|
+
sheet ||= default_sheet
|
215
|
+
read_comments(sheet) unless @comments_read[sheet]
|
216
|
+
return [] unless @comment[sheet]
|
217
|
+
@comment[sheet].each.collect do |elem|
|
218
|
+
[elem[0][0], elem[0][1], elem[1]]
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
private
|
223
|
+
|
224
|
+
# If the ODS file has an encryption-data element, then try to decrypt.
|
225
|
+
# If successful, the temporary content.xml will be overwritten with
|
226
|
+
# decrypted contents.
|
227
|
+
def decrypt_if_necessary(
|
228
|
+
zip_file,
|
229
|
+
content_entry,
|
230
|
+
roo_content_xml_path, options
|
231
|
+
)
|
232
|
+
# Check if content.xml is encrypted by extracting manifest.xml
|
233
|
+
# and searching for a manifest:encryption-data element
|
234
|
+
|
235
|
+
if (manifest_entry = zip_file.glob('META-INF/manifest.xml').first)
|
236
|
+
roo_manifest_xml_path = File.join(@tmpdir, 'roo_manifest.xml')
|
237
|
+
manifest_entry.extract(roo_manifest_xml_path)
|
238
|
+
manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
|
239
|
+
|
240
|
+
# XPath search for manifest:encryption-data only for the content.xml
|
241
|
+
# file
|
242
|
+
|
243
|
+
encryption_data = manifest.xpath(
|
244
|
+
"//manifest:file-entry[@manifest:full-path='content.xml']"\
|
245
|
+
"/manifest:encryption-data"
|
246
|
+
).first
|
247
|
+
|
248
|
+
# If XPath returns a node, then we know content.xml is encrypted
|
249
|
+
|
250
|
+
unless encryption_data.nil?
|
251
|
+
|
252
|
+
# Since we know it's encrypted, we check for the password option
|
253
|
+
# and if it doesn't exist, raise an argument error
|
254
|
+
|
255
|
+
password = options[:password]
|
256
|
+
if !password.nil?
|
257
|
+
perform_decryption(
|
258
|
+
encryption_data,
|
259
|
+
password,
|
260
|
+
content_entry,
|
261
|
+
roo_content_xml_path
|
262
|
+
)
|
263
|
+
else
|
264
|
+
fail ArgumentError, 'file is encrypted but password was not supplied'
|
265
|
+
end
|
266
|
+
end
|
267
|
+
else
|
268
|
+
fail ArgumentError, 'file missing required META-INF/manifest.xml'
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
# Process the ODS encryption manifest and perform the decryption
|
273
|
+
def perform_decryption(
|
274
|
+
encryption_data,
|
275
|
+
password,
|
276
|
+
content_entry,
|
277
|
+
roo_content_xml_path
|
278
|
+
)
|
279
|
+
# Extract various expected attributes from the manifest that
|
280
|
+
# describe the encryption
|
281
|
+
|
282
|
+
algorithm_node = encryption_data.xpath('manifest:algorithm').first
|
283
|
+
key_derivation_node =
|
284
|
+
encryption_data.xpath('manifest:key-derivation').first
|
285
|
+
start_key_generation_node =
|
286
|
+
encryption_data.xpath('manifest:start-key-generation').first
|
287
|
+
|
288
|
+
# If we have all the expected elements, then we can perform
|
289
|
+
# the decryption.
|
290
|
+
|
291
|
+
if !algorithm_node.nil? && !key_derivation_node.nil? &&
|
292
|
+
!start_key_generation_node.nil?
|
293
|
+
|
294
|
+
# The algorithm is a URI describing the algorithm used
|
295
|
+
algorithm = algorithm_node['manifest:algorithm-name']
|
296
|
+
|
297
|
+
# The initialization vector is base-64 encoded
|
298
|
+
iv = Base64.decode64(
|
299
|
+
algorithm_node['manifest:initialisation-vector']
|
300
|
+
)
|
301
|
+
key_derivation_name = key_derivation_node['manifest:key-derivation-name']
|
302
|
+
iteration_count = key_derivation_node['manifest:iteration-count'].to_i
|
303
|
+
salt = Base64.decode64(key_derivation_node['manifest:salt'])
|
304
|
+
|
305
|
+
# The key is hashed with an algorithm represented by this URI
|
306
|
+
key_generation_name =
|
307
|
+
start_key_generation_node[
|
308
|
+
'manifest:start-key-generation-name'
|
309
|
+
]
|
310
|
+
|
311
|
+
hashed_password = password
|
312
|
+
|
313
|
+
if key_generation_name == 'http://www.w3.org/2000/09/xmldsig#sha256'
|
314
|
+
|
315
|
+
hashed_password = Digest::SHA256.digest(password)
|
316
|
+
else
|
317
|
+
fail ArgumentError, "Unknown key generation algorithm #{key_generation_name}"
|
318
|
+
end
|
319
|
+
|
320
|
+
cipher = find_cipher(
|
321
|
+
algorithm,
|
322
|
+
key_derivation_name,
|
323
|
+
hashed_password,
|
324
|
+
salt,
|
325
|
+
iteration_count,
|
326
|
+
iv
|
327
|
+
)
|
328
|
+
|
329
|
+
begin
|
330
|
+
decrypted = decrypt(content_entry, cipher)
|
331
|
+
|
332
|
+
# Finally, inflate the decrypted stream and overwrite
|
333
|
+
# content.xml
|
334
|
+
IO.binwrite(
|
335
|
+
roo_content_xml_path,
|
336
|
+
Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(decrypted)
|
337
|
+
)
|
338
|
+
rescue StandardError => error
|
339
|
+
raise ArgumentError, "Invalid password or other data error: #{error}"
|
340
|
+
end
|
341
|
+
else
|
342
|
+
fail ArgumentError, 'manifest.xml missing encryption-data elements'
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
# Create a cipher based on an ODS algorithm URI from manifest.xml
|
347
|
+
# params: algorithm, key_derivation_name, hashed_password, salt, iteration_count, iv
|
348
|
+
def find_cipher(*args)
|
349
|
+
fail ArgumentError, 'Unknown algorithm ' + algorithm unless args[0] == 'http://www.w3.org/2001/04/xmlenc#aes256-cbc'
|
350
|
+
|
351
|
+
cipher = ::OpenSSL::Cipher.new('AES-256-CBC')
|
352
|
+
cipher.decrypt
|
353
|
+
cipher.padding = 0
|
354
|
+
cipher.key = find_cipher_key(cipher, *args[1..4])
|
355
|
+
cipher.iv = args[5]
|
356
|
+
|
357
|
+
cipher
|
358
|
+
end
|
359
|
+
|
360
|
+
# Create a cipher key based on an ODS algorithm string from manifest.xml
|
361
|
+
def find_cipher_key(*args)
|
362
|
+
fail ArgumentError, 'Unknown key derivation name ', args[1] unless args[1] == 'PBKDF2'
|
363
|
+
|
364
|
+
::OpenSSL::PKCS5.pbkdf2_hmac_sha1(args[2], args[3], args[4], args[0].key_len)
|
365
|
+
end
|
366
|
+
|
367
|
+
# Block decrypt raw bytes from the zip file based on the cipher
|
368
|
+
def decrypt(content_entry, cipher)
|
369
|
+
# Zip::Entry.extract writes a 0-length file when trying
|
370
|
+
# to extract an encrypted stream, so we read the
|
371
|
+
# raw bytes based on the offset and lengths
|
372
|
+
decrypted = ''
|
373
|
+
File.open(@filename, 'rb') do |zipfile|
|
374
|
+
zipfile.seek(
|
375
|
+
content_entry.local_header_offset +
|
376
|
+
content_entry.calculate_local_header_size
|
377
|
+
)
|
378
|
+
total_to_read = content_entry.compressed_size
|
379
|
+
|
380
|
+
block_size = 4096
|
381
|
+
block_size = total_to_read if block_size > total_to_read
|
382
|
+
|
383
|
+
while (buffer = zipfile.read(block_size))
|
384
|
+
decrypted += cipher.update(buffer)
|
385
|
+
total_to_read -= buffer.length
|
386
|
+
|
387
|
+
break if total_to_read == 0
|
388
|
+
|
389
|
+
block_size = total_to_read if block_size > total_to_read
|
390
|
+
end
|
391
|
+
end
|
392
|
+
|
393
|
+
decrypted + cipher.final
|
394
|
+
end
|
395
|
+
|
396
|
+
def doc
|
397
|
+
@doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, 'roo_content.xml'))
|
398
|
+
end
|
399
|
+
|
400
|
+
# read the version of the OO-Version
|
401
|
+
def oo_version
|
402
|
+
doc.xpath("//*[local-name()='document-content']").each do |office|
|
403
|
+
@officeversion = attribute(office, 'version')
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
# helper function to set the internal representation of cells
|
408
|
+
def set_cell_values(sheet, x, y, i, v, value_type, formula, table_cell, str_v, style_name)
|
409
|
+
key = [y, x + i]
|
410
|
+
@cell_type[sheet] ||= {}
|
411
|
+
@cell_type[sheet][key] = value_type.to_sym if value_type
|
412
|
+
@formula[sheet] ||= {}
|
413
|
+
if formula
|
414
|
+
['of:', 'oooc:'].each do |prefix|
|
415
|
+
if formula[0, prefix.length] == prefix
|
416
|
+
formula = formula[prefix.length..-1]
|
417
|
+
end
|
418
|
+
end
|
419
|
+
@formula[sheet][key] = formula
|
420
|
+
end
|
421
|
+
@cell[sheet] ||= {}
|
422
|
+
@style[sheet] ||= {}
|
423
|
+
@style[sheet][key] = style_name
|
424
|
+
case @cell_type[sheet][key]
|
425
|
+
when :float
|
426
|
+
@cell[sheet][key] = (table_cell.attributes['value'].to_s.include?(".") || table_cell.children.first.text.include?(".")) ? v.to_f : v.to_i
|
427
|
+
when :percentage
|
428
|
+
@cell[sheet][key] = v.to_f
|
429
|
+
when :string
|
430
|
+
@cell[sheet][key] = str_v
|
431
|
+
when :date
|
432
|
+
# TODO: if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size
|
433
|
+
if attribute(table_cell, 'date-value').size != 'XXXX-XX-XX'.size
|
434
|
+
#-- dann ist noch eine Uhrzeit vorhanden
|
435
|
+
#-- "1961-11-21T12:17:18"
|
436
|
+
@cell[sheet][key] = DateTime.parse(attribute(table_cell, 'date-value').to_s)
|
437
|
+
@cell_type[sheet][key] = :datetime
|
438
|
+
else
|
439
|
+
@cell[sheet][key] = table_cell.attributes['date-value']
|
440
|
+
end
|
441
|
+
when :time
|
442
|
+
hms = v.split(':')
|
443
|
+
@cell[sheet][key] = hms[0].to_i * 3600 + hms[1].to_i * 60 + hms[2].to_i
|
444
|
+
else
|
445
|
+
@cell[sheet][key] = v
|
446
|
+
end
|
447
|
+
end
|
448
|
+
|
449
|
+
# read all cells in the selected sheet
|
450
|
+
#--
|
451
|
+
# the following construct means '4 blanks'
|
452
|
+
# some content <text:s text:c="3"/>
|
453
|
+
#++
|
454
|
+
def read_cells(sheet = default_sheet)
|
455
|
+
validate_sheet!(sheet)
|
456
|
+
return if @cells_read[sheet]
|
457
|
+
|
458
|
+
sheet_found = false
|
459
|
+
doc.xpath("//*[local-name()='table']").each do |ws|
|
460
|
+
next unless sheet == attribute(ws, 'name')
|
461
|
+
|
462
|
+
sheet_found = true
|
463
|
+
col = 1
|
464
|
+
row = 1
|
465
|
+
ws.children.each do |table_element|
|
466
|
+
case table_element.name
|
467
|
+
when 'table-column'
|
468
|
+
@style_defaults[sheet] << table_element.attributes['default-cell-style-name']
|
469
|
+
when 'table-row'
|
470
|
+
if table_element.attributes['number-rows-repeated']
|
471
|
+
skip_row = attribute(table_element, 'number-rows-repeated').to_s.to_i
|
472
|
+
row = row + skip_row - 1
|
473
|
+
end
|
474
|
+
table_element.children.each do |cell|
|
475
|
+
skip_col = attribute(cell, 'number-columns-repeated')
|
476
|
+
formula = attribute(cell, 'formula')
|
477
|
+
value_type = attribute(cell, 'value-type')
|
478
|
+
v = attribute(cell, 'value')
|
479
|
+
style_name = attribute(cell, 'style-name')
|
480
|
+
case value_type
|
481
|
+
when 'string'
|
482
|
+
str_v = ''
|
483
|
+
# insert \n if there is more than one paragraph
|
484
|
+
para_count = 0
|
485
|
+
cell.children.each do |str|
|
486
|
+
# begin comments
|
487
|
+
#=begin
|
488
|
+
#- <table:table-cell office:value-type="string">
|
489
|
+
# - <office:annotation office:display="true" draw:style-name="gr1" draw:text-style-name="P1" svg:width="1.1413in" svg:height="0.3902in" svg:x="2.0142in" svg:y="0in" draw:caption-point-x="-0.2402in" draw:caption-point-y="0.5661in">
|
490
|
+
# <dc:date>2011-09-20T00:00:00</dc:date>
|
491
|
+
# <text:p text:style-name="P1">Kommentar fuer B4</text:p>
|
492
|
+
# </office:annotation>
|
493
|
+
# <text:p>B4 (mit Kommentar)</text:p>
|
494
|
+
# </table:table-cell>
|
495
|
+
#=end
|
496
|
+
if str.name == 'annotation'
|
497
|
+
str.children.each do |annotation|
|
498
|
+
next unless annotation.name == 'p'
|
499
|
+
# @comment ist ein Hash mit Sheet als Key (wie bei @cell)
|
500
|
+
# innerhalb eines Elements besteht ein Eintrag aus einem
|
501
|
+
# weiteren Hash mit Key [row,col] und dem eigentlichen
|
502
|
+
# Kommentartext als Inhalt
|
503
|
+
@comment[sheet] = Hash.new unless @comment[sheet]
|
504
|
+
key = [row, col]
|
505
|
+
@comment[sheet][key] = annotation.text
|
506
|
+
end
|
507
|
+
end
|
508
|
+
# end comments
|
509
|
+
if str.name == 'p'
|
510
|
+
v = str.content
|
511
|
+
str_v += "\n" if para_count > 0
|
512
|
+
para_count += 1
|
513
|
+
if str.children.size > 1
|
514
|
+
str_v += children_to_string(str.children)
|
515
|
+
else
|
516
|
+
str.children.each do |child|
|
517
|
+
str_v += child.content #.text
|
518
|
+
end
|
519
|
+
end
|
520
|
+
str_v.gsub!(/'/, "'") # special case not supported by unescapeHTML
|
521
|
+
str_v = CGI.unescapeHTML(str_v)
|
522
|
+
end # == 'p'
|
523
|
+
end
|
524
|
+
when 'time'
|
525
|
+
cell.children.each do |str|
|
526
|
+
v = str.content if str.name == 'p'
|
527
|
+
end
|
528
|
+
when '', nil, 'date', 'percentage', 'float'
|
529
|
+
#
|
530
|
+
when 'boolean'
|
531
|
+
v = attribute(cell, 'boolean-value').to_s
|
532
|
+
end
|
533
|
+
if skip_col
|
534
|
+
if !v.nil? || cell.attributes['date-value']
|
535
|
+
0.upto(skip_col.to_i - 1) do |i|
|
536
|
+
set_cell_values(sheet, col, row, i, v, value_type, formula, cell, str_v, style_name)
|
537
|
+
end
|
538
|
+
end
|
539
|
+
col += (skip_col.to_i - 1)
|
540
|
+
end # if skip
|
541
|
+
set_cell_values(sheet, col, row, 0, v, value_type, formula, cell, str_v, style_name)
|
542
|
+
col += 1
|
543
|
+
end
|
544
|
+
row += 1
|
545
|
+
col = 1
|
546
|
+
end
|
547
|
+
end
|
548
|
+
end
|
549
|
+
doc.xpath("//*[local-name()='automatic-styles']").each do |style|
|
550
|
+
read_styles(style)
|
551
|
+
end
|
552
|
+
|
553
|
+
fail RangeError unless sheet_found
|
554
|
+
|
555
|
+
@cells_read[sheet] = true
|
556
|
+
@comments_read[sheet] = true
|
557
|
+
end
|
558
|
+
|
559
|
+
# Only calls read_cells because Roo::Base calls read_comments
|
560
|
+
# whereas the reading of comments is done in read_cells for Roo::OpenOffice-objects
|
561
|
+
def read_comments(sheet = nil)
|
562
|
+
read_cells(sheet)
|
563
|
+
end
|
564
|
+
|
565
|
+
def read_labels
|
566
|
+
@label ||= doc.xpath('//table:named-range').each_with_object({}) do |ne, hash|
|
567
|
+
#-
|
568
|
+
# $Sheet1.$C$5
|
569
|
+
#+
|
570
|
+
name = attribute(ne, 'name').to_s
|
571
|
+
sheetname, coords = attribute(ne, 'cell-range-address').to_s.split('.$')
|
572
|
+
col, row = coords.split('$')
|
573
|
+
sheetname = sheetname[1..-1] if sheetname[0, 1] == '$'
|
574
|
+
hash[name] = [sheetname, row, col]
|
575
|
+
end
|
576
|
+
end
|
577
|
+
|
578
|
+
def read_styles(style_elements)
|
579
|
+
@font_style_definitions['Default'] = Roo::Font.new
|
580
|
+
style_elements.each do |style|
|
581
|
+
next unless style.name == 'style'
|
582
|
+
style_name = attribute(style, 'name')
|
583
|
+
style.each do |properties|
|
584
|
+
font = Roo::OpenOffice::Font.new
|
585
|
+
font.bold = attribute(properties, 'font-weight')
|
586
|
+
font.italic = attribute(properties, 'font-style')
|
587
|
+
font.underline = attribute(properties, 'text-underline-style')
|
588
|
+
@font_style_definitions[style_name] = font
|
589
|
+
end
|
590
|
+
end
|
591
|
+
end
|
592
|
+
|
593
|
+
def read_table_styles(styles)
|
594
|
+
styles.children.each do |style|
|
595
|
+
next unless style.name == 'style'
|
596
|
+
style_name = attribute(style, 'name')
|
597
|
+
style.children.each do |properties|
|
598
|
+
display = attribute(properties, 'display')
|
599
|
+
next unless display
|
600
|
+
@table_display[style_name] = (display == 'true')
|
601
|
+
end
|
602
|
+
end
|
603
|
+
end
|
604
|
+
|
605
|
+
# helper method to convert compressed spaces and other elements within
|
606
|
+
# an text into a string
|
607
|
+
# FIXME: add a test for compressed_spaces == 0. It's not currently tested.
|
608
|
+
def children_to_string(children)
|
609
|
+
children.map do |child|
|
610
|
+
if child.text?
|
611
|
+
child.content
|
612
|
+
else
|
613
|
+
if child.name == 's'
|
614
|
+
compressed_spaces = child.attributes['c'].to_s.to_i
|
615
|
+
# no explicit number means a count of 1:
|
616
|
+
compressed_spaces == 0 ? ' ' : ' ' * compressed_spaces
|
617
|
+
else
|
618
|
+
child.content
|
619
|
+
end
|
620
|
+
end
|
621
|
+
end.join
|
622
|
+
end
|
623
|
+
|
624
|
+
def attribute(node, attr_name)
|
625
|
+
node.attributes[attr_name].value if node.attributes[attr_name]
|
626
|
+
end
|
627
|
+
end
|
628
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module Roo
|
4
|
+
class Spreadsheet
|
5
|
+
class << self
|
6
|
+
def open(path, options = {})
|
7
|
+
path = path.respond_to?(:path) ? path.path : path
|
8
|
+
extension = extension_for(path, options)
|
9
|
+
|
10
|
+
begin
|
11
|
+
Roo::CLASS_FOR_EXTENSION.fetch(extension).new(path, options)
|
12
|
+
rescue KeyError
|
13
|
+
raise ArgumentError,
|
14
|
+
"Can't detect the type of #{path} - please use the :extension option to declare its type."
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def extension_for(path, options)
|
19
|
+
case (extension = options.delete(:extension))
|
20
|
+
when ::Symbol
|
21
|
+
options[:file_warning] = :ignore
|
22
|
+
extension
|
23
|
+
when ::String
|
24
|
+
options[:file_warning] = :ignore
|
25
|
+
extension.tr('.', '').downcase.to_sym
|
26
|
+
else
|
27
|
+
parsed_path =
|
28
|
+
if path =~ /\A#{::URI::DEFAULT_PARSER.make_regexp}\z/
|
29
|
+
# path is 7th match
|
30
|
+
Regexp.last_match[7]
|
31
|
+
else
|
32
|
+
path
|
33
|
+
end
|
34
|
+
::File.extname(parsed_path).tr('.', '').downcase.to_sym
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/roo/tempdir.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
module Roo
|
2
|
+
module Tempdir
|
3
|
+
def finalize_tempdirs(object_id)
|
4
|
+
if @tempdirs && (dirs_to_remove = @tempdirs[object_id])
|
5
|
+
@tempdirs.delete(object_id)
|
6
|
+
dirs_to_remove.each do |dir|
|
7
|
+
::FileUtils.remove_entry(dir)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def make_tempdir(object, prefix, root)
|
13
|
+
root ||= ENV["ROO_TMP"]
|
14
|
+
# NOTE: This folder is cleaned up by finalize_tempdirs.
|
15
|
+
::Dir.mktmpdir("#{Roo::TEMP_PREFIX}#{prefix}", root).tap do |tmpdir|
|
16
|
+
@tempdirs ||= Hash.new { |h, k| h[k] = [] }
|
17
|
+
@tempdirs[object.object_id] << tmpdir
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|