roo 2.0.1 → 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,318 +4,306 @@ require 'cgi'
4
4
  require 'zip/filesystem'
5
5
  require 'roo/font'
6
6
 
7
- class Roo::OpenOffice < Roo::Base
8
- # initialization and opening of a spreadsheet file
9
- # values for packed: :zip
10
- def initialize(filename, options={})
11
- packed = options[:packed]
12
- file_warning = options[:file_warning] || :error
13
-
14
- @only_visible_sheets = options[:only_visible_sheets]
15
- file_type_check(filename,'.ods','an Roo::OpenOffice', file_warning, packed)
16
- @tmpdir = make_tmpdir(File.basename(filename), options[:tmpdir_root])
17
- @filename = local_filename(filename, @tmpdir, packed)
18
- #TODO: @cells_read[:default] = false
19
- Zip::File.open(@filename) do |zip_file|
20
- if content_entry = zip_file.glob("content.xml").first
21
- roo_content_xml_path = File.join(@tmpdir, 'roo_content.xml')
7
+ module Roo
8
+ class OpenOffice < Roo::Base
9
+ ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'.freeze
10
+ XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']".freeze
11
+ XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']".freeze
12
+
13
+ # initialization and opening of a spreadsheet file
14
+ # values for packed: :zip
15
+ def initialize(filename, options = {})
16
+ packed = options[:packed]
17
+ file_warning = options[:file_warning] || :error
18
+
19
+ @only_visible_sheets = options[:only_visible_sheets]
20
+ file_type_check(filename, '.ods', 'an Roo::OpenOffice', file_warning, packed)
21
+ @tmpdir = make_tmpdir(File.basename(filename), options[:tmpdir_root])
22
+ @filename = local_filename(filename, @tmpdir, packed)
23
+ # TODO: @cells_read[:default] = false
24
+ open_oo_file(options)
25
+ super(filename, options)
26
+ initialize_default_variables
27
+ rescue => e # clean up any temp files, but only if an error was raised
28
+ close
29
+ raise e
30
+ end
31
+
32
+ def open_oo_file(options)
33
+ Zip::File.open(@filename) do |zip_file|
34
+ content_entry = zip_file.glob('content.xml').first
35
+ fail ArgumentError, ERROR_MISSING_CONTENT_XML unless content_entry
36
+
37
+ roo_content_xml_path = ::File.join(@tmpdir, 'roo_content.xml')
22
38
  content_entry.extract(roo_content_xml_path)
23
- decrypt_if_necessary(
24
- zip_file,
25
- content_entry,
26
- roo_content_xml_path,
27
- options
28
- )
29
- else
30
- raise ArgumentError, 'file missing required content.xml'
39
+ decrypt_if_necessary(zip_file, content_entry, roo_content_xml_path, options)
31
40
  end
32
41
  end
33
- super(filename, options)
34
- @formula = Hash.new
35
- @style = Hash.new
36
- @style_defaults = Hash.new { |h,k| h[k] = [] }
37
- @table_display = Hash.new { |h,k| h[k] = true }
38
- @font_style_definitions = Hash.new
39
- @comment = Hash.new
40
- @comments_read = Hash.new
41
- rescue => e # clean up any temp files, but only if an error was raised
42
- close
43
- raise e
44
- end
45
42
 
46
- def method_missing(m,*args)
47
- read_labels
48
- # is method name a label name
49
- if @label.has_key?(m.to_s)
50
- row,col = label(m.to_s)
51
- cell(row,col)
52
- else
53
- # call super for methods like #a1
54
- super
43
+ def initialize_default_variables
44
+ @formula = {}
45
+ @style = {}
46
+ @style_defaults = Hash.new { |h, k| h[k] = [] }
47
+ @table_display = Hash.new { |h, k| h[k] = true }
48
+ @font_style_definitions = {}
49
+ @comment = {}
50
+ @comments_read = {}
55
51
  end
56
- end
57
52
 
58
- # Returns the content of a spreadsheet-cell.
59
- # (1,1) is the upper left corner.
60
- # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
61
- # cell at the first line and first row.
62
- def cell(row, col, sheet=nil)
63
- sheet ||= default_sheet
64
- read_cells(sheet)
65
- row,col = normalize(row,col)
66
- if celltype(row,col,sheet) == :date
67
- yyyy,mm,dd = @cell[sheet][[row,col]].to_s.split('-')
68
- return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
69
- end
70
- @cell[sheet][[row,col]]
71
- end
53
+ def method_missing(m, *args)
54
+ read_labels
55
+ # is method name a label name
56
+ if @label.key?(m.to_s)
57
+ row, col = label(m.to_s)
58
+ cell(row, col)
59
+ else
60
+ # call super for methods like #a1
61
+ super
62
+ end
63
+ end
72
64
 
73
- # Returns the formula at (row,col).
74
- # Returns nil if there is no formula.
75
- # The method #formula? checks if there is a formula.
76
- def formula(row,col,sheet=nil)
77
- sheet ||= default_sheet
78
- read_cells(sheet)
79
- row,col = normalize(row,col)
80
- @formula[sheet][[row,col]]
81
- end
65
+ # Returns the content of a spreadsheet-cell.
66
+ # (1,1) is the upper left corner.
67
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
68
+ # cell at the first line and first row.
69
+ def cell(row, col, sheet = nil)
70
+ sheet ||= default_sheet
71
+ read_cells(sheet)
72
+ row, col = normalize(row, col)
73
+ if celltype(row, col, sheet) == :date
74
+ yyyy, mm, dd = @cell[sheet][[row, col]].to_s.split('-')
75
+ return Date.new(yyyy.to_i, mm.to_i, dd.to_i)
76
+ end
82
77
 
83
- # Predicate methods really should return a boolean
84
- # value. Hopefully no one was relying on the fact that this
85
- # previously returned either nil/formula
86
- def formula?(*args)
87
- !!formula(*args)
88
- end
78
+ @cell[sheet][[row, col]]
79
+ end
80
+
81
+ # Returns the formula at (row,col).
82
+ # Returns nil if there is no formula.
83
+ # The method #formula? checks if there is a formula.
84
+ def formula(row, col, sheet = nil)
85
+ sheet ||= default_sheet
86
+ read_cells(sheet)
87
+ row, col = normalize(row, col)
88
+ @formula[sheet][[row, col]]
89
+ end
89
90
 
90
- # returns each formula in the selected sheet as an array of elements
91
- # [row, col, formula]
92
- def formulas(sheet=nil)
93
- sheet ||= default_sheet
94
- read_cells(sheet)
95
- if @formula[sheet]
91
+ # Predicate methods really should return a boolean
92
+ # value. Hopefully no one was relying on the fact that this
93
+ # previously returned either nil/formula
94
+ def formula?(*args)
95
+ !!formula(*args)
96
+ end
97
+
98
+ # returns each formula in the selected sheet as an array of elements
99
+ # [row, col, formula]
100
+ def formulas(sheet = nil)
101
+ sheet ||= default_sheet
102
+ read_cells(sheet)
103
+ return [] unless @formula[sheet]
96
104
  @formula[sheet].each.collect do |elem|
97
105
  [elem[0][0], elem[0][1], elem[1]]
98
106
  end
99
- else
100
- []
101
107
  end
102
- end
103
108
 
104
- # Given a cell, return the cell's style
105
- def font(row, col, sheet=nil)
106
- sheet ||= default_sheet
107
- read_cells(sheet)
108
- row,col = normalize(row,col)
109
- style_name = @style[sheet][[row,col]] || @style_defaults[sheet][col - 1] || 'Default'
110
- @font_style_definitions[style_name]
111
- end
109
+ # Given a cell, return the cell's style
110
+ def font(row, col, sheet = nil)
111
+ sheet ||= default_sheet
112
+ read_cells(sheet)
113
+ row, col = normalize(row, col)
114
+ style_name = @style[sheet][[row, col]] || @style_defaults[sheet][col - 1] || 'Default'
115
+ @font_style_definitions[style_name]
116
+ end
112
117
 
113
- # returns the type of a cell:
114
- # * :float
115
- # * :string
116
- # * :date
117
- # * :percentage
118
- # * :formula
119
- # * :time
120
- # * :datetime
121
- def celltype(row,col,sheet=nil)
122
- sheet ||= default_sheet
123
- read_cells(sheet)
124
- row,col = normalize(row,col)
125
- if @formula[sheet][[row,col]]
126
- return :formula
127
- else
128
- @cell_type[sheet][[row,col]]
118
+ # returns the type of a cell:
119
+ # * :float
120
+ # * :string
121
+ # * :date
122
+ # * :percentage
123
+ # * :formula
124
+ # * :time
125
+ # * :datetime
126
+ def celltype(row, col, sheet = nil)
127
+ sheet ||= default_sheet
128
+ read_cells(sheet)
129
+ row, col = normalize(row, col)
130
+ @formula[sheet][[row, col]] ? :formula : @cell_type[sheet][[row, col]]
129
131
  end
130
- end
131
132
 
132
- def sheets
133
- unless @table_display.any?
134
- doc.xpath("//*[local-name()='automatic-styles']").each do |style|
135
- read_table_styles(style)
133
+ def sheets
134
+ unless @table_display.any?
135
+ doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style|
136
+ read_table_styles(style)
137
+ end
136
138
  end
139
+ doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet|
140
+ if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')]
141
+ sheet.attributes['name'].value
142
+ end
143
+ end.compact
137
144
  end
138
- doc.xpath("//*[local-name()='table']").map do |sheet|
139
- if !@only_visible_sheets || @table_display[attr(sheet,'style-name')]
140
- sheet.attributes["name"].value
141
- end
142
- end.compact
143
- end
144
145
 
145
- # version of the Roo::OpenOffice document
146
- # at 2007 this is always "1.0"
147
- def officeversion
148
- oo_version
149
- @officeversion
150
- end
151
-
152
- # shows the internal representation of all cells
153
- # mainly for debugging purposes
154
- def to_s(sheet=nil)
155
- sheet ||= default_sheet
156
- read_cells(sheet)
157
- @cell[sheet].inspect
158
- end
146
+ # version of the Roo::OpenOffice document
147
+ # at 2007 this is always "1.0"
148
+ def officeversion
149
+ oo_version
150
+ @officeversion
151
+ end
159
152
 
160
- # returns the row,col values of the labelled cell
161
- # (nil,nil) if label is not defined
162
- def label(labelname)
163
- read_labels
164
- unless @label.size > 0
165
- return nil,nil,nil
153
+ # shows the internal representation of all cells
154
+ # mainly for debugging purposes
155
+ def to_s(sheet = nil)
156
+ sheet ||= default_sheet
157
+ read_cells(sheet)
158
+ @cell[sheet].inspect
166
159
  end
167
- if @label.has_key? labelname
168
- return @label[labelname][1].to_i,
160
+
161
+ # returns the row,col values of the labelled cell
162
+ # (nil,nil) if label is not defined
163
+ def label(labelname)
164
+ read_labels
165
+ return [nil, nil, nil] if @label.size < 1 || !@label.key?(labelname)
166
+ [
167
+ @label[labelname][1].to_i,
169
168
  ::Roo::Utils.letter_to_number(@label[labelname][2]),
170
169
  @label[labelname][0]
171
- else
172
- return nil,nil,nil
170
+ ]
173
171
  end
174
- end
175
172
 
176
- # Returns an array which all labels. Each element is an array with
177
- # [labelname, [row,col,sheetname]]
178
- def labels(sheet=nil)
179
- read_labels
180
- @label.map do |label|
181
- [ label[0], # name
182
- [ label[1][1].to_i, # row
173
+ # Returns an array which all labels. Each element is an array with
174
+ # [labelname, [row,col,sheetname]]
175
+ def labels(_sheet = nil)
176
+ read_labels
177
+ @label.map do |label|
178
+ [label[0], # name
179
+ [label[1][1].to_i, # row
183
180
  ::Roo::Utils.letter_to_number(label[1][2]), # column
184
181
  label[1][0], # sheet
185
- ] ]
182
+ ]]
183
+ end
186
184
  end
187
- end
188
185
 
189
- # returns the comment at (row/col)
190
- # nil if there is no comment
191
- def comment(row,col,sheet=nil)
192
- sheet ||= default_sheet
193
- read_cells(sheet)
194
- row,col = normalize(row,col)
195
- return nil unless @comment[sheet]
196
- @comment[sheet][[row,col]]
197
- end
186
+ # returns the comment at (row/col)
187
+ # nil if there is no comment
188
+ def comment(row, col, sheet = nil)
189
+ sheet ||= default_sheet
190
+ read_cells(sheet)
191
+ row, col = normalize(row, col)
192
+ return nil unless @comment[sheet]
193
+ @comment[sheet][[row, col]]
194
+ end
198
195
 
199
- # returns each comment in the selected sheet as an array of elements
200
- # [row, col, comment]
201
- def comments(sheet=nil)
202
- sheet ||= default_sheet
203
- read_comments(sheet) unless @comments_read[sheet]
204
- if @comment[sheet]
196
+ # returns each comment in the selected sheet as an array of elements
197
+ # [row, col, comment]
198
+ def comments(sheet = nil)
199
+ sheet ||= default_sheet
200
+ read_comments(sheet) unless @comments_read[sheet]
201
+ return [] unless @comment[sheet]
205
202
  @comment[sheet].each.collect do |elem|
206
- [elem[0][0],elem[0][1],elem[1]]
203
+ [elem[0][0], elem[0][1], elem[1]]
207
204
  end
208
- else
209
- []
210
205
  end
211
- end
212
206
 
213
- private
214
-
215
- # If the ODS file has an encryption-data element, then try to decrypt.
216
- # If successful, the temporary content.xml will be overwritten with
217
- # decrypted contents.
218
- def decrypt_if_necessary(
219
- zip_file,
220
- content_entry,
221
- roo_content_xml_path, options
222
- )
223
- # Check if content.xml is encrypted by extracting manifest.xml
224
- # and searching for a manifest:encryption-data element
225
-
226
- if manifest_entry = zip_file.glob("META-INF/manifest.xml").first
227
- roo_manifest_xml_path = File.join(@tmpdir, "roo_manifest.xml")
228
- manifest_entry.extract(roo_manifest_xml_path)
229
- manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
230
-
231
- # XPath search for manifest:encryption-data only for the content.xml
232
- # file
233
-
234
- encryption_data = manifest.xpath(
235
- "//manifest:file-entry[@manifest:full-path='content.xml']"\
207
+ private
208
+
209
+ # If the ODS file has an encryption-data element, then try to decrypt.
210
+ # If successful, the temporary content.xml will be overwritten with
211
+ # decrypted contents.
212
+ def decrypt_if_necessary(
213
+ zip_file,
214
+ content_entry,
215
+ roo_content_xml_path, options
216
+ )
217
+ # Check if content.xml is encrypted by extracting manifest.xml
218
+ # and searching for a manifest:encryption-data element
219
+
220
+ if (manifest_entry = zip_file.glob('META-INF/manifest.xml').first)
221
+ roo_manifest_xml_path = File.join(@tmpdir, 'roo_manifest.xml')
222
+ manifest_entry.extract(roo_manifest_xml_path)
223
+ manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
224
+
225
+ # XPath search for manifest:encryption-data only for the content.xml
226
+ # file
227
+
228
+ encryption_data = manifest.xpath(
229
+ "//manifest:file-entry[@manifest:full-path='content.xml']"\
236
230
  "/manifest:encryption-data"
237
- ).first
231
+ ).first
238
232
 
239
- # If XPath returns a node, then we know content.xml is encrypted
233
+ # If XPath returns a node, then we know content.xml is encrypted
240
234
 
241
- if !encryption_data.nil?
235
+ unless encryption_data.nil?
242
236
 
243
- # Since we know it's encrypted, we check for the password option
244
- # and if it doesn't exist, raise an argument error
237
+ # Since we know it's encrypted, we check for the password option
238
+ # and if it doesn't exist, raise an argument error
245
239
 
246
- password = options[:password]
247
- if !password.nil?
248
- perform_decryption(
249
- encryption_data,
250
- password,
251
- content_entry,
252
- roo_content_xml_path
253
- )
254
- else
255
- raise ArgumentError,
256
- 'file is encrypted but password was not supplied'
240
+ password = options[:password]
241
+ if !password.nil?
242
+ perform_decryption(
243
+ encryption_data,
244
+ password,
245
+ content_entry,
246
+ roo_content_xml_path
247
+ )
248
+ else
249
+ fail ArgumentError, 'file is encrypted but password was not supplied'
250
+ end
257
251
  end
252
+ else
253
+ fail ArgumentError, 'file missing required META-INF/manifest.xml'
258
254
  end
259
- else
260
- raise ArgumentError, 'file missing required META-INF/manifest.xml'
261
255
  end
262
- end
263
256
 
264
- # Process the ODS encryption manifest and perform the decryption
265
- def perform_decryption(
266
- encryption_data,
267
- password,
268
- content_entry,
269
- roo_content_xml_path
270
- )
271
- # Extract various expected attributes from the manifest that
272
- # describe the encryption
273
-
274
- algorithm_node = encryption_data.xpath("manifest:algorithm").first
275
- key_derivation_node =
276
- encryption_data.xpath("manifest:key-derivation").first
277
- start_key_generation_node =
278
- encryption_data.xpath("manifest:start-key-generation").first
279
-
280
- # If we have all the expected elements, then we can perform
281
- # the decryption.
282
-
283
- if !algorithm_node.nil? && !key_derivation_node.nil? &&
284
- !start_key_generation_node.nil?
285
-
286
- # The algorithm is a URI describing the algorithm used
287
- algorithm = algorithm_node['manifest:algorithm-name']
288
-
289
- # The initialization vector is base-64 encoded
290
- iv = Base64.decode64(
291
- algorithm_node['manifest:initialisation-vector']
292
- )
293
- key_derivation_name =
294
- key_derivation_node['manifest:key-derivation-name']
295
- key_size = key_derivation_node['manifest:key-size'].to_i
296
- iteration_count =
297
- key_derivation_node['manifest:iteration-count'].to_i
298
- salt = Base64.decode64(key_derivation_node['manifest:salt'])
299
-
300
- # The key is hashed with an algorithm represented by this URI
301
- key_generation_name =
302
- start_key_generation_node[
303
- 'manifest:start-key-generation-name'
304
- ]
305
- key_generation_size =
306
- start_key_generation_node['manifest:key-size'].to_i
257
+ # Process the ODS encryption manifest and perform the decryption
258
+ def perform_decryption(
259
+ encryption_data,
260
+ password,
261
+ content_entry,
262
+ roo_content_xml_path
263
+ )
264
+ # Extract various expected attributes from the manifest that
265
+ # describe the encryption
266
+
267
+ algorithm_node = encryption_data.xpath('manifest:algorithm').first
268
+ key_derivation_node =
269
+ encryption_data.xpath('manifest:key-derivation').first
270
+ start_key_generation_node =
271
+ encryption_data.xpath('manifest:start-key-generation').first
272
+
273
+ # If we have all the expected elements, then we can perform
274
+ # the decryption.
275
+
276
+ if !algorithm_node.nil? && !key_derivation_node.nil? &&
277
+ !start_key_generation_node.nil?
278
+
279
+ # The algorithm is a URI describing the algorithm used
280
+ algorithm = algorithm_node['manifest:algorithm-name']
281
+
282
+ # The initialization vector is base-64 encoded
283
+ iv = Base64.decode64(
284
+ algorithm_node['manifest:initialisation-vector']
285
+ )
286
+ key_derivation_name = key_derivation_node['manifest:key-derivation-name']
287
+ key_size = key_derivation_node['manifest:key-size'].to_i
288
+ iteration_count = key_derivation_node['manifest:iteration-count'].to_i
289
+ salt = Base64.decode64(key_derivation_node['manifest:salt'])
290
+
291
+ # The key is hashed with an algorithm represented by this URI
292
+ key_generation_name =
293
+ start_key_generation_node[
294
+ 'manifest:start-key-generation-name'
295
+ ]
296
+ key_generation_size = start_key_generation_node['manifest:key-size'].to_i
307
297
 
308
298
  hashed_password = password
309
- key = nil
299
+ key = nil
310
300
 
311
- if key_generation_name.eql?(
312
- "http://www.w3.org/2000/09/xmldsig#sha256"
313
- )
314
- hashed_password = Digest::SHA256.digest(password)
315
- else
316
- raise ArgumentError, 'Unknown key generation algorithm ' +
317
- key_generation_name
318
- end
301
+ if key_generation_name == 'http://www.w3.org/2000/09/xmldsig#sha256'
302
+
303
+ hashed_password = Digest::SHA256.digest(password)
304
+ else
305
+ fail ArgumentError, "Unknown key generation algorithm #{key_generation_name}"
306
+ end
319
307
 
320
308
  cipher = find_cipher(
321
309
  algorithm,
@@ -336,210 +324,177 @@ class Roo::OpenOffice < Roo::Base
336
324
  Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(decrypted)
337
325
  )
338
326
  rescue StandardError => error
339
- raise ArgumentError,
340
- 'Invalid password or other data error: ' + error.to_s
327
+ raise ArgumentError, "Invalid password or other data error: #{error}"
341
328
  end
342
- else
343
- raise ArgumentError,
344
- 'manifest.xml missing encryption-data elements'
329
+ else
330
+ fail ArgumentError, 'manifest.xml missing encryption-data elements'
331
+ end
345
332
  end
346
- end
347
333
 
348
- # Create a cipher based on an ODS algorithm URI from manifest.xml
349
- def find_cipher(
350
- algorithm,
351
- key_derivation_name,
352
- hashed_password,
353
- salt,
354
- iteration_count,
355
- iv
356
- )
357
- cipher = nil
358
- if algorithm.eql? "http://www.w3.org/2001/04/xmlenc#aes256-cbc"
334
+ # Create a cipher based on an ODS algorithm URI from manifest.xml
335
+ # params: algorithm, key_derivation_name, hashed_password, salt, iteration_count, iv
336
+ def find_cipher(*args)
337
+ fail ArgumentError, 'Unknown algorithm ' + algorithm unless args[0] == 'http://www.w3.org/2001/04/xmlenc#aes256-cbc'
338
+
359
339
  cipher = OpenSSL::Cipher.new('AES-256-CBC')
360
340
  cipher.decrypt
361
341
  cipher.padding = 0
362
- cipher.key = find_cipher_key(
363
- cipher,
364
- key_derivation_name,
365
- hashed_password,
366
- salt,
367
- iteration_count
368
- )
369
- cipher.iv = iv
370
- else
371
- raise ArgumentError, 'Unknown algorithm ' + algorithm
372
- end
373
- cipher
374
- end
342
+ cipher.key = find_cipher_key(cipher, *args[1..4])
343
+ cipher.iv = args[5]
375
344
 
376
- # Create a cipher key based on an ODS algorithm string from manifest.xml
377
- def find_cipher_key(
378
- cipher,
379
- key_derivation_name,
380
- hashed_password,
381
- salt,
382
- iteration_count
383
- )
384
- if key_derivation_name.eql? "PBKDF2"
385
- key = OpenSSL::PKCS5.pbkdf2_hmac_sha1(
386
- hashed_password,
387
- salt,
388
- iteration_count,
389
- cipher.key_len
390
- )
391
- else
392
- raise ArgumentError, 'Unknown key derivation name ' +
393
- key_derivation_name
394
- end
395
- key
396
- end
345
+ cipher
346
+ end
397
347
 
398
- # Block decrypt raw bytes from the zip file based on the cipher
399
- def decrypt(content_entry, cipher)
400
- # Zip::Entry.extract writes a 0-length file when trying
401
- # to extract an encrypted stream, so we read the
402
- # raw bytes based on the offset and lengths
403
- decrypted = ""
404
- File.open(@filename, "rb") do |zipfile|
405
- zipfile.seek(
406
- content_entry.local_header_offset +
407
- content_entry.calculate_local_header_size
408
- )
409
- total_to_read = content_entry.compressed_size
410
-
411
- block_size = 4096
412
- block_size = total_to_read if block_size > total_to_read
413
-
414
- while buffer = zipfile.read(block_size)
415
- decrypted += cipher.update(buffer)
416
- total_to_read -= buffer.length
417
-
418
- break if total_to_read == 0
419
-
420
- block_size = total_to_read if block_size > total_to_read
421
- end
348
+ # Create a cipher key based on an ODS algorithm string from manifest.xml
349
+ def find_cipher_key(*args)
350
+ fail ArgumentError, 'Unknown key derivation name ', args[1] unless args[1] == 'PBKDF2'
351
+
352
+ OpenSSL::PKCS5.pbkdf2_hmac_sha1(args[2], args[3], args[4], args[0].key_len)
422
353
  end
423
354
 
424
- decrypted + cipher.final
425
- end
355
+ # Block decrypt raw bytes from the zip file based on the cipher
356
+ def decrypt(content_entry, cipher)
357
+ # Zip::Entry.extract writes a 0-length file when trying
358
+ # to extract an encrypted stream, so we read the
359
+ # raw bytes based on the offset and lengths
360
+ decrypted = ''
361
+ File.open(@filename, 'rb') do |zipfile|
362
+ zipfile.seek(
363
+ content_entry.local_header_offset +
364
+ content_entry.calculate_local_header_size
365
+ )
366
+ total_to_read = content_entry.compressed_size
426
367
 
427
- def doc
428
- @doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, "roo_content.xml"))
429
- end
368
+ block_size = 4096
369
+ block_size = total_to_read if block_size > total_to_read
370
+
371
+ while (buffer = zipfile.read(block_size))
372
+ decrypted += cipher.update(buffer)
373
+ total_to_read -= buffer.length
430
374
 
431
- # read the version of the OO-Version
432
- def oo_version
433
- doc.xpath("//*[local-name()='document-content']").each do |office|
434
- @officeversion = attr(office,'version')
375
+ break if total_to_read == 0
376
+
377
+ block_size = total_to_read if block_size > total_to_read
378
+ end
379
+ end
380
+
381
+ decrypted + cipher.final
382
+ end
383
+
384
+ def doc
385
+ @doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, 'roo_content.xml'))
386
+ end
387
+
388
+ # read the version of the OO-Version
389
+ def oo_version
390
+ doc.xpath("//*[local-name()='document-content']").each do |office|
391
+ @officeversion = attribute(office, 'version')
392
+ end
435
393
  end
436
- end
437
394
 
438
- # helper function to set the internal representation of cells
439
- def set_cell_values(sheet,x,y,i,v,value_type,formula,table_cell,str_v,style_name)
440
- key = [y,x+i]
441
- @cell_type[sheet] = {} unless @cell_type[sheet]
442
- @cell_type[sheet][key] = Roo::OpenOffice.oo_type_2_roo_type(value_type)
443
- @formula[sheet] = {} unless @formula[sheet]
444
- if formula
445
- ['of:', 'oooc:'].each do |prefix|
446
- if formula[0,prefix.length] == prefix
447
- formula = formula[prefix.length..-1]
395
+ # helper function to set the internal representation of cells
396
+ def set_cell_values(sheet, x, y, i, v, value_type, formula, table_cell, str_v, style_name)
397
+ key = [y, x + i]
398
+ @cell_type[sheet] ||= {}
399
+ @cell_type[sheet][key] = value_type.to_sym if value_type
400
+ @formula[sheet] ||= {}
401
+ if formula
402
+ ['of:', 'oooc:'].each do |prefix|
403
+ if formula[0, prefix.length] == prefix
404
+ formula = formula[prefix.length..-1]
405
+ end
448
406
  end
407
+ @formula[sheet][key] = formula
449
408
  end
450
- @formula[sheet][key] = formula
451
- end
452
- @cell[sheet] = {} unless @cell[sheet]
453
- @style[sheet] = {} unless @style[sheet]
454
- @style[sheet][key] = style_name
455
- case @cell_type[sheet][key]
456
- when :float
457
- @cell[sheet][key] = v.to_f
458
- when :string
459
- @cell[sheet][key] = str_v
460
- when :date
461
- #TODO: if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size
462
- if attr(table_cell,'date-value').size != "XXXX-XX-XX".size
463
- #-- dann ist noch eine Uhrzeit vorhanden
464
- #-- "1961-11-21T12:17:18"
465
- @cell[sheet][key] = DateTime.parse(attr(table_cell,'date-value').to_s)
466
- @cell_type[sheet][key] = :datetime
409
+ @cell[sheet] ||= {}
410
+ @style[sheet] ||= {}
411
+ @style[sheet][key] = style_name
412
+ case @cell_type[sheet][key]
413
+ when :float, :percentage
414
+ @cell[sheet][key] = v.to_f
415
+ when :string
416
+ @cell[sheet][key] = str_v
417
+ when :date
418
+ # TODO: if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size
419
+ if attribute(table_cell, 'date-value').size != 'XXXX-XX-XX'.size
420
+ #-- dann ist noch eine Uhrzeit vorhanden
421
+ #-- "1961-11-21T12:17:18"
422
+ @cell[sheet][key] = DateTime.parse(attribute(table_cell, 'date-value').to_s)
423
+ @cell_type[sheet][key] = :datetime
424
+ else
425
+ @cell[sheet][key] = table_cell.attributes['date-value']
426
+ end
427
+ when :time
428
+ hms = v.split(':')
429
+ @cell[sheet][key] = hms[0].to_i * 3600 + hms[1].to_i * 60 + hms[2].to_i
467
430
  else
468
- @cell[sheet][key] = table_cell.attributes['date-value']
431
+ @cell[sheet][key] = v
469
432
  end
470
- when :percentage
471
- @cell[sheet][key] = v.to_f
472
- when :time
473
- hms = v.split(':')
474
- @cell[sheet][key] = hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i
475
- else
476
- @cell[sheet][key] = v
477
433
  end
478
- end
479
434
 
480
- # read all cells in the selected sheet
481
- #--
482
- # the following construct means '4 blanks'
483
- # some content <text:s text:c="3"/>
484
- #++
485
- def read_cells(sheet = default_sheet)
486
- validate_sheet!(sheet)
487
- return if @cells_read[sheet]
488
-
489
- sheet_found = false
490
- doc.xpath("//*[local-name()='table']").each do |ws|
491
- if sheet == attr(ws,'name')
435
+ # read all cells in the selected sheet
436
+ #--
437
+ # the following construct means '4 blanks'
438
+ # some content <text:s text:c="3"/>
439
+ #++
440
+ def read_cells(sheet = default_sheet)
441
+ validate_sheet!(sheet)
442
+ return if @cells_read[sheet]
443
+
444
+ sheet_found = false
445
+ doc.xpath("//*[local-name()='table']").each do |ws|
446
+ next unless sheet == attribute(ws, 'name')
447
+
492
448
  sheet_found = true
493
- col = 1
494
- row = 1
449
+ col = 1
450
+ row = 1
495
451
  ws.children.each do |table_element|
496
452
  case table_element.name
497
453
  when 'table-column'
498
454
  @style_defaults[sheet] << table_element.attributes['default-cell-style-name']
499
455
  when 'table-row'
500
456
  if table_element.attributes['number-rows-repeated']
501
- skip_row = attr(table_element,'number-rows-repeated').to_s.to_i
502
- row = row + skip_row - 1
457
+ skip_row = attribute(table_element, 'number-rows-repeated').to_s.to_i
458
+ row = row + skip_row - 1
503
459
  end
504
460
  table_element.children.each do |cell|
505
- skip_col = attr(cell, 'number-columns-repeated')
506
- formula = attr(cell,'formula')
507
- value_type = attr(cell,'value-type')
508
- v = attr(cell,'value')
509
- style_name = attr(cell,'style-name')
461
+ skip_col = attribute(cell, 'number-columns-repeated')
462
+ formula = attribute(cell, 'formula')
463
+ value_type = attribute(cell, 'value-type')
464
+ v = attribute(cell, 'value')
465
+ style_name = attribute(cell, 'style-name')
510
466
  case value_type
511
467
  when 'string'
512
- str_v = ''
468
+ str_v = ''
513
469
  # insert \n if there is more than one paragraph
514
470
  para_count = 0
515
471
  cell.children.each do |str|
516
472
  # begin comments
517
- =begin
518
- - <table:table-cell office:value-type="string">
519
- - <office:annotation office:display="true" draw:style-name="gr1" draw:text-style-name="P1" svg:width="1.1413in" svg:height="0.3902in" svg:x="2.0142in" svg:y="0in" draw:caption-point-x="-0.2402in" draw:caption-point-y="0.5661in">
520
- <dc:date>2011-09-20T00:00:00</dc:date>
521
- <text:p text:style-name="P1">Kommentar fuer B4</text:p>
522
- </office:annotation>
523
- <text:p>B4 (mit Kommentar)</text:p>
524
- </table:table-cell>
525
- =end
473
+ #=begin
474
+ #- <table:table-cell office:value-type="string">
475
+ # - <office:annotation office:display="true" draw:style-name="gr1" draw:text-style-name="P1" svg:width="1.1413in" svg:height="0.3902in" svg:x="2.0142in" svg:y="0in" draw:caption-point-x="-0.2402in" draw:caption-point-y="0.5661in">
476
+ # <dc:date>2011-09-20T00:00:00</dc:date>
477
+ # <text:p text:style-name="P1">Kommentar fuer B4</text:p>
478
+ # </office:annotation>
479
+ # <text:p>B4 (mit Kommentar)</text:p>
480
+ # </table:table-cell>
481
+ #=end
526
482
  if str.name == 'annotation'
527
483
  str.children.each do |annotation|
528
- if annotation.name == 'p'
529
- # @comment ist ein Hash mit Sheet als Key (wie bei @cell)
530
- # innerhalb eines Elements besteht ein Eintrag aus einem
531
- # weiteren Hash mit Key [row,col] und dem eigentlichen
532
- # Kommentartext als Inhalt
533
- @comment[sheet] = Hash.new unless @comment[sheet]
534
- key = [row,col]
535
- @comment[sheet][key] = annotation.text
536
- end
484
+ next unless annotation.name == 'p'
485
+ # @comment ist ein Hash mit Sheet als Key (wie bei @cell)
486
+ # innerhalb eines Elements besteht ein Eintrag aus einem
487
+ # weiteren Hash mit Key [row,col] und dem eigentlichen
488
+ # Kommentartext als Inhalt
489
+ @comment[sheet] = Hash.new unless @comment[sheet]
490
+ key = [row, col]
491
+ @comment[sheet][key] = annotation.text
537
492
  end
538
493
  end
539
494
  # end comments
540
495
  if str.name == 'p'
541
- v = str.content
542
- str_v += "\n" if para_count > 0
496
+ v = str.content
497
+ str_v += "\n" if para_count > 0
543
498
  para_count += 1
544
499
  if str.children.size > 1
545
500
  str_v += children_to_string(str.children)
@@ -548,32 +503,28 @@ class Roo::OpenOffice < Roo::Base
548
503
  str_v += child.content #.text
549
504
  end
550
505
  end
551
- str_v.gsub!(/&apos;/,"'") # special case not supported by unescapeHTML
506
+ str_v.gsub!(/&apos;/, "'") # special case not supported by unescapeHTML
552
507
  str_v = CGI.unescapeHTML(str_v)
553
508
  end # == 'p'
554
509
  end
555
510
  when 'time'
556
511
  cell.children.each do |str|
557
- if str.name == 'p'
558
- v = str.content
559
- end
512
+ v = str.content if str.name == 'p'
560
513
  end
561
514
  when '', nil, 'date', 'percentage', 'float'
562
515
  #
563
516
  when 'boolean'
564
- v = attr(cell,'boolean-value').to_s
565
- else
566
- # raise "unknown type #{value_type}"
517
+ v = attribute(cell, 'boolean-value').to_s
567
518
  end
568
519
  if skip_col
569
- if v != nil or cell.attributes['date-value']
570
- 0.upto(skip_col.to_i-1) do |i|
571
- set_cell_values(sheet,col,row,i,v,value_type,formula,cell,str_v,style_name)
520
+ if !v.nil? || cell.attributes['date-value']
521
+ 0.upto(skip_col.to_i - 1) do |i|
522
+ set_cell_values(sheet, col, row, i, v, value_type, formula, cell, str_v, style_name)
572
523
  end
573
524
  end
574
525
  col += (skip_col.to_i - 1)
575
526
  end # if skip
576
- set_cell_values(sheet,col,row,0,v,value_type,formula,cell,str_v,style_name)
527
+ set_cell_values(sheet, col, row, 0, v, value_type, formula, cell, str_v, style_name)
577
528
  col += 1
578
529
  end
579
530
  row += 1
@@ -581,101 +532,83 @@ class Roo::OpenOffice < Roo::Base
581
532
  end
582
533
  end
583
534
  end
535
+ doc.xpath("//*[local-name()='automatic-styles']").each do |style|
536
+ read_styles(style)
537
+ end
538
+
539
+ fail RangeError unless sheet_found
540
+
541
+ @cells_read[sheet] = true
542
+ @comments_read[sheet] = true
584
543
  end
585
- doc.xpath("//*[local-name()='automatic-styles']").each do |style|
586
- read_styles(style)
587
- end
588
- if !sheet_found
589
- raise RangeError
590
- end
591
- @cells_read[sheet] = true
592
- @comments_read[sheet] = true
593
- end
594
544
 
595
- # Only calls read_cells because Roo::Base calls read_comments
596
- # whereas the reading of comments is done in read_cells for Roo::OpenOffice-objects
597
- def read_comments(sheet=nil)
598
- read_cells(sheet)
599
- end
545
+ # Only calls read_cells because Roo::Base calls read_comments
546
+ # whereas the reading of comments is done in read_cells for Roo::OpenOffice-objects
547
+ def read_comments(sheet = nil)
548
+ read_cells(sheet)
549
+ end
600
550
 
601
- def read_labels
602
- @label ||= Hash[doc.xpath("//table:named-range").map do |ne|
603
- #-
604
- # $Sheet1.$C$5
605
- #+
606
- name = attr(ne,'name').to_s
607
- sheetname,coords = attr(ne,'cell-range-address').to_s.split('.$')
608
- col, row = coords.split('$')
609
- sheetname = sheetname[1..-1] if sheetname[0,1] == '$'
610
- [name, [sheetname,row,col]]
611
- end]
612
- end
551
+ def read_labels
552
+ @label ||= Hash[doc.xpath('//table:named-range').map do |ne|
553
+ #-
554
+ # $Sheet1.$C$5
555
+ #+
556
+ name = attribute(ne, 'name').to_s
557
+ sheetname, coords = attribute(ne, 'cell-range-address').to_s.split('.$')
558
+ col, row = coords.split('$')
559
+ sheetname = sheetname[1..-1] if sheetname[0, 1] == '$'
560
+ [name, [sheetname, row, col]]
561
+ end]
562
+ end
613
563
 
614
- def read_styles(style_elements)
615
- @font_style_definitions['Default'] = Roo::Font.new
616
- style_elements.each do |style|
617
- next unless style.name == 'style'
618
- style_name = attr(style,'name')
619
- style.each do |properties|
620
- font = Roo::OpenOffice::Font.new
621
- font.bold = attr(properties,'font-weight')
622
- font.italic = attr(properties,'font-style')
623
- font.underline = attr(properties,'text-underline-style')
624
- @font_style_definitions[style_name] = font
564
+ def read_styles(style_elements)
565
+ @font_style_definitions['Default'] = Roo::Font.new
566
+ style_elements.each do |style|
567
+ next unless style.name == 'style'
568
+ style_name = attribute(style, 'name')
569
+ style.each do |properties|
570
+ font = Roo::OpenOffice::Font.new
571
+ font.bold = attribute(properties, 'font-weight')
572
+ font.italic = attribute(properties, 'font-style')
573
+ font.underline = attribute(properties, 'text-underline-style')
574
+ @font_style_definitions[style_name] = font
575
+ end
625
576
  end
626
577
  end
627
- end
628
578
 
629
- def read_table_styles(styles)
630
- styles.children.each do |style|
631
- next unless style.name == 'style'
632
- style_name = attr(style,'name')
633
- style.children.each do |properties|
634
- display = attr(properties,'display')
635
- next unless display
636
- @table_display[style_name] = (display == 'true')
579
+ def read_table_styles(styles)
580
+ styles.children.each do |style|
581
+ next unless style.name == 'style'
582
+ style_name = attribute(style, 'name')
583
+ style.children.each do |properties|
584
+ display = attribute(properties, 'display')
585
+ next unless display
586
+ @table_display[style_name] = (display == 'true')
587
+ end
637
588
  end
638
589
  end
639
- end
640
-
641
- A_ROO_TYPE = {
642
- "float" => :float,
643
- "string" => :string,
644
- "date" => :date,
645
- "percentage" => :percentage,
646
- "time" => :time,
647
- }
648
-
649
- def self.oo_type_2_roo_type(ootype)
650
- return A_ROO_TYPE[ootype]
651
- end
652
590
 
653
- # helper method to convert compressed spaces and other elements within
654
- # an text into a string
655
- def children_to_string(children)
656
- result = ''
657
- children.each {|child|
658
- if child.text?
659
- result = result + child.content
660
- else
661
- if child.name == 's'
662
- compressed_spaces = child.attributes['c'].to_s.to_i
663
- # no explicit number means a count of 1:
664
- if compressed_spaces == 0
665
- compressed_spaces = 1
666
- end
667
- result = result + " "*compressed_spaces
591
+ # helper method to convert compressed spaces and other elements within
592
+ # an text into a string
593
+ # FIXME: add a test for compressed_spaces == 0. It's not currently tested.
594
+ def children_to_string(children)
595
+ children.map do |child|
596
+ if child.text?
597
+ child.content
668
598
  else
669
- result = result + child.content
599
+ if child.name == 's'
600
+ compressed_spaces = child.attributes['c'].to_s.to_i
601
+ # no explicit number means a count of 1:
602
+ compressed_spaces == 0 ? ' ' : ' ' * compressed_spaces
603
+ else
604
+ child.content
605
+ end
670
606
  end
671
- end
672
- }
673
- result
674
- end
607
+ end.join
608
+ end
675
609
 
676
- def attr(node, attr_name)
677
- if node.attributes[attr_name]
678
- node.attributes[attr_name].value
610
+ def attribute(node, attr_name)
611
+ node.attributes[attr_name].value if node.attributes[attr_name]
679
612
  end
680
613
  end
681
614
  end