culturecode-roo 2.0.1 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/.gitignore +1 -0
  2. data/CHANGELOG.md +513 -0
  3. data/README.md +206 -73
  4. data/lib/roo.rb +3 -3
  5. data/lib/roo/base.rb +49 -33
  6. data/lib/roo/csv.rb +10 -0
  7. data/lib/roo/excelx.rb +187 -60
  8. data/lib/roo/excelx/comments.rb +2 -1
  9. data/lib/roo/excelx/sheet_doc.rb +30 -3
  10. data/lib/roo/open_office.rb +250 -221
  11. data/lib/roo/utils.rb +28 -31
  12. data/lib/roo/version.rb +1 -1
  13. data/roo.gemspec +10 -12
  14. data/spec/lib/roo/csv_spec.rb +14 -0
  15. data/spec/lib/roo/excelx_spec.rb +90 -2
  16. data/spec/lib/roo/libreoffice_spec.rb +16 -0
  17. data/spec/lib/roo/openoffice_spec.rb +11 -0
  18. data/spec/lib/roo/utils_spec.rb +5 -4
  19. data/test/test_roo.rb +113 -2
  20. metadata +29 -180
  21. data/CHANGELOG +0 -438
  22. data/scripts/txt2html +0 -67
  23. data/test/files/1900_base.xlsx +0 -0
  24. data/test/files/1904_base.xlsx +0 -0
  25. data/test/files/Bibelbund.csv +0 -3741
  26. data/test/files/Bibelbund.ods +0 -0
  27. data/test/files/Bibelbund.xlsx +0 -0
  28. data/test/files/Bibelbund1.ods +0 -0
  29. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  30. data/test/files/advanced_header.ods +0 -0
  31. data/test/files/bbu.ods +0 -0
  32. data/test/files/bbu.xlsx +0 -0
  33. data/test/files/bode-v1.ods.zip +0 -0
  34. data/test/files/bode-v1.xls.zip +0 -0
  35. data/test/files/boolean.csv +0 -2
  36. data/test/files/boolean.ods +0 -0
  37. data/test/files/boolean.xlsx +0 -0
  38. data/test/files/borders.ods +0 -0
  39. data/test/files/borders.xlsx +0 -0
  40. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  41. data/test/files/comments.ods +0 -0
  42. data/test/files/comments.xlsx +0 -0
  43. data/test/files/csvtypes.csv +0 -1
  44. data/test/files/datetime.ods +0 -0
  45. data/test/files/datetime.xlsx +0 -0
  46. data/test/files/dreimalvier.ods +0 -0
  47. data/test/files/emptysheets.ods +0 -0
  48. data/test/files/emptysheets.xlsx +0 -0
  49. data/test/files/encrypted-letmein.ods +0 -0
  50. data/test/files/file_item_error.xlsx +0 -0
  51. data/test/files/formula.ods +0 -0
  52. data/test/files/formula.xlsx +0 -0
  53. data/test/files/formula_string_error.xlsx +0 -0
  54. data/test/files/html-escape.ods +0 -0
  55. data/test/files/link.csv +0 -1
  56. data/test/files/link.xlsx +0 -0
  57. data/test/files/matrix.ods +0 -0
  58. data/test/files/named_cells.ods +0 -0
  59. data/test/files/named_cells.xlsx +0 -0
  60. data/test/files/no_spreadsheet_file.txt +0 -1
  61. data/test/files/numbers-export.xlsx +0 -0
  62. data/test/files/numbers1.csv +0 -18
  63. data/test/files/numbers1.ods +0 -0
  64. data/test/files/numbers1.xlsx +0 -0
  65. data/test/files/numbers1withnull.xlsx +0 -0
  66. data/test/files/numeric-link.xlsx +0 -0
  67. data/test/files/only_one_sheet.ods +0 -0
  68. data/test/files/only_one_sheet.xlsx +0 -0
  69. data/test/files/paragraph.ods +0 -0
  70. data/test/files/paragraph.xlsx +0 -0
  71. data/test/files/ric.ods +0 -0
  72. data/test/files/sheet1.xml +0 -109
  73. data/test/files/simple_spreadsheet.ods +0 -0
  74. data/test/files/simple_spreadsheet.xlsx +0 -0
  75. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  76. data/test/files/so_datetime.csv +0 -8
  77. data/test/files/style.ods +0 -0
  78. data/test/files/style.xlsx +0 -0
  79. data/test/files/time-test.csv +0 -2
  80. data/test/files/time-test.ods +0 -0
  81. data/test/files/time-test.xlsx +0 -0
  82. data/test/files/type_excel.ods +0 -0
  83. data/test/files/type_excel.xlsx +0 -0
  84. data/test/files/type_excelx.ods +0 -0
  85. data/test/files/type_openoffice.xlsx +0 -0
  86. data/test/files/whitespace.ods +0 -0
  87. data/test/files/whitespace.xlsx +0 -0
@@ -11,8 +11,9 @@ class Roo::OpenOffice < Roo::Base
11
11
  packed = options[:packed]
12
12
  file_warning = options[:file_warning] || :error
13
13
 
14
+ @only_visible_sheets = options[:only_visible_sheets]
14
15
  file_type_check(filename,'.ods','an Roo::OpenOffice', file_warning, packed)
15
- @tmpdir = make_tmpdir(filename.split('/').last, options[:tmpdir_root])
16
+ @tmpdir = make_tmpdir(File.basename(filename), options[:tmpdir_root])
16
17
  @filename = local_filename(filename, @tmpdir, packed)
17
18
  #TODO: @cells_read[:default] = false
18
19
  Zip::File.open(@filename) do |zip_file|
@@ -33,222 +34,13 @@ class Roo::OpenOffice < Roo::Base
33
34
  @formula = Hash.new
34
35
  @style = Hash.new
35
36
  @style_defaults = Hash.new { |h,k| h[k] = [] }
36
- @style_definitions = Hash.new
37
+ @table_display = Hash.new { |h,k| h[k] = true }
38
+ @font_style_definitions = Hash.new
37
39
  @comment = Hash.new
38
40
  @comments_read = Hash.new
39
- end
40
-
41
- # If the ODS file has an encryption-data element, then try to decrypt.
42
- # If successful, the temporary content.xml will be overwritten with
43
- # decrypted contents.
44
- def decrypt_if_necessary(
45
- zip_file,
46
- content_entry,
47
- roo_content_xml_path, options
48
- )
49
- # Check if content.xml is encrypted by extracting manifest.xml
50
- # and searching for a manifest:encryption-data element
51
-
52
- if manifest_entry = zip_file.glob("META-INF/manifest.xml").first
53
- roo_manifest_xml_path = File.join(@tmpdir, "roo_manifest.xml")
54
- manifest_entry.extract(roo_manifest_xml_path)
55
- manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
56
-
57
- # XPath search for manifest:encryption-data only for the content.xml
58
- # file
59
-
60
- encryption_data = manifest.xpath(
61
- "//manifest:file-entry[@manifest:full-path='content.xml']"\
62
- "/manifest:encryption-data"
63
- ).first
64
-
65
- # If XPath returns a node, then we know content.xml is encrypted
66
-
67
- if !encryption_data.nil?
68
-
69
- # Since we know it's encrypted, we check for the password option
70
- # and if it doesn't exist, raise an argument error
71
-
72
- password = options[:password]
73
- if !password.nil?
74
- perform_decryption(
75
- encryption_data,
76
- password,
77
- content_entry,
78
- roo_content_xml_path
79
- )
80
- else
81
- raise ArgumentError,
82
- 'file is encrypted but password was not supplied'
83
- end
84
- end
85
- else
86
- raise ArgumentError, 'file missing required META-INF/manifest.xml'
87
- end
88
- end
89
-
90
- # Process the ODS encryption manifest and perform the decryption
91
- def perform_decryption(
92
- encryption_data,
93
- password,
94
- content_entry,
95
- roo_content_xml_path
96
- )
97
- # Extract various expected attributes from the manifest that
98
- # describe the encryption
99
-
100
- algorithm_node = encryption_data.xpath("manifest:algorithm").first
101
- key_derivation_node =
102
- encryption_data.xpath("manifest:key-derivation").first
103
- start_key_generation_node =
104
- encryption_data.xpath("manifest:start-key-generation").first
105
-
106
- # If we have all the expected elements, then we can perform
107
- # the decryption.
108
-
109
- if !algorithm_node.nil? && !key_derivation_node.nil? &&
110
- !start_key_generation_node.nil?
111
-
112
- # The algorithm is a URI describing the algorithm used
113
- algorithm = algorithm_node['manifest:algorithm-name']
114
-
115
- # The initialization vector is base-64 encoded
116
- iv = Base64.decode64(
117
- algorithm_node['manifest:initialisation-vector']
118
- )
119
- key_derivation_name =
120
- key_derivation_node['manifest:key-derivation-name']
121
- key_size = key_derivation_node['manifest:key-size'].to_i
122
- iteration_count =
123
- key_derivation_node['manifest:iteration-count'].to_i
124
- salt = Base64.decode64(key_derivation_node['manifest:salt'])
125
-
126
- # The key is hashed with an algorithm represented by this URI
127
- key_generation_name =
128
- start_key_generation_node[
129
- 'manifest:start-key-generation-name'
130
- ]
131
- key_generation_size =
132
- start_key_generation_node['manifest:key-size'].to_i
133
-
134
- hashed_password = password
135
- key = nil
136
-
137
- if key_generation_name.eql?(
138
- "http://www.w3.org/2000/09/xmldsig#sha256"
139
- )
140
- hashed_password = Digest::SHA256.digest(password)
141
- else
142
- raise ArgumentError, 'Unknown key generation algorithm ' +
143
- key_generation_name
144
- end
145
-
146
- cipher = find_cipher(
147
- algorithm,
148
- key_derivation_name,
149
- hashed_password,
150
- salt,
151
- iteration_count,
152
- iv
153
- )
154
-
155
- begin
156
- decrypted = decrypt(content_entry, cipher)
157
-
158
- # Finally, inflate the decrypted stream and overwrite
159
- # content.xml
160
- IO.binwrite(
161
- roo_content_xml_path,
162
- Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(decrypted)
163
- )
164
- rescue StandardError => error
165
- raise ArgumentError,
166
- 'Invalid password or other data error: ' + error.to_s
167
- end
168
- else
169
- raise ArgumentError,
170
- 'manifest.xml missing encryption-data elements'
171
- end
172
- end
173
-
174
- # Create a cipher based on an ODS algorithm URI from manifest.xml
175
- def find_cipher(
176
- algorithm,
177
- key_derivation_name,
178
- hashed_password,
179
- salt,
180
- iteration_count,
181
- iv
182
- )
183
- cipher = nil
184
- if algorithm.eql? "http://www.w3.org/2001/04/xmlenc#aes256-cbc"
185
- cipher = OpenSSL::Cipher.new('AES-256-CBC')
186
- cipher.decrypt
187
- cipher.padding = 0
188
- cipher.key = find_cipher_key(
189
- cipher,
190
- key_derivation_name,
191
- hashed_password,
192
- salt,
193
- iteration_count
194
- )
195
- cipher.iv = iv
196
- else
197
- raise ArgumentError, 'Unknown algorithm ' + algorithm
198
- end
199
- cipher
200
- end
201
-
202
- # Create a cipher key based on an ODS algorithm string from manifest.xml
203
- def find_cipher_key(
204
- cipher,
205
- key_derivation_name,
206
- hashed_password,
207
- salt,
208
- iteration_count
209
- )
210
- if key_derivation_name.eql? "PBKDF2"
211
- key = OpenSSL::PKCS5.pbkdf2_hmac_sha1(
212
- hashed_password,
213
- salt,
214
- iteration_count,
215
- cipher.key_len
216
- )
217
- else
218
- raise ArgumentError, 'Unknown key derivation name ' +
219
- key_derivation_name
220
- end
221
- key
222
- end
223
-
224
- # Block decrypt raw bytes from the zip file based on the cipher
225
- def decrypt(content_entry, cipher)
226
- # Zip::Entry.extract writes a 0-length file when trying
227
- # to extract an encrypted stream, so we read the
228
- # raw bytes based on the offset and lengths
229
- decrypted = ""
230
- File.open(@filename, "rb") do |zipfile|
231
- zipfile.seek(
232
- content_entry.local_header_offset +
233
- content_entry.calculate_local_header_size
234
- )
235
- total_to_read = content_entry.compressed_size
236
- block_size = 4096
237
- if block_size > total_to_read
238
- block_size = total_to_read
239
- end
240
- while buffer = zipfile.read(block_size)
241
- decrypted += cipher.update(buffer)
242
- total_to_read -= buffer.length
243
- if total_to_read == 0
244
- break
245
- end
246
- if block_size > total_to_read
247
- block_size = total_to_read
248
- end
249
- end
250
- end
251
- decrypted + cipher.final
41
+ rescue => e # clean up any temp files, but only if an error was raised
42
+ close
43
+ raise e
252
44
  end
253
45
 
254
46
  def method_missing(m,*args)
@@ -287,7 +79,13 @@ class Roo::OpenOffice < Roo::Base
287
79
  row,col = normalize(row,col)
288
80
  @formula[sheet][[row,col]]
289
81
  end
290
- alias_method :formula?, :formula
82
+
83
+ # Predicate methods really should return a boolean
84
+ # value. Hopefully no one was relying on the fact that this
85
+ # previously returned either nil/formula
86
+ def formula?(*args)
87
+ !!formula(*args)
88
+ end
291
89
 
292
90
  # returns each formula in the selected sheet as an array of elements
293
91
  # [row, col, formula]
@@ -309,7 +107,7 @@ class Roo::OpenOffice < Roo::Base
309
107
  read_cells(sheet)
310
108
  row,col = normalize(row,col)
311
109
  style_name = @style[sheet][[row,col]] || @style_defaults[sheet][col - 1] || 'Default'
312
- @style_definitions[style_name]
110
+ @font_style_definitions[style_name]
313
111
  end
314
112
 
315
113
  # returns the type of a cell:
@@ -332,9 +130,16 @@ class Roo::OpenOffice < Roo::Base
332
130
  end
333
131
 
334
132
  def sheets
335
- doc.xpath("//*[local-name()='table']").map do |sheet|
336
- sheet.attributes["name"].value
133
+ unless @table_display.any?
134
+ doc.xpath("//*[local-name()='automatic-styles']").each do |style|
135
+ read_table_styles(style)
136
+ end
337
137
  end
138
+ doc.xpath("//*[local-name()='table']").map do |sheet|
139
+ if !@only_visible_sheets || @table_display[attr(sheet,'style-name')]
140
+ sheet.attributes["name"].value
141
+ end
142
+ end.compact
338
143
  end
339
144
 
340
145
  # version of the Roo::OpenOffice document
@@ -407,6 +212,218 @@ class Roo::OpenOffice < Roo::Base
407
212
 
408
213
  private
409
214
 
215
+ # If the ODS file has an encryption-data element, then try to decrypt.
216
+ # If successful, the temporary content.xml will be overwritten with
217
+ # decrypted contents.
218
+ def decrypt_if_necessary(
219
+ zip_file,
220
+ content_entry,
221
+ roo_content_xml_path, options
222
+ )
223
+ # Check if content.xml is encrypted by extracting manifest.xml
224
+ # and searching for a manifest:encryption-data element
225
+
226
+ if manifest_entry = zip_file.glob("META-INF/manifest.xml").first
227
+ roo_manifest_xml_path = File.join(@tmpdir, "roo_manifest.xml")
228
+ manifest_entry.extract(roo_manifest_xml_path)
229
+ manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
230
+
231
+ # XPath search for manifest:encryption-data only for the content.xml
232
+ # file
233
+
234
+ encryption_data = manifest.xpath(
235
+ "//manifest:file-entry[@manifest:full-path='content.xml']"\
236
+ "/manifest:encryption-data"
237
+ ).first
238
+
239
+ # If XPath returns a node, then we know content.xml is encrypted
240
+
241
+ if !encryption_data.nil?
242
+
243
+ # Since we know it's encrypted, we check for the password option
244
+ # and if it doesn't exist, raise an argument error
245
+
246
+ password = options[:password]
247
+ if !password.nil?
248
+ perform_decryption(
249
+ encryption_data,
250
+ password,
251
+ content_entry,
252
+ roo_content_xml_path
253
+ )
254
+ else
255
+ raise ArgumentError,
256
+ 'file is encrypted but password was not supplied'
257
+ end
258
+ end
259
+ else
260
+ raise ArgumentError, 'file missing required META-INF/manifest.xml'
261
+ end
262
+ end
263
+
264
+ # Process the ODS encryption manifest and perform the decryption
265
+ def perform_decryption(
266
+ encryption_data,
267
+ password,
268
+ content_entry,
269
+ roo_content_xml_path
270
+ )
271
+ # Extract various expected attributes from the manifest that
272
+ # describe the encryption
273
+
274
+ algorithm_node = encryption_data.xpath("manifest:algorithm").first
275
+ key_derivation_node =
276
+ encryption_data.xpath("manifest:key-derivation").first
277
+ start_key_generation_node =
278
+ encryption_data.xpath("manifest:start-key-generation").first
279
+
280
+ # If we have all the expected elements, then we can perform
281
+ # the decryption.
282
+
283
+ if !algorithm_node.nil? && !key_derivation_node.nil? &&
284
+ !start_key_generation_node.nil?
285
+
286
+ # The algorithm is a URI describing the algorithm used
287
+ algorithm = algorithm_node['manifest:algorithm-name']
288
+
289
+ # The initialization vector is base-64 encoded
290
+ iv = Base64.decode64(
291
+ algorithm_node['manifest:initialisation-vector']
292
+ )
293
+ key_derivation_name =
294
+ key_derivation_node['manifest:key-derivation-name']
295
+ key_size = key_derivation_node['manifest:key-size'].to_i
296
+ iteration_count =
297
+ key_derivation_node['manifest:iteration-count'].to_i
298
+ salt = Base64.decode64(key_derivation_node['manifest:salt'])
299
+
300
+ # The key is hashed with an algorithm represented by this URI
301
+ key_generation_name =
302
+ start_key_generation_node[
303
+ 'manifest:start-key-generation-name'
304
+ ]
305
+ key_generation_size =
306
+ start_key_generation_node['manifest:key-size'].to_i
307
+
308
+ hashed_password = password
309
+ key = nil
310
+
311
+ if key_generation_name.eql?(
312
+ "http://www.w3.org/2000/09/xmldsig#sha256"
313
+ )
314
+ hashed_password = Digest::SHA256.digest(password)
315
+ else
316
+ raise ArgumentError, 'Unknown key generation algorithm ' +
317
+ key_generation_name
318
+ end
319
+
320
+ cipher = find_cipher(
321
+ algorithm,
322
+ key_derivation_name,
323
+ hashed_password,
324
+ salt,
325
+ iteration_count,
326
+ iv
327
+ )
328
+
329
+ begin
330
+ decrypted = decrypt(content_entry, cipher)
331
+
332
+ # Finally, inflate the decrypted stream and overwrite
333
+ # content.xml
334
+ IO.binwrite(
335
+ roo_content_xml_path,
336
+ Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(decrypted)
337
+ )
338
+ rescue StandardError => error
339
+ raise ArgumentError,
340
+ 'Invalid password or other data error: ' + error.to_s
341
+ end
342
+ else
343
+ raise ArgumentError,
344
+ 'manifest.xml missing encryption-data elements'
345
+ end
346
+ end
347
+
348
+ # Create a cipher based on an ODS algorithm URI from manifest.xml
349
+ def find_cipher(
350
+ algorithm,
351
+ key_derivation_name,
352
+ hashed_password,
353
+ salt,
354
+ iteration_count,
355
+ iv
356
+ )
357
+ cipher = nil
358
+ if algorithm.eql? "http://www.w3.org/2001/04/xmlenc#aes256-cbc"
359
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
360
+ cipher.decrypt
361
+ cipher.padding = 0
362
+ cipher.key = find_cipher_key(
363
+ cipher,
364
+ key_derivation_name,
365
+ hashed_password,
366
+ salt,
367
+ iteration_count
368
+ )
369
+ cipher.iv = iv
370
+ else
371
+ raise ArgumentError, 'Unknown algorithm ' + algorithm
372
+ end
373
+ cipher
374
+ end
375
+
376
+ # Create a cipher key based on an ODS algorithm string from manifest.xml
377
+ def find_cipher_key(
378
+ cipher,
379
+ key_derivation_name,
380
+ hashed_password,
381
+ salt,
382
+ iteration_count
383
+ )
384
+ if key_derivation_name.eql? "PBKDF2"
385
+ key = OpenSSL::PKCS5.pbkdf2_hmac_sha1(
386
+ hashed_password,
387
+ salt,
388
+ iteration_count,
389
+ cipher.key_len
390
+ )
391
+ else
392
+ raise ArgumentError, 'Unknown key derivation name ' +
393
+ key_derivation_name
394
+ end
395
+ key
396
+ end
397
+
398
+ # Block decrypt raw bytes from the zip file based on the cipher
399
+ def decrypt(content_entry, cipher)
400
+ # Zip::Entry.extract writes a 0-length file when trying
401
+ # to extract an encrypted stream, so we read the
402
+ # raw bytes based on the offset and lengths
403
+ decrypted = ""
404
+ File.open(@filename, "rb") do |zipfile|
405
+ zipfile.seek(
406
+ content_entry.local_header_offset +
407
+ content_entry.calculate_local_header_size
408
+ )
409
+ total_to_read = content_entry.compressed_size
410
+
411
+ block_size = 4096
412
+ block_size = total_to_read if block_size > total_to_read
413
+
414
+ while buffer = zipfile.read(block_size)
415
+ decrypted += cipher.update(buffer)
416
+ total_to_read -= buffer.length
417
+
418
+ break if total_to_read == 0
419
+
420
+ block_size = total_to_read if block_size > total_to_read
421
+ end
422
+ end
423
+
424
+ decrypted + cipher.final
425
+ end
426
+
410
427
  def doc
411
428
  @doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, "roo_content.xml"))
412
429
  end
@@ -595,7 +612,7 @@ class Roo::OpenOffice < Roo::Base
595
612
  end
596
613
 
597
614
  def read_styles(style_elements)
598
- @style_definitions['Default'] = Roo::Font.new
615
+ @font_style_definitions['Default'] = Roo::Font.new
599
616
  style_elements.each do |style|
600
617
  next unless style.name == 'style'
601
618
  style_name = attr(style,'name')
@@ -604,7 +621,19 @@ class Roo::OpenOffice < Roo::Base
604
621
  font.bold = attr(properties,'font-weight')
605
622
  font.italic = attr(properties,'font-style')
606
623
  font.underline = attr(properties,'text-underline-style')
607
- @style_definitions[style_name] = font
624
+ @font_style_definitions[style_name] = font
625
+ end
626
+ end
627
+ end
628
+
629
+ def read_table_styles(styles)
630
+ styles.children.each do |style|
631
+ next unless style.name == 'style'
632
+ style_name = attr(style,'name')
633
+ style.children.each do |properties|
634
+ display = attr(properties,'display')
635
+ next unless display
636
+ @table_display[style_name] = (display == 'true')
608
637
  end
609
638
  end
610
639
  end