culturecode-roo 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/.gitignore +1 -0
  2. data/CHANGELOG.md +513 -0
  3. data/README.md +206 -73
  4. data/lib/roo.rb +3 -3
  5. data/lib/roo/base.rb +49 -33
  6. data/lib/roo/csv.rb +10 -0
  7. data/lib/roo/excelx.rb +187 -60
  8. data/lib/roo/excelx/comments.rb +2 -1
  9. data/lib/roo/excelx/sheet_doc.rb +30 -3
  10. data/lib/roo/open_office.rb +250 -221
  11. data/lib/roo/utils.rb +28 -31
  12. data/lib/roo/version.rb +1 -1
  13. data/roo.gemspec +10 -12
  14. data/spec/lib/roo/csv_spec.rb +14 -0
  15. data/spec/lib/roo/excelx_spec.rb +90 -2
  16. data/spec/lib/roo/libreoffice_spec.rb +16 -0
  17. data/spec/lib/roo/openoffice_spec.rb +11 -0
  18. data/spec/lib/roo/utils_spec.rb +5 -4
  19. data/test/test_roo.rb +113 -2
  20. metadata +29 -180
  21. data/CHANGELOG +0 -438
  22. data/scripts/txt2html +0 -67
  23. data/test/files/1900_base.xlsx +0 -0
  24. data/test/files/1904_base.xlsx +0 -0
  25. data/test/files/Bibelbund.csv +0 -3741
  26. data/test/files/Bibelbund.ods +0 -0
  27. data/test/files/Bibelbund.xlsx +0 -0
  28. data/test/files/Bibelbund1.ods +0 -0
  29. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  30. data/test/files/advanced_header.ods +0 -0
  31. data/test/files/bbu.ods +0 -0
  32. data/test/files/bbu.xlsx +0 -0
  33. data/test/files/bode-v1.ods.zip +0 -0
  34. data/test/files/bode-v1.xls.zip +0 -0
  35. data/test/files/boolean.csv +0 -2
  36. data/test/files/boolean.ods +0 -0
  37. data/test/files/boolean.xlsx +0 -0
  38. data/test/files/borders.ods +0 -0
  39. data/test/files/borders.xlsx +0 -0
  40. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  41. data/test/files/comments.ods +0 -0
  42. data/test/files/comments.xlsx +0 -0
  43. data/test/files/csvtypes.csv +0 -1
  44. data/test/files/datetime.ods +0 -0
  45. data/test/files/datetime.xlsx +0 -0
  46. data/test/files/dreimalvier.ods +0 -0
  47. data/test/files/emptysheets.ods +0 -0
  48. data/test/files/emptysheets.xlsx +0 -0
  49. data/test/files/encrypted-letmein.ods +0 -0
  50. data/test/files/file_item_error.xlsx +0 -0
  51. data/test/files/formula.ods +0 -0
  52. data/test/files/formula.xlsx +0 -0
  53. data/test/files/formula_string_error.xlsx +0 -0
  54. data/test/files/html-escape.ods +0 -0
  55. data/test/files/link.csv +0 -1
  56. data/test/files/link.xlsx +0 -0
  57. data/test/files/matrix.ods +0 -0
  58. data/test/files/named_cells.ods +0 -0
  59. data/test/files/named_cells.xlsx +0 -0
  60. data/test/files/no_spreadsheet_file.txt +0 -1
  61. data/test/files/numbers-export.xlsx +0 -0
  62. data/test/files/numbers1.csv +0 -18
  63. data/test/files/numbers1.ods +0 -0
  64. data/test/files/numbers1.xlsx +0 -0
  65. data/test/files/numbers1withnull.xlsx +0 -0
  66. data/test/files/numeric-link.xlsx +0 -0
  67. data/test/files/only_one_sheet.ods +0 -0
  68. data/test/files/only_one_sheet.xlsx +0 -0
  69. data/test/files/paragraph.ods +0 -0
  70. data/test/files/paragraph.xlsx +0 -0
  71. data/test/files/ric.ods +0 -0
  72. data/test/files/sheet1.xml +0 -109
  73. data/test/files/simple_spreadsheet.ods +0 -0
  74. data/test/files/simple_spreadsheet.xlsx +0 -0
  75. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  76. data/test/files/so_datetime.csv +0 -8
  77. data/test/files/style.ods +0 -0
  78. data/test/files/style.xlsx +0 -0
  79. data/test/files/time-test.csv +0 -2
  80. data/test/files/time-test.ods +0 -0
  81. data/test/files/time-test.xlsx +0 -0
  82. data/test/files/type_excel.ods +0 -0
  83. data/test/files/type_excel.xlsx +0 -0
  84. data/test/files/type_excelx.ods +0 -0
  85. data/test/files/type_openoffice.xlsx +0 -0
  86. data/test/files/whitespace.ods +0 -0
  87. data/test/files/whitespace.xlsx +0 -0
@@ -11,8 +11,9 @@ class Roo::OpenOffice < Roo::Base
11
11
  packed = options[:packed]
12
12
  file_warning = options[:file_warning] || :error
13
13
 
14
+ @only_visible_sheets = options[:only_visible_sheets]
14
15
  file_type_check(filename,'.ods','an Roo::OpenOffice', file_warning, packed)
15
- @tmpdir = make_tmpdir(filename.split('/').last, options[:tmpdir_root])
16
+ @tmpdir = make_tmpdir(File.basename(filename), options[:tmpdir_root])
16
17
  @filename = local_filename(filename, @tmpdir, packed)
17
18
  #TODO: @cells_read[:default] = false
18
19
  Zip::File.open(@filename) do |zip_file|
@@ -33,222 +34,13 @@ class Roo::OpenOffice < Roo::Base
33
34
  @formula = Hash.new
34
35
  @style = Hash.new
35
36
  @style_defaults = Hash.new { |h,k| h[k] = [] }
36
- @style_definitions = Hash.new
37
+ @table_display = Hash.new { |h,k| h[k] = true }
38
+ @font_style_definitions = Hash.new
37
39
  @comment = Hash.new
38
40
  @comments_read = Hash.new
39
- end
40
-
41
- # If the ODS file has an encryption-data element, then try to decrypt.
42
- # If successful, the temporary content.xml will be overwritten with
43
- # decrypted contents.
44
- def decrypt_if_necessary(
45
- zip_file,
46
- content_entry,
47
- roo_content_xml_path, options
48
- )
49
- # Check if content.xml is encrypted by extracting manifest.xml
50
- # and searching for a manifest:encryption-data element
51
-
52
- if manifest_entry = zip_file.glob("META-INF/manifest.xml").first
53
- roo_manifest_xml_path = File.join(@tmpdir, "roo_manifest.xml")
54
- manifest_entry.extract(roo_manifest_xml_path)
55
- manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
56
-
57
- # XPath search for manifest:encryption-data only for the content.xml
58
- # file
59
-
60
- encryption_data = manifest.xpath(
61
- "//manifest:file-entry[@manifest:full-path='content.xml']"\
62
- "/manifest:encryption-data"
63
- ).first
64
-
65
- # If XPath returns a node, then we know content.xml is encrypted
66
-
67
- if !encryption_data.nil?
68
-
69
- # Since we know it's encrypted, we check for the password option
70
- # and if it doesn't exist, raise an argument error
71
-
72
- password = options[:password]
73
- if !password.nil?
74
- perform_decryption(
75
- encryption_data,
76
- password,
77
- content_entry,
78
- roo_content_xml_path
79
- )
80
- else
81
- raise ArgumentError,
82
- 'file is encrypted but password was not supplied'
83
- end
84
- end
85
- else
86
- raise ArgumentError, 'file missing required META-INF/manifest.xml'
87
- end
88
- end
89
-
90
- # Process the ODS encryption manifest and perform the decryption
91
- def perform_decryption(
92
- encryption_data,
93
- password,
94
- content_entry,
95
- roo_content_xml_path
96
- )
97
- # Extract various expected attributes from the manifest that
98
- # describe the encryption
99
-
100
- algorithm_node = encryption_data.xpath("manifest:algorithm").first
101
- key_derivation_node =
102
- encryption_data.xpath("manifest:key-derivation").first
103
- start_key_generation_node =
104
- encryption_data.xpath("manifest:start-key-generation").first
105
-
106
- # If we have all the expected elements, then we can perform
107
- # the decryption.
108
-
109
- if !algorithm_node.nil? && !key_derivation_node.nil? &&
110
- !start_key_generation_node.nil?
111
-
112
- # The algorithm is a URI describing the algorithm used
113
- algorithm = algorithm_node['manifest:algorithm-name']
114
-
115
- # The initialization vector is base-64 encoded
116
- iv = Base64.decode64(
117
- algorithm_node['manifest:initialisation-vector']
118
- )
119
- key_derivation_name =
120
- key_derivation_node['manifest:key-derivation-name']
121
- key_size = key_derivation_node['manifest:key-size'].to_i
122
- iteration_count =
123
- key_derivation_node['manifest:iteration-count'].to_i
124
- salt = Base64.decode64(key_derivation_node['manifest:salt'])
125
-
126
- # The key is hashed with an algorithm represented by this URI
127
- key_generation_name =
128
- start_key_generation_node[
129
- 'manifest:start-key-generation-name'
130
- ]
131
- key_generation_size =
132
- start_key_generation_node['manifest:key-size'].to_i
133
-
134
- hashed_password = password
135
- key = nil
136
-
137
- if key_generation_name.eql?(
138
- "http://www.w3.org/2000/09/xmldsig#sha256"
139
- )
140
- hashed_password = Digest::SHA256.digest(password)
141
- else
142
- raise ArgumentError, 'Unknown key generation algorithm ' +
143
- key_generation_name
144
- end
145
-
146
- cipher = find_cipher(
147
- algorithm,
148
- key_derivation_name,
149
- hashed_password,
150
- salt,
151
- iteration_count,
152
- iv
153
- )
154
-
155
- begin
156
- decrypted = decrypt(content_entry, cipher)
157
-
158
- # Finally, inflate the decrypted stream and overwrite
159
- # content.xml
160
- IO.binwrite(
161
- roo_content_xml_path,
162
- Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(decrypted)
163
- )
164
- rescue StandardError => error
165
- raise ArgumentError,
166
- 'Invalid password or other data error: ' + error.to_s
167
- end
168
- else
169
- raise ArgumentError,
170
- 'manifest.xml missing encryption-data elements'
171
- end
172
- end
173
-
174
- # Create a cipher based on an ODS algorithm URI from manifest.xml
175
- def find_cipher(
176
- algorithm,
177
- key_derivation_name,
178
- hashed_password,
179
- salt,
180
- iteration_count,
181
- iv
182
- )
183
- cipher = nil
184
- if algorithm.eql? "http://www.w3.org/2001/04/xmlenc#aes256-cbc"
185
- cipher = OpenSSL::Cipher.new('AES-256-CBC')
186
- cipher.decrypt
187
- cipher.padding = 0
188
- cipher.key = find_cipher_key(
189
- cipher,
190
- key_derivation_name,
191
- hashed_password,
192
- salt,
193
- iteration_count
194
- )
195
- cipher.iv = iv
196
- else
197
- raise ArgumentError, 'Unknown algorithm ' + algorithm
198
- end
199
- cipher
200
- end
201
-
202
- # Create a cipher key based on an ODS algorithm string from manifest.xml
203
- def find_cipher_key(
204
- cipher,
205
- key_derivation_name,
206
- hashed_password,
207
- salt,
208
- iteration_count
209
- )
210
- if key_derivation_name.eql? "PBKDF2"
211
- key = OpenSSL::PKCS5.pbkdf2_hmac_sha1(
212
- hashed_password,
213
- salt,
214
- iteration_count,
215
- cipher.key_len
216
- )
217
- else
218
- raise ArgumentError, 'Unknown key derivation name ' +
219
- key_derivation_name
220
- end
221
- key
222
- end
223
-
224
- # Block decrypt raw bytes from the zip file based on the cipher
225
- def decrypt(content_entry, cipher)
226
- # Zip::Entry.extract writes a 0-length file when trying
227
- # to extract an encrypted stream, so we read the
228
- # raw bytes based on the offset and lengths
229
- decrypted = ""
230
- File.open(@filename, "rb") do |zipfile|
231
- zipfile.seek(
232
- content_entry.local_header_offset +
233
- content_entry.calculate_local_header_size
234
- )
235
- total_to_read = content_entry.compressed_size
236
- block_size = 4096
237
- if block_size > total_to_read
238
- block_size = total_to_read
239
- end
240
- while buffer = zipfile.read(block_size)
241
- decrypted += cipher.update(buffer)
242
- total_to_read -= buffer.length
243
- if total_to_read == 0
244
- break
245
- end
246
- if block_size > total_to_read
247
- block_size = total_to_read
248
- end
249
- end
250
- end
251
- decrypted + cipher.final
41
+ rescue => e # clean up any temp files, but only if an error was raised
42
+ close
43
+ raise e
252
44
  end
253
45
 
254
46
  def method_missing(m,*args)
@@ -287,7 +79,13 @@ class Roo::OpenOffice < Roo::Base
287
79
  row,col = normalize(row,col)
288
80
  @formula[sheet][[row,col]]
289
81
  end
290
- alias_method :formula?, :formula
82
+
83
+ # Predicate methods really should return a boolean
84
+ # value. Hopefully no one was relying on the fact that this
85
+ # previously returned either nil/formula
86
+ def formula?(*args)
87
+ !!formula(*args)
88
+ end
291
89
 
292
90
  # returns each formula in the selected sheet as an array of elements
293
91
  # [row, col, formula]
@@ -309,7 +107,7 @@ class Roo::OpenOffice < Roo::Base
309
107
  read_cells(sheet)
310
108
  row,col = normalize(row,col)
311
109
  style_name = @style[sheet][[row,col]] || @style_defaults[sheet][col - 1] || 'Default'
312
- @style_definitions[style_name]
110
+ @font_style_definitions[style_name]
313
111
  end
314
112
 
315
113
  # returns the type of a cell:
@@ -332,9 +130,16 @@ class Roo::OpenOffice < Roo::Base
332
130
  end
333
131
 
334
132
  def sheets
335
- doc.xpath("//*[local-name()='table']").map do |sheet|
336
- sheet.attributes["name"].value
133
+ unless @table_display.any?
134
+ doc.xpath("//*[local-name()='automatic-styles']").each do |style|
135
+ read_table_styles(style)
136
+ end
337
137
  end
138
+ doc.xpath("//*[local-name()='table']").map do |sheet|
139
+ if !@only_visible_sheets || @table_display[attr(sheet,'style-name')]
140
+ sheet.attributes["name"].value
141
+ end
142
+ end.compact
338
143
  end
339
144
 
340
145
  # version of the Roo::OpenOffice document
@@ -407,6 +212,218 @@ class Roo::OpenOffice < Roo::Base
407
212
 
408
213
  private
409
214
 
215
+ # If the ODS file has an encryption-data element, then try to decrypt.
216
+ # If successful, the temporary content.xml will be overwritten with
217
+ # decrypted contents.
218
+ def decrypt_if_necessary(
219
+ zip_file,
220
+ content_entry,
221
+ roo_content_xml_path, options
222
+ )
223
+ # Check if content.xml is encrypted by extracting manifest.xml
224
+ # and searching for a manifest:encryption-data element
225
+
226
+ if manifest_entry = zip_file.glob("META-INF/manifest.xml").first
227
+ roo_manifest_xml_path = File.join(@tmpdir, "roo_manifest.xml")
228
+ manifest_entry.extract(roo_manifest_xml_path)
229
+ manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
230
+
231
+ # XPath search for manifest:encryption-data only for the content.xml
232
+ # file
233
+
234
+ encryption_data = manifest.xpath(
235
+ "//manifest:file-entry[@manifest:full-path='content.xml']"\
236
+ "/manifest:encryption-data"
237
+ ).first
238
+
239
+ # If XPath returns a node, then we know content.xml is encrypted
240
+
241
+ if !encryption_data.nil?
242
+
243
+ # Since we know it's encrypted, we check for the password option
244
+ # and if it doesn't exist, raise an argument error
245
+
246
+ password = options[:password]
247
+ if !password.nil?
248
+ perform_decryption(
249
+ encryption_data,
250
+ password,
251
+ content_entry,
252
+ roo_content_xml_path
253
+ )
254
+ else
255
+ raise ArgumentError,
256
+ 'file is encrypted but password was not supplied'
257
+ end
258
+ end
259
+ else
260
+ raise ArgumentError, 'file missing required META-INF/manifest.xml'
261
+ end
262
+ end
263
+
264
+ # Process the ODS encryption manifest and perform the decryption
265
+ def perform_decryption(
266
+ encryption_data,
267
+ password,
268
+ content_entry,
269
+ roo_content_xml_path
270
+ )
271
+ # Extract various expected attributes from the manifest that
272
+ # describe the encryption
273
+
274
+ algorithm_node = encryption_data.xpath("manifest:algorithm").first
275
+ key_derivation_node =
276
+ encryption_data.xpath("manifest:key-derivation").first
277
+ start_key_generation_node =
278
+ encryption_data.xpath("manifest:start-key-generation").first
279
+
280
+ # If we have all the expected elements, then we can perform
281
+ # the decryption.
282
+
283
+ if !algorithm_node.nil? && !key_derivation_node.nil? &&
284
+ !start_key_generation_node.nil?
285
+
286
+ # The algorithm is a URI describing the algorithm used
287
+ algorithm = algorithm_node['manifest:algorithm-name']
288
+
289
+ # The initialization vector is base-64 encoded
290
+ iv = Base64.decode64(
291
+ algorithm_node['manifest:initialisation-vector']
292
+ )
293
+ key_derivation_name =
294
+ key_derivation_node['manifest:key-derivation-name']
295
+ key_size = key_derivation_node['manifest:key-size'].to_i
296
+ iteration_count =
297
+ key_derivation_node['manifest:iteration-count'].to_i
298
+ salt = Base64.decode64(key_derivation_node['manifest:salt'])
299
+
300
+ # The key is hashed with an algorithm represented by this URI
301
+ key_generation_name =
302
+ start_key_generation_node[
303
+ 'manifest:start-key-generation-name'
304
+ ]
305
+ key_generation_size =
306
+ start_key_generation_node['manifest:key-size'].to_i
307
+
308
+ hashed_password = password
309
+ key = nil
310
+
311
+ if key_generation_name.eql?(
312
+ "http://www.w3.org/2000/09/xmldsig#sha256"
313
+ )
314
+ hashed_password = Digest::SHA256.digest(password)
315
+ else
316
+ raise ArgumentError, 'Unknown key generation algorithm ' +
317
+ key_generation_name
318
+ end
319
+
320
+ cipher = find_cipher(
321
+ algorithm,
322
+ key_derivation_name,
323
+ hashed_password,
324
+ salt,
325
+ iteration_count,
326
+ iv
327
+ )
328
+
329
+ begin
330
+ decrypted = decrypt(content_entry, cipher)
331
+
332
+ # Finally, inflate the decrypted stream and overwrite
333
+ # content.xml
334
+ IO.binwrite(
335
+ roo_content_xml_path,
336
+ Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(decrypted)
337
+ )
338
+ rescue StandardError => error
339
+ raise ArgumentError,
340
+ 'Invalid password or other data error: ' + error.to_s
341
+ end
342
+ else
343
+ raise ArgumentError,
344
+ 'manifest.xml missing encryption-data elements'
345
+ end
346
+ end
347
+
348
+ # Create a cipher based on an ODS algorithm URI from manifest.xml
349
+ def find_cipher(
350
+ algorithm,
351
+ key_derivation_name,
352
+ hashed_password,
353
+ salt,
354
+ iteration_count,
355
+ iv
356
+ )
357
+ cipher = nil
358
+ if algorithm.eql? "http://www.w3.org/2001/04/xmlenc#aes256-cbc"
359
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
360
+ cipher.decrypt
361
+ cipher.padding = 0
362
+ cipher.key = find_cipher_key(
363
+ cipher,
364
+ key_derivation_name,
365
+ hashed_password,
366
+ salt,
367
+ iteration_count
368
+ )
369
+ cipher.iv = iv
370
+ else
371
+ raise ArgumentError, 'Unknown algorithm ' + algorithm
372
+ end
373
+ cipher
374
+ end
375
+
376
+ # Create a cipher key based on an ODS algorithm string from manifest.xml
377
+ def find_cipher_key(
378
+ cipher,
379
+ key_derivation_name,
380
+ hashed_password,
381
+ salt,
382
+ iteration_count
383
+ )
384
+ if key_derivation_name.eql? "PBKDF2"
385
+ key = OpenSSL::PKCS5.pbkdf2_hmac_sha1(
386
+ hashed_password,
387
+ salt,
388
+ iteration_count,
389
+ cipher.key_len
390
+ )
391
+ else
392
+ raise ArgumentError, 'Unknown key derivation name ' +
393
+ key_derivation_name
394
+ end
395
+ key
396
+ end
397
+
398
+ # Block decrypt raw bytes from the zip file based on the cipher
399
+ def decrypt(content_entry, cipher)
400
+ # Zip::Entry.extract writes a 0-length file when trying
401
+ # to extract an encrypted stream, so we read the
402
+ # raw bytes based on the offset and lengths
403
+ decrypted = ""
404
+ File.open(@filename, "rb") do |zipfile|
405
+ zipfile.seek(
406
+ content_entry.local_header_offset +
407
+ content_entry.calculate_local_header_size
408
+ )
409
+ total_to_read = content_entry.compressed_size
410
+
411
+ block_size = 4096
412
+ block_size = total_to_read if block_size > total_to_read
413
+
414
+ while buffer = zipfile.read(block_size)
415
+ decrypted += cipher.update(buffer)
416
+ total_to_read -= buffer.length
417
+
418
+ break if total_to_read == 0
419
+
420
+ block_size = total_to_read if block_size > total_to_read
421
+ end
422
+ end
423
+
424
+ decrypted + cipher.final
425
+ end
426
+
410
427
  def doc
411
428
  @doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, "roo_content.xml"))
412
429
  end
@@ -595,7 +612,7 @@ class Roo::OpenOffice < Roo::Base
595
612
  end
596
613
 
597
614
  def read_styles(style_elements)
598
- @style_definitions['Default'] = Roo::Font.new
615
+ @font_style_definitions['Default'] = Roo::Font.new
599
616
  style_elements.each do |style|
600
617
  next unless style.name == 'style'
601
618
  style_name = attr(style,'name')
@@ -604,7 +621,19 @@ class Roo::OpenOffice < Roo::Base
604
621
  font.bold = attr(properties,'font-weight')
605
622
  font.italic = attr(properties,'font-style')
606
623
  font.underline = attr(properties,'text-underline-style')
607
- @style_definitions[style_name] = font
624
+ @font_style_definitions[style_name] = font
625
+ end
626
+ end
627
+ end
628
+
629
+ def read_table_styles(styles)
630
+ styles.children.each do |style|
631
+ next unless style.name == 'style'
632
+ style_name = attr(style,'name')
633
+ style.children.each do |properties|
634
+ display = attr(properties,'display')
635
+ next unless display
636
+ @table_display[style_name] = (display == 'true')
608
637
  end
609
638
  end
610
639
  end