roo 2.0.1 → 2.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/.codeclimate.yml +17 -0
  3. data/.github/ISSUE_TEMPLATE +10 -0
  4. data/.gitignore +4 -0
  5. data/.travis.yml +10 -6
  6. data/CHANGELOG.md +116 -1
  7. data/Gemfile +3 -4
  8. data/Gemfile_ruby2 +30 -0
  9. data/Guardfile +1 -2
  10. data/README.md +56 -22
  11. data/Rakefile +1 -1
  12. data/lib/roo/base.rb +108 -245
  13. data/lib/roo/constants.rb +5 -0
  14. data/lib/roo/csv.rb +94 -87
  15. data/lib/roo/errors.rb +11 -0
  16. data/lib/roo/excelx/cell/base.rb +94 -0
  17. data/lib/roo/excelx/cell/boolean.rb +27 -0
  18. data/lib/roo/excelx/cell/date.rb +28 -0
  19. data/lib/roo/excelx/cell/datetime.rb +111 -0
  20. data/lib/roo/excelx/cell/empty.rb +19 -0
  21. data/lib/roo/excelx/cell/number.rb +87 -0
  22. data/lib/roo/excelx/cell/string.rb +19 -0
  23. data/lib/roo/excelx/cell/time.rb +43 -0
  24. data/lib/roo/excelx/cell.rb +33 -4
  25. data/lib/roo/excelx/comments.rb +33 -0
  26. data/lib/roo/excelx/coordinate.rb +12 -0
  27. data/lib/roo/excelx/extractor.rb +3 -4
  28. data/lib/roo/excelx/format.rb +64 -0
  29. data/lib/roo/excelx/shared.rb +32 -0
  30. data/lib/roo/excelx/shared_strings.rb +124 -4
  31. data/lib/roo/excelx/sheet.rb +12 -7
  32. data/lib/roo/excelx/sheet_doc.rb +108 -97
  33. data/lib/roo/excelx/styles.rb +1 -1
  34. data/lib/roo/excelx.rb +61 -103
  35. data/lib/roo/formatters/base.rb +15 -0
  36. data/lib/roo/formatters/csv.rb +84 -0
  37. data/lib/roo/formatters/matrix.rb +23 -0
  38. data/lib/roo/formatters/xml.rb +31 -0
  39. data/lib/roo/formatters/yaml.rb +40 -0
  40. data/lib/roo/libre_office.rb +1 -2
  41. data/lib/roo/link.rb +21 -2
  42. data/lib/roo/open_office.rb +468 -523
  43. data/lib/roo/spreadsheet.rb +3 -1
  44. data/lib/roo/tempdir.rb +21 -0
  45. data/lib/roo/utils.rb +7 -7
  46. data/lib/roo/version.rb +1 -1
  47. data/lib/roo.rb +8 -3
  48. data/roo.gemspec +2 -1
  49. data/spec/helpers.rb +5 -0
  50. data/spec/lib/roo/base_spec.rb +229 -0
  51. data/spec/lib/roo/csv_spec.rb +19 -0
  52. data/spec/lib/roo/excelx_spec.rb +97 -11
  53. data/spec/lib/roo/openoffice_spec.rb +18 -1
  54. data/spec/lib/roo/spreadsheet_spec.rb +20 -0
  55. data/spec/lib/roo/utils_spec.rb +1 -1
  56. data/spec/spec_helper.rb +5 -5
  57. data/test/all_ss.rb +12 -11
  58. data/test/excelx/cell/test_base.rb +63 -0
  59. data/test/excelx/cell/test_boolean.rb +36 -0
  60. data/test/excelx/cell/test_date.rb +38 -0
  61. data/test/excelx/cell/test_datetime.rb +45 -0
  62. data/test/excelx/cell/test_empty.rb +7 -0
  63. data/test/excelx/cell/test_number.rb +74 -0
  64. data/test/excelx/cell/test_string.rb +28 -0
  65. data/test/excelx/cell/test_time.rb +30 -0
  66. data/test/formatters/test_csv.rb +119 -0
  67. data/test/formatters/test_matrix.rb +76 -0
  68. data/test/formatters/test_xml.rb +78 -0
  69. data/test/formatters/test_yaml.rb +20 -0
  70. data/test/helpers/test_accessing_files.rb +60 -0
  71. data/test/helpers/test_comments.rb +43 -0
  72. data/test/helpers/test_formulas.rb +9 -0
  73. data/test/helpers/test_labels.rb +103 -0
  74. data/test/helpers/test_sheets.rb +55 -0
  75. data/test/helpers/test_styles.rb +62 -0
  76. data/test/roo/test_base.rb +182 -0
  77. data/test/roo/test_csv.rb +60 -0
  78. data/test/roo/test_excelx.rb +325 -0
  79. data/test/roo/test_libre_office.rb +9 -0
  80. data/test/roo/test_open_office.rb +289 -0
  81. data/test/test_helper.rb +116 -18
  82. data/test/test_roo.rb +362 -2088
  83. metadata +70 -4
  84. data/test/test_generic_spreadsheet.rb +0 -237
@@ -3,319 +3,317 @@ require 'nokogiri'
3
3
  require 'cgi'
4
4
  require 'zip/filesystem'
5
5
  require 'roo/font'
6
+ require 'roo/tempdir'
7
+ require 'base64'
8
+ require 'openssl'
9
+
10
+ module Roo
11
+ class OpenOffice < Roo::Base
12
+ extend Roo::Tempdir
13
+
14
+ ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'.freeze
15
+ XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']".freeze
16
+ XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']".freeze
17
+
18
+ # initialization and opening of a spreadsheet file
19
+ # values for packed: :zip
20
+ def initialize(filename, options = {})
21
+ packed = options[:packed]
22
+ file_warning = options[:file_warning] || :error
23
+
24
+ @only_visible_sheets = options[:only_visible_sheets]
25
+ file_type_check(filename, '.ods', 'an Roo::OpenOffice', file_warning, packed)
26
+ # NOTE: Create temp directory and allow Ruby to cleanup the temp directory
27
+ # when the object is garbage collected. Initially, the finalizer was
28
+ # created in the Roo::Tempdir module, but that led to a segfault
29
+ # when testing in Ruby 2.4.0.
30
+ @tmpdir = self.class.make_tempdir(self, find_basename(filename), options[:tmpdir_root])
31
+ ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
32
+ @filename = local_filename(filename, @tmpdir, packed)
33
+ # TODO: @cells_read[:default] = false
34
+ open_oo_file(options)
35
+ super(filename, options)
36
+ initialize_default_variables
37
+
38
+ unless @table_display.any?
39
+ doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style|
40
+ read_table_styles(style)
41
+ end
42
+ end
6
43
 
7
- class Roo::OpenOffice < Roo::Base
8
- # initialization and opening of a spreadsheet file
9
- # values for packed: :zip
10
- def initialize(filename, options={})
11
- packed = options[:packed]
12
- file_warning = options[:file_warning] || :error
13
-
14
- @only_visible_sheets = options[:only_visible_sheets]
15
- file_type_check(filename,'.ods','an Roo::OpenOffice', file_warning, packed)
16
- @tmpdir = make_tmpdir(File.basename(filename), options[:tmpdir_root])
17
- @filename = local_filename(filename, @tmpdir, packed)
18
- #TODO: @cells_read[:default] = false
19
- Zip::File.open(@filename) do |zip_file|
20
- if content_entry = zip_file.glob("content.xml").first
21
- roo_content_xml_path = File.join(@tmpdir, 'roo_content.xml')
44
+ @sheet_names = doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet|
45
+ if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')]
46
+ sheet.attributes['name'].value
47
+ end
48
+ end.compact
49
+ rescue
50
+ self.class.finalize_tempdirs(object_id)
51
+ raise
52
+ end
53
+
54
+ def open_oo_file(options)
55
+ Zip::File.open(@filename) do |zip_file|
56
+ content_entry = zip_file.glob('content.xml').first
57
+ fail ArgumentError, ERROR_MISSING_CONTENT_XML unless content_entry
58
+
59
+ roo_content_xml_path = ::File.join(@tmpdir, 'roo_content.xml')
22
60
  content_entry.extract(roo_content_xml_path)
23
- decrypt_if_necessary(
24
- zip_file,
25
- content_entry,
26
- roo_content_xml_path,
27
- options
28
- )
29
- else
30
- raise ArgumentError, 'file missing required content.xml'
61
+ decrypt_if_necessary(zip_file, content_entry, roo_content_xml_path, options)
31
62
  end
32
63
  end
33
- super(filename, options)
34
- @formula = Hash.new
35
- @style = Hash.new
36
- @style_defaults = Hash.new { |h,k| h[k] = [] }
37
- @table_display = Hash.new { |h,k| h[k] = true }
38
- @font_style_definitions = Hash.new
39
- @comment = Hash.new
40
- @comments_read = Hash.new
41
- rescue => e # clean up any temp files, but only if an error was raised
42
- close
43
- raise e
44
- end
45
64
 
46
- def method_missing(m,*args)
47
- read_labels
48
- # is method name a label name
49
- if @label.has_key?(m.to_s)
50
- row,col = label(m.to_s)
51
- cell(row,col)
52
- else
53
- # call super for methods like #a1
54
- super
65
+ def initialize_default_variables
66
+ @formula = {}
67
+ @style = {}
68
+ @style_defaults = Hash.new { |h, k| h[k] = [] }
69
+ @table_display = Hash.new { |h, k| h[k] = true }
70
+ @font_style_definitions = {}
71
+ @comment = {}
72
+ @comments_read = {}
55
73
  end
56
- end
57
74
 
58
- # Returns the content of a spreadsheet-cell.
59
- # (1,1) is the upper left corner.
60
- # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
61
- # cell at the first line and first row.
62
- def cell(row, col, sheet=nil)
63
- sheet ||= default_sheet
64
- read_cells(sheet)
65
- row,col = normalize(row,col)
66
- if celltype(row,col,sheet) == :date
67
- yyyy,mm,dd = @cell[sheet][[row,col]].to_s.split('-')
68
- return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
69
- end
70
- @cell[sheet][[row,col]]
71
- end
75
+ def method_missing(m, *args)
76
+ read_labels
77
+ # is method name a label name
78
+ if @label.key?(m.to_s)
79
+ row, col = label(m.to_s)
80
+ cell(row, col)
81
+ else
82
+ # call super for methods like #a1
83
+ super
84
+ end
85
+ end
72
86
 
73
- # Returns the formula at (row,col).
74
- # Returns nil if there is no formula.
75
- # The method #formula? checks if there is a formula.
76
- def formula(row,col,sheet=nil)
77
- sheet ||= default_sheet
78
- read_cells(sheet)
79
- row,col = normalize(row,col)
80
- @formula[sheet][[row,col]]
81
- end
87
+ # Returns the content of a spreadsheet-cell.
88
+ # (1,1) is the upper left corner.
89
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
90
+ # cell at the first line and first row.
91
+ def cell(row, col, sheet = nil)
92
+ sheet ||= default_sheet
93
+ read_cells(sheet)
94
+ row, col = normalize(row, col)
95
+ if celltype(row, col, sheet) == :date
96
+ yyyy, mm, dd = @cell[sheet][[row, col]].to_s.split('-')
97
+ return Date.new(yyyy.to_i, mm.to_i, dd.to_i)
98
+ end
82
99
 
83
- # Predicate methods really should return a boolean
84
- # value. Hopefully no one was relying on the fact that this
85
- # previously returned either nil/formula
86
- def formula?(*args)
87
- !!formula(*args)
88
- end
100
+ @cell[sheet][[row, col]]
101
+ end
102
+
103
+ # Returns the formula at (row,col).
104
+ # Returns nil if there is no formula.
105
+ # The method #formula? checks if there is a formula.
106
+ def formula(row, col, sheet = nil)
107
+ sheet ||= default_sheet
108
+ read_cells(sheet)
109
+ row, col = normalize(row, col)
110
+ @formula[sheet][[row, col]]
111
+ end
112
+
113
+ # Predicate methods really should return a boolean
114
+ # value. Hopefully no one was relying on the fact that this
115
+ # previously returned either nil/formula
116
+ def formula?(*args)
117
+ !!formula(*args)
118
+ end
89
119
 
90
- # returns each formula in the selected sheet as an array of elements
91
- # [row, col, formula]
92
- def formulas(sheet=nil)
93
- sheet ||= default_sheet
94
- read_cells(sheet)
95
- if @formula[sheet]
120
+ # returns each formula in the selected sheet as an array of elements
121
+ # [row, col, formula]
122
+ def formulas(sheet = nil)
123
+ sheet ||= default_sheet
124
+ read_cells(sheet)
125
+ return [] unless @formula[sheet]
96
126
  @formula[sheet].each.collect do |elem|
97
127
  [elem[0][0], elem[0][1], elem[1]]
98
128
  end
99
- else
100
- []
101
129
  end
102
- end
103
-
104
- # Given a cell, return the cell's style
105
- def font(row, col, sheet=nil)
106
- sheet ||= default_sheet
107
- read_cells(sheet)
108
- row,col = normalize(row,col)
109
- style_name = @style[sheet][[row,col]] || @style_defaults[sheet][col - 1] || 'Default'
110
- @font_style_definitions[style_name]
111
- end
112
130
 
113
- # returns the type of a cell:
114
- # * :float
115
- # * :string
116
- # * :date
117
- # * :percentage
118
- # * :formula
119
- # * :time
120
- # * :datetime
121
- def celltype(row,col,sheet=nil)
122
- sheet ||= default_sheet
123
- read_cells(sheet)
124
- row,col = normalize(row,col)
125
- if @formula[sheet][[row,col]]
126
- return :formula
127
- else
128
- @cell_type[sheet][[row,col]]
131
+ # Given a cell, return the cell's style
132
+ def font(row, col, sheet = nil)
133
+ sheet ||= default_sheet
134
+ read_cells(sheet)
135
+ row, col = normalize(row, col)
136
+ style_name = @style[sheet][[row, col]] || @style_defaults[sheet][col - 1] || 'Default'
137
+ @font_style_definitions[style_name]
129
138
  end
130
- end
131
139
 
132
- def sheets
133
- unless @table_display.any?
134
- doc.xpath("//*[local-name()='automatic-styles']").each do |style|
135
- read_table_styles(style)
136
- end
140
+ # returns the type of a cell:
141
+ # * :float
142
+ # * :string
143
+ # * :date
144
+ # * :percentage
145
+ # * :formula
146
+ # * :time
147
+ # * :datetime
148
+ def celltype(row, col, sheet = nil)
149
+ sheet ||= default_sheet
150
+ read_cells(sheet)
151
+ row, col = normalize(row, col)
152
+ @formula[sheet][[row, col]] ? :formula : @cell_type[sheet][[row, col]]
137
153
  end
138
- doc.xpath("//*[local-name()='table']").map do |sheet|
139
- if !@only_visible_sheets || @table_display[attr(sheet,'style-name')]
140
- sheet.attributes["name"].value
141
- end
142
- end.compact
143
- end
144
154
 
145
- # version of the Roo::OpenOffice document
146
- # at 2007 this is always "1.0"
147
- def officeversion
148
- oo_version
149
- @officeversion
150
- end
155
+ def sheets
156
+ @sheet_names
157
+ end
151
158
 
152
- # shows the internal representation of all cells
153
- # mainly for debugging purposes
154
- def to_s(sheet=nil)
155
- sheet ||= default_sheet
156
- read_cells(sheet)
157
- @cell[sheet].inspect
158
- end
159
+ # version of the Roo::OpenOffice document
160
+ # at 2007 this is always "1.0"
161
+ def officeversion
162
+ oo_version
163
+ @officeversion
164
+ end
159
165
 
160
- # returns the row,col values of the labelled cell
161
- # (nil,nil) if label is not defined
162
- def label(labelname)
163
- read_labels
164
- unless @label.size > 0
165
- return nil,nil,nil
166
+ # shows the internal representation of all cells
167
+ # mainly for debugging purposes
168
+ def to_s(sheet = nil)
169
+ sheet ||= default_sheet
170
+ read_cells(sheet)
171
+ @cell[sheet].inspect
166
172
  end
167
- if @label.has_key? labelname
168
- return @label[labelname][1].to_i,
173
+
174
+ # returns the row,col values of the labelled cell
175
+ # (nil,nil) if label is not defined
176
+ def label(labelname)
177
+ read_labels
178
+ return [nil, nil, nil] if @label.size < 1 || !@label.key?(labelname)
179
+ [
180
+ @label[labelname][1].to_i,
169
181
  ::Roo::Utils.letter_to_number(@label[labelname][2]),
170
182
  @label[labelname][0]
171
- else
172
- return nil,nil,nil
183
+ ]
173
184
  end
174
- end
175
185
 
176
- # Returns an array which all labels. Each element is an array with
177
- # [labelname, [row,col,sheetname]]
178
- def labels(sheet=nil)
179
- read_labels
180
- @label.map do |label|
181
- [ label[0], # name
182
- [ label[1][1].to_i, # row
186
+ # Returns an array which all labels. Each element is an array with
187
+ # [labelname, [row,col,sheetname]]
188
+ def labels(_sheet = nil)
189
+ read_labels
190
+ @label.map do |label|
191
+ [label[0], # name
192
+ [label[1][1].to_i, # row
183
193
  ::Roo::Utils.letter_to_number(label[1][2]), # column
184
194
  label[1][0], # sheet
185
- ] ]
195
+ ]]
196
+ end
186
197
  end
187
- end
188
198
 
189
- # returns the comment at (row/col)
190
- # nil if there is no comment
191
- def comment(row,col,sheet=nil)
192
- sheet ||= default_sheet
193
- read_cells(sheet)
194
- row,col = normalize(row,col)
195
- return nil unless @comment[sheet]
196
- @comment[sheet][[row,col]]
197
- end
199
+ # returns the comment at (row/col)
200
+ # nil if there is no comment
201
+ def comment(row, col, sheet = nil)
202
+ sheet ||= default_sheet
203
+ read_cells(sheet)
204
+ row, col = normalize(row, col)
205
+ return nil unless @comment[sheet]
206
+ @comment[sheet][[row, col]]
207
+ end
198
208
 
199
- # returns each comment in the selected sheet as an array of elements
200
- # [row, col, comment]
201
- def comments(sheet=nil)
202
- sheet ||= default_sheet
203
- read_comments(sheet) unless @comments_read[sheet]
204
- if @comment[sheet]
209
+ # returns each comment in the selected sheet as an array of elements
210
+ # [row, col, comment]
211
+ def comments(sheet = nil)
212
+ sheet ||= default_sheet
213
+ read_comments(sheet) unless @comments_read[sheet]
214
+ return [] unless @comment[sheet]
205
215
  @comment[sheet].each.collect do |elem|
206
- [elem[0][0],elem[0][1],elem[1]]
216
+ [elem[0][0], elem[0][1], elem[1]]
207
217
  end
208
- else
209
- []
210
218
  end
211
- end
212
219
 
213
- private
214
-
215
- # If the ODS file has an encryption-data element, then try to decrypt.
216
- # If successful, the temporary content.xml will be overwritten with
217
- # decrypted contents.
218
- def decrypt_if_necessary(
219
- zip_file,
220
- content_entry,
221
- roo_content_xml_path, options
222
- )
223
- # Check if content.xml is encrypted by extracting manifest.xml
224
- # and searching for a manifest:encryption-data element
225
-
226
- if manifest_entry = zip_file.glob("META-INF/manifest.xml").first
227
- roo_manifest_xml_path = File.join(@tmpdir, "roo_manifest.xml")
228
- manifest_entry.extract(roo_manifest_xml_path)
229
- manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
230
-
231
- # XPath search for manifest:encryption-data only for the content.xml
232
- # file
233
-
234
- encryption_data = manifest.xpath(
235
- "//manifest:file-entry[@manifest:full-path='content.xml']"\
220
+ private
221
+
222
+ # If the ODS file has an encryption-data element, then try to decrypt.
223
+ # If successful, the temporary content.xml will be overwritten with
224
+ # decrypted contents.
225
+ def decrypt_if_necessary(
226
+ zip_file,
227
+ content_entry,
228
+ roo_content_xml_path, options
229
+ )
230
+ # Check if content.xml is encrypted by extracting manifest.xml
231
+ # and searching for a manifest:encryption-data element
232
+
233
+ if (manifest_entry = zip_file.glob('META-INF/manifest.xml').first)
234
+ roo_manifest_xml_path = File.join(@tmpdir, 'roo_manifest.xml')
235
+ manifest_entry.extract(roo_manifest_xml_path)
236
+ manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
237
+
238
+ # XPath search for manifest:encryption-data only for the content.xml
239
+ # file
240
+
241
+ encryption_data = manifest.xpath(
242
+ "//manifest:file-entry[@manifest:full-path='content.xml']"\
236
243
  "/manifest:encryption-data"
237
- ).first
244
+ ).first
238
245
 
239
- # If XPath returns a node, then we know content.xml is encrypted
246
+ # If XPath returns a node, then we know content.xml is encrypted
240
247
 
241
- if !encryption_data.nil?
248
+ unless encryption_data.nil?
242
249
 
243
- # Since we know it's encrypted, we check for the password option
244
- # and if it doesn't exist, raise an argument error
250
+ # Since we know it's encrypted, we check for the password option
251
+ # and if it doesn't exist, raise an argument error
245
252
 
246
- password = options[:password]
247
- if !password.nil?
248
- perform_decryption(
249
- encryption_data,
250
- password,
251
- content_entry,
252
- roo_content_xml_path
253
- )
254
- else
255
- raise ArgumentError,
256
- 'file is encrypted but password was not supplied'
253
+ password = options[:password]
254
+ if !password.nil?
255
+ perform_decryption(
256
+ encryption_data,
257
+ password,
258
+ content_entry,
259
+ roo_content_xml_path
260
+ )
261
+ else
262
+ fail ArgumentError, 'file is encrypted but password was not supplied'
263
+ end
257
264
  end
265
+ else
266
+ fail ArgumentError, 'file missing required META-INF/manifest.xml'
258
267
  end
259
- else
260
- raise ArgumentError, 'file missing required META-INF/manifest.xml'
261
268
  end
262
- end
263
269
 
264
- # Process the ODS encryption manifest and perform the decryption
265
- def perform_decryption(
266
- encryption_data,
267
- password,
268
- content_entry,
269
- roo_content_xml_path
270
- )
271
- # Extract various expected attributes from the manifest that
272
- # describe the encryption
273
-
274
- algorithm_node = encryption_data.xpath("manifest:algorithm").first
275
- key_derivation_node =
276
- encryption_data.xpath("manifest:key-derivation").first
277
- start_key_generation_node =
278
- encryption_data.xpath("manifest:start-key-generation").first
279
-
280
- # If we have all the expected elements, then we can perform
281
- # the decryption.
282
-
283
- if !algorithm_node.nil? && !key_derivation_node.nil? &&
284
- !start_key_generation_node.nil?
285
-
286
- # The algorithm is a URI describing the algorithm used
287
- algorithm = algorithm_node['manifest:algorithm-name']
288
-
289
- # The initialization vector is base-64 encoded
290
- iv = Base64.decode64(
291
- algorithm_node['manifest:initialisation-vector']
292
- )
293
- key_derivation_name =
294
- key_derivation_node['manifest:key-derivation-name']
295
- key_size = key_derivation_node['manifest:key-size'].to_i
296
- iteration_count =
297
- key_derivation_node['manifest:iteration-count'].to_i
298
- salt = Base64.decode64(key_derivation_node['manifest:salt'])
299
-
300
- # The key is hashed with an algorithm represented by this URI
301
- key_generation_name =
302
- start_key_generation_node[
303
- 'manifest:start-key-generation-name'
304
- ]
305
- key_generation_size =
306
- start_key_generation_node['manifest:key-size'].to_i
270
+ # Process the ODS encryption manifest and perform the decryption
271
+ def perform_decryption(
272
+ encryption_data,
273
+ password,
274
+ content_entry,
275
+ roo_content_xml_path
276
+ )
277
+ # Extract various expected attributes from the manifest that
278
+ # describe the encryption
279
+
280
+ algorithm_node = encryption_data.xpath('manifest:algorithm').first
281
+ key_derivation_node =
282
+ encryption_data.xpath('manifest:key-derivation').first
283
+ start_key_generation_node =
284
+ encryption_data.xpath('manifest:start-key-generation').first
285
+
286
+ # If we have all the expected elements, then we can perform
287
+ # the decryption.
288
+
289
+ if !algorithm_node.nil? && !key_derivation_node.nil? &&
290
+ !start_key_generation_node.nil?
291
+
292
+ # The algorithm is a URI describing the algorithm used
293
+ algorithm = algorithm_node['manifest:algorithm-name']
294
+
295
+ # The initialization vector is base-64 encoded
296
+ iv = Base64.decode64(
297
+ algorithm_node['manifest:initialisation-vector']
298
+ )
299
+ key_derivation_name = key_derivation_node['manifest:key-derivation-name']
300
+ iteration_count = key_derivation_node['manifest:iteration-count'].to_i
301
+ salt = Base64.decode64(key_derivation_node['manifest:salt'])
302
+
303
+ # The key is hashed with an algorithm represented by this URI
304
+ key_generation_name =
305
+ start_key_generation_node[
306
+ 'manifest:start-key-generation-name'
307
+ ]
307
308
 
308
309
  hashed_password = password
309
- key = nil
310
310
 
311
- if key_generation_name.eql?(
312
- "http://www.w3.org/2000/09/xmldsig#sha256"
313
- )
314
- hashed_password = Digest::SHA256.digest(password)
315
- else
316
- raise ArgumentError, 'Unknown key generation algorithm ' +
317
- key_generation_name
318
- end
311
+ if key_generation_name == 'http://www.w3.org/2000/09/xmldsig#sha256'
312
+
313
+ hashed_password = Digest::SHA256.digest(password)
314
+ else
315
+ fail ArgumentError, "Unknown key generation algorithm #{key_generation_name}"
316
+ end
319
317
 
320
318
  cipher = find_cipher(
321
319
  algorithm,
@@ -336,210 +334,179 @@ class Roo::OpenOffice < Roo::Base
336
334
  Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(decrypted)
337
335
  )
338
336
  rescue StandardError => error
339
- raise ArgumentError,
340
- 'Invalid password or other data error: ' + error.to_s
337
+ raise ArgumentError, "Invalid password or other data error: #{error}"
341
338
  end
342
- else
343
- raise ArgumentError,
344
- 'manifest.xml missing encryption-data elements'
339
+ else
340
+ fail ArgumentError, 'manifest.xml missing encryption-data elements'
341
+ end
345
342
  end
346
- end
347
343
 
348
- # Create a cipher based on an ODS algorithm URI from manifest.xml
349
- def find_cipher(
350
- algorithm,
351
- key_derivation_name,
352
- hashed_password,
353
- salt,
354
- iteration_count,
355
- iv
356
- )
357
- cipher = nil
358
- if algorithm.eql? "http://www.w3.org/2001/04/xmlenc#aes256-cbc"
359
- cipher = OpenSSL::Cipher.new('AES-256-CBC')
344
+ # Create a cipher based on an ODS algorithm URI from manifest.xml
345
+ # params: algorithm, key_derivation_name, hashed_password, salt, iteration_count, iv
346
+ def find_cipher(*args)
347
+ fail ArgumentError, 'Unknown algorithm ' + algorithm unless args[0] == 'http://www.w3.org/2001/04/xmlenc#aes256-cbc'
348
+
349
+ cipher = ::OpenSSL::Cipher.new('AES-256-CBC')
360
350
  cipher.decrypt
361
351
  cipher.padding = 0
362
- cipher.key = find_cipher_key(
363
- cipher,
364
- key_derivation_name,
365
- hashed_password,
366
- salt,
367
- iteration_count
368
- )
369
- cipher.iv = iv
370
- else
371
- raise ArgumentError, 'Unknown algorithm ' + algorithm
372
- end
373
- cipher
374
- end
352
+ cipher.key = find_cipher_key(cipher, *args[1..4])
353
+ cipher.iv = args[5]
375
354
 
376
- # Create a cipher key based on an ODS algorithm string from manifest.xml
377
- def find_cipher_key(
378
- cipher,
379
- key_derivation_name,
380
- hashed_password,
381
- salt,
382
- iteration_count
383
- )
384
- if key_derivation_name.eql? "PBKDF2"
385
- key = OpenSSL::PKCS5.pbkdf2_hmac_sha1(
386
- hashed_password,
387
- salt,
388
- iteration_count,
389
- cipher.key_len
390
- )
391
- else
392
- raise ArgumentError, 'Unknown key derivation name ' +
393
- key_derivation_name
394
- end
395
- key
396
- end
355
+ cipher
356
+ end
397
357
 
398
- # Block decrypt raw bytes from the zip file based on the cipher
399
- def decrypt(content_entry, cipher)
400
- # Zip::Entry.extract writes a 0-length file when trying
401
- # to extract an encrypted stream, so we read the
402
- # raw bytes based on the offset and lengths
403
- decrypted = ""
404
- File.open(@filename, "rb") do |zipfile|
405
- zipfile.seek(
406
- content_entry.local_header_offset +
407
- content_entry.calculate_local_header_size
408
- )
409
- total_to_read = content_entry.compressed_size
410
-
411
- block_size = 4096
412
- block_size = total_to_read if block_size > total_to_read
413
-
414
- while buffer = zipfile.read(block_size)
415
- decrypted += cipher.update(buffer)
416
- total_to_read -= buffer.length
417
-
418
- break if total_to_read == 0
419
-
420
- block_size = total_to_read if block_size > total_to_read
421
- end
358
+ # Create a cipher key based on an ODS algorithm string from manifest.xml
359
+ def find_cipher_key(*args)
360
+ fail ArgumentError, 'Unknown key derivation name ', args[1] unless args[1] == 'PBKDF2'
361
+
362
+ ::OpenSSL::PKCS5.pbkdf2_hmac_sha1(args[2], args[3], args[4], args[0].key_len)
422
363
  end
423
364
 
424
- decrypted + cipher.final
425
- end
365
+ # Block decrypt raw bytes from the zip file based on the cipher
366
+ def decrypt(content_entry, cipher)
367
+ # Zip::Entry.extract writes a 0-length file when trying
368
+ # to extract an encrypted stream, so we read the
369
+ # raw bytes based on the offset and lengths
370
+ decrypted = ''
371
+ File.open(@filename, 'rb') do |zipfile|
372
+ zipfile.seek(
373
+ content_entry.local_header_offset +
374
+ content_entry.calculate_local_header_size
375
+ )
376
+ total_to_read = content_entry.compressed_size
426
377
 
427
- def doc
428
- @doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, "roo_content.xml"))
429
- end
378
+ block_size = 4096
379
+ block_size = total_to_read if block_size > total_to_read
380
+
381
+ while (buffer = zipfile.read(block_size))
382
+ decrypted += cipher.update(buffer)
383
+ total_to_read -= buffer.length
384
+
385
+ break if total_to_read == 0
430
386
 
431
- # read the version of the OO-Version
432
- def oo_version
433
- doc.xpath("//*[local-name()='document-content']").each do |office|
434
- @officeversion = attr(office,'version')
387
+ block_size = total_to_read if block_size > total_to_read
388
+ end
389
+ end
390
+
391
+ decrypted + cipher.final
392
+ end
393
+
394
+ def doc
395
+ @doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, 'roo_content.xml'))
396
+ end
397
+
398
+ # read the version of the OO-Version
399
+ def oo_version
400
+ doc.xpath("//*[local-name()='document-content']").each do |office|
401
+ @officeversion = attribute(office, 'version')
402
+ end
435
403
  end
436
- end
437
404
 
438
- # helper function to set the internal representation of cells
439
- def set_cell_values(sheet,x,y,i,v,value_type,formula,table_cell,str_v,style_name)
440
- key = [y,x+i]
441
- @cell_type[sheet] = {} unless @cell_type[sheet]
442
- @cell_type[sheet][key] = Roo::OpenOffice.oo_type_2_roo_type(value_type)
443
- @formula[sheet] = {} unless @formula[sheet]
444
- if formula
445
- ['of:', 'oooc:'].each do |prefix|
446
- if formula[0,prefix.length] == prefix
447
- formula = formula[prefix.length..-1]
405
+ # helper function to set the internal representation of cells
406
+ def set_cell_values(sheet, x, y, i, v, value_type, formula, table_cell, str_v, style_name)
407
+ key = [y, x + i]
408
+ @cell_type[sheet] ||= {}
409
+ @cell_type[sheet][key] = value_type.to_sym if value_type
410
+ @formula[sheet] ||= {}
411
+ if formula
412
+ ['of:', 'oooc:'].each do |prefix|
413
+ if formula[0, prefix.length] == prefix
414
+ formula = formula[prefix.length..-1]
415
+ end
448
416
  end
417
+ @formula[sheet][key] = formula
449
418
  end
450
- @formula[sheet][key] = formula
451
- end
452
- @cell[sheet] = {} unless @cell[sheet]
453
- @style[sheet] = {} unless @style[sheet]
454
- @style[sheet][key] = style_name
455
- case @cell_type[sheet][key]
456
- when :float
457
- @cell[sheet][key] = v.to_f
458
- when :string
459
- @cell[sheet][key] = str_v
460
- when :date
461
- #TODO: if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size
462
- if attr(table_cell,'date-value').size != "XXXX-XX-XX".size
463
- #-- dann ist noch eine Uhrzeit vorhanden
464
- #-- "1961-11-21T12:17:18"
465
- @cell[sheet][key] = DateTime.parse(attr(table_cell,'date-value').to_s)
466
- @cell_type[sheet][key] = :datetime
419
+ @cell[sheet] ||= {}
420
+ @style[sheet] ||= {}
421
+ @style[sheet][key] = style_name
422
+ case @cell_type[sheet][key]
423
+ when :float
424
+ @cell[sheet][key] = (table_cell.attributes['value'].to_s.include?(".") || table_cell.children.first.text.include?(".")) ? v.to_f : v.to_i
425
+ when :percentage
426
+ @cell[sheet][key] = v.to_f
427
+ when :string
428
+ @cell[sheet][key] = str_v
429
+ when :date
430
+ # TODO: if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size
431
+ if attribute(table_cell, 'date-value').size != 'XXXX-XX-XX'.size
432
+ #-- dann ist noch eine Uhrzeit vorhanden
433
+ #-- "1961-11-21T12:17:18"
434
+ @cell[sheet][key] = DateTime.parse(attribute(table_cell, 'date-value').to_s)
435
+ @cell_type[sheet][key] = :datetime
436
+ else
437
+ @cell[sheet][key] = table_cell.attributes['date-value']
438
+ end
439
+ when :time
440
+ hms = v.split(':')
441
+ @cell[sheet][key] = hms[0].to_i * 3600 + hms[1].to_i * 60 + hms[2].to_i
467
442
  else
468
- @cell[sheet][key] = table_cell.attributes['date-value']
443
+ @cell[sheet][key] = v
469
444
  end
470
- when :percentage
471
- @cell[sheet][key] = v.to_f
472
- when :time
473
- hms = v.split(':')
474
- @cell[sheet][key] = hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i
475
- else
476
- @cell[sheet][key] = v
477
445
  end
478
- end
479
446
 
480
- # read all cells in the selected sheet
481
- #--
482
- # the following construct means '4 blanks'
483
- # some content <text:s text:c="3"/>
484
- #++
485
- def read_cells(sheet = default_sheet)
486
- validate_sheet!(sheet)
487
- return if @cells_read[sheet]
488
-
489
- sheet_found = false
490
- doc.xpath("//*[local-name()='table']").each do |ws|
491
- if sheet == attr(ws,'name')
447
+ # read all cells in the selected sheet
448
+ #--
449
+ # the following construct means '4 blanks'
450
+ # some content <text:s text:c="3"/>
451
+ #++
452
+ def read_cells(sheet = default_sheet)
453
+ validate_sheet!(sheet)
454
+ return if @cells_read[sheet]
455
+
456
+ sheet_found = false
457
+ doc.xpath("//*[local-name()='table']").each do |ws|
458
+ next unless sheet == attribute(ws, 'name')
459
+
492
460
  sheet_found = true
493
- col = 1
494
- row = 1
461
+ col = 1
462
+ row = 1
495
463
  ws.children.each do |table_element|
496
464
  case table_element.name
497
465
  when 'table-column'
498
466
  @style_defaults[sheet] << table_element.attributes['default-cell-style-name']
499
467
  when 'table-row'
500
468
  if table_element.attributes['number-rows-repeated']
501
- skip_row = attr(table_element,'number-rows-repeated').to_s.to_i
502
- row = row + skip_row - 1
469
+ skip_row = attribute(table_element, 'number-rows-repeated').to_s.to_i
470
+ row = row + skip_row - 1
503
471
  end
504
472
  table_element.children.each do |cell|
505
- skip_col = attr(cell, 'number-columns-repeated')
506
- formula = attr(cell,'formula')
507
- value_type = attr(cell,'value-type')
508
- v = attr(cell,'value')
509
- style_name = attr(cell,'style-name')
473
+ skip_col = attribute(cell, 'number-columns-repeated')
474
+ formula = attribute(cell, 'formula')
475
+ value_type = attribute(cell, 'value-type')
476
+ v = attribute(cell, 'value')
477
+ style_name = attribute(cell, 'style-name')
510
478
  case value_type
511
479
  when 'string'
512
- str_v = ''
480
+ str_v = ''
513
481
  # insert \n if there is more than one paragraph
514
482
  para_count = 0
515
483
  cell.children.each do |str|
516
484
  # begin comments
517
- =begin
518
- - <table:table-cell office:value-type="string">
519
- - <office:annotation office:display="true" draw:style-name="gr1" draw:text-style-name="P1" svg:width="1.1413in" svg:height="0.3902in" svg:x="2.0142in" svg:y="0in" draw:caption-point-x="-0.2402in" draw:caption-point-y="0.5661in">
520
- <dc:date>2011-09-20T00:00:00</dc:date>
521
- <text:p text:style-name="P1">Kommentar fuer B4</text:p>
522
- </office:annotation>
523
- <text:p>B4 (mit Kommentar)</text:p>
524
- </table:table-cell>
525
- =end
485
+ #=begin
486
+ #- <table:table-cell office:value-type="string">
487
+ # - <office:annotation office:display="true" draw:style-name="gr1" draw:text-style-name="P1" svg:width="1.1413in" svg:height="0.3902in" svg:x="2.0142in" svg:y="0in" draw:caption-point-x="-0.2402in" draw:caption-point-y="0.5661in">
488
+ # <dc:date>2011-09-20T00:00:00</dc:date>
489
+ # <text:p text:style-name="P1">Kommentar fuer B4</text:p>
490
+ # </office:annotation>
491
+ # <text:p>B4 (mit Kommentar)</text:p>
492
+ # </table:table-cell>
493
+ #=end
526
494
  if str.name == 'annotation'
527
495
  str.children.each do |annotation|
528
- if annotation.name == 'p'
529
- # @comment ist ein Hash mit Sheet als Key (wie bei @cell)
530
- # innerhalb eines Elements besteht ein Eintrag aus einem
531
- # weiteren Hash mit Key [row,col] und dem eigentlichen
532
- # Kommentartext als Inhalt
533
- @comment[sheet] = Hash.new unless @comment[sheet]
534
- key = [row,col]
535
- @comment[sheet][key] = annotation.text
536
- end
496
+ next unless annotation.name == 'p'
497
+ # @comment ist ein Hash mit Sheet als Key (wie bei @cell)
498
+ # innerhalb eines Elements besteht ein Eintrag aus einem
499
+ # weiteren Hash mit Key [row,col] und dem eigentlichen
500
+ # Kommentartext als Inhalt
501
+ @comment[sheet] = Hash.new unless @comment[sheet]
502
+ key = [row, col]
503
+ @comment[sheet][key] = annotation.text
537
504
  end
538
505
  end
539
506
  # end comments
540
507
  if str.name == 'p'
541
- v = str.content
542
- str_v += "\n" if para_count > 0
508
+ v = str.content
509
+ str_v += "\n" if para_count > 0
543
510
  para_count += 1
544
511
  if str.children.size > 1
545
512
  str_v += children_to_string(str.children)
@@ -548,32 +515,28 @@ class Roo::OpenOffice < Roo::Base
548
515
  str_v += child.content #.text
549
516
  end
550
517
  end
551
- str_v.gsub!(/&apos;/,"'") # special case not supported by unescapeHTML
518
+ str_v.gsub!(/&apos;/, "'") # special case not supported by unescapeHTML
552
519
  str_v = CGI.unescapeHTML(str_v)
553
520
  end # == 'p'
554
521
  end
555
522
  when 'time'
556
523
  cell.children.each do |str|
557
- if str.name == 'p'
558
- v = str.content
559
- end
524
+ v = str.content if str.name == 'p'
560
525
  end
561
526
  when '', nil, 'date', 'percentage', 'float'
562
527
  #
563
528
  when 'boolean'
564
- v = attr(cell,'boolean-value').to_s
565
- else
566
- # raise "unknown type #{value_type}"
529
+ v = attribute(cell, 'boolean-value').to_s
567
530
  end
568
531
  if skip_col
569
- if v != nil or cell.attributes['date-value']
570
- 0.upto(skip_col.to_i-1) do |i|
571
- set_cell_values(sheet,col,row,i,v,value_type,formula,cell,str_v,style_name)
532
+ if !v.nil? || cell.attributes['date-value']
533
+ 0.upto(skip_col.to_i - 1) do |i|
534
+ set_cell_values(sheet, col, row, i, v, value_type, formula, cell, str_v, style_name)
572
535
  end
573
536
  end
574
537
  col += (skip_col.to_i - 1)
575
538
  end # if skip
576
- set_cell_values(sheet,col,row,0,v,value_type,formula,cell,str_v,style_name)
539
+ set_cell_values(sheet, col, row, 0, v, value_type, formula, cell, str_v, style_name)
577
540
  col += 1
578
541
  end
579
542
  row += 1
@@ -581,101 +544,83 @@ class Roo::OpenOffice < Roo::Base
581
544
  end
582
545
  end
583
546
  end
547
+ doc.xpath("//*[local-name()='automatic-styles']").each do |style|
548
+ read_styles(style)
549
+ end
550
+
551
+ fail RangeError unless sheet_found
552
+
553
+ @cells_read[sheet] = true
554
+ @comments_read[sheet] = true
584
555
  end
585
- doc.xpath("//*[local-name()='automatic-styles']").each do |style|
586
- read_styles(style)
587
- end
588
- if !sheet_found
589
- raise RangeError
590
- end
591
- @cells_read[sheet] = true
592
- @comments_read[sheet] = true
593
- end
594
556
 
595
- # Only calls read_cells because Roo::Base calls read_comments
596
- # whereas the reading of comments is done in read_cells for Roo::OpenOffice-objects
597
- def read_comments(sheet=nil)
598
- read_cells(sheet)
599
- end
557
+ # Only calls read_cells because Roo::Base calls read_comments
558
+ # whereas the reading of comments is done in read_cells for Roo::OpenOffice-objects
559
+ def read_comments(sheet = nil)
560
+ read_cells(sheet)
561
+ end
600
562
 
601
- def read_labels
602
- @label ||= Hash[doc.xpath("//table:named-range").map do |ne|
603
- #-
604
- # $Sheet1.$C$5
605
- #+
606
- name = attr(ne,'name').to_s
607
- sheetname,coords = attr(ne,'cell-range-address').to_s.split('.$')
608
- col, row = coords.split('$')
609
- sheetname = sheetname[1..-1] if sheetname[0,1] == '$'
610
- [name, [sheetname,row,col]]
611
- end]
612
- end
563
+ def read_labels
564
+ @label ||= Hash[doc.xpath('//table:named-range').map do |ne|
565
+ #-
566
+ # $Sheet1.$C$5
567
+ #+
568
+ name = attribute(ne, 'name').to_s
569
+ sheetname, coords = attribute(ne, 'cell-range-address').to_s.split('.$')
570
+ col, row = coords.split('$')
571
+ sheetname = sheetname[1..-1] if sheetname[0, 1] == '$'
572
+ [name, [sheetname, row, col]]
573
+ end]
574
+ end
613
575
 
614
- def read_styles(style_elements)
615
- @font_style_definitions['Default'] = Roo::Font.new
616
- style_elements.each do |style|
617
- next unless style.name == 'style'
618
- style_name = attr(style,'name')
619
- style.each do |properties|
620
- font = Roo::OpenOffice::Font.new
621
- font.bold = attr(properties,'font-weight')
622
- font.italic = attr(properties,'font-style')
623
- font.underline = attr(properties,'text-underline-style')
624
- @font_style_definitions[style_name] = font
576
+ def read_styles(style_elements)
577
+ @font_style_definitions['Default'] = Roo::Font.new
578
+ style_elements.each do |style|
579
+ next unless style.name == 'style'
580
+ style_name = attribute(style, 'name')
581
+ style.each do |properties|
582
+ font = Roo::OpenOffice::Font.new
583
+ font.bold = attribute(properties, 'font-weight')
584
+ font.italic = attribute(properties, 'font-style')
585
+ font.underline = attribute(properties, 'text-underline-style')
586
+ @font_style_definitions[style_name] = font
587
+ end
625
588
  end
626
589
  end
627
- end
628
590
 
629
- def read_table_styles(styles)
630
- styles.children.each do |style|
631
- next unless style.name == 'style'
632
- style_name = attr(style,'name')
633
- style.children.each do |properties|
634
- display = attr(properties,'display')
635
- next unless display
636
- @table_display[style_name] = (display == 'true')
591
+ def read_table_styles(styles)
592
+ styles.children.each do |style|
593
+ next unless style.name == 'style'
594
+ style_name = attribute(style, 'name')
595
+ style.children.each do |properties|
596
+ display = attribute(properties, 'display')
597
+ next unless display
598
+ @table_display[style_name] = (display == 'true')
599
+ end
637
600
  end
638
601
  end
639
- end
640
-
641
- A_ROO_TYPE = {
642
- "float" => :float,
643
- "string" => :string,
644
- "date" => :date,
645
- "percentage" => :percentage,
646
- "time" => :time,
647
- }
648
-
649
- def self.oo_type_2_roo_type(ootype)
650
- return A_ROO_TYPE[ootype]
651
- end
652
602
 
653
- # helper method to convert compressed spaces and other elements within
654
- # an text into a string
655
- def children_to_string(children)
656
- result = ''
657
- children.each {|child|
658
- if child.text?
659
- result = result + child.content
660
- else
661
- if child.name == 's'
662
- compressed_spaces = child.attributes['c'].to_s.to_i
663
- # no explicit number means a count of 1:
664
- if compressed_spaces == 0
665
- compressed_spaces = 1
666
- end
667
- result = result + " "*compressed_spaces
603
+ # helper method to convert compressed spaces and other elements within
604
+ # an text into a string
605
+ # FIXME: add a test for compressed_spaces == 0. It's not currently tested.
606
+ def children_to_string(children)
607
+ children.map do |child|
608
+ if child.text?
609
+ child.content
668
610
  else
669
- result = result + child.content
611
+ if child.name == 's'
612
+ compressed_spaces = child.attributes['c'].to_s.to_i
613
+ # no explicit number means a count of 1:
614
+ compressed_spaces == 0 ? ' ' : ' ' * compressed_spaces
615
+ else
616
+ child.content
617
+ end
670
618
  end
671
- end
672
- }
673
- result
674
- end
619
+ end.join
620
+ end
675
621
 
676
- def attr(node, attr_name)
677
- if node.attributes[attr_name]
678
- node.attributes[attr_name].value
622
+ def attribute(node, attr_name)
623
+ node.attributes[attr_name].value if node.attributes[attr_name]
679
624
  end
680
625
  end
681
626
  end