roo 2.0.1 → 2.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/.codeclimate.yml +17 -0
  3. data/.github/ISSUE_TEMPLATE +10 -0
  4. data/.gitignore +4 -0
  5. data/.travis.yml +10 -6
  6. data/CHANGELOG.md +116 -1
  7. data/Gemfile +3 -4
  8. data/Gemfile_ruby2 +30 -0
  9. data/Guardfile +1 -2
  10. data/README.md +56 -22
  11. data/Rakefile +1 -1
  12. data/lib/roo/base.rb +108 -245
  13. data/lib/roo/constants.rb +5 -0
  14. data/lib/roo/csv.rb +94 -87
  15. data/lib/roo/errors.rb +11 -0
  16. data/lib/roo/excelx/cell/base.rb +94 -0
  17. data/lib/roo/excelx/cell/boolean.rb +27 -0
  18. data/lib/roo/excelx/cell/date.rb +28 -0
  19. data/lib/roo/excelx/cell/datetime.rb +111 -0
  20. data/lib/roo/excelx/cell/empty.rb +19 -0
  21. data/lib/roo/excelx/cell/number.rb +87 -0
  22. data/lib/roo/excelx/cell/string.rb +19 -0
  23. data/lib/roo/excelx/cell/time.rb +43 -0
  24. data/lib/roo/excelx/cell.rb +33 -4
  25. data/lib/roo/excelx/comments.rb +33 -0
  26. data/lib/roo/excelx/coordinate.rb +12 -0
  27. data/lib/roo/excelx/extractor.rb +3 -4
  28. data/lib/roo/excelx/format.rb +64 -0
  29. data/lib/roo/excelx/shared.rb +32 -0
  30. data/lib/roo/excelx/shared_strings.rb +124 -4
  31. data/lib/roo/excelx/sheet.rb +12 -7
  32. data/lib/roo/excelx/sheet_doc.rb +108 -97
  33. data/lib/roo/excelx/styles.rb +1 -1
  34. data/lib/roo/excelx.rb +61 -103
  35. data/lib/roo/formatters/base.rb +15 -0
  36. data/lib/roo/formatters/csv.rb +84 -0
  37. data/lib/roo/formatters/matrix.rb +23 -0
  38. data/lib/roo/formatters/xml.rb +31 -0
  39. data/lib/roo/formatters/yaml.rb +40 -0
  40. data/lib/roo/libre_office.rb +1 -2
  41. data/lib/roo/link.rb +21 -2
  42. data/lib/roo/open_office.rb +468 -523
  43. data/lib/roo/spreadsheet.rb +3 -1
  44. data/lib/roo/tempdir.rb +21 -0
  45. data/lib/roo/utils.rb +7 -7
  46. data/lib/roo/version.rb +1 -1
  47. data/lib/roo.rb +8 -3
  48. data/roo.gemspec +2 -1
  49. data/spec/helpers.rb +5 -0
  50. data/spec/lib/roo/base_spec.rb +229 -0
  51. data/spec/lib/roo/csv_spec.rb +19 -0
  52. data/spec/lib/roo/excelx_spec.rb +97 -11
  53. data/spec/lib/roo/openoffice_spec.rb +18 -1
  54. data/spec/lib/roo/spreadsheet_spec.rb +20 -0
  55. data/spec/lib/roo/utils_spec.rb +1 -1
  56. data/spec/spec_helper.rb +5 -5
  57. data/test/all_ss.rb +12 -11
  58. data/test/excelx/cell/test_base.rb +63 -0
  59. data/test/excelx/cell/test_boolean.rb +36 -0
  60. data/test/excelx/cell/test_date.rb +38 -0
  61. data/test/excelx/cell/test_datetime.rb +45 -0
  62. data/test/excelx/cell/test_empty.rb +7 -0
  63. data/test/excelx/cell/test_number.rb +74 -0
  64. data/test/excelx/cell/test_string.rb +28 -0
  65. data/test/excelx/cell/test_time.rb +30 -0
  66. data/test/formatters/test_csv.rb +119 -0
  67. data/test/formatters/test_matrix.rb +76 -0
  68. data/test/formatters/test_xml.rb +78 -0
  69. data/test/formatters/test_yaml.rb +20 -0
  70. data/test/helpers/test_accessing_files.rb +60 -0
  71. data/test/helpers/test_comments.rb +43 -0
  72. data/test/helpers/test_formulas.rb +9 -0
  73. data/test/helpers/test_labels.rb +103 -0
  74. data/test/helpers/test_sheets.rb +55 -0
  75. data/test/helpers/test_styles.rb +62 -0
  76. data/test/roo/test_base.rb +182 -0
  77. data/test/roo/test_csv.rb +60 -0
  78. data/test/roo/test_excelx.rb +325 -0
  79. data/test/roo/test_libre_office.rb +9 -0
  80. data/test/roo/test_open_office.rb +289 -0
  81. data/test/test_helper.rb +116 -18
  82. data/test/test_roo.rb +362 -2088
  83. metadata +70 -4
  84. data/test/test_generic_spreadsheet.rb +0 -237
@@ -3,319 +3,317 @@ require 'nokogiri'
3
3
  require 'cgi'
4
4
  require 'zip/filesystem'
5
5
  require 'roo/font'
6
+ require 'roo/tempdir'
7
+ require 'base64'
8
+ require 'openssl'
9
+
10
+ module Roo
11
+ class OpenOffice < Roo::Base
12
+ extend Roo::Tempdir
13
+
14
+ ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'.freeze
15
+ XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']".freeze
16
+ XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']".freeze
17
+
18
+ # initialization and opening of a spreadsheet file
19
+ # values for packed: :zip
20
+ def initialize(filename, options = {})
21
+ packed = options[:packed]
22
+ file_warning = options[:file_warning] || :error
23
+
24
+ @only_visible_sheets = options[:only_visible_sheets]
25
+ file_type_check(filename, '.ods', 'an Roo::OpenOffice', file_warning, packed)
26
+ # NOTE: Create temp directory and allow Ruby to cleanup the temp directory
27
+ # when the object is garbage collected. Initially, the finalizer was
28
+ # created in the Roo::Tempdir module, but that led to a segfault
29
+ # when testing in Ruby 2.4.0.
30
+ @tmpdir = self.class.make_tempdir(self, find_basename(filename), options[:tmpdir_root])
31
+ ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
32
+ @filename = local_filename(filename, @tmpdir, packed)
33
+ # TODO: @cells_read[:default] = false
34
+ open_oo_file(options)
35
+ super(filename, options)
36
+ initialize_default_variables
37
+
38
+ unless @table_display.any?
39
+ doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style|
40
+ read_table_styles(style)
41
+ end
42
+ end
6
43
 
7
- class Roo::OpenOffice < Roo::Base
8
- # initialization and opening of a spreadsheet file
9
- # values for packed: :zip
10
- def initialize(filename, options={})
11
- packed = options[:packed]
12
- file_warning = options[:file_warning] || :error
13
-
14
- @only_visible_sheets = options[:only_visible_sheets]
15
- file_type_check(filename,'.ods','an Roo::OpenOffice', file_warning, packed)
16
- @tmpdir = make_tmpdir(File.basename(filename), options[:tmpdir_root])
17
- @filename = local_filename(filename, @tmpdir, packed)
18
- #TODO: @cells_read[:default] = false
19
- Zip::File.open(@filename) do |zip_file|
20
- if content_entry = zip_file.glob("content.xml").first
21
- roo_content_xml_path = File.join(@tmpdir, 'roo_content.xml')
44
+ @sheet_names = doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet|
45
+ if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')]
46
+ sheet.attributes['name'].value
47
+ end
48
+ end.compact
49
+ rescue
50
+ self.class.finalize_tempdirs(object_id)
51
+ raise
52
+ end
53
+
54
+ def open_oo_file(options)
55
+ Zip::File.open(@filename) do |zip_file|
56
+ content_entry = zip_file.glob('content.xml').first
57
+ fail ArgumentError, ERROR_MISSING_CONTENT_XML unless content_entry
58
+
59
+ roo_content_xml_path = ::File.join(@tmpdir, 'roo_content.xml')
22
60
  content_entry.extract(roo_content_xml_path)
23
- decrypt_if_necessary(
24
- zip_file,
25
- content_entry,
26
- roo_content_xml_path,
27
- options
28
- )
29
- else
30
- raise ArgumentError, 'file missing required content.xml'
61
+ decrypt_if_necessary(zip_file, content_entry, roo_content_xml_path, options)
31
62
  end
32
63
  end
33
- super(filename, options)
34
- @formula = Hash.new
35
- @style = Hash.new
36
- @style_defaults = Hash.new { |h,k| h[k] = [] }
37
- @table_display = Hash.new { |h,k| h[k] = true }
38
- @font_style_definitions = Hash.new
39
- @comment = Hash.new
40
- @comments_read = Hash.new
41
- rescue => e # clean up any temp files, but only if an error was raised
42
- close
43
- raise e
44
- end
45
64
 
46
- def method_missing(m,*args)
47
- read_labels
48
- # is method name a label name
49
- if @label.has_key?(m.to_s)
50
- row,col = label(m.to_s)
51
- cell(row,col)
52
- else
53
- # call super for methods like #a1
54
- super
65
+ def initialize_default_variables
66
+ @formula = {}
67
+ @style = {}
68
+ @style_defaults = Hash.new { |h, k| h[k] = [] }
69
+ @table_display = Hash.new { |h, k| h[k] = true }
70
+ @font_style_definitions = {}
71
+ @comment = {}
72
+ @comments_read = {}
55
73
  end
56
- end
57
74
 
58
- # Returns the content of a spreadsheet-cell.
59
- # (1,1) is the upper left corner.
60
- # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
61
- # cell at the first line and first row.
62
- def cell(row, col, sheet=nil)
63
- sheet ||= default_sheet
64
- read_cells(sheet)
65
- row,col = normalize(row,col)
66
- if celltype(row,col,sheet) == :date
67
- yyyy,mm,dd = @cell[sheet][[row,col]].to_s.split('-')
68
- return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
69
- end
70
- @cell[sheet][[row,col]]
71
- end
75
+ def method_missing(m, *args)
76
+ read_labels
77
+ # is method name a label name
78
+ if @label.key?(m.to_s)
79
+ row, col = label(m.to_s)
80
+ cell(row, col)
81
+ else
82
+ # call super for methods like #a1
83
+ super
84
+ end
85
+ end
72
86
 
73
- # Returns the formula at (row,col).
74
- # Returns nil if there is no formula.
75
- # The method #formula? checks if there is a formula.
76
- def formula(row,col,sheet=nil)
77
- sheet ||= default_sheet
78
- read_cells(sheet)
79
- row,col = normalize(row,col)
80
- @formula[sheet][[row,col]]
81
- end
87
+ # Returns the content of a spreadsheet-cell.
88
+ # (1,1) is the upper left corner.
89
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
90
+ # cell at the first line and first row.
91
+ def cell(row, col, sheet = nil)
92
+ sheet ||= default_sheet
93
+ read_cells(sheet)
94
+ row, col = normalize(row, col)
95
+ if celltype(row, col, sheet) == :date
96
+ yyyy, mm, dd = @cell[sheet][[row, col]].to_s.split('-')
97
+ return Date.new(yyyy.to_i, mm.to_i, dd.to_i)
98
+ end
82
99
 
83
- # Predicate methods really should return a boolean
84
- # value. Hopefully no one was relying on the fact that this
85
- # previously returned either nil/formula
86
- def formula?(*args)
87
- !!formula(*args)
88
- end
100
+ @cell[sheet][[row, col]]
101
+ end
102
+
103
+ # Returns the formula at (row,col).
104
+ # Returns nil if there is no formula.
105
+ # The method #formula? checks if there is a formula.
106
+ def formula(row, col, sheet = nil)
107
+ sheet ||= default_sheet
108
+ read_cells(sheet)
109
+ row, col = normalize(row, col)
110
+ @formula[sheet][[row, col]]
111
+ end
112
+
113
+ # Predicate methods really should return a boolean
114
+ # value. Hopefully no one was relying on the fact that this
115
+ # previously returned either nil/formula
116
+ def formula?(*args)
117
+ !!formula(*args)
118
+ end
89
119
 
90
- # returns each formula in the selected sheet as an array of elements
91
- # [row, col, formula]
92
- def formulas(sheet=nil)
93
- sheet ||= default_sheet
94
- read_cells(sheet)
95
- if @formula[sheet]
120
+ # returns each formula in the selected sheet as an array of elements
121
+ # [row, col, formula]
122
+ def formulas(sheet = nil)
123
+ sheet ||= default_sheet
124
+ read_cells(sheet)
125
+ return [] unless @formula[sheet]
96
126
  @formula[sheet].each.collect do |elem|
97
127
  [elem[0][0], elem[0][1], elem[1]]
98
128
  end
99
- else
100
- []
101
129
  end
102
- end
103
-
104
- # Given a cell, return the cell's style
105
- def font(row, col, sheet=nil)
106
- sheet ||= default_sheet
107
- read_cells(sheet)
108
- row,col = normalize(row,col)
109
- style_name = @style[sheet][[row,col]] || @style_defaults[sheet][col - 1] || 'Default'
110
- @font_style_definitions[style_name]
111
- end
112
130
 
113
- # returns the type of a cell:
114
- # * :float
115
- # * :string
116
- # * :date
117
- # * :percentage
118
- # * :formula
119
- # * :time
120
- # * :datetime
121
- def celltype(row,col,sheet=nil)
122
- sheet ||= default_sheet
123
- read_cells(sheet)
124
- row,col = normalize(row,col)
125
- if @formula[sheet][[row,col]]
126
- return :formula
127
- else
128
- @cell_type[sheet][[row,col]]
131
+ # Given a cell, return the cell's style
132
+ def font(row, col, sheet = nil)
133
+ sheet ||= default_sheet
134
+ read_cells(sheet)
135
+ row, col = normalize(row, col)
136
+ style_name = @style[sheet][[row, col]] || @style_defaults[sheet][col - 1] || 'Default'
137
+ @font_style_definitions[style_name]
129
138
  end
130
- end
131
139
 
132
- def sheets
133
- unless @table_display.any?
134
- doc.xpath("//*[local-name()='automatic-styles']").each do |style|
135
- read_table_styles(style)
136
- end
140
+ # returns the type of a cell:
141
+ # * :float
142
+ # * :string
143
+ # * :date
144
+ # * :percentage
145
+ # * :formula
146
+ # * :time
147
+ # * :datetime
148
+ def celltype(row, col, sheet = nil)
149
+ sheet ||= default_sheet
150
+ read_cells(sheet)
151
+ row, col = normalize(row, col)
152
+ @formula[sheet][[row, col]] ? :formula : @cell_type[sheet][[row, col]]
137
153
  end
138
- doc.xpath("//*[local-name()='table']").map do |sheet|
139
- if !@only_visible_sheets || @table_display[attr(sheet,'style-name')]
140
- sheet.attributes["name"].value
141
- end
142
- end.compact
143
- end
144
154
 
145
- # version of the Roo::OpenOffice document
146
- # at 2007 this is always "1.0"
147
- def officeversion
148
- oo_version
149
- @officeversion
150
- end
155
+ def sheets
156
+ @sheet_names
157
+ end
151
158
 
152
- # shows the internal representation of all cells
153
- # mainly for debugging purposes
154
- def to_s(sheet=nil)
155
- sheet ||= default_sheet
156
- read_cells(sheet)
157
- @cell[sheet].inspect
158
- end
159
+ # version of the Roo::OpenOffice document
160
+ # at 2007 this is always "1.0"
161
+ def officeversion
162
+ oo_version
163
+ @officeversion
164
+ end
159
165
 
160
- # returns the row,col values of the labelled cell
161
- # (nil,nil) if label is not defined
162
- def label(labelname)
163
- read_labels
164
- unless @label.size > 0
165
- return nil,nil,nil
166
+ # shows the internal representation of all cells
167
+ # mainly for debugging purposes
168
+ def to_s(sheet = nil)
169
+ sheet ||= default_sheet
170
+ read_cells(sheet)
171
+ @cell[sheet].inspect
166
172
  end
167
- if @label.has_key? labelname
168
- return @label[labelname][1].to_i,
173
+
174
+ # returns the row,col values of the labelled cell
175
+ # (nil,nil) if label is not defined
176
+ def label(labelname)
177
+ read_labels
178
+ return [nil, nil, nil] if @label.size < 1 || !@label.key?(labelname)
179
+ [
180
+ @label[labelname][1].to_i,
169
181
  ::Roo::Utils.letter_to_number(@label[labelname][2]),
170
182
  @label[labelname][0]
171
- else
172
- return nil,nil,nil
183
+ ]
173
184
  end
174
- end
175
185
 
176
- # Returns an array which all labels. Each element is an array with
177
- # [labelname, [row,col,sheetname]]
178
- def labels(sheet=nil)
179
- read_labels
180
- @label.map do |label|
181
- [ label[0], # name
182
- [ label[1][1].to_i, # row
186
+ # Returns an array which all labels. Each element is an array with
187
+ # [labelname, [row,col,sheetname]]
188
+ def labels(_sheet = nil)
189
+ read_labels
190
+ @label.map do |label|
191
+ [label[0], # name
192
+ [label[1][1].to_i, # row
183
193
  ::Roo::Utils.letter_to_number(label[1][2]), # column
184
194
  label[1][0], # sheet
185
- ] ]
195
+ ]]
196
+ end
186
197
  end
187
- end
188
198
 
189
- # returns the comment at (row/col)
190
- # nil if there is no comment
191
- def comment(row,col,sheet=nil)
192
- sheet ||= default_sheet
193
- read_cells(sheet)
194
- row,col = normalize(row,col)
195
- return nil unless @comment[sheet]
196
- @comment[sheet][[row,col]]
197
- end
199
+ # returns the comment at (row/col)
200
+ # nil if there is no comment
201
+ def comment(row, col, sheet = nil)
202
+ sheet ||= default_sheet
203
+ read_cells(sheet)
204
+ row, col = normalize(row, col)
205
+ return nil unless @comment[sheet]
206
+ @comment[sheet][[row, col]]
207
+ end
198
208
 
199
- # returns each comment in the selected sheet as an array of elements
200
- # [row, col, comment]
201
- def comments(sheet=nil)
202
- sheet ||= default_sheet
203
- read_comments(sheet) unless @comments_read[sheet]
204
- if @comment[sheet]
209
+ # returns each comment in the selected sheet as an array of elements
210
+ # [row, col, comment]
211
+ def comments(sheet = nil)
212
+ sheet ||= default_sheet
213
+ read_comments(sheet) unless @comments_read[sheet]
214
+ return [] unless @comment[sheet]
205
215
  @comment[sheet].each.collect do |elem|
206
- [elem[0][0],elem[0][1],elem[1]]
216
+ [elem[0][0], elem[0][1], elem[1]]
207
217
  end
208
- else
209
- []
210
218
  end
211
- end
212
219
 
213
- private
214
-
215
- # If the ODS file has an encryption-data element, then try to decrypt.
216
- # If successful, the temporary content.xml will be overwritten with
217
- # decrypted contents.
218
- def decrypt_if_necessary(
219
- zip_file,
220
- content_entry,
221
- roo_content_xml_path, options
222
- )
223
- # Check if content.xml is encrypted by extracting manifest.xml
224
- # and searching for a manifest:encryption-data element
225
-
226
- if manifest_entry = zip_file.glob("META-INF/manifest.xml").first
227
- roo_manifest_xml_path = File.join(@tmpdir, "roo_manifest.xml")
228
- manifest_entry.extract(roo_manifest_xml_path)
229
- manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
230
-
231
- # XPath search for manifest:encryption-data only for the content.xml
232
- # file
233
-
234
- encryption_data = manifest.xpath(
235
- "//manifest:file-entry[@manifest:full-path='content.xml']"\
220
+ private
221
+
222
+ # If the ODS file has an encryption-data element, then try to decrypt.
223
+ # If successful, the temporary content.xml will be overwritten with
224
+ # decrypted contents.
225
+ def decrypt_if_necessary(
226
+ zip_file,
227
+ content_entry,
228
+ roo_content_xml_path, options
229
+ )
230
+ # Check if content.xml is encrypted by extracting manifest.xml
231
+ # and searching for a manifest:encryption-data element
232
+
233
+ if (manifest_entry = zip_file.glob('META-INF/manifest.xml').first)
234
+ roo_manifest_xml_path = File.join(@tmpdir, 'roo_manifest.xml')
235
+ manifest_entry.extract(roo_manifest_xml_path)
236
+ manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
237
+
238
+ # XPath search for manifest:encryption-data only for the content.xml
239
+ # file
240
+
241
+ encryption_data = manifest.xpath(
242
+ "//manifest:file-entry[@manifest:full-path='content.xml']"\
236
243
  "/manifest:encryption-data"
237
- ).first
244
+ ).first
238
245
 
239
- # If XPath returns a node, then we know content.xml is encrypted
246
+ # If XPath returns a node, then we know content.xml is encrypted
240
247
 
241
- if !encryption_data.nil?
248
+ unless encryption_data.nil?
242
249
 
243
- # Since we know it's encrypted, we check for the password option
244
- # and if it doesn't exist, raise an argument error
250
+ # Since we know it's encrypted, we check for the password option
251
+ # and if it doesn't exist, raise an argument error
245
252
 
246
- password = options[:password]
247
- if !password.nil?
248
- perform_decryption(
249
- encryption_data,
250
- password,
251
- content_entry,
252
- roo_content_xml_path
253
- )
254
- else
255
- raise ArgumentError,
256
- 'file is encrypted but password was not supplied'
253
+ password = options[:password]
254
+ if !password.nil?
255
+ perform_decryption(
256
+ encryption_data,
257
+ password,
258
+ content_entry,
259
+ roo_content_xml_path
260
+ )
261
+ else
262
+ fail ArgumentError, 'file is encrypted but password was not supplied'
263
+ end
257
264
  end
265
+ else
266
+ fail ArgumentError, 'file missing required META-INF/manifest.xml'
258
267
  end
259
- else
260
- raise ArgumentError, 'file missing required META-INF/manifest.xml'
261
268
  end
262
- end
263
269
 
264
- # Process the ODS encryption manifest and perform the decryption
265
- def perform_decryption(
266
- encryption_data,
267
- password,
268
- content_entry,
269
- roo_content_xml_path
270
- )
271
- # Extract various expected attributes from the manifest that
272
- # describe the encryption
273
-
274
- algorithm_node = encryption_data.xpath("manifest:algorithm").first
275
- key_derivation_node =
276
- encryption_data.xpath("manifest:key-derivation").first
277
- start_key_generation_node =
278
- encryption_data.xpath("manifest:start-key-generation").first
279
-
280
- # If we have all the expected elements, then we can perform
281
- # the decryption.
282
-
283
- if !algorithm_node.nil? && !key_derivation_node.nil? &&
284
- !start_key_generation_node.nil?
285
-
286
- # The algorithm is a URI describing the algorithm used
287
- algorithm = algorithm_node['manifest:algorithm-name']
288
-
289
- # The initialization vector is base-64 encoded
290
- iv = Base64.decode64(
291
- algorithm_node['manifest:initialisation-vector']
292
- )
293
- key_derivation_name =
294
- key_derivation_node['manifest:key-derivation-name']
295
- key_size = key_derivation_node['manifest:key-size'].to_i
296
- iteration_count =
297
- key_derivation_node['manifest:iteration-count'].to_i
298
- salt = Base64.decode64(key_derivation_node['manifest:salt'])
299
-
300
- # The key is hashed with an algorithm represented by this URI
301
- key_generation_name =
302
- start_key_generation_node[
303
- 'manifest:start-key-generation-name'
304
- ]
305
- key_generation_size =
306
- start_key_generation_node['manifest:key-size'].to_i
270
+ # Process the ODS encryption manifest and perform the decryption
271
+ def perform_decryption(
272
+ encryption_data,
273
+ password,
274
+ content_entry,
275
+ roo_content_xml_path
276
+ )
277
+ # Extract various expected attributes from the manifest that
278
+ # describe the encryption
279
+
280
+ algorithm_node = encryption_data.xpath('manifest:algorithm').first
281
+ key_derivation_node =
282
+ encryption_data.xpath('manifest:key-derivation').first
283
+ start_key_generation_node =
284
+ encryption_data.xpath('manifest:start-key-generation').first
285
+
286
+ # If we have all the expected elements, then we can perform
287
+ # the decryption.
288
+
289
+ if !algorithm_node.nil? && !key_derivation_node.nil? &&
290
+ !start_key_generation_node.nil?
291
+
292
+ # The algorithm is a URI describing the algorithm used
293
+ algorithm = algorithm_node['manifest:algorithm-name']
294
+
295
+ # The initialization vector is base-64 encoded
296
+ iv = Base64.decode64(
297
+ algorithm_node['manifest:initialisation-vector']
298
+ )
299
+ key_derivation_name = key_derivation_node['manifest:key-derivation-name']
300
+ iteration_count = key_derivation_node['manifest:iteration-count'].to_i
301
+ salt = Base64.decode64(key_derivation_node['manifest:salt'])
302
+
303
+ # The key is hashed with an algorithm represented by this URI
304
+ key_generation_name =
305
+ start_key_generation_node[
306
+ 'manifest:start-key-generation-name'
307
+ ]
307
308
 
308
309
  hashed_password = password
309
- key = nil
310
310
 
311
- if key_generation_name.eql?(
312
- "http://www.w3.org/2000/09/xmldsig#sha256"
313
- )
314
- hashed_password = Digest::SHA256.digest(password)
315
- else
316
- raise ArgumentError, 'Unknown key generation algorithm ' +
317
- key_generation_name
318
- end
311
+ if key_generation_name == 'http://www.w3.org/2000/09/xmldsig#sha256'
312
+
313
+ hashed_password = Digest::SHA256.digest(password)
314
+ else
315
+ fail ArgumentError, "Unknown key generation algorithm #{key_generation_name}"
316
+ end
319
317
 
320
318
  cipher = find_cipher(
321
319
  algorithm,
@@ -336,210 +334,179 @@ class Roo::OpenOffice < Roo::Base
336
334
  Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(decrypted)
337
335
  )
338
336
  rescue StandardError => error
339
- raise ArgumentError,
340
- 'Invalid password or other data error: ' + error.to_s
337
+ raise ArgumentError, "Invalid password or other data error: #{error}"
341
338
  end
342
- else
343
- raise ArgumentError,
344
- 'manifest.xml missing encryption-data elements'
339
+ else
340
+ fail ArgumentError, 'manifest.xml missing encryption-data elements'
341
+ end
345
342
  end
346
- end
347
343
 
348
- # Create a cipher based on an ODS algorithm URI from manifest.xml
349
- def find_cipher(
350
- algorithm,
351
- key_derivation_name,
352
- hashed_password,
353
- salt,
354
- iteration_count,
355
- iv
356
- )
357
- cipher = nil
358
- if algorithm.eql? "http://www.w3.org/2001/04/xmlenc#aes256-cbc"
359
- cipher = OpenSSL::Cipher.new('AES-256-CBC')
344
+ # Create a cipher based on an ODS algorithm URI from manifest.xml
345
+ # params: algorithm, key_derivation_name, hashed_password, salt, iteration_count, iv
346
+ def find_cipher(*args)
347
+ fail ArgumentError, 'Unknown algorithm ' + algorithm unless args[0] == 'http://www.w3.org/2001/04/xmlenc#aes256-cbc'
348
+
349
+ cipher = ::OpenSSL::Cipher.new('AES-256-CBC')
360
350
  cipher.decrypt
361
351
  cipher.padding = 0
362
- cipher.key = find_cipher_key(
363
- cipher,
364
- key_derivation_name,
365
- hashed_password,
366
- salt,
367
- iteration_count
368
- )
369
- cipher.iv = iv
370
- else
371
- raise ArgumentError, 'Unknown algorithm ' + algorithm
372
- end
373
- cipher
374
- end
352
+ cipher.key = find_cipher_key(cipher, *args[1..4])
353
+ cipher.iv = args[5]
375
354
 
376
- # Create a cipher key based on an ODS algorithm string from manifest.xml
377
- def find_cipher_key(
378
- cipher,
379
- key_derivation_name,
380
- hashed_password,
381
- salt,
382
- iteration_count
383
- )
384
- if key_derivation_name.eql? "PBKDF2"
385
- key = OpenSSL::PKCS5.pbkdf2_hmac_sha1(
386
- hashed_password,
387
- salt,
388
- iteration_count,
389
- cipher.key_len
390
- )
391
- else
392
- raise ArgumentError, 'Unknown key derivation name ' +
393
- key_derivation_name
394
- end
395
- key
396
- end
355
+ cipher
356
+ end
397
357
 
398
- # Block decrypt raw bytes from the zip file based on the cipher
399
- def decrypt(content_entry, cipher)
400
- # Zip::Entry.extract writes a 0-length file when trying
401
- # to extract an encrypted stream, so we read the
402
- # raw bytes based on the offset and lengths
403
- decrypted = ""
404
- File.open(@filename, "rb") do |zipfile|
405
- zipfile.seek(
406
- content_entry.local_header_offset +
407
- content_entry.calculate_local_header_size
408
- )
409
- total_to_read = content_entry.compressed_size
410
-
411
- block_size = 4096
412
- block_size = total_to_read if block_size > total_to_read
413
-
414
- while buffer = zipfile.read(block_size)
415
- decrypted += cipher.update(buffer)
416
- total_to_read -= buffer.length
417
-
418
- break if total_to_read == 0
419
-
420
- block_size = total_to_read if block_size > total_to_read
421
- end
358
+ # Create a cipher key based on an ODS algorithm string from manifest.xml
359
+ def find_cipher_key(*args)
360
+ fail ArgumentError, 'Unknown key derivation name ', args[1] unless args[1] == 'PBKDF2'
361
+
362
+ ::OpenSSL::PKCS5.pbkdf2_hmac_sha1(args[2], args[3], args[4], args[0].key_len)
422
363
  end
423
364
 
424
- decrypted + cipher.final
425
- end
365
+ # Block decrypt raw bytes from the zip file based on the cipher
366
+ def decrypt(content_entry, cipher)
367
+ # Zip::Entry.extract writes a 0-length file when trying
368
+ # to extract an encrypted stream, so we read the
369
+ # raw bytes based on the offset and lengths
370
+ decrypted = ''
371
+ File.open(@filename, 'rb') do |zipfile|
372
+ zipfile.seek(
373
+ content_entry.local_header_offset +
374
+ content_entry.calculate_local_header_size
375
+ )
376
+ total_to_read = content_entry.compressed_size
426
377
 
427
- def doc
428
- @doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, "roo_content.xml"))
429
- end
378
+ block_size = 4096
379
+ block_size = total_to_read if block_size > total_to_read
380
+
381
+ while (buffer = zipfile.read(block_size))
382
+ decrypted += cipher.update(buffer)
383
+ total_to_read -= buffer.length
384
+
385
+ break if total_to_read == 0
430
386
 
431
- # read the version of the OO-Version
432
- def oo_version
433
- doc.xpath("//*[local-name()='document-content']").each do |office|
434
- @officeversion = attr(office,'version')
387
+ block_size = total_to_read if block_size > total_to_read
388
+ end
389
+ end
390
+
391
+ decrypted + cipher.final
392
+ end
393
+
394
+ def doc
395
+ @doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, 'roo_content.xml'))
396
+ end
397
+
398
+ # read the version of the OO-Version
399
+ def oo_version
400
+ doc.xpath("//*[local-name()='document-content']").each do |office|
401
+ @officeversion = attribute(office, 'version')
402
+ end
435
403
  end
436
- end
437
404
 
438
- # helper function to set the internal representation of cells
439
- def set_cell_values(sheet,x,y,i,v,value_type,formula,table_cell,str_v,style_name)
440
- key = [y,x+i]
441
- @cell_type[sheet] = {} unless @cell_type[sheet]
442
- @cell_type[sheet][key] = Roo::OpenOffice.oo_type_2_roo_type(value_type)
443
- @formula[sheet] = {} unless @formula[sheet]
444
- if formula
445
- ['of:', 'oooc:'].each do |prefix|
446
- if formula[0,prefix.length] == prefix
447
- formula = formula[prefix.length..-1]
405
+ # helper function to set the internal representation of cells
406
+ def set_cell_values(sheet, x, y, i, v, value_type, formula, table_cell, str_v, style_name)
407
+ key = [y, x + i]
408
+ @cell_type[sheet] ||= {}
409
+ @cell_type[sheet][key] = value_type.to_sym if value_type
410
+ @formula[sheet] ||= {}
411
+ if formula
412
+ ['of:', 'oooc:'].each do |prefix|
413
+ if formula[0, prefix.length] == prefix
414
+ formula = formula[prefix.length..-1]
415
+ end
448
416
  end
417
+ @formula[sheet][key] = formula
449
418
  end
450
- @formula[sheet][key] = formula
451
- end
452
- @cell[sheet] = {} unless @cell[sheet]
453
- @style[sheet] = {} unless @style[sheet]
454
- @style[sheet][key] = style_name
455
- case @cell_type[sheet][key]
456
- when :float
457
- @cell[sheet][key] = v.to_f
458
- when :string
459
- @cell[sheet][key] = str_v
460
- when :date
461
- #TODO: if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size
462
- if attr(table_cell,'date-value').size != "XXXX-XX-XX".size
463
- #-- dann ist noch eine Uhrzeit vorhanden
464
- #-- "1961-11-21T12:17:18"
465
- @cell[sheet][key] = DateTime.parse(attr(table_cell,'date-value').to_s)
466
- @cell_type[sheet][key] = :datetime
419
+ @cell[sheet] ||= {}
420
+ @style[sheet] ||= {}
421
+ @style[sheet][key] = style_name
422
+ case @cell_type[sheet][key]
423
+ when :float
424
+ @cell[sheet][key] = (table_cell.attributes['value'].to_s.include?(".") || table_cell.children.first.text.include?(".")) ? v.to_f : v.to_i
425
+ when :percentage
426
+ @cell[sheet][key] = v.to_f
427
+ when :string
428
+ @cell[sheet][key] = str_v
429
+ when :date
430
+ # TODO: if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size
431
+ if attribute(table_cell, 'date-value').size != 'XXXX-XX-XX'.size
432
+ #-- dann ist noch eine Uhrzeit vorhanden
433
+ #-- "1961-11-21T12:17:18"
434
+ @cell[sheet][key] = DateTime.parse(attribute(table_cell, 'date-value').to_s)
435
+ @cell_type[sheet][key] = :datetime
436
+ else
437
+ @cell[sheet][key] = table_cell.attributes['date-value']
438
+ end
439
+ when :time
440
+ hms = v.split(':')
441
+ @cell[sheet][key] = hms[0].to_i * 3600 + hms[1].to_i * 60 + hms[2].to_i
467
442
  else
468
- @cell[sheet][key] = table_cell.attributes['date-value']
443
+ @cell[sheet][key] = v
469
444
  end
470
- when :percentage
471
- @cell[sheet][key] = v.to_f
472
- when :time
473
- hms = v.split(':')
474
- @cell[sheet][key] = hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i
475
- else
476
- @cell[sheet][key] = v
477
445
  end
478
- end
479
446
 
480
- # read all cells in the selected sheet
481
- #--
482
- # the following construct means '4 blanks'
483
- # some content <text:s text:c="3"/>
484
- #++
485
- def read_cells(sheet = default_sheet)
486
- validate_sheet!(sheet)
487
- return if @cells_read[sheet]
488
-
489
- sheet_found = false
490
- doc.xpath("//*[local-name()='table']").each do |ws|
491
- if sheet == attr(ws,'name')
447
+ # read all cells in the selected sheet
448
+ #--
449
+ # the following construct means '4 blanks'
450
+ # some content <text:s text:c="3"/>
451
+ #++
452
+ def read_cells(sheet = default_sheet)
453
+ validate_sheet!(sheet)
454
+ return if @cells_read[sheet]
455
+
456
+ sheet_found = false
457
+ doc.xpath("//*[local-name()='table']").each do |ws|
458
+ next unless sheet == attribute(ws, 'name')
459
+
492
460
  sheet_found = true
493
- col = 1
494
- row = 1
461
+ col = 1
462
+ row = 1
495
463
  ws.children.each do |table_element|
496
464
  case table_element.name
497
465
  when 'table-column'
498
466
  @style_defaults[sheet] << table_element.attributes['default-cell-style-name']
499
467
  when 'table-row'
500
468
  if table_element.attributes['number-rows-repeated']
501
- skip_row = attr(table_element,'number-rows-repeated').to_s.to_i
502
- row = row + skip_row - 1
469
+ skip_row = attribute(table_element, 'number-rows-repeated').to_s.to_i
470
+ row = row + skip_row - 1
503
471
  end
504
472
  table_element.children.each do |cell|
505
- skip_col = attr(cell, 'number-columns-repeated')
506
- formula = attr(cell,'formula')
507
- value_type = attr(cell,'value-type')
508
- v = attr(cell,'value')
509
- style_name = attr(cell,'style-name')
473
+ skip_col = attribute(cell, 'number-columns-repeated')
474
+ formula = attribute(cell, 'formula')
475
+ value_type = attribute(cell, 'value-type')
476
+ v = attribute(cell, 'value')
477
+ style_name = attribute(cell, 'style-name')
510
478
  case value_type
511
479
  when 'string'
512
- str_v = ''
480
+ str_v = ''
513
481
  # insert \n if there is more than one paragraph
514
482
  para_count = 0
515
483
  cell.children.each do |str|
516
484
  # begin comments
517
- =begin
518
- - <table:table-cell office:value-type="string">
519
- - <office:annotation office:display="true" draw:style-name="gr1" draw:text-style-name="P1" svg:width="1.1413in" svg:height="0.3902in" svg:x="2.0142in" svg:y="0in" draw:caption-point-x="-0.2402in" draw:caption-point-y="0.5661in">
520
- <dc:date>2011-09-20T00:00:00</dc:date>
521
- <text:p text:style-name="P1">Kommentar fuer B4</text:p>
522
- </office:annotation>
523
- <text:p>B4 (mit Kommentar)</text:p>
524
- </table:table-cell>
525
- =end
485
+ #=begin
486
+ #- <table:table-cell office:value-type="string">
487
+ # - <office:annotation office:display="true" draw:style-name="gr1" draw:text-style-name="P1" svg:width="1.1413in" svg:height="0.3902in" svg:x="2.0142in" svg:y="0in" draw:caption-point-x="-0.2402in" draw:caption-point-y="0.5661in">
488
+ # <dc:date>2011-09-20T00:00:00</dc:date>
489
+ # <text:p text:style-name="P1">Kommentar fuer B4</text:p>
490
+ # </office:annotation>
491
+ # <text:p>B4 (mit Kommentar)</text:p>
492
+ # </table:table-cell>
493
+ #=end
526
494
  if str.name == 'annotation'
527
495
  str.children.each do |annotation|
528
- if annotation.name == 'p'
529
- # @comment ist ein Hash mit Sheet als Key (wie bei @cell)
530
- # innerhalb eines Elements besteht ein Eintrag aus einem
531
- # weiteren Hash mit Key [row,col] und dem eigentlichen
532
- # Kommentartext als Inhalt
533
- @comment[sheet] = Hash.new unless @comment[sheet]
534
- key = [row,col]
535
- @comment[sheet][key] = annotation.text
536
- end
496
+ next unless annotation.name == 'p'
497
+ # @comment ist ein Hash mit Sheet als Key (wie bei @cell)
498
+ # innerhalb eines Elements besteht ein Eintrag aus einem
499
+ # weiteren Hash mit Key [row,col] und dem eigentlichen
500
+ # Kommentartext als Inhalt
501
+ @comment[sheet] = Hash.new unless @comment[sheet]
502
+ key = [row, col]
503
+ @comment[sheet][key] = annotation.text
537
504
  end
538
505
  end
539
506
  # end comments
540
507
  if str.name == 'p'
541
- v = str.content
542
- str_v += "\n" if para_count > 0
508
+ v = str.content
509
+ str_v += "\n" if para_count > 0
543
510
  para_count += 1
544
511
  if str.children.size > 1
545
512
  str_v += children_to_string(str.children)
@@ -548,32 +515,28 @@ class Roo::OpenOffice < Roo::Base
548
515
  str_v += child.content #.text
549
516
  end
550
517
  end
551
- str_v.gsub!(/&apos;/,"'") # special case not supported by unescapeHTML
518
+ str_v.gsub!(/&apos;/, "'") # special case not supported by unescapeHTML
552
519
  str_v = CGI.unescapeHTML(str_v)
553
520
  end # == 'p'
554
521
  end
555
522
  when 'time'
556
523
  cell.children.each do |str|
557
- if str.name == 'p'
558
- v = str.content
559
- end
524
+ v = str.content if str.name == 'p'
560
525
  end
561
526
  when '', nil, 'date', 'percentage', 'float'
562
527
  #
563
528
  when 'boolean'
564
- v = attr(cell,'boolean-value').to_s
565
- else
566
- # raise "unknown type #{value_type}"
529
+ v = attribute(cell, 'boolean-value').to_s
567
530
  end
568
531
  if skip_col
569
- if v != nil or cell.attributes['date-value']
570
- 0.upto(skip_col.to_i-1) do |i|
571
- set_cell_values(sheet,col,row,i,v,value_type,formula,cell,str_v,style_name)
532
+ if !v.nil? || cell.attributes['date-value']
533
+ 0.upto(skip_col.to_i - 1) do |i|
534
+ set_cell_values(sheet, col, row, i, v, value_type, formula, cell, str_v, style_name)
572
535
  end
573
536
  end
574
537
  col += (skip_col.to_i - 1)
575
538
  end # if skip
576
- set_cell_values(sheet,col,row,0,v,value_type,formula,cell,str_v,style_name)
539
+ set_cell_values(sheet, col, row, 0, v, value_type, formula, cell, str_v, style_name)
577
540
  col += 1
578
541
  end
579
542
  row += 1
@@ -581,101 +544,83 @@ class Roo::OpenOffice < Roo::Base
581
544
  end
582
545
  end
583
546
  end
547
+ doc.xpath("//*[local-name()='automatic-styles']").each do |style|
548
+ read_styles(style)
549
+ end
550
+
551
+ fail RangeError unless sheet_found
552
+
553
+ @cells_read[sheet] = true
554
+ @comments_read[sheet] = true
584
555
  end
585
- doc.xpath("//*[local-name()='automatic-styles']").each do |style|
586
- read_styles(style)
587
- end
588
- if !sheet_found
589
- raise RangeError
590
- end
591
- @cells_read[sheet] = true
592
- @comments_read[sheet] = true
593
- end
594
556
 
595
- # Only calls read_cells because Roo::Base calls read_comments
596
- # whereas the reading of comments is done in read_cells for Roo::OpenOffice-objects
597
- def read_comments(sheet=nil)
598
- read_cells(sheet)
599
- end
557
+ # Only calls read_cells because Roo::Base calls read_comments
558
+ # whereas the reading of comments is done in read_cells for Roo::OpenOffice-objects
559
+ def read_comments(sheet = nil)
560
+ read_cells(sheet)
561
+ end
600
562
 
601
- def read_labels
602
- @label ||= Hash[doc.xpath("//table:named-range").map do |ne|
603
- #-
604
- # $Sheet1.$C$5
605
- #+
606
- name = attr(ne,'name').to_s
607
- sheetname,coords = attr(ne,'cell-range-address').to_s.split('.$')
608
- col, row = coords.split('$')
609
- sheetname = sheetname[1..-1] if sheetname[0,1] == '$'
610
- [name, [sheetname,row,col]]
611
- end]
612
- end
563
+ def read_labels
564
+ @label ||= Hash[doc.xpath('//table:named-range').map do |ne|
565
+ #-
566
+ # $Sheet1.$C$5
567
+ #+
568
+ name = attribute(ne, 'name').to_s
569
+ sheetname, coords = attribute(ne, 'cell-range-address').to_s.split('.$')
570
+ col, row = coords.split('$')
571
+ sheetname = sheetname[1..-1] if sheetname[0, 1] == '$'
572
+ [name, [sheetname, row, col]]
573
+ end]
574
+ end
613
575
 
614
- def read_styles(style_elements)
615
- @font_style_definitions['Default'] = Roo::Font.new
616
- style_elements.each do |style|
617
- next unless style.name == 'style'
618
- style_name = attr(style,'name')
619
- style.each do |properties|
620
- font = Roo::OpenOffice::Font.new
621
- font.bold = attr(properties,'font-weight')
622
- font.italic = attr(properties,'font-style')
623
- font.underline = attr(properties,'text-underline-style')
624
- @font_style_definitions[style_name] = font
576
+ def read_styles(style_elements)
577
+ @font_style_definitions['Default'] = Roo::Font.new
578
+ style_elements.each do |style|
579
+ next unless style.name == 'style'
580
+ style_name = attribute(style, 'name')
581
+ style.each do |properties|
582
+ font = Roo::OpenOffice::Font.new
583
+ font.bold = attribute(properties, 'font-weight')
584
+ font.italic = attribute(properties, 'font-style')
585
+ font.underline = attribute(properties, 'text-underline-style')
586
+ @font_style_definitions[style_name] = font
587
+ end
625
588
  end
626
589
  end
627
- end
628
590
 
629
- def read_table_styles(styles)
630
- styles.children.each do |style|
631
- next unless style.name == 'style'
632
- style_name = attr(style,'name')
633
- style.children.each do |properties|
634
- display = attr(properties,'display')
635
- next unless display
636
- @table_display[style_name] = (display == 'true')
591
+ def read_table_styles(styles)
592
+ styles.children.each do |style|
593
+ next unless style.name == 'style'
594
+ style_name = attribute(style, 'name')
595
+ style.children.each do |properties|
596
+ display = attribute(properties, 'display')
597
+ next unless display
598
+ @table_display[style_name] = (display == 'true')
599
+ end
637
600
  end
638
601
  end
639
- end
640
-
641
- A_ROO_TYPE = {
642
- "float" => :float,
643
- "string" => :string,
644
- "date" => :date,
645
- "percentage" => :percentage,
646
- "time" => :time,
647
- }
648
-
649
- def self.oo_type_2_roo_type(ootype)
650
- return A_ROO_TYPE[ootype]
651
- end
652
602
 
653
- # helper method to convert compressed spaces and other elements within
654
- # an text into a string
655
- def children_to_string(children)
656
- result = ''
657
- children.each {|child|
658
- if child.text?
659
- result = result + child.content
660
- else
661
- if child.name == 's'
662
- compressed_spaces = child.attributes['c'].to_s.to_i
663
- # no explicit number means a count of 1:
664
- if compressed_spaces == 0
665
- compressed_spaces = 1
666
- end
667
- result = result + " "*compressed_spaces
603
+ # helper method to convert compressed spaces and other elements within
604
+ # an text into a string
605
+ # FIXME: add a test for compressed_spaces == 0. It's not currently tested.
606
+ def children_to_string(children)
607
+ children.map do |child|
608
+ if child.text?
609
+ child.content
668
610
  else
669
- result = result + child.content
611
+ if child.name == 's'
612
+ compressed_spaces = child.attributes['c'].to_s.to_i
613
+ # no explicit number means a count of 1:
614
+ compressed_spaces == 0 ? ' ' : ' ' * compressed_spaces
615
+ else
616
+ child.content
617
+ end
670
618
  end
671
- end
672
- }
673
- result
674
- end
619
+ end.join
620
+ end
675
621
 
676
- def attr(node, attr_name)
677
- if node.attributes[attr_name]
678
- node.attributes[attr_name].value
622
+ def attribute(node, attr_name)
623
+ node.attributes[attr_name].value if node.attributes[attr_name]
679
624
  end
680
625
  end
681
626
  end