roo 2.0.1 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/CHANGELOG.md +16 -1
- data/README.md +8 -11
- data/lib/roo/base.rb +84 -84
- data/lib/roo/constants.rb +5 -0
- data/lib/roo/excelx/shared_strings.rb +10 -0
- data/lib/roo/excelx.rb +12 -13
- data/lib/roo/libre_office.rb +1 -2
- data/lib/roo/open_office.rb +454 -521
- data/lib/roo/spreadsheet.rb +3 -1
- data/lib/roo/version.rb +1 -1
- data/lib/roo.rb +5 -3
- data/spec/helpers.rb +5 -0
- data/spec/lib/roo/base_spec.rb +212 -0
- data/spec/lib/roo/excelx_spec.rb +13 -0
- data/spec/lib/roo/spreadsheet_spec.rb +20 -0
- data/spec/spec_helper.rb +6 -1
- data/test/all_ss.rb +12 -11
- data/test/test_helper.rb +0 -4
- data/test/test_roo.rb +2091 -2088
- metadata +5 -3
- data/test/test_generic_spreadsheet.rb +0 -237
data/lib/roo/open_office.rb
CHANGED
@@ -4,318 +4,306 @@ require 'cgi'
|
|
4
4
|
require 'zip/filesystem'
|
5
5
|
require 'roo/font'
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
7
|
+
module Roo
|
8
|
+
class OpenOffice < Roo::Base
|
9
|
+
ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'.freeze
|
10
|
+
XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']".freeze
|
11
|
+
XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']".freeze
|
12
|
+
|
13
|
+
# initialization and opening of a spreadsheet file
|
14
|
+
# values for packed: :zip
|
15
|
+
def initialize(filename, options = {})
|
16
|
+
packed = options[:packed]
|
17
|
+
file_warning = options[:file_warning] || :error
|
18
|
+
|
19
|
+
@only_visible_sheets = options[:only_visible_sheets]
|
20
|
+
file_type_check(filename, '.ods', 'an Roo::OpenOffice', file_warning, packed)
|
21
|
+
@tmpdir = make_tmpdir(File.basename(filename), options[:tmpdir_root])
|
22
|
+
@filename = local_filename(filename, @tmpdir, packed)
|
23
|
+
# TODO: @cells_read[:default] = false
|
24
|
+
open_oo_file(options)
|
25
|
+
super(filename, options)
|
26
|
+
initialize_default_variables
|
27
|
+
rescue => e # clean up any temp files, but only if an error was raised
|
28
|
+
close
|
29
|
+
raise e
|
30
|
+
end
|
31
|
+
|
32
|
+
def open_oo_file(options)
|
33
|
+
Zip::File.open(@filename) do |zip_file|
|
34
|
+
content_entry = zip_file.glob('content.xml').first
|
35
|
+
fail ArgumentError, ERROR_MISSING_CONTENT_XML unless content_entry
|
36
|
+
|
37
|
+
roo_content_xml_path = ::File.join(@tmpdir, 'roo_content.xml')
|
22
38
|
content_entry.extract(roo_content_xml_path)
|
23
|
-
decrypt_if_necessary(
|
24
|
-
zip_file,
|
25
|
-
content_entry,
|
26
|
-
roo_content_xml_path,
|
27
|
-
options
|
28
|
-
)
|
29
|
-
else
|
30
|
-
raise ArgumentError, 'file missing required content.xml'
|
39
|
+
decrypt_if_necessary(zip_file, content_entry, roo_content_xml_path, options)
|
31
40
|
end
|
32
41
|
end
|
33
|
-
super(filename, options)
|
34
|
-
@formula = Hash.new
|
35
|
-
@style = Hash.new
|
36
|
-
@style_defaults = Hash.new { |h,k| h[k] = [] }
|
37
|
-
@table_display = Hash.new { |h,k| h[k] = true }
|
38
|
-
@font_style_definitions = Hash.new
|
39
|
-
@comment = Hash.new
|
40
|
-
@comments_read = Hash.new
|
41
|
-
rescue => e # clean up any temp files, but only if an error was raised
|
42
|
-
close
|
43
|
-
raise e
|
44
|
-
end
|
45
42
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
super
|
43
|
+
def initialize_default_variables
|
44
|
+
@formula = {}
|
45
|
+
@style = {}
|
46
|
+
@style_defaults = Hash.new { |h, k| h[k] = [] }
|
47
|
+
@table_display = Hash.new { |h, k| h[k] = true }
|
48
|
+
@font_style_definitions = {}
|
49
|
+
@comment = {}
|
50
|
+
@comments_read = {}
|
55
51
|
end
|
56
|
-
end
|
57
52
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
end
|
70
|
-
@cell[sheet][[row,col]]
|
71
|
-
end
|
53
|
+
def method_missing(m, *args)
|
54
|
+
read_labels
|
55
|
+
# is method name a label name
|
56
|
+
if @label.key?(m.to_s)
|
57
|
+
row, col = label(m.to_s)
|
58
|
+
cell(row, col)
|
59
|
+
else
|
60
|
+
# call super for methods like #a1
|
61
|
+
super
|
62
|
+
end
|
63
|
+
end
|
72
64
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
sheet
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
65
|
+
# Returns the content of a spreadsheet-cell.
|
66
|
+
# (1,1) is the upper left corner.
|
67
|
+
# (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
|
68
|
+
# cell at the first line and first row.
|
69
|
+
def cell(row, col, sheet = nil)
|
70
|
+
sheet ||= default_sheet
|
71
|
+
read_cells(sheet)
|
72
|
+
row, col = normalize(row, col)
|
73
|
+
if celltype(row, col, sheet) == :date
|
74
|
+
yyyy, mm, dd = @cell[sheet][[row, col]].to_s.split('-')
|
75
|
+
return Date.new(yyyy.to_i, mm.to_i, dd.to_i)
|
76
|
+
end
|
82
77
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
78
|
+
@cell[sheet][[row, col]]
|
79
|
+
end
|
80
|
+
|
81
|
+
# Returns the formula at (row,col).
|
82
|
+
# Returns nil if there is no formula.
|
83
|
+
# The method #formula? checks if there is a formula.
|
84
|
+
def formula(row, col, sheet = nil)
|
85
|
+
sheet ||= default_sheet
|
86
|
+
read_cells(sheet)
|
87
|
+
row, col = normalize(row, col)
|
88
|
+
@formula[sheet][[row, col]]
|
89
|
+
end
|
89
90
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
91
|
+
# Predicate methods really should return a boolean
|
92
|
+
# value. Hopefully no one was relying on the fact that this
|
93
|
+
# previously returned either nil/formula
|
94
|
+
def formula?(*args)
|
95
|
+
!!formula(*args)
|
96
|
+
end
|
97
|
+
|
98
|
+
# returns each formula in the selected sheet as an array of elements
|
99
|
+
# [row, col, formula]
|
100
|
+
def formulas(sheet = nil)
|
101
|
+
sheet ||= default_sheet
|
102
|
+
read_cells(sheet)
|
103
|
+
return [] unless @formula[sheet]
|
96
104
|
@formula[sheet].each.collect do |elem|
|
97
105
|
[elem[0][0], elem[0][1], elem[1]]
|
98
106
|
end
|
99
|
-
else
|
100
|
-
[]
|
101
107
|
end
|
102
|
-
end
|
103
108
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
109
|
+
# Given a cell, return the cell's style
|
110
|
+
def font(row, col, sheet = nil)
|
111
|
+
sheet ||= default_sheet
|
112
|
+
read_cells(sheet)
|
113
|
+
row, col = normalize(row, col)
|
114
|
+
style_name = @style[sheet][[row, col]] || @style_defaults[sheet][col - 1] || 'Default'
|
115
|
+
@font_style_definitions[style_name]
|
116
|
+
end
|
112
117
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
return :formula
|
127
|
-
else
|
128
|
-
@cell_type[sheet][[row,col]]
|
118
|
+
# returns the type of a cell:
|
119
|
+
# * :float
|
120
|
+
# * :string
|
121
|
+
# * :date
|
122
|
+
# * :percentage
|
123
|
+
# * :formula
|
124
|
+
# * :time
|
125
|
+
# * :datetime
|
126
|
+
def celltype(row, col, sheet = nil)
|
127
|
+
sheet ||= default_sheet
|
128
|
+
read_cells(sheet)
|
129
|
+
row, col = normalize(row, col)
|
130
|
+
@formula[sheet][[row, col]] ? :formula : @cell_type[sheet][[row, col]]
|
129
131
|
end
|
130
|
-
end
|
131
132
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
133
|
+
def sheets
|
134
|
+
unless @table_display.any?
|
135
|
+
doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style|
|
136
|
+
read_table_styles(style)
|
137
|
+
end
|
136
138
|
end
|
139
|
+
doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet|
|
140
|
+
if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')]
|
141
|
+
sheet.attributes['name'].value
|
142
|
+
end
|
143
|
+
end.compact
|
137
144
|
end
|
138
|
-
doc.xpath("//*[local-name()='table']").map do |sheet|
|
139
|
-
if !@only_visible_sheets || @table_display[attr(sheet,'style-name')]
|
140
|
-
sheet.attributes["name"].value
|
141
|
-
end
|
142
|
-
end.compact
|
143
|
-
end
|
144
145
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
# shows the internal representation of all cells
|
153
|
-
# mainly for debugging purposes
|
154
|
-
def to_s(sheet=nil)
|
155
|
-
sheet ||= default_sheet
|
156
|
-
read_cells(sheet)
|
157
|
-
@cell[sheet].inspect
|
158
|
-
end
|
146
|
+
# version of the Roo::OpenOffice document
|
147
|
+
# at 2007 this is always "1.0"
|
148
|
+
def officeversion
|
149
|
+
oo_version
|
150
|
+
@officeversion
|
151
|
+
end
|
159
152
|
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
153
|
+
# shows the internal representation of all cells
|
154
|
+
# mainly for debugging purposes
|
155
|
+
def to_s(sheet = nil)
|
156
|
+
sheet ||= default_sheet
|
157
|
+
read_cells(sheet)
|
158
|
+
@cell[sheet].inspect
|
166
159
|
end
|
167
|
-
|
168
|
-
|
160
|
+
|
161
|
+
# returns the row,col values of the labelled cell
|
162
|
+
# (nil,nil) if label is not defined
|
163
|
+
def label(labelname)
|
164
|
+
read_labels
|
165
|
+
return [nil, nil, nil] if @label.size < 1 || !@label.key?(labelname)
|
166
|
+
[
|
167
|
+
@label[labelname][1].to_i,
|
169
168
|
::Roo::Utils.letter_to_number(@label[labelname][2]),
|
170
169
|
@label[labelname][0]
|
171
|
-
|
172
|
-
return nil,nil,nil
|
170
|
+
]
|
173
171
|
end
|
174
|
-
end
|
175
172
|
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
173
|
+
# Returns an array which all labels. Each element is an array with
|
174
|
+
# [labelname, [row,col,sheetname]]
|
175
|
+
def labels(_sheet = nil)
|
176
|
+
read_labels
|
177
|
+
@label.map do |label|
|
178
|
+
[label[0], # name
|
179
|
+
[label[1][1].to_i, # row
|
183
180
|
::Roo::Utils.letter_to_number(label[1][2]), # column
|
184
181
|
label[1][0], # sheet
|
185
|
-
|
182
|
+
]]
|
183
|
+
end
|
186
184
|
end
|
187
|
-
end
|
188
185
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
186
|
+
# returns the comment at (row/col)
|
187
|
+
# nil if there is no comment
|
188
|
+
def comment(row, col, sheet = nil)
|
189
|
+
sheet ||= default_sheet
|
190
|
+
read_cells(sheet)
|
191
|
+
row, col = normalize(row, col)
|
192
|
+
return nil unless @comment[sheet]
|
193
|
+
@comment[sheet][[row, col]]
|
194
|
+
end
|
198
195
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
196
|
+
# returns each comment in the selected sheet as an array of elements
|
197
|
+
# [row, col, comment]
|
198
|
+
def comments(sheet = nil)
|
199
|
+
sheet ||= default_sheet
|
200
|
+
read_comments(sheet) unless @comments_read[sheet]
|
201
|
+
return [] unless @comment[sheet]
|
205
202
|
@comment[sheet].each.collect do |elem|
|
206
|
-
[elem[0][0],elem[0][1],elem[1]]
|
203
|
+
[elem[0][0], elem[0][1], elem[1]]
|
207
204
|
end
|
208
|
-
else
|
209
|
-
[]
|
210
205
|
end
|
211
|
-
end
|
212
206
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
207
|
+
private
|
208
|
+
|
209
|
+
# If the ODS file has an encryption-data element, then try to decrypt.
|
210
|
+
# If successful, the temporary content.xml will be overwritten with
|
211
|
+
# decrypted contents.
|
212
|
+
def decrypt_if_necessary(
|
213
|
+
zip_file,
|
214
|
+
content_entry,
|
215
|
+
roo_content_xml_path, options
|
216
|
+
)
|
217
|
+
# Check if content.xml is encrypted by extracting manifest.xml
|
218
|
+
# and searching for a manifest:encryption-data element
|
219
|
+
|
220
|
+
if (manifest_entry = zip_file.glob('META-INF/manifest.xml').first)
|
221
|
+
roo_manifest_xml_path = File.join(@tmpdir, 'roo_manifest.xml')
|
222
|
+
manifest_entry.extract(roo_manifest_xml_path)
|
223
|
+
manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
|
224
|
+
|
225
|
+
# XPath search for manifest:encryption-data only for the content.xml
|
226
|
+
# file
|
227
|
+
|
228
|
+
encryption_data = manifest.xpath(
|
229
|
+
"//manifest:file-entry[@manifest:full-path='content.xml']"\
|
236
230
|
"/manifest:encryption-data"
|
237
|
-
|
231
|
+
).first
|
238
232
|
|
239
|
-
|
233
|
+
# If XPath returns a node, then we know content.xml is encrypted
|
240
234
|
|
241
|
-
|
235
|
+
unless encryption_data.nil?
|
242
236
|
|
243
|
-
|
244
|
-
|
237
|
+
# Since we know it's encrypted, we check for the password option
|
238
|
+
# and if it doesn't exist, raise an argument error
|
245
239
|
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
240
|
+
password = options[:password]
|
241
|
+
if !password.nil?
|
242
|
+
perform_decryption(
|
243
|
+
encryption_data,
|
244
|
+
password,
|
245
|
+
content_entry,
|
246
|
+
roo_content_xml_path
|
247
|
+
)
|
248
|
+
else
|
249
|
+
fail ArgumentError, 'file is encrypted but password was not supplied'
|
250
|
+
end
|
257
251
|
end
|
252
|
+
else
|
253
|
+
fail ArgumentError, 'file missing required META-INF/manifest.xml'
|
258
254
|
end
|
259
|
-
else
|
260
|
-
raise ArgumentError, 'file missing required META-INF/manifest.xml'
|
261
255
|
end
|
262
|
-
end
|
263
256
|
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
key_derivation_node['manifest:key-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
]
|
305
|
-
key_generation_size =
|
306
|
-
start_key_generation_node['manifest:key-size'].to_i
|
257
|
+
# Process the ODS encryption manifest and perform the decryption
|
258
|
+
def perform_decryption(
|
259
|
+
encryption_data,
|
260
|
+
password,
|
261
|
+
content_entry,
|
262
|
+
roo_content_xml_path
|
263
|
+
)
|
264
|
+
# Extract various expected attributes from the manifest that
|
265
|
+
# describe the encryption
|
266
|
+
|
267
|
+
algorithm_node = encryption_data.xpath('manifest:algorithm').first
|
268
|
+
key_derivation_node =
|
269
|
+
encryption_data.xpath('manifest:key-derivation').first
|
270
|
+
start_key_generation_node =
|
271
|
+
encryption_data.xpath('manifest:start-key-generation').first
|
272
|
+
|
273
|
+
# If we have all the expected elements, then we can perform
|
274
|
+
# the decryption.
|
275
|
+
|
276
|
+
if !algorithm_node.nil? && !key_derivation_node.nil? &&
|
277
|
+
!start_key_generation_node.nil?
|
278
|
+
|
279
|
+
# The algorithm is a URI describing the algorithm used
|
280
|
+
algorithm = algorithm_node['manifest:algorithm-name']
|
281
|
+
|
282
|
+
# The initialization vector is base-64 encoded
|
283
|
+
iv = Base64.decode64(
|
284
|
+
algorithm_node['manifest:initialisation-vector']
|
285
|
+
)
|
286
|
+
key_derivation_name = key_derivation_node['manifest:key-derivation-name']
|
287
|
+
key_size = key_derivation_node['manifest:key-size'].to_i
|
288
|
+
iteration_count = key_derivation_node['manifest:iteration-count'].to_i
|
289
|
+
salt = Base64.decode64(key_derivation_node['manifest:salt'])
|
290
|
+
|
291
|
+
# The key is hashed with an algorithm represented by this URI
|
292
|
+
key_generation_name =
|
293
|
+
start_key_generation_node[
|
294
|
+
'manifest:start-key-generation-name'
|
295
|
+
]
|
296
|
+
key_generation_size = start_key_generation_node['manifest:key-size'].to_i
|
307
297
|
|
308
298
|
hashed_password = password
|
309
|
-
key
|
299
|
+
key = nil
|
310
300
|
|
311
|
-
if key_generation_name.
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
key_generation_name
|
318
|
-
end
|
301
|
+
if key_generation_name == 'http://www.w3.org/2000/09/xmldsig#sha256'
|
302
|
+
|
303
|
+
hashed_password = Digest::SHA256.digest(password)
|
304
|
+
else
|
305
|
+
fail ArgumentError, "Unknown key generation algorithm #{key_generation_name}"
|
306
|
+
end
|
319
307
|
|
320
308
|
cipher = find_cipher(
|
321
309
|
algorithm,
|
@@ -336,210 +324,177 @@ class Roo::OpenOffice < Roo::Base
|
|
336
324
|
Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(decrypted)
|
337
325
|
)
|
338
326
|
rescue StandardError => error
|
339
|
-
raise ArgumentError,
|
340
|
-
'Invalid password or other data error: ' + error.to_s
|
327
|
+
raise ArgumentError, "Invalid password or other data error: #{error}"
|
341
328
|
end
|
342
|
-
|
343
|
-
|
344
|
-
|
329
|
+
else
|
330
|
+
fail ArgumentError, 'manifest.xml missing encryption-data elements'
|
331
|
+
end
|
345
332
|
end
|
346
|
-
end
|
347
333
|
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
salt,
|
354
|
-
iteration_count,
|
355
|
-
iv
|
356
|
-
)
|
357
|
-
cipher = nil
|
358
|
-
if algorithm.eql? "http://www.w3.org/2001/04/xmlenc#aes256-cbc"
|
334
|
+
# Create a cipher based on an ODS algorithm URI from manifest.xml
|
335
|
+
# params: algorithm, key_derivation_name, hashed_password, salt, iteration_count, iv
|
336
|
+
def find_cipher(*args)
|
337
|
+
fail ArgumentError, 'Unknown algorithm ' + algorithm unless args[0] == 'http://www.w3.org/2001/04/xmlenc#aes256-cbc'
|
338
|
+
|
359
339
|
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
360
340
|
cipher.decrypt
|
361
341
|
cipher.padding = 0
|
362
|
-
cipher.key
|
363
|
-
|
364
|
-
key_derivation_name,
|
365
|
-
hashed_password,
|
366
|
-
salt,
|
367
|
-
iteration_count
|
368
|
-
)
|
369
|
-
cipher.iv = iv
|
370
|
-
else
|
371
|
-
raise ArgumentError, 'Unknown algorithm ' + algorithm
|
372
|
-
end
|
373
|
-
cipher
|
374
|
-
end
|
342
|
+
cipher.key = find_cipher_key(cipher, *args[1..4])
|
343
|
+
cipher.iv = args[5]
|
375
344
|
|
376
|
-
|
377
|
-
|
378
|
-
cipher,
|
379
|
-
key_derivation_name,
|
380
|
-
hashed_password,
|
381
|
-
salt,
|
382
|
-
iteration_count
|
383
|
-
)
|
384
|
-
if key_derivation_name.eql? "PBKDF2"
|
385
|
-
key = OpenSSL::PKCS5.pbkdf2_hmac_sha1(
|
386
|
-
hashed_password,
|
387
|
-
salt,
|
388
|
-
iteration_count,
|
389
|
-
cipher.key_len
|
390
|
-
)
|
391
|
-
else
|
392
|
-
raise ArgumentError, 'Unknown key derivation name ' +
|
393
|
-
key_derivation_name
|
394
|
-
end
|
395
|
-
key
|
396
|
-
end
|
345
|
+
cipher
|
346
|
+
end
|
397
347
|
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
decrypted = ""
|
404
|
-
File.open(@filename, "rb") do |zipfile|
|
405
|
-
zipfile.seek(
|
406
|
-
content_entry.local_header_offset +
|
407
|
-
content_entry.calculate_local_header_size
|
408
|
-
)
|
409
|
-
total_to_read = content_entry.compressed_size
|
410
|
-
|
411
|
-
block_size = 4096
|
412
|
-
block_size = total_to_read if block_size > total_to_read
|
413
|
-
|
414
|
-
while buffer = zipfile.read(block_size)
|
415
|
-
decrypted += cipher.update(buffer)
|
416
|
-
total_to_read -= buffer.length
|
417
|
-
|
418
|
-
break if total_to_read == 0
|
419
|
-
|
420
|
-
block_size = total_to_read if block_size > total_to_read
|
421
|
-
end
|
348
|
+
# Create a cipher key based on an ODS algorithm string from manifest.xml
|
349
|
+
def find_cipher_key(*args)
|
350
|
+
fail ArgumentError, 'Unknown key derivation name ', args[1] unless args[1] == 'PBKDF2'
|
351
|
+
|
352
|
+
OpenSSL::PKCS5.pbkdf2_hmac_sha1(args[2], args[3], args[4], args[0].key_len)
|
422
353
|
end
|
423
354
|
|
424
|
-
|
425
|
-
|
355
|
+
# Block decrypt raw bytes from the zip file based on the cipher
|
356
|
+
def decrypt(content_entry, cipher)
|
357
|
+
# Zip::Entry.extract writes a 0-length file when trying
|
358
|
+
# to extract an encrypted stream, so we read the
|
359
|
+
# raw bytes based on the offset and lengths
|
360
|
+
decrypted = ''
|
361
|
+
File.open(@filename, 'rb') do |zipfile|
|
362
|
+
zipfile.seek(
|
363
|
+
content_entry.local_header_offset +
|
364
|
+
content_entry.calculate_local_header_size
|
365
|
+
)
|
366
|
+
total_to_read = content_entry.compressed_size
|
426
367
|
|
427
|
-
|
428
|
-
|
429
|
-
|
368
|
+
block_size = 4096
|
369
|
+
block_size = total_to_read if block_size > total_to_read
|
370
|
+
|
371
|
+
while (buffer = zipfile.read(block_size))
|
372
|
+
decrypted += cipher.update(buffer)
|
373
|
+
total_to_read -= buffer.length
|
430
374
|
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
375
|
+
break if total_to_read == 0
|
376
|
+
|
377
|
+
block_size = total_to_read if block_size > total_to_read
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
decrypted + cipher.final
|
382
|
+
end
|
383
|
+
|
384
|
+
def doc
|
385
|
+
@doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, 'roo_content.xml'))
|
386
|
+
end
|
387
|
+
|
388
|
+
# read the version of the OO-Version
|
389
|
+
def oo_version
|
390
|
+
doc.xpath("//*[local-name()='document-content']").each do |office|
|
391
|
+
@officeversion = attribute(office, 'version')
|
392
|
+
end
|
435
393
|
end
|
436
|
-
end
|
437
394
|
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
395
|
+
# helper function to set the internal representation of cells
|
396
|
+
def set_cell_values(sheet, x, y, i, v, value_type, formula, table_cell, str_v, style_name)
|
397
|
+
key = [y, x + i]
|
398
|
+
@cell_type[sheet] ||= {}
|
399
|
+
@cell_type[sheet][key] = value_type.to_sym if value_type
|
400
|
+
@formula[sheet] ||= {}
|
401
|
+
if formula
|
402
|
+
['of:', 'oooc:'].each do |prefix|
|
403
|
+
if formula[0, prefix.length] == prefix
|
404
|
+
formula = formula[prefix.length..-1]
|
405
|
+
end
|
448
406
|
end
|
407
|
+
@formula[sheet][key] = formula
|
449
408
|
end
|
450
|
-
@
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
409
|
+
@cell[sheet] ||= {}
|
410
|
+
@style[sheet] ||= {}
|
411
|
+
@style[sheet][key] = style_name
|
412
|
+
case @cell_type[sheet][key]
|
413
|
+
when :float, :percentage
|
414
|
+
@cell[sheet][key] = v.to_f
|
415
|
+
when :string
|
416
|
+
@cell[sheet][key] = str_v
|
417
|
+
when :date
|
418
|
+
# TODO: if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size
|
419
|
+
if attribute(table_cell, 'date-value').size != 'XXXX-XX-XX'.size
|
420
|
+
#-- dann ist noch eine Uhrzeit vorhanden
|
421
|
+
#-- "1961-11-21T12:17:18"
|
422
|
+
@cell[sheet][key] = DateTime.parse(attribute(table_cell, 'date-value').to_s)
|
423
|
+
@cell_type[sheet][key] = :datetime
|
424
|
+
else
|
425
|
+
@cell[sheet][key] = table_cell.attributes['date-value']
|
426
|
+
end
|
427
|
+
when :time
|
428
|
+
hms = v.split(':')
|
429
|
+
@cell[sheet][key] = hms[0].to_i * 3600 + hms[1].to_i * 60 + hms[2].to_i
|
467
430
|
else
|
468
|
-
@cell[sheet][key] =
|
431
|
+
@cell[sheet][key] = v
|
469
432
|
end
|
470
|
-
when :percentage
|
471
|
-
@cell[sheet][key] = v.to_f
|
472
|
-
when :time
|
473
|
-
hms = v.split(':')
|
474
|
-
@cell[sheet][key] = hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i
|
475
|
-
else
|
476
|
-
@cell[sheet][key] = v
|
477
433
|
end
|
478
|
-
end
|
479
434
|
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
435
|
+
# read all cells in the selected sheet
|
436
|
+
#--
|
437
|
+
# the following construct means '4 blanks'
|
438
|
+
# some content <text:s text:c="3"/>
|
439
|
+
#++
|
440
|
+
def read_cells(sheet = default_sheet)
|
441
|
+
validate_sheet!(sheet)
|
442
|
+
return if @cells_read[sheet]
|
443
|
+
|
444
|
+
sheet_found = false
|
445
|
+
doc.xpath("//*[local-name()='table']").each do |ws|
|
446
|
+
next unless sheet == attribute(ws, 'name')
|
447
|
+
|
492
448
|
sheet_found = true
|
493
|
-
col
|
494
|
-
row
|
449
|
+
col = 1
|
450
|
+
row = 1
|
495
451
|
ws.children.each do |table_element|
|
496
452
|
case table_element.name
|
497
453
|
when 'table-column'
|
498
454
|
@style_defaults[sheet] << table_element.attributes['default-cell-style-name']
|
499
455
|
when 'table-row'
|
500
456
|
if table_element.attributes['number-rows-repeated']
|
501
|
-
skip_row =
|
502
|
-
row
|
457
|
+
skip_row = attribute(table_element, 'number-rows-repeated').to_s.to_i
|
458
|
+
row = row + skip_row - 1
|
503
459
|
end
|
504
460
|
table_element.children.each do |cell|
|
505
|
-
skip_col
|
506
|
-
formula
|
507
|
-
value_type =
|
508
|
-
v
|
509
|
-
style_name =
|
461
|
+
skip_col = attribute(cell, 'number-columns-repeated')
|
462
|
+
formula = attribute(cell, 'formula')
|
463
|
+
value_type = attribute(cell, 'value-type')
|
464
|
+
v = attribute(cell, 'value')
|
465
|
+
style_name = attribute(cell, 'style-name')
|
510
466
|
case value_type
|
511
467
|
when 'string'
|
512
|
-
str_v
|
468
|
+
str_v = ''
|
513
469
|
# insert \n if there is more than one paragraph
|
514
470
|
para_count = 0
|
515
471
|
cell.children.each do |str|
|
516
472
|
# begin comments
|
517
|
-
|
518
|
-
|
519
|
-
- <office:annotation office:display="true" draw:style-name="gr1" draw:text-style-name="P1" svg:width="1.1413in" svg:height="0.3902in" svg:x="2.0142in" svg:y="0in" draw:caption-point-x="-0.2402in" draw:caption-point-y="0.5661in">
|
520
|
-
<dc:date>2011-09-20T00:00:00</dc:date>
|
521
|
-
<text:p text:style-name="P1">Kommentar fuer B4</text:p>
|
522
|
-
</office:annotation>
|
523
|
-
<text:p>B4 (mit Kommentar)</text:p>
|
524
|
-
</table:table-cell>
|
525
|
-
|
473
|
+
#=begin
|
474
|
+
#- <table:table-cell office:value-type="string">
|
475
|
+
# - <office:annotation office:display="true" draw:style-name="gr1" draw:text-style-name="P1" svg:width="1.1413in" svg:height="0.3902in" svg:x="2.0142in" svg:y="0in" draw:caption-point-x="-0.2402in" draw:caption-point-y="0.5661in">
|
476
|
+
# <dc:date>2011-09-20T00:00:00</dc:date>
|
477
|
+
# <text:p text:style-name="P1">Kommentar fuer B4</text:p>
|
478
|
+
# </office:annotation>
|
479
|
+
# <text:p>B4 (mit Kommentar)</text:p>
|
480
|
+
# </table:table-cell>
|
481
|
+
#=end
|
526
482
|
if str.name == 'annotation'
|
527
483
|
str.children.each do |annotation|
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
end
|
484
|
+
next unless annotation.name == 'p'
|
485
|
+
# @comment ist ein Hash mit Sheet als Key (wie bei @cell)
|
486
|
+
# innerhalb eines Elements besteht ein Eintrag aus einem
|
487
|
+
# weiteren Hash mit Key [row,col] und dem eigentlichen
|
488
|
+
# Kommentartext als Inhalt
|
489
|
+
@comment[sheet] = Hash.new unless @comment[sheet]
|
490
|
+
key = [row, col]
|
491
|
+
@comment[sheet][key] = annotation.text
|
537
492
|
end
|
538
493
|
end
|
539
494
|
# end comments
|
540
495
|
if str.name == 'p'
|
541
|
-
v
|
542
|
-
str_v
|
496
|
+
v = str.content
|
497
|
+
str_v += "\n" if para_count > 0
|
543
498
|
para_count += 1
|
544
499
|
if str.children.size > 1
|
545
500
|
str_v += children_to_string(str.children)
|
@@ -548,32 +503,28 @@ class Roo::OpenOffice < Roo::Base
|
|
548
503
|
str_v += child.content #.text
|
549
504
|
end
|
550
505
|
end
|
551
|
-
str_v.gsub!(/'/,"'")
|
506
|
+
str_v.gsub!(/'/, "'") # special case not supported by unescapeHTML
|
552
507
|
str_v = CGI.unescapeHTML(str_v)
|
553
508
|
end # == 'p'
|
554
509
|
end
|
555
510
|
when 'time'
|
556
511
|
cell.children.each do |str|
|
557
|
-
if str.name == 'p'
|
558
|
-
v = str.content
|
559
|
-
end
|
512
|
+
v = str.content if str.name == 'p'
|
560
513
|
end
|
561
514
|
when '', nil, 'date', 'percentage', 'float'
|
562
515
|
#
|
563
516
|
when 'boolean'
|
564
|
-
v =
|
565
|
-
else
|
566
|
-
# raise "unknown type #{value_type}"
|
517
|
+
v = attribute(cell, 'boolean-value').to_s
|
567
518
|
end
|
568
519
|
if skip_col
|
569
|
-
if v
|
570
|
-
0.upto(skip_col.to_i-1) do |i|
|
571
|
-
set_cell_values(sheet,col,row,i,v,value_type,formula,cell,str_v,style_name)
|
520
|
+
if !v.nil? || cell.attributes['date-value']
|
521
|
+
0.upto(skip_col.to_i - 1) do |i|
|
522
|
+
set_cell_values(sheet, col, row, i, v, value_type, formula, cell, str_v, style_name)
|
572
523
|
end
|
573
524
|
end
|
574
525
|
col += (skip_col.to_i - 1)
|
575
526
|
end # if skip
|
576
|
-
set_cell_values(sheet,col,row,0,v,value_type,formula,cell,str_v,style_name)
|
527
|
+
set_cell_values(sheet, col, row, 0, v, value_type, formula, cell, str_v, style_name)
|
577
528
|
col += 1
|
578
529
|
end
|
579
530
|
row += 1
|
@@ -581,101 +532,83 @@ class Roo::OpenOffice < Roo::Base
|
|
581
532
|
end
|
582
533
|
end
|
583
534
|
end
|
535
|
+
doc.xpath("//*[local-name()='automatic-styles']").each do |style|
|
536
|
+
read_styles(style)
|
537
|
+
end
|
538
|
+
|
539
|
+
fail RangeError unless sheet_found
|
540
|
+
|
541
|
+
@cells_read[sheet] = true
|
542
|
+
@comments_read[sheet] = true
|
584
543
|
end
|
585
|
-
doc.xpath("//*[local-name()='automatic-styles']").each do |style|
|
586
|
-
read_styles(style)
|
587
|
-
end
|
588
|
-
if !sheet_found
|
589
|
-
raise RangeError
|
590
|
-
end
|
591
|
-
@cells_read[sheet] = true
|
592
|
-
@comments_read[sheet] = true
|
593
|
-
end
|
594
544
|
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
545
|
+
# Only calls read_cells because Roo::Base calls read_comments
|
546
|
+
# whereas the reading of comments is done in read_cells for Roo::OpenOffice-objects
|
547
|
+
def read_comments(sheet = nil)
|
548
|
+
read_cells(sheet)
|
549
|
+
end
|
600
550
|
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
551
|
+
def read_labels
|
552
|
+
@label ||= Hash[doc.xpath('//table:named-range').map do |ne|
|
553
|
+
#-
|
554
|
+
# $Sheet1.$C$5
|
555
|
+
#+
|
556
|
+
name = attribute(ne, 'name').to_s
|
557
|
+
sheetname, coords = attribute(ne, 'cell-range-address').to_s.split('.$')
|
558
|
+
col, row = coords.split('$')
|
559
|
+
sheetname = sheetname[1..-1] if sheetname[0, 1] == '$'
|
560
|
+
[name, [sheetname, row, col]]
|
561
|
+
end]
|
562
|
+
end
|
613
563
|
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
564
|
+
def read_styles(style_elements)
|
565
|
+
@font_style_definitions['Default'] = Roo::Font.new
|
566
|
+
style_elements.each do |style|
|
567
|
+
next unless style.name == 'style'
|
568
|
+
style_name = attribute(style, 'name')
|
569
|
+
style.each do |properties|
|
570
|
+
font = Roo::OpenOffice::Font.new
|
571
|
+
font.bold = attribute(properties, 'font-weight')
|
572
|
+
font.italic = attribute(properties, 'font-style')
|
573
|
+
font.underline = attribute(properties, 'text-underline-style')
|
574
|
+
@font_style_definitions[style_name] = font
|
575
|
+
end
|
625
576
|
end
|
626
577
|
end
|
627
|
-
end
|
628
578
|
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
579
|
+
def read_table_styles(styles)
|
580
|
+
styles.children.each do |style|
|
581
|
+
next unless style.name == 'style'
|
582
|
+
style_name = attribute(style, 'name')
|
583
|
+
style.children.each do |properties|
|
584
|
+
display = attribute(properties, 'display')
|
585
|
+
next unless display
|
586
|
+
@table_display[style_name] = (display == 'true')
|
587
|
+
end
|
637
588
|
end
|
638
589
|
end
|
639
|
-
end
|
640
|
-
|
641
|
-
A_ROO_TYPE = {
|
642
|
-
"float" => :float,
|
643
|
-
"string" => :string,
|
644
|
-
"date" => :date,
|
645
|
-
"percentage" => :percentage,
|
646
|
-
"time" => :time,
|
647
|
-
}
|
648
|
-
|
649
|
-
def self.oo_type_2_roo_type(ootype)
|
650
|
-
return A_ROO_TYPE[ootype]
|
651
|
-
end
|
652
590
|
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
else
|
661
|
-
if child.name == 's'
|
662
|
-
compressed_spaces = child.attributes['c'].to_s.to_i
|
663
|
-
# no explicit number means a count of 1:
|
664
|
-
if compressed_spaces == 0
|
665
|
-
compressed_spaces = 1
|
666
|
-
end
|
667
|
-
result = result + " "*compressed_spaces
|
591
|
+
# helper method to convert compressed spaces and other elements within
|
592
|
+
# an text into a string
|
593
|
+
# FIXME: add a test for compressed_spaces == 0. It's not currently tested.
|
594
|
+
def children_to_string(children)
|
595
|
+
children.map do |child|
|
596
|
+
if child.text?
|
597
|
+
child.content
|
668
598
|
else
|
669
|
-
|
599
|
+
if child.name == 's'
|
600
|
+
compressed_spaces = child.attributes['c'].to_s.to_i
|
601
|
+
# no explicit number means a count of 1:
|
602
|
+
compressed_spaces == 0 ? ' ' : ' ' * compressed_spaces
|
603
|
+
else
|
604
|
+
child.content
|
605
|
+
end
|
670
606
|
end
|
671
|
-
end
|
672
|
-
|
673
|
-
result
|
674
|
-
end
|
607
|
+
end.join
|
608
|
+
end
|
675
609
|
|
676
|
-
|
677
|
-
|
678
|
-
node.attributes[attr_name].value
|
610
|
+
def attribute(node, attr_name)
|
611
|
+
node.attributes[attr_name].value if node.attributes[attr_name]
|
679
612
|
end
|
680
613
|
end
|
681
614
|
end
|