culturecode-roo 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +7 -0
- data/.simplecov +4 -0
- data/.travis.yml +13 -0
- data/CHANGELOG +438 -0
- data/Gemfile +24 -0
- data/Guardfile +24 -0
- data/LICENSE +22 -0
- data/README.md +121 -0
- data/Rakefile +23 -0
- data/examples/roo_soap_client.rb +50 -0
- data/examples/roo_soap_server.rb +26 -0
- data/examples/write_me.rb +31 -0
- data/lib/roo.rb +28 -0
- data/lib/roo/base.rb +717 -0
- data/lib/roo/csv.rb +110 -0
- data/lib/roo/excelx.rb +542 -0
- data/lib/roo/excelx/comments.rb +23 -0
- data/lib/roo/excelx/extractor.rb +20 -0
- data/lib/roo/excelx/relationships.rb +26 -0
- data/lib/roo/excelx/shared_strings.rb +40 -0
- data/lib/roo/excelx/sheet_doc.rb +175 -0
- data/lib/roo/excelx/styles.rb +62 -0
- data/lib/roo/excelx/workbook.rb +59 -0
- data/lib/roo/font.rb +17 -0
- data/lib/roo/libre_office.rb +5 -0
- data/lib/roo/link.rb +15 -0
- data/lib/roo/open_office.rb +652 -0
- data/lib/roo/spreadsheet.rb +31 -0
- data/lib/roo/utils.rb +81 -0
- data/lib/roo/version.rb +3 -0
- data/roo.gemspec +27 -0
- data/scripts/txt2html +67 -0
- data/spec/fixtures/vcr_cassettes/google_drive.yml +165 -0
- data/spec/fixtures/vcr_cassettes/google_drive_access_token.yml +73 -0
- data/spec/fixtures/vcr_cassettes/google_drive_set.yml +857 -0
- data/spec/lib/roo/base_spec.rb +4 -0
- data/spec/lib/roo/csv_spec.rb +48 -0
- data/spec/lib/roo/excelx/format_spec.rb +51 -0
- data/spec/lib/roo/excelx_spec.rb +363 -0
- data/spec/lib/roo/libreoffice_spec.rb +13 -0
- data/spec/lib/roo/openoffice_spec.rb +15 -0
- data/spec/lib/roo/spreadsheet_spec.rb +88 -0
- data/spec/lib/roo/utils_spec.rb +105 -0
- data/spec/spec_helper.rb +9 -0
- data/test/all_ss.rb +11 -0
- data/test/files/1900_base.xlsx +0 -0
- data/test/files/1904_base.xlsx +0 -0
- data/test/files/Bibelbund.csv +3741 -0
- data/test/files/Bibelbund.ods +0 -0
- data/test/files/Bibelbund.xlsx +0 -0
- data/test/files/Bibelbund1.ods +0 -0
- data/test/files/Pfand_from_windows_phone.xlsx +0 -0
- data/test/files/advanced_header.ods +0 -0
- data/test/files/bbu.ods +0 -0
- data/test/files/bbu.xlsx +0 -0
- data/test/files/bode-v1.ods.zip +0 -0
- data/test/files/bode-v1.xls.zip +0 -0
- data/test/files/boolean.csv +2 -0
- data/test/files/boolean.ods +0 -0
- data/test/files/boolean.xlsx +0 -0
- data/test/files/borders.ods +0 -0
- data/test/files/borders.xlsx +0 -0
- data/test/files/bug-numbered-sheet-names.xlsx +0 -0
- data/test/files/comments.ods +0 -0
- data/test/files/comments.xlsx +0 -0
- data/test/files/csvtypes.csv +1 -0
- data/test/files/datetime.ods +0 -0
- data/test/files/datetime.xlsx +0 -0
- data/test/files/dreimalvier.ods +0 -0
- data/test/files/emptysheets.ods +0 -0
- data/test/files/emptysheets.xlsx +0 -0
- data/test/files/encrypted-letmein.ods +0 -0
- data/test/files/file_item_error.xlsx +0 -0
- data/test/files/formula.ods +0 -0
- data/test/files/formula.xlsx +0 -0
- data/test/files/formula_string_error.xlsx +0 -0
- data/test/files/html-escape.ods +0 -0
- data/test/files/link.csv +1 -0
- data/test/files/link.xlsx +0 -0
- data/test/files/matrix.ods +0 -0
- data/test/files/named_cells.ods +0 -0
- data/test/files/named_cells.xlsx +0 -0
- data/test/files/no_spreadsheet_file.txt +1 -0
- data/test/files/numbers-export.xlsx +0 -0
- data/test/files/numbers1.csv +18 -0
- data/test/files/numbers1.ods +0 -0
- data/test/files/numbers1.xlsx +0 -0
- data/test/files/numbers1withnull.xlsx +0 -0
- data/test/files/numeric-link.xlsx +0 -0
- data/test/files/only_one_sheet.ods +0 -0
- data/test/files/only_one_sheet.xlsx +0 -0
- data/test/files/paragraph.ods +0 -0
- data/test/files/paragraph.xlsx +0 -0
- data/test/files/ric.ods +0 -0
- data/test/files/sheet1.xml +109 -0
- data/test/files/simple_spreadsheet.ods +0 -0
- data/test/files/simple_spreadsheet.xlsx +0 -0
- data/test/files/simple_spreadsheet_from_italo.ods +0 -0
- data/test/files/so_datetime.csv +8 -0
- data/test/files/style.ods +0 -0
- data/test/files/style.xlsx +0 -0
- data/test/files/time-test.csv +2 -0
- data/test/files/time-test.ods +0 -0
- data/test/files/time-test.xlsx +0 -0
- data/test/files/type_excel.ods +0 -0
- data/test/files/type_excel.xlsx +0 -0
- data/test/files/type_excelx.ods +0 -0
- data/test/files/type_openoffice.xlsx +0 -0
- data/test/files/whitespace.ods +0 -0
- data/test/files/whitespace.xlsx +0 -0
- data/test/test_generic_spreadsheet.rb +211 -0
- data/test/test_helper.rb +58 -0
- data/test/test_roo.rb +1977 -0
- metadata +329 -0
|
@@ -0,0 +1,652 @@
|
|
|
1
|
+
require 'date'
|
|
2
|
+
require 'nokogiri'
|
|
3
|
+
require 'cgi'
|
|
4
|
+
require 'zip/filesystem'
|
|
5
|
+
require 'roo/font'
|
|
6
|
+
|
|
7
|
+
class Roo::OpenOffice < Roo::Base
|
|
8
|
+
# initialization and opening of a spreadsheet file
|
|
9
|
+
# values for packed: :zip
|
|
10
|
+
def initialize(filename, options={})
|
|
11
|
+
packed = options[:packed]
|
|
12
|
+
file_warning = options[:file_warning] || :error
|
|
13
|
+
|
|
14
|
+
file_type_check(filename,'.ods','an Roo::OpenOffice', file_warning, packed)
|
|
15
|
+
@tmpdir = make_tmpdir(filename.split('/').last, options[:tmpdir_root])
|
|
16
|
+
@filename = local_filename(filename, @tmpdir, packed)
|
|
17
|
+
#TODO: @cells_read[:default] = false
|
|
18
|
+
Zip::File.open(@filename) do |zip_file|
|
|
19
|
+
if content_entry = zip_file.glob("content.xml").first
|
|
20
|
+
roo_content_xml_path = File.join(@tmpdir, 'roo_content.xml')
|
|
21
|
+
content_entry.extract(roo_content_xml_path)
|
|
22
|
+
decrypt_if_necessary(
|
|
23
|
+
zip_file,
|
|
24
|
+
content_entry,
|
|
25
|
+
roo_content_xml_path,
|
|
26
|
+
options
|
|
27
|
+
)
|
|
28
|
+
else
|
|
29
|
+
raise ArgumentError, 'file missing required content.xml'
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
super(filename, options)
|
|
33
|
+
@formula = Hash.new
|
|
34
|
+
@style = Hash.new
|
|
35
|
+
@style_defaults = Hash.new { |h,k| h[k] = [] }
|
|
36
|
+
@style_definitions = Hash.new
|
|
37
|
+
@comment = Hash.new
|
|
38
|
+
@comments_read = Hash.new
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# If the ODS file has an encryption-data element, then try to decrypt.
|
|
42
|
+
# If successful, the temporary content.xml will be overwritten with
|
|
43
|
+
# decrypted contents.
|
|
44
|
+
def decrypt_if_necessary(
|
|
45
|
+
zip_file,
|
|
46
|
+
content_entry,
|
|
47
|
+
roo_content_xml_path, options
|
|
48
|
+
)
|
|
49
|
+
# Check if content.xml is encrypted by extracting manifest.xml
|
|
50
|
+
# and searching for a manifest:encryption-data element
|
|
51
|
+
|
|
52
|
+
if manifest_entry = zip_file.glob("META-INF/manifest.xml").first
|
|
53
|
+
roo_manifest_xml_path = File.join(@tmpdir, "roo_manifest.xml")
|
|
54
|
+
manifest_entry.extract(roo_manifest_xml_path)
|
|
55
|
+
manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
|
|
56
|
+
|
|
57
|
+
# XPath search for manifest:encryption-data only for the content.xml
|
|
58
|
+
# file
|
|
59
|
+
|
|
60
|
+
encryption_data = manifest.xpath(
|
|
61
|
+
"//manifest:file-entry[@manifest:full-path='content.xml']"\
|
|
62
|
+
"/manifest:encryption-data"
|
|
63
|
+
).first
|
|
64
|
+
|
|
65
|
+
# If XPath returns a node, then we know content.xml is encrypted
|
|
66
|
+
|
|
67
|
+
if !encryption_data.nil?
|
|
68
|
+
|
|
69
|
+
# Since we know it's encrypted, we check for the password option
|
|
70
|
+
# and if it doesn't exist, raise an argument error
|
|
71
|
+
|
|
72
|
+
password = options[:password]
|
|
73
|
+
if !password.nil?
|
|
74
|
+
perform_decryption(
|
|
75
|
+
encryption_data,
|
|
76
|
+
password,
|
|
77
|
+
content_entry,
|
|
78
|
+
roo_content_xml_path
|
|
79
|
+
)
|
|
80
|
+
else
|
|
81
|
+
raise ArgumentError,
|
|
82
|
+
'file is encrypted but password was not supplied'
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
else
|
|
86
|
+
raise ArgumentError, 'file missing required META-INF/manifest.xml'
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Process the ODS encryption manifest and perform the decryption
|
|
91
|
+
def perform_decryption(
|
|
92
|
+
encryption_data,
|
|
93
|
+
password,
|
|
94
|
+
content_entry,
|
|
95
|
+
roo_content_xml_path
|
|
96
|
+
)
|
|
97
|
+
# Extract various expected attributes from the manifest that
|
|
98
|
+
# describe the encryption
|
|
99
|
+
|
|
100
|
+
algorithm_node = encryption_data.xpath("manifest:algorithm").first
|
|
101
|
+
key_derivation_node =
|
|
102
|
+
encryption_data.xpath("manifest:key-derivation").first
|
|
103
|
+
start_key_generation_node =
|
|
104
|
+
encryption_data.xpath("manifest:start-key-generation").first
|
|
105
|
+
|
|
106
|
+
# If we have all the expected elements, then we can perform
|
|
107
|
+
# the decryption.
|
|
108
|
+
|
|
109
|
+
if !algorithm_node.nil? && !key_derivation_node.nil? &&
|
|
110
|
+
!start_key_generation_node.nil?
|
|
111
|
+
|
|
112
|
+
# The algorithm is a URI describing the algorithm used
|
|
113
|
+
algorithm = algorithm_node['manifest:algorithm-name']
|
|
114
|
+
|
|
115
|
+
# The initialization vector is base-64 encoded
|
|
116
|
+
iv = Base64.decode64(
|
|
117
|
+
algorithm_node['manifest:initialisation-vector']
|
|
118
|
+
)
|
|
119
|
+
key_derivation_name =
|
|
120
|
+
key_derivation_node['manifest:key-derivation-name']
|
|
121
|
+
key_size = key_derivation_node['manifest:key-size'].to_i
|
|
122
|
+
iteration_count =
|
|
123
|
+
key_derivation_node['manifest:iteration-count'].to_i
|
|
124
|
+
salt = Base64.decode64(key_derivation_node['manifest:salt'])
|
|
125
|
+
|
|
126
|
+
# The key is hashed with an algorithm represented by this URI
|
|
127
|
+
key_generation_name =
|
|
128
|
+
start_key_generation_node[
|
|
129
|
+
'manifest:start-key-generation-name'
|
|
130
|
+
]
|
|
131
|
+
key_generation_size =
|
|
132
|
+
start_key_generation_node['manifest:key-size'].to_i
|
|
133
|
+
|
|
134
|
+
hashed_password = password
|
|
135
|
+
key = nil
|
|
136
|
+
|
|
137
|
+
if key_generation_name.eql?(
|
|
138
|
+
"http://www.w3.org/2000/09/xmldsig#sha256"
|
|
139
|
+
)
|
|
140
|
+
hashed_password = Digest::SHA256.digest(password)
|
|
141
|
+
else
|
|
142
|
+
raise ArgumentError, 'Unknown key generation algorithm ' +
|
|
143
|
+
key_generation_name
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
cipher = find_cipher(
|
|
147
|
+
algorithm,
|
|
148
|
+
key_derivation_name,
|
|
149
|
+
hashed_password,
|
|
150
|
+
salt,
|
|
151
|
+
iteration_count,
|
|
152
|
+
iv
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
begin
|
|
156
|
+
decrypted = decrypt(content_entry, cipher)
|
|
157
|
+
|
|
158
|
+
# Finally, inflate the decrypted stream and overwrite
|
|
159
|
+
# content.xml
|
|
160
|
+
IO.binwrite(
|
|
161
|
+
roo_content_xml_path,
|
|
162
|
+
Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(decrypted)
|
|
163
|
+
)
|
|
164
|
+
rescue StandardError => error
|
|
165
|
+
raise ArgumentError,
|
|
166
|
+
'Invalid password or other data error: ' + error.to_s
|
|
167
|
+
end
|
|
168
|
+
else
|
|
169
|
+
raise ArgumentError,
|
|
170
|
+
'manifest.xml missing encryption-data elements'
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Create a cipher based on an ODS algorithm URI from manifest.xml
|
|
175
|
+
def find_cipher(
|
|
176
|
+
algorithm,
|
|
177
|
+
key_derivation_name,
|
|
178
|
+
hashed_password,
|
|
179
|
+
salt,
|
|
180
|
+
iteration_count,
|
|
181
|
+
iv
|
|
182
|
+
)
|
|
183
|
+
cipher = nil
|
|
184
|
+
if algorithm.eql? "http://www.w3.org/2001/04/xmlenc#aes256-cbc"
|
|
185
|
+
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
|
186
|
+
cipher.decrypt
|
|
187
|
+
cipher.padding = 0
|
|
188
|
+
cipher.key = find_cipher_key(
|
|
189
|
+
cipher,
|
|
190
|
+
key_derivation_name,
|
|
191
|
+
hashed_password,
|
|
192
|
+
salt,
|
|
193
|
+
iteration_count
|
|
194
|
+
)
|
|
195
|
+
cipher.iv = iv
|
|
196
|
+
else
|
|
197
|
+
raise ArgumentError, 'Unknown algorithm ' + algorithm
|
|
198
|
+
end
|
|
199
|
+
cipher
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Create a cipher key based on an ODS algorithm string from manifest.xml
|
|
203
|
+
def find_cipher_key(
|
|
204
|
+
cipher,
|
|
205
|
+
key_derivation_name,
|
|
206
|
+
hashed_password,
|
|
207
|
+
salt,
|
|
208
|
+
iteration_count
|
|
209
|
+
)
|
|
210
|
+
if key_derivation_name.eql? "PBKDF2"
|
|
211
|
+
key = OpenSSL::PKCS5.pbkdf2_hmac_sha1(
|
|
212
|
+
hashed_password,
|
|
213
|
+
salt,
|
|
214
|
+
iteration_count,
|
|
215
|
+
cipher.key_len
|
|
216
|
+
)
|
|
217
|
+
else
|
|
218
|
+
raise ArgumentError, 'Unknown key derivation name ' +
|
|
219
|
+
key_derivation_name
|
|
220
|
+
end
|
|
221
|
+
key
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Block decrypt raw bytes from the zip file based on the cipher
|
|
225
|
+
def decrypt(content_entry, cipher)
|
|
226
|
+
# Zip::Entry.extract writes a 0-length file when trying
|
|
227
|
+
# to extract an encrypted stream, so we read the
|
|
228
|
+
# raw bytes based on the offset and lengths
|
|
229
|
+
decrypted = ""
|
|
230
|
+
File.open(@filename, "rb") do |zipfile|
|
|
231
|
+
zipfile.seek(
|
|
232
|
+
content_entry.local_header_offset +
|
|
233
|
+
content_entry.calculate_local_header_size
|
|
234
|
+
)
|
|
235
|
+
total_to_read = content_entry.compressed_size
|
|
236
|
+
block_size = 4096
|
|
237
|
+
if block_size > total_to_read
|
|
238
|
+
block_size = total_to_read
|
|
239
|
+
end
|
|
240
|
+
while buffer = zipfile.read(block_size)
|
|
241
|
+
decrypted += cipher.update(buffer)
|
|
242
|
+
total_to_read -= buffer.length
|
|
243
|
+
if total_to_read == 0
|
|
244
|
+
break
|
|
245
|
+
end
|
|
246
|
+
if block_size > total_to_read
|
|
247
|
+
block_size = total_to_read
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
decrypted + cipher.final
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def method_missing(m,*args)
|
|
255
|
+
read_labels
|
|
256
|
+
# is method name a label name
|
|
257
|
+
if @label.has_key?(m.to_s)
|
|
258
|
+
row,col = label(m.to_s)
|
|
259
|
+
cell(row,col)
|
|
260
|
+
else
|
|
261
|
+
# call super for methods like #a1
|
|
262
|
+
super
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
# Returns the content of a spreadsheet-cell.
|
|
267
|
+
# (1,1) is the upper left corner.
|
|
268
|
+
# (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
|
|
269
|
+
# cell at the first line and first row.
|
|
270
|
+
def cell(row, col, sheet=nil)
|
|
271
|
+
sheet ||= default_sheet
|
|
272
|
+
read_cells(sheet)
|
|
273
|
+
row,col = normalize(row,col)
|
|
274
|
+
if celltype(row,col,sheet) == :date
|
|
275
|
+
yyyy,mm,dd = @cell[sheet][[row,col]].to_s.split('-')
|
|
276
|
+
return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
|
|
277
|
+
end
|
|
278
|
+
@cell[sheet][[row,col]]
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
# Returns the formula at (row,col).
|
|
282
|
+
# Returns nil if there is no formula.
|
|
283
|
+
# The method #formula? checks if there is a formula.
|
|
284
|
+
def formula(row,col,sheet=nil)
|
|
285
|
+
sheet ||= default_sheet
|
|
286
|
+
read_cells(sheet)
|
|
287
|
+
row,col = normalize(row,col)
|
|
288
|
+
@formula[sheet][[row,col]]
|
|
289
|
+
end
|
|
290
|
+
alias_method :formula?, :formula
|
|
291
|
+
|
|
292
|
+
# returns each formula in the selected sheet as an array of elements
|
|
293
|
+
# [row, col, formula]
|
|
294
|
+
def formulas(sheet=nil)
|
|
295
|
+
sheet ||= default_sheet
|
|
296
|
+
read_cells(sheet)
|
|
297
|
+
if @formula[sheet]
|
|
298
|
+
@formula[sheet].each.collect do |elem|
|
|
299
|
+
[elem[0][0], elem[0][1], elem[1]]
|
|
300
|
+
end
|
|
301
|
+
else
|
|
302
|
+
[]
|
|
303
|
+
end
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
# Given a cell, return the cell's style
|
|
307
|
+
def font(row, col, sheet=nil)
|
|
308
|
+
sheet ||= default_sheet
|
|
309
|
+
read_cells(sheet)
|
|
310
|
+
row,col = normalize(row,col)
|
|
311
|
+
style_name = @style[sheet][[row,col]] || @style_defaults[sheet][col - 1] || 'Default'
|
|
312
|
+
@style_definitions[style_name]
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
# returns the type of a cell:
|
|
316
|
+
# * :float
|
|
317
|
+
# * :string
|
|
318
|
+
# * :date
|
|
319
|
+
# * :percentage
|
|
320
|
+
# * :formula
|
|
321
|
+
# * :time
|
|
322
|
+
# * :datetime
|
|
323
|
+
def celltype(row,col,sheet=nil)
|
|
324
|
+
sheet ||= default_sheet
|
|
325
|
+
read_cells(sheet)
|
|
326
|
+
row,col = normalize(row,col)
|
|
327
|
+
if @formula[sheet][[row,col]]
|
|
328
|
+
return :formula
|
|
329
|
+
else
|
|
330
|
+
@cell_type[sheet][[row,col]]
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
def sheets
|
|
335
|
+
doc.xpath("//*[local-name()='table']").map do |sheet|
|
|
336
|
+
sheet.attributes["name"].value
|
|
337
|
+
end
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
# version of the Roo::OpenOffice document
|
|
341
|
+
# at 2007 this is always "1.0"
|
|
342
|
+
def officeversion
|
|
343
|
+
oo_version
|
|
344
|
+
@officeversion
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
# shows the internal representation of all cells
|
|
348
|
+
# mainly for debugging purposes
|
|
349
|
+
def to_s(sheet=nil)
|
|
350
|
+
sheet ||= default_sheet
|
|
351
|
+
read_cells(sheet)
|
|
352
|
+
@cell[sheet].inspect
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
# returns the row,col values of the labelled cell
|
|
356
|
+
# (nil,nil) if label is not defined
|
|
357
|
+
def label(labelname)
|
|
358
|
+
read_labels
|
|
359
|
+
unless @label.size > 0
|
|
360
|
+
return nil,nil,nil
|
|
361
|
+
end
|
|
362
|
+
if @label.has_key? labelname
|
|
363
|
+
return @label[labelname][1].to_i,
|
|
364
|
+
::Roo::Utils.letter_to_number(@label[labelname][2]),
|
|
365
|
+
@label[labelname][0]
|
|
366
|
+
else
|
|
367
|
+
return nil,nil,nil
|
|
368
|
+
end
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
# Returns an array which all labels. Each element is an array with
|
|
372
|
+
# [labelname, [row,col,sheetname]]
|
|
373
|
+
def labels(sheet=nil)
|
|
374
|
+
read_labels
|
|
375
|
+
@label.map do |label|
|
|
376
|
+
[ label[0], # name
|
|
377
|
+
[ label[1][1].to_i, # row
|
|
378
|
+
::Roo::Utils.letter_to_number(label[1][2]), # column
|
|
379
|
+
label[1][0], # sheet
|
|
380
|
+
] ]
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
# returns the comment at (row/col)
|
|
385
|
+
# nil if there is no comment
|
|
386
|
+
def comment(row,col,sheet=nil)
|
|
387
|
+
sheet ||= default_sheet
|
|
388
|
+
read_cells(sheet)
|
|
389
|
+
row,col = normalize(row,col)
|
|
390
|
+
return nil unless @comment[sheet]
|
|
391
|
+
@comment[sheet][[row,col]]
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
# returns each comment in the selected sheet as an array of elements
|
|
395
|
+
# [row, col, comment]
|
|
396
|
+
def comments(sheet=nil)
|
|
397
|
+
sheet ||= default_sheet
|
|
398
|
+
read_comments(sheet) unless @comments_read[sheet]
|
|
399
|
+
if @comment[sheet]
|
|
400
|
+
@comment[sheet].each.collect do |elem|
|
|
401
|
+
[elem[0][0],elem[0][1],elem[1]]
|
|
402
|
+
end
|
|
403
|
+
else
|
|
404
|
+
[]
|
|
405
|
+
end
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
private
|
|
409
|
+
|
|
410
|
+
def doc
|
|
411
|
+
@doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, "roo_content.xml"))
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
# read the version of the OO-Version
|
|
415
|
+
def oo_version
|
|
416
|
+
doc.xpath("//*[local-name()='document-content']").each do |office|
|
|
417
|
+
@officeversion = attr(office,'version')
|
|
418
|
+
end
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
# helper function to set the internal representation of cells
|
|
422
|
+
def set_cell_values(sheet,x,y,i,v,value_type,formula,table_cell,str_v,style_name)
|
|
423
|
+
key = [y,x+i]
|
|
424
|
+
@cell_type[sheet] = {} unless @cell_type[sheet]
|
|
425
|
+
@cell_type[sheet][key] = Roo::OpenOffice.oo_type_2_roo_type(value_type)
|
|
426
|
+
@formula[sheet] = {} unless @formula[sheet]
|
|
427
|
+
if formula
|
|
428
|
+
['of:', 'oooc:'].each do |prefix|
|
|
429
|
+
if formula[0,prefix.length] == prefix
|
|
430
|
+
formula = formula[prefix.length..-1]
|
|
431
|
+
end
|
|
432
|
+
end
|
|
433
|
+
@formula[sheet][key] = formula
|
|
434
|
+
end
|
|
435
|
+
@cell[sheet] = {} unless @cell[sheet]
|
|
436
|
+
@style[sheet] = {} unless @style[sheet]
|
|
437
|
+
@style[sheet][key] = style_name
|
|
438
|
+
case @cell_type[sheet][key]
|
|
439
|
+
when :float
|
|
440
|
+
@cell[sheet][key] = v.to_f
|
|
441
|
+
when :string
|
|
442
|
+
@cell[sheet][key] = str_v
|
|
443
|
+
when :date
|
|
444
|
+
#TODO: if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size
|
|
445
|
+
if attr(table_cell,'date-value').size != "XXXX-XX-XX".size
|
|
446
|
+
#-- dann ist noch eine Uhrzeit vorhanden
|
|
447
|
+
#-- "1961-11-21T12:17:18"
|
|
448
|
+
@cell[sheet][key] = DateTime.parse(attr(table_cell,'date-value').to_s)
|
|
449
|
+
@cell_type[sheet][key] = :datetime
|
|
450
|
+
else
|
|
451
|
+
@cell[sheet][key] = table_cell.attributes['date-value']
|
|
452
|
+
end
|
|
453
|
+
when :percentage
|
|
454
|
+
@cell[sheet][key] = v.to_f
|
|
455
|
+
when :time
|
|
456
|
+
hms = v.split(':')
|
|
457
|
+
@cell[sheet][key] = hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i
|
|
458
|
+
else
|
|
459
|
+
@cell[sheet][key] = v
|
|
460
|
+
end
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
# read all cells in the selected sheet
|
|
464
|
+
#--
|
|
465
|
+
# the following construct means '4 blanks'
|
|
466
|
+
# some content <text:s text:c="3"/>
|
|
467
|
+
#++
|
|
468
|
+
def read_cells(sheet = default_sheet)
|
|
469
|
+
validate_sheet!(sheet)
|
|
470
|
+
return if @cells_read[sheet]
|
|
471
|
+
|
|
472
|
+
sheet_found = false
|
|
473
|
+
doc.xpath("//*[local-name()='table']").each do |ws|
|
|
474
|
+
if sheet == attr(ws,'name')
|
|
475
|
+
sheet_found = true
|
|
476
|
+
col = 1
|
|
477
|
+
row = 1
|
|
478
|
+
ws.children.each do |table_element|
|
|
479
|
+
case table_element.name
|
|
480
|
+
when 'table-column'
|
|
481
|
+
@style_defaults[sheet] << table_element.attributes['default-cell-style-name']
|
|
482
|
+
when 'table-row'
|
|
483
|
+
if table_element.attributes['number-rows-repeated']
|
|
484
|
+
skip_row = attr(table_element,'number-rows-repeated').to_s.to_i
|
|
485
|
+
row = row + skip_row - 1
|
|
486
|
+
end
|
|
487
|
+
table_element.children.each do |cell|
|
|
488
|
+
skip_col = attr(cell, 'number-columns-repeated')
|
|
489
|
+
formula = attr(cell,'formula')
|
|
490
|
+
value_type = attr(cell,'value-type')
|
|
491
|
+
v = attr(cell,'value')
|
|
492
|
+
style_name = attr(cell,'style-name')
|
|
493
|
+
case value_type
|
|
494
|
+
when 'string'
|
|
495
|
+
str_v = ''
|
|
496
|
+
# insert \n if there is more than one paragraph
|
|
497
|
+
para_count = 0
|
|
498
|
+
cell.children.each do |str|
|
|
499
|
+
# begin comments
|
|
500
|
+
=begin
|
|
501
|
+
- <table:table-cell office:value-type="string">
|
|
502
|
+
- <office:annotation office:display="true" draw:style-name="gr1" draw:text-style-name="P1" svg:width="1.1413in" svg:height="0.3902in" svg:x="2.0142in" svg:y="0in" draw:caption-point-x="-0.2402in" draw:caption-point-y="0.5661in">
|
|
503
|
+
<dc:date>2011-09-20T00:00:00</dc:date>
|
|
504
|
+
<text:p text:style-name="P1">Kommentar fuer B4</text:p>
|
|
505
|
+
</office:annotation>
|
|
506
|
+
<text:p>B4 (mit Kommentar)</text:p>
|
|
507
|
+
</table:table-cell>
|
|
508
|
+
=end
|
|
509
|
+
if str.name == 'annotation'
|
|
510
|
+
str.children.each do |annotation|
|
|
511
|
+
if annotation.name == 'p'
|
|
512
|
+
# @comment ist ein Hash mit Sheet als Key (wie bei @cell)
|
|
513
|
+
# innerhalb eines Elements besteht ein Eintrag aus einem
|
|
514
|
+
# weiteren Hash mit Key [row,col] und dem eigentlichen
|
|
515
|
+
# Kommentartext als Inhalt
|
|
516
|
+
@comment[sheet] = Hash.new unless @comment[sheet]
|
|
517
|
+
key = [row,col]
|
|
518
|
+
@comment[sheet][key] = annotation.text
|
|
519
|
+
end
|
|
520
|
+
end
|
|
521
|
+
end
|
|
522
|
+
# end comments
|
|
523
|
+
if str.name == 'p'
|
|
524
|
+
v = str.content
|
|
525
|
+
str_v += "\n" if para_count > 0
|
|
526
|
+
para_count += 1
|
|
527
|
+
if str.children.size > 1
|
|
528
|
+
str_v += children_to_string(str.children)
|
|
529
|
+
else
|
|
530
|
+
str.children.each do |child|
|
|
531
|
+
str_v += child.content #.text
|
|
532
|
+
end
|
|
533
|
+
end
|
|
534
|
+
str_v.gsub!(/'/,"'") # special case not supported by unescapeHTML
|
|
535
|
+
str_v = CGI.unescapeHTML(str_v)
|
|
536
|
+
end # == 'p'
|
|
537
|
+
end
|
|
538
|
+
when 'time'
|
|
539
|
+
cell.children.each do |str|
|
|
540
|
+
if str.name == 'p'
|
|
541
|
+
v = str.content
|
|
542
|
+
end
|
|
543
|
+
end
|
|
544
|
+
when '', nil, 'date', 'percentage', 'float'
|
|
545
|
+
#
|
|
546
|
+
when 'boolean'
|
|
547
|
+
v = attr(cell,'boolean-value').to_s
|
|
548
|
+
else
|
|
549
|
+
# raise "unknown type #{value_type}"
|
|
550
|
+
end
|
|
551
|
+
if skip_col
|
|
552
|
+
if v != nil or cell.attributes['date-value']
|
|
553
|
+
0.upto(skip_col.to_i-1) do |i|
|
|
554
|
+
set_cell_values(sheet,col,row,i,v,value_type,formula,cell,str_v,style_name)
|
|
555
|
+
end
|
|
556
|
+
end
|
|
557
|
+
col += (skip_col.to_i - 1)
|
|
558
|
+
end # if skip
|
|
559
|
+
set_cell_values(sheet,col,row,0,v,value_type,formula,cell,str_v,style_name)
|
|
560
|
+
col += 1
|
|
561
|
+
end
|
|
562
|
+
row += 1
|
|
563
|
+
col = 1
|
|
564
|
+
end
|
|
565
|
+
end
|
|
566
|
+
end
|
|
567
|
+
end
|
|
568
|
+
doc.xpath("//*[local-name()='automatic-styles']").each do |style|
|
|
569
|
+
read_styles(style)
|
|
570
|
+
end
|
|
571
|
+
if !sheet_found
|
|
572
|
+
raise RangeError
|
|
573
|
+
end
|
|
574
|
+
@cells_read[sheet] = true
|
|
575
|
+
@comments_read[sheet] = true
|
|
576
|
+
end
|
|
577
|
+
|
|
578
|
+
# Only calls read_cells because Roo::Base calls read_comments
|
|
579
|
+
# whereas the reading of comments is done in read_cells for Roo::OpenOffice-objects
|
|
580
|
+
def read_comments(sheet=nil)
|
|
581
|
+
read_cells(sheet)
|
|
582
|
+
end
|
|
583
|
+
|
|
584
|
+
def read_labels
|
|
585
|
+
@label ||= Hash[doc.xpath("//table:named-range").map do |ne|
|
|
586
|
+
#-
|
|
587
|
+
# $Sheet1.$C$5
|
|
588
|
+
#+
|
|
589
|
+
name = attr(ne,'name').to_s
|
|
590
|
+
sheetname,coords = attr(ne,'cell-range-address').to_s.split('.$')
|
|
591
|
+
col, row = coords.split('$')
|
|
592
|
+
sheetname = sheetname[1..-1] if sheetname[0,1] == '$'
|
|
593
|
+
[name, [sheetname,row,col]]
|
|
594
|
+
end]
|
|
595
|
+
end
|
|
596
|
+
|
|
597
|
+
def read_styles(style_elements)
|
|
598
|
+
@style_definitions['Default'] = Roo::Font.new
|
|
599
|
+
style_elements.each do |style|
|
|
600
|
+
next unless style.name == 'style'
|
|
601
|
+
style_name = attr(style,'name')
|
|
602
|
+
style.each do |properties|
|
|
603
|
+
font = Roo::OpenOffice::Font.new
|
|
604
|
+
font.bold = attr(properties,'font-weight')
|
|
605
|
+
font.italic = attr(properties,'font-style')
|
|
606
|
+
font.underline = attr(properties,'text-underline-style')
|
|
607
|
+
@style_definitions[style_name] = font
|
|
608
|
+
end
|
|
609
|
+
end
|
|
610
|
+
end
|
|
611
|
+
|
|
612
|
+
A_ROO_TYPE = {
|
|
613
|
+
"float" => :float,
|
|
614
|
+
"string" => :string,
|
|
615
|
+
"date" => :date,
|
|
616
|
+
"percentage" => :percentage,
|
|
617
|
+
"time" => :time,
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
def self.oo_type_2_roo_type(ootype)
|
|
621
|
+
return A_ROO_TYPE[ootype]
|
|
622
|
+
end
|
|
623
|
+
|
|
624
|
+
# helper method to convert compressed spaces and other elements within
|
|
625
|
+
# an text into a string
|
|
626
|
+
def children_to_string(children)
|
|
627
|
+
result = ''
|
|
628
|
+
children.each {|child|
|
|
629
|
+
if child.text?
|
|
630
|
+
result = result + child.content
|
|
631
|
+
else
|
|
632
|
+
if child.name == 's'
|
|
633
|
+
compressed_spaces = child.attributes['c'].to_s.to_i
|
|
634
|
+
# no explicit number means a count of 1:
|
|
635
|
+
if compressed_spaces == 0
|
|
636
|
+
compressed_spaces = 1
|
|
637
|
+
end
|
|
638
|
+
result = result + " "*compressed_spaces
|
|
639
|
+
else
|
|
640
|
+
result = result + child.content
|
|
641
|
+
end
|
|
642
|
+
end
|
|
643
|
+
}
|
|
644
|
+
result
|
|
645
|
+
end
|
|
646
|
+
|
|
647
|
+
def attr(node, attr_name)
|
|
648
|
+
if node.attributes[attr_name]
|
|
649
|
+
node.attributes[attr_name].value
|
|
650
|
+
end
|
|
651
|
+
end
|
|
652
|
+
end
|