culturecode-roo 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. data/.gitignore +7 -0
  2. data/.simplecov +4 -0
  3. data/.travis.yml +13 -0
  4. data/CHANGELOG +438 -0
  5. data/Gemfile +24 -0
  6. data/Guardfile +24 -0
  7. data/LICENSE +22 -0
  8. data/README.md +121 -0
  9. data/Rakefile +23 -0
  10. data/examples/roo_soap_client.rb +50 -0
  11. data/examples/roo_soap_server.rb +26 -0
  12. data/examples/write_me.rb +31 -0
  13. data/lib/roo.rb +28 -0
  14. data/lib/roo/base.rb +717 -0
  15. data/lib/roo/csv.rb +110 -0
  16. data/lib/roo/excelx.rb +542 -0
  17. data/lib/roo/excelx/comments.rb +23 -0
  18. data/lib/roo/excelx/extractor.rb +20 -0
  19. data/lib/roo/excelx/relationships.rb +26 -0
  20. data/lib/roo/excelx/shared_strings.rb +40 -0
  21. data/lib/roo/excelx/sheet_doc.rb +175 -0
  22. data/lib/roo/excelx/styles.rb +62 -0
  23. data/lib/roo/excelx/workbook.rb +59 -0
  24. data/lib/roo/font.rb +17 -0
  25. data/lib/roo/libre_office.rb +5 -0
  26. data/lib/roo/link.rb +15 -0
  27. data/lib/roo/open_office.rb +652 -0
  28. data/lib/roo/spreadsheet.rb +31 -0
  29. data/lib/roo/utils.rb +81 -0
  30. data/lib/roo/version.rb +3 -0
  31. data/roo.gemspec +27 -0
  32. data/scripts/txt2html +67 -0
  33. data/spec/fixtures/vcr_cassettes/google_drive.yml +165 -0
  34. data/spec/fixtures/vcr_cassettes/google_drive_access_token.yml +73 -0
  35. data/spec/fixtures/vcr_cassettes/google_drive_set.yml +857 -0
  36. data/spec/lib/roo/base_spec.rb +4 -0
  37. data/spec/lib/roo/csv_spec.rb +48 -0
  38. data/spec/lib/roo/excelx/format_spec.rb +51 -0
  39. data/spec/lib/roo/excelx_spec.rb +363 -0
  40. data/spec/lib/roo/libreoffice_spec.rb +13 -0
  41. data/spec/lib/roo/openoffice_spec.rb +15 -0
  42. data/spec/lib/roo/spreadsheet_spec.rb +88 -0
  43. data/spec/lib/roo/utils_spec.rb +105 -0
  44. data/spec/spec_helper.rb +9 -0
  45. data/test/all_ss.rb +11 -0
  46. data/test/files/1900_base.xlsx +0 -0
  47. data/test/files/1904_base.xlsx +0 -0
  48. data/test/files/Bibelbund.csv +3741 -0
  49. data/test/files/Bibelbund.ods +0 -0
  50. data/test/files/Bibelbund.xlsx +0 -0
  51. data/test/files/Bibelbund1.ods +0 -0
  52. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  53. data/test/files/advanced_header.ods +0 -0
  54. data/test/files/bbu.ods +0 -0
  55. data/test/files/bbu.xlsx +0 -0
  56. data/test/files/bode-v1.ods.zip +0 -0
  57. data/test/files/bode-v1.xls.zip +0 -0
  58. data/test/files/boolean.csv +2 -0
  59. data/test/files/boolean.ods +0 -0
  60. data/test/files/boolean.xlsx +0 -0
  61. data/test/files/borders.ods +0 -0
  62. data/test/files/borders.xlsx +0 -0
  63. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  64. data/test/files/comments.ods +0 -0
  65. data/test/files/comments.xlsx +0 -0
  66. data/test/files/csvtypes.csv +1 -0
  67. data/test/files/datetime.ods +0 -0
  68. data/test/files/datetime.xlsx +0 -0
  69. data/test/files/dreimalvier.ods +0 -0
  70. data/test/files/emptysheets.ods +0 -0
  71. data/test/files/emptysheets.xlsx +0 -0
  72. data/test/files/encrypted-letmein.ods +0 -0
  73. data/test/files/file_item_error.xlsx +0 -0
  74. data/test/files/formula.ods +0 -0
  75. data/test/files/formula.xlsx +0 -0
  76. data/test/files/formula_string_error.xlsx +0 -0
  77. data/test/files/html-escape.ods +0 -0
  78. data/test/files/link.csv +1 -0
  79. data/test/files/link.xlsx +0 -0
  80. data/test/files/matrix.ods +0 -0
  81. data/test/files/named_cells.ods +0 -0
  82. data/test/files/named_cells.xlsx +0 -0
  83. data/test/files/no_spreadsheet_file.txt +1 -0
  84. data/test/files/numbers-export.xlsx +0 -0
  85. data/test/files/numbers1.csv +18 -0
  86. data/test/files/numbers1.ods +0 -0
  87. data/test/files/numbers1.xlsx +0 -0
  88. data/test/files/numbers1withnull.xlsx +0 -0
  89. data/test/files/numeric-link.xlsx +0 -0
  90. data/test/files/only_one_sheet.ods +0 -0
  91. data/test/files/only_one_sheet.xlsx +0 -0
  92. data/test/files/paragraph.ods +0 -0
  93. data/test/files/paragraph.xlsx +0 -0
  94. data/test/files/ric.ods +0 -0
  95. data/test/files/sheet1.xml +109 -0
  96. data/test/files/simple_spreadsheet.ods +0 -0
  97. data/test/files/simple_spreadsheet.xlsx +0 -0
  98. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  99. data/test/files/so_datetime.csv +8 -0
  100. data/test/files/style.ods +0 -0
  101. data/test/files/style.xlsx +0 -0
  102. data/test/files/time-test.csv +2 -0
  103. data/test/files/time-test.ods +0 -0
  104. data/test/files/time-test.xlsx +0 -0
  105. data/test/files/type_excel.ods +0 -0
  106. data/test/files/type_excel.xlsx +0 -0
  107. data/test/files/type_excelx.ods +0 -0
  108. data/test/files/type_openoffice.xlsx +0 -0
  109. data/test/files/whitespace.ods +0 -0
  110. data/test/files/whitespace.xlsx +0 -0
  111. data/test/test_generic_spreadsheet.rb +211 -0
  112. data/test/test_helper.rb +58 -0
  113. data/test/test_roo.rb +1977 -0
  114. metadata +329 -0
@@ -0,0 +1,652 @@
1
+ require 'date'
2
+ require 'nokogiri'
3
+ require 'cgi'
4
+ require 'zip/filesystem'
5
+ require 'roo/font'
6
+
7
+ class Roo::OpenOffice < Roo::Base
8
+ # initialization and opening of a spreadsheet file
9
+ # values for packed: :zip
10
+ def initialize(filename, options={})
11
+ packed = options[:packed]
12
+ file_warning = options[:file_warning] || :error
13
+
14
+ file_type_check(filename,'.ods','an Roo::OpenOffice', file_warning, packed)
15
+ @tmpdir = make_tmpdir(filename.split('/').last, options[:tmpdir_root])
16
+ @filename = local_filename(filename, @tmpdir, packed)
17
+ #TODO: @cells_read[:default] = false
18
+ Zip::File.open(@filename) do |zip_file|
19
+ if content_entry = zip_file.glob("content.xml").first
20
+ roo_content_xml_path = File.join(@tmpdir, 'roo_content.xml')
21
+ content_entry.extract(roo_content_xml_path)
22
+ decrypt_if_necessary(
23
+ zip_file,
24
+ content_entry,
25
+ roo_content_xml_path,
26
+ options
27
+ )
28
+ else
29
+ raise ArgumentError, 'file missing required content.xml'
30
+ end
31
+ end
32
+ super(filename, options)
33
+ @formula = Hash.new
34
+ @style = Hash.new
35
+ @style_defaults = Hash.new { |h,k| h[k] = [] }
36
+ @style_definitions = Hash.new
37
+ @comment = Hash.new
38
+ @comments_read = Hash.new
39
+ end
40
+
41
+ # If the ODS file has an encryption-data element, then try to decrypt.
42
+ # If successful, the temporary content.xml will be overwritten with
43
+ # decrypted contents.
44
+ def decrypt_if_necessary(
45
+ zip_file,
46
+ content_entry,
47
+ roo_content_xml_path, options
48
+ )
49
+ # Check if content.xml is encrypted by extracting manifest.xml
50
+ # and searching for a manifest:encryption-data element
51
+
52
+ if manifest_entry = zip_file.glob("META-INF/manifest.xml").first
53
+ roo_manifest_xml_path = File.join(@tmpdir, "roo_manifest.xml")
54
+ manifest_entry.extract(roo_manifest_xml_path)
55
+ manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
56
+
57
+ # XPath search for manifest:encryption-data only for the content.xml
58
+ # file
59
+
60
+ encryption_data = manifest.xpath(
61
+ "//manifest:file-entry[@manifest:full-path='content.xml']"\
62
+ "/manifest:encryption-data"
63
+ ).first
64
+
65
+ # If XPath returns a node, then we know content.xml is encrypted
66
+
67
+ if !encryption_data.nil?
68
+
69
+ # Since we know it's encrypted, we check for the password option
70
+ # and if it doesn't exist, raise an argument error
71
+
72
+ password = options[:password]
73
+ if !password.nil?
74
+ perform_decryption(
75
+ encryption_data,
76
+ password,
77
+ content_entry,
78
+ roo_content_xml_path
79
+ )
80
+ else
81
+ raise ArgumentError,
82
+ 'file is encrypted but password was not supplied'
83
+ end
84
+ end
85
+ else
86
+ raise ArgumentError, 'file missing required META-INF/manifest.xml'
87
+ end
88
+ end
89
+
90
+ # Process the ODS encryption manifest and perform the decryption
91
+ def perform_decryption(
92
+ encryption_data,
93
+ password,
94
+ content_entry,
95
+ roo_content_xml_path
96
+ )
97
+ # Extract various expected attributes from the manifest that
98
+ # describe the encryption
99
+
100
+ algorithm_node = encryption_data.xpath("manifest:algorithm").first
101
+ key_derivation_node =
102
+ encryption_data.xpath("manifest:key-derivation").first
103
+ start_key_generation_node =
104
+ encryption_data.xpath("manifest:start-key-generation").first
105
+
106
+ # If we have all the expected elements, then we can perform
107
+ # the decryption.
108
+
109
+ if !algorithm_node.nil? && !key_derivation_node.nil? &&
110
+ !start_key_generation_node.nil?
111
+
112
+ # The algorithm is a URI describing the algorithm used
113
+ algorithm = algorithm_node['manifest:algorithm-name']
114
+
115
+ # The initialization vector is base-64 encoded
116
+ iv = Base64.decode64(
117
+ algorithm_node['manifest:initialisation-vector']
118
+ )
119
+ key_derivation_name =
120
+ key_derivation_node['manifest:key-derivation-name']
121
+ key_size = key_derivation_node['manifest:key-size'].to_i
122
+ iteration_count =
123
+ key_derivation_node['manifest:iteration-count'].to_i
124
+ salt = Base64.decode64(key_derivation_node['manifest:salt'])
125
+
126
+ # The key is hashed with an algorithm represented by this URI
127
+ key_generation_name =
128
+ start_key_generation_node[
129
+ 'manifest:start-key-generation-name'
130
+ ]
131
+ key_generation_size =
132
+ start_key_generation_node['manifest:key-size'].to_i
133
+
134
+ hashed_password = password
135
+ key = nil
136
+
137
+ if key_generation_name.eql?(
138
+ "http://www.w3.org/2000/09/xmldsig#sha256"
139
+ )
140
+ hashed_password = Digest::SHA256.digest(password)
141
+ else
142
+ raise ArgumentError, 'Unknown key generation algorithm ' +
143
+ key_generation_name
144
+ end
145
+
146
+ cipher = find_cipher(
147
+ algorithm,
148
+ key_derivation_name,
149
+ hashed_password,
150
+ salt,
151
+ iteration_count,
152
+ iv
153
+ )
154
+
155
+ begin
156
+ decrypted = decrypt(content_entry, cipher)
157
+
158
+ # Finally, inflate the decrypted stream and overwrite
159
+ # content.xml
160
+ IO.binwrite(
161
+ roo_content_xml_path,
162
+ Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(decrypted)
163
+ )
164
+ rescue StandardError => error
165
+ raise ArgumentError,
166
+ 'Invalid password or other data error: ' + error.to_s
167
+ end
168
+ else
169
+ raise ArgumentError,
170
+ 'manifest.xml missing encryption-data elements'
171
+ end
172
+ end
173
+
174
+ # Create a cipher based on an ODS algorithm URI from manifest.xml
175
+ def find_cipher(
176
+ algorithm,
177
+ key_derivation_name,
178
+ hashed_password,
179
+ salt,
180
+ iteration_count,
181
+ iv
182
+ )
183
+ cipher = nil
184
+ if algorithm.eql? "http://www.w3.org/2001/04/xmlenc#aes256-cbc"
185
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
186
+ cipher.decrypt
187
+ cipher.padding = 0
188
+ cipher.key = find_cipher_key(
189
+ cipher,
190
+ key_derivation_name,
191
+ hashed_password,
192
+ salt,
193
+ iteration_count
194
+ )
195
+ cipher.iv = iv
196
+ else
197
+ raise ArgumentError, 'Unknown algorithm ' + algorithm
198
+ end
199
+ cipher
200
+ end
201
+
202
+ # Create a cipher key based on an ODS algorithm string from manifest.xml
203
+ def find_cipher_key(
204
+ cipher,
205
+ key_derivation_name,
206
+ hashed_password,
207
+ salt,
208
+ iteration_count
209
+ )
210
+ if key_derivation_name.eql? "PBKDF2"
211
+ key = OpenSSL::PKCS5.pbkdf2_hmac_sha1(
212
+ hashed_password,
213
+ salt,
214
+ iteration_count,
215
+ cipher.key_len
216
+ )
217
+ else
218
+ raise ArgumentError, 'Unknown key derivation name ' +
219
+ key_derivation_name
220
+ end
221
+ key
222
+ end
223
+
224
+ # Block decrypt raw bytes from the zip file based on the cipher
225
+ def decrypt(content_entry, cipher)
226
+ # Zip::Entry.extract writes a 0-length file when trying
227
+ # to extract an encrypted stream, so we read the
228
+ # raw bytes based on the offset and lengths
229
+ decrypted = ""
230
+ File.open(@filename, "rb") do |zipfile|
231
+ zipfile.seek(
232
+ content_entry.local_header_offset +
233
+ content_entry.calculate_local_header_size
234
+ )
235
+ total_to_read = content_entry.compressed_size
236
+ block_size = 4096
237
+ if block_size > total_to_read
238
+ block_size = total_to_read
239
+ end
240
+ while buffer = zipfile.read(block_size)
241
+ decrypted += cipher.update(buffer)
242
+ total_to_read -= buffer.length
243
+ if total_to_read == 0
244
+ break
245
+ end
246
+ if block_size > total_to_read
247
+ block_size = total_to_read
248
+ end
249
+ end
250
+ end
251
+ decrypted + cipher.final
252
+ end
253
+
254
+ def method_missing(m,*args)
255
+ read_labels
256
+ # is method name a label name
257
+ if @label.has_key?(m.to_s)
258
+ row,col = label(m.to_s)
259
+ cell(row,col)
260
+ else
261
+ # call super for methods like #a1
262
+ super
263
+ end
264
+ end
265
+
266
+ # Returns the content of a spreadsheet-cell.
267
+ # (1,1) is the upper left corner.
268
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
269
+ # cell at the first line and first row.
270
+ def cell(row, col, sheet=nil)
271
+ sheet ||= default_sheet
272
+ read_cells(sheet)
273
+ row,col = normalize(row,col)
274
+ if celltype(row,col,sheet) == :date
275
+ yyyy,mm,dd = @cell[sheet][[row,col]].to_s.split('-')
276
+ return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
277
+ end
278
+ @cell[sheet][[row,col]]
279
+ end
280
+
281
+ # Returns the formula at (row,col).
282
+ # Returns nil if there is no formula.
283
+ # The method #formula? checks if there is a formula.
284
+ def formula(row,col,sheet=nil)
285
+ sheet ||= default_sheet
286
+ read_cells(sheet)
287
+ row,col = normalize(row,col)
288
+ @formula[sheet][[row,col]]
289
+ end
290
+ alias_method :formula?, :formula
291
+
292
+ # returns each formula in the selected sheet as an array of elements
293
+ # [row, col, formula]
294
+ def formulas(sheet=nil)
295
+ sheet ||= default_sheet
296
+ read_cells(sheet)
297
+ if @formula[sheet]
298
+ @formula[sheet].each.collect do |elem|
299
+ [elem[0][0], elem[0][1], elem[1]]
300
+ end
301
+ else
302
+ []
303
+ end
304
+ end
305
+
306
+ # Given a cell, return the cell's style
307
+ def font(row, col, sheet=nil)
308
+ sheet ||= default_sheet
309
+ read_cells(sheet)
310
+ row,col = normalize(row,col)
311
+ style_name = @style[sheet][[row,col]] || @style_defaults[sheet][col - 1] || 'Default'
312
+ @style_definitions[style_name]
313
+ end
314
+
315
+ # returns the type of a cell:
316
+ # * :float
317
+ # * :string
318
+ # * :date
319
+ # * :percentage
320
+ # * :formula
321
+ # * :time
322
+ # * :datetime
323
+ def celltype(row,col,sheet=nil)
324
+ sheet ||= default_sheet
325
+ read_cells(sheet)
326
+ row,col = normalize(row,col)
327
+ if @formula[sheet][[row,col]]
328
+ return :formula
329
+ else
330
+ @cell_type[sheet][[row,col]]
331
+ end
332
+ end
333
+
334
+ def sheets
335
+ doc.xpath("//*[local-name()='table']").map do |sheet|
336
+ sheet.attributes["name"].value
337
+ end
338
+ end
339
+
340
+ # version of the Roo::OpenOffice document
341
+ # at 2007 this is always "1.0"
342
+ def officeversion
343
+ oo_version
344
+ @officeversion
345
+ end
346
+
347
+ # shows the internal representation of all cells
348
+ # mainly for debugging purposes
349
+ def to_s(sheet=nil)
350
+ sheet ||= default_sheet
351
+ read_cells(sheet)
352
+ @cell[sheet].inspect
353
+ end
354
+
355
+ # returns the row,col values of the labelled cell
356
+ # (nil,nil) if label is not defined
357
+ def label(labelname)
358
+ read_labels
359
+ unless @label.size > 0
360
+ return nil,nil,nil
361
+ end
362
+ if @label.has_key? labelname
363
+ return @label[labelname][1].to_i,
364
+ ::Roo::Utils.letter_to_number(@label[labelname][2]),
365
+ @label[labelname][0]
366
+ else
367
+ return nil,nil,nil
368
+ end
369
+ end
370
+
371
+ # Returns an array which all labels. Each element is an array with
372
+ # [labelname, [row,col,sheetname]]
373
+ def labels(sheet=nil)
374
+ read_labels
375
+ @label.map do |label|
376
+ [ label[0], # name
377
+ [ label[1][1].to_i, # row
378
+ ::Roo::Utils.letter_to_number(label[1][2]), # column
379
+ label[1][0], # sheet
380
+ ] ]
381
+ end
382
+ end
383
+
384
+ # returns the comment at (row/col)
385
+ # nil if there is no comment
386
+ def comment(row,col,sheet=nil)
387
+ sheet ||= default_sheet
388
+ read_cells(sheet)
389
+ row,col = normalize(row,col)
390
+ return nil unless @comment[sheet]
391
+ @comment[sheet][[row,col]]
392
+ end
393
+
394
+ # returns each comment in the selected sheet as an array of elements
395
+ # [row, col, comment]
396
+ def comments(sheet=nil)
397
+ sheet ||= default_sheet
398
+ read_comments(sheet) unless @comments_read[sheet]
399
+ if @comment[sheet]
400
+ @comment[sheet].each.collect do |elem|
401
+ [elem[0][0],elem[0][1],elem[1]]
402
+ end
403
+ else
404
+ []
405
+ end
406
+ end
407
+
408
+ private
409
+
410
+ def doc
411
+ @doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, "roo_content.xml"))
412
+ end
413
+
414
+ # read the version of the OO-Version
415
+ def oo_version
416
+ doc.xpath("//*[local-name()='document-content']").each do |office|
417
+ @officeversion = attr(office,'version')
418
+ end
419
+ end
420
+
421
+ # helper function to set the internal representation of cells
422
+ def set_cell_values(sheet,x,y,i,v,value_type,formula,table_cell,str_v,style_name)
423
+ key = [y,x+i]
424
+ @cell_type[sheet] = {} unless @cell_type[sheet]
425
+ @cell_type[sheet][key] = Roo::OpenOffice.oo_type_2_roo_type(value_type)
426
+ @formula[sheet] = {} unless @formula[sheet]
427
+ if formula
428
+ ['of:', 'oooc:'].each do |prefix|
429
+ if formula[0,prefix.length] == prefix
430
+ formula = formula[prefix.length..-1]
431
+ end
432
+ end
433
+ @formula[sheet][key] = formula
434
+ end
435
+ @cell[sheet] = {} unless @cell[sheet]
436
+ @style[sheet] = {} unless @style[sheet]
437
+ @style[sheet][key] = style_name
438
+ case @cell_type[sheet][key]
439
+ when :float
440
+ @cell[sheet][key] = v.to_f
441
+ when :string
442
+ @cell[sheet][key] = str_v
443
+ when :date
444
+ #TODO: if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size
445
+ if attr(table_cell,'date-value').size != "XXXX-XX-XX".size
446
+ #-- dann ist noch eine Uhrzeit vorhanden
447
+ #-- "1961-11-21T12:17:18"
448
+ @cell[sheet][key] = DateTime.parse(attr(table_cell,'date-value').to_s)
449
+ @cell_type[sheet][key] = :datetime
450
+ else
451
+ @cell[sheet][key] = table_cell.attributes['date-value']
452
+ end
453
+ when :percentage
454
+ @cell[sheet][key] = v.to_f
455
+ when :time
456
+ hms = v.split(':')
457
+ @cell[sheet][key] = hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i
458
+ else
459
+ @cell[sheet][key] = v
460
+ end
461
+ end
462
+
463
+ # read all cells in the selected sheet
464
+ #--
465
+ # the following construct means '4 blanks'
466
+ # some content <text:s text:c="3"/>
467
+ #++
468
+ def read_cells(sheet = default_sheet)
469
+ validate_sheet!(sheet)
470
+ return if @cells_read[sheet]
471
+
472
+ sheet_found = false
473
+ doc.xpath("//*[local-name()='table']").each do |ws|
474
+ if sheet == attr(ws,'name')
475
+ sheet_found = true
476
+ col = 1
477
+ row = 1
478
+ ws.children.each do |table_element|
479
+ case table_element.name
480
+ when 'table-column'
481
+ @style_defaults[sheet] << table_element.attributes['default-cell-style-name']
482
+ when 'table-row'
483
+ if table_element.attributes['number-rows-repeated']
484
+ skip_row = attr(table_element,'number-rows-repeated').to_s.to_i
485
+ row = row + skip_row - 1
486
+ end
487
+ table_element.children.each do |cell|
488
+ skip_col = attr(cell, 'number-columns-repeated')
489
+ formula = attr(cell,'formula')
490
+ value_type = attr(cell,'value-type')
491
+ v = attr(cell,'value')
492
+ style_name = attr(cell,'style-name')
493
+ case value_type
494
+ when 'string'
495
+ str_v = ''
496
+ # insert \n if there is more than one paragraph
497
+ para_count = 0
498
+ cell.children.each do |str|
499
+ # begin comments
500
+ =begin
501
+ - <table:table-cell office:value-type="string">
502
+ - <office:annotation office:display="true" draw:style-name="gr1" draw:text-style-name="P1" svg:width="1.1413in" svg:height="0.3902in" svg:x="2.0142in" svg:y="0in" draw:caption-point-x="-0.2402in" draw:caption-point-y="0.5661in">
503
+ <dc:date>2011-09-20T00:00:00</dc:date>
504
+ <text:p text:style-name="P1">Kommentar fuer B4</text:p>
505
+ </office:annotation>
506
+ <text:p>B4 (mit Kommentar)</text:p>
507
+ </table:table-cell>
508
+ =end
509
+ if str.name == 'annotation'
510
+ str.children.each do |annotation|
511
+ if annotation.name == 'p'
512
+ # @comment ist ein Hash mit Sheet als Key (wie bei @cell)
513
+ # innerhalb eines Elements besteht ein Eintrag aus einem
514
+ # weiteren Hash mit Key [row,col] und dem eigentlichen
515
+ # Kommentartext als Inhalt
516
+ @comment[sheet] = Hash.new unless @comment[sheet]
517
+ key = [row,col]
518
+ @comment[sheet][key] = annotation.text
519
+ end
520
+ end
521
+ end
522
+ # end comments
523
+ if str.name == 'p'
524
+ v = str.content
525
+ str_v += "\n" if para_count > 0
526
+ para_count += 1
527
+ if str.children.size > 1
528
+ str_v += children_to_string(str.children)
529
+ else
530
+ str.children.each do |child|
531
+ str_v += child.content #.text
532
+ end
533
+ end
534
+ str_v.gsub!(/&apos;/,"'") # special case not supported by unescapeHTML
535
+ str_v = CGI.unescapeHTML(str_v)
536
+ end # == 'p'
537
+ end
538
+ when 'time'
539
+ cell.children.each do |str|
540
+ if str.name == 'p'
541
+ v = str.content
542
+ end
543
+ end
544
+ when '', nil, 'date', 'percentage', 'float'
545
+ #
546
+ when 'boolean'
547
+ v = attr(cell,'boolean-value').to_s
548
+ else
549
+ # raise "unknown type #{value_type}"
550
+ end
551
+ if skip_col
552
+ if v != nil or cell.attributes['date-value']
553
+ 0.upto(skip_col.to_i-1) do |i|
554
+ set_cell_values(sheet,col,row,i,v,value_type,formula,cell,str_v,style_name)
555
+ end
556
+ end
557
+ col += (skip_col.to_i - 1)
558
+ end # if skip
559
+ set_cell_values(sheet,col,row,0,v,value_type,formula,cell,str_v,style_name)
560
+ col += 1
561
+ end
562
+ row += 1
563
+ col = 1
564
+ end
565
+ end
566
+ end
567
+ end
568
+ doc.xpath("//*[local-name()='automatic-styles']").each do |style|
569
+ read_styles(style)
570
+ end
571
+ if !sheet_found
572
+ raise RangeError
573
+ end
574
+ @cells_read[sheet] = true
575
+ @comments_read[sheet] = true
576
+ end
577
+
578
+ # Only calls read_cells because Roo::Base calls read_comments
579
+ # whereas the reading of comments is done in read_cells for Roo::OpenOffice-objects
580
+ def read_comments(sheet=nil)
581
+ read_cells(sheet)
582
+ end
583
+
584
+ def read_labels
585
+ @label ||= Hash[doc.xpath("//table:named-range").map do |ne|
586
+ #-
587
+ # $Sheet1.$C$5
588
+ #+
589
+ name = attr(ne,'name').to_s
590
+ sheetname,coords = attr(ne,'cell-range-address').to_s.split('.$')
591
+ col, row = coords.split('$')
592
+ sheetname = sheetname[1..-1] if sheetname[0,1] == '$'
593
+ [name, [sheetname,row,col]]
594
+ end]
595
+ end
596
+
597
+ def read_styles(style_elements)
598
+ @style_definitions['Default'] = Roo::Font.new
599
+ style_elements.each do |style|
600
+ next unless style.name == 'style'
601
+ style_name = attr(style,'name')
602
+ style.each do |properties|
603
+ font = Roo::OpenOffice::Font.new
604
+ font.bold = attr(properties,'font-weight')
605
+ font.italic = attr(properties,'font-style')
606
+ font.underline = attr(properties,'text-underline-style')
607
+ @style_definitions[style_name] = font
608
+ end
609
+ end
610
+ end
611
+
612
+ A_ROO_TYPE = {
613
+ "float" => :float,
614
+ "string" => :string,
615
+ "date" => :date,
616
+ "percentage" => :percentage,
617
+ "time" => :time,
618
+ }
619
+
620
+ def self.oo_type_2_roo_type(ootype)
621
+ return A_ROO_TYPE[ootype]
622
+ end
623
+
624
+ # helper method to convert compressed spaces and other elements within
625
+ # an text into a string
626
+ def children_to_string(children)
627
+ result = ''
628
+ children.each {|child|
629
+ if child.text?
630
+ result = result + child.content
631
+ else
632
+ if child.name == 's'
633
+ compressed_spaces = child.attributes['c'].to_s.to_i
634
+ # no explicit number means a count of 1:
635
+ if compressed_spaces == 0
636
+ compressed_spaces = 1
637
+ end
638
+ result = result + " "*compressed_spaces
639
+ else
640
+ result = result + child.content
641
+ end
642
+ end
643
+ }
644
+ result
645
+ end
646
+
647
+ def attr(node, attr_name)
648
+ if node.attributes[attr_name]
649
+ node.attributes[attr_name].value
650
+ end
651
+ end
652
+ end