hmcgowan-roo 1.2.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,19 @@
1
- README for roo
2
- ==============
1
+ # README for Roo
3
2
 
4
- Installation:
3
+ This is the semi-official roo repository. I've been unable to contact the maintainer so this
4
+ repository should allow us to continue in the interim. If you'd like to contribute I'm happy
5
+ to pull your changes in and will be making periodic releases from here until we can get
6
+ the gems on RubyForge.
5
7
 
8
+
9
+ ## Installation
10
+
11
+ # Run the following if you haven't done so before:
12
+ gem sources -a http://gems.github.com
13
+ # Install the gem:
6
14
  sudo gem install roo
7
15
 
8
- Usage:
16
+ ## Usage:
9
17
 
10
18
  require 'rubygems'
11
19
  require 'roo'
data/lib/roo/excel.rb CHANGED
@@ -1,8 +1,12 @@
1
1
  require 'rubygems'
2
2
  gem 'spreadsheet', '>= 0.6.3.1'
3
3
  require 'spreadsheet'
4
- CHARGUESS = false
5
- require 'charguess' if CHARGUESS
4
+ CHARGUESS = begin
5
+ require 'charguess'
6
+ true
7
+ rescue LoadError => e
8
+ false
9
+ end
6
10
 
7
11
  # ruby-spreadsheet has a font object so we're extending it
8
12
  # with our own functionality but still providing full access
@@ -79,18 +83,7 @@ class Excel < GenericSpreadsheet
79
83
  def sheets
80
84
  result = []
81
85
  @workbook.worksheets.each do |worksheet|
82
- # TODO: is there a better way to do conversion?
83
- if CHARGUESS
84
- encoding = CharGuess::guess(worksheet.name)
85
- encoding = 'unicode' unless encoding
86
-
87
-
88
- result << Iconv.new('utf-8',encoding).iconv(
89
- worksheet.name
90
- )
91
- else
92
- result << platform_specific_iconv(worksheet.name)
93
- end
86
+ result << normalize_string(worksheet.name)
94
87
  end
95
88
  return result
96
89
  end
@@ -243,11 +236,7 @@ class Excel < GenericSpreadsheet
243
236
  return name-1 if name.kind_of?(Fixnum)
244
237
  i = 0
245
238
  @workbook.worksheets.each do |worksheet|
246
- # TODO: is there a better way to do conversion?
247
- return i if name == platform_specific_iconv(worksheet.name)
248
- #Iconv.new('utf-8','unicode').iconv(
249
- # @workbook.worksheet(i).name
250
- # )
239
+ return i if name == normalize_string(worksheet.name)
251
240
  i += 1
252
241
  end
253
242
  raise StandardError, "sheet '#{name}' not found"
@@ -272,7 +261,16 @@ class Excel < GenericSpreadsheet
272
261
  }
273
262
  ! content
274
263
  end
275
-
264
+
265
+ def normalize_string(value)
266
+ value = every_second_null?(value) ? remove_every_second_null(value) : value
267
+ if CHARGUESS && encoding = CharGuess::guess(value)
268
+ Iconv.new('utf-8', encoding)
269
+ else
270
+ platform_specific_iconv(value)
271
+ end
272
+ end
273
+
276
274
  def platform_specific_iconv(value)
277
275
  case RUBY_PLATFORM.downcase
278
276
  when /darwin/
@@ -381,7 +379,7 @@ class Excel < GenericSpreadsheet
381
379
  format = row.format(idx)
382
380
  if format.date_or_time?
383
381
  cell = row.at(idx)
384
- cell.to_s.to_f > 0 ? true : false # cell value must be numeric
382
+ true if Float(cell) > 0 rescue false
385
383
  else
386
384
  false
387
385
  end
data/lib/roo/excelx.rb CHANGED
@@ -1,6 +1,7 @@
1
1
 
2
2
  require 'rubygems'
3
- require 'rexml/document'
3
+ gem 'libxml-ruby', '>= 0.8.3'
4
+ require 'xml'
4
5
  require 'fileutils'
5
6
  require 'zip/zipfilesystem'
6
7
  require 'date'
@@ -102,12 +103,12 @@ class Excelx < GenericSpreadsheet
102
103
  @file_nr = @@nr
103
104
  extract_content(@filename)
104
105
  file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
105
- @workbook_doc = REXML::Document.new file
106
+ @workbook_doc = XML::Parser.io(file).parse
106
107
  file.close
107
108
  @shared_table = []
108
109
  if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
109
110
  file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
110
- @sharedstring_doc = REXML::Document.new file
111
+ @sharedstring_doc = XML::Parser.io(file).parse
111
112
  file.close
112
113
  read_shared_strings(@sharedstring_doc)
113
114
  end
@@ -115,14 +116,14 @@ class Excelx < GenericSpreadsheet
115
116
  @style_definitions = Array.new { |h,k| h[k] = {} }
116
117
  if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
117
118
  file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
118
- @styles_doc = REXML::Document.new file
119
+ @styles_doc = XML::Parser.io(file).parse
119
120
  file.close
120
121
  read_styles(@styles_doc)
121
122
  end
122
123
  @sheet_doc = []
123
124
  @sheet_files.each_with_index do |item, i|
124
125
  file = File.new(item)
125
- @sheet_doc[i] = REXML::Document.new file
126
+ @sheet_doc[i] = XML::Parser.io(file).parse
126
127
  file.close
127
128
  end
128
129
  ensure
@@ -288,18 +289,11 @@ class Excelx < GenericSpreadsheet
288
289
  # returns an array of sheet names in the spreadsheet
289
290
  def sheets
290
291
  return_sheets = []
291
- @workbook_doc.each_element do |workbook|
292
- workbook.each_element do |el|
293
- if el.name == "sheets"
294
- el.each_element do |sheet|
295
- return_sheets << sheet.attributes['name']
296
- end
297
- end
298
- end
292
+ @workbook_doc.find("//*[local-name()='sheet']").each do |sheet|
293
+ return_sheets << sheet.attributes.to_h['name']
299
294
  end
300
295
  return_sheets
301
296
  end
302
-
303
297
  # shows the internal representation of all cells
304
298
  # for debugging purposes
305
299
  def to_s(sheet=nil)
@@ -404,87 +398,62 @@ class Excelx < GenericSpreadsheet
404
398
  raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
405
399
  raise RangeError unless self.sheets.include? sheet
406
400
  n = self.sheets.index(sheet)
407
- @sheet_doc[n].each_element do |worksheet|
408
- worksheet.each_element do |elem|
409
- if elem.name == 'sheetData'
410
- elem.each_element do |sheetdata|
411
- if sheetdata.name == 'row'
412
- sheetdata.each_element do |row|
413
- if row.name == 'c'
414
- s_attribute = row.attributes['s']
415
- if row.attributes['t'] == 's'
416
- tmp_type = :shared
417
- elsif row.attributes['t'] == 'b'
418
- tmp_type = :boolean
419
- else
420
- format = attribute2format(s_attribute)
421
- tmp_type = format2type(format)
422
- end
423
- formula = nil
424
- row.each_element do |cell|
425
- # puts "cell.name: #{cell.name}" if cell.text.include? "22606.5120"
426
- # puts "cell.text: #{cell.text}" if cell.text.include? "22606.5120"
427
- if cell.name == 'f'
428
- formula = cell.text
429
- end
430
- if cell.name == 'v'
431
- #puts "tmp_type: #{tmp_type}" if cell.text.include? "22606.5120"
432
- #puts cell.name
433
- if tmp_type == :time or tmp_type == :datetime #2008-07-26
434
- #p cell.text
435
- # p cell.text.to_f if cell.text.include? "22606.5120"
436
- if cell.text.to_f >= 1.0 # 2008-07-26
437
- # puts ">= 1.0" if cell.text.include? "22606.5120"
438
- # puts "cell.text.to_f: #{cell.text.to_f}" if cell.text.include? "22606.5120"
439
- #puts "cell.text.to_f.floor: #{cell.text.to_f.floor}" if cell.text.include? "22606.5120"
440
- if (cell.text.to_f - cell.text.to_f.floor).abs > 0.000001 #TODO:
441
- # puts "abs ist groesser" if cell.text.include? "22606.5120"
442
- # @cell[sheet][key] = DateTime.parse(tr.attributes['date-value'])
443
- tmp_type = :datetime
444
-
445
- else
446
- #puts ":date"
447
- tmp_type = :date # 2008-07-26
448
- end
449
- else
450
- #puts "<1.0"
451
- end # 2008-07-26
452
- end # 2008-07-26
453
- excelx_type = [:numeric_or_formula,format]
454
- excelx_value = cell.text
455
- if tmp_type == :shared
456
- vt = :string
457
- str_v = @shared_table[cell.text.to_i]
458
- excelx_type = :string
459
- elsif tmp_type == :boolean
460
- vt = :boolean
461
- cell.text.to_i == 1 ? v = 'TRUE' : v = 'FALSE'
462
- elsif tmp_type == :date
463
- vt = :date
464
- v = cell.text
465
- elsif tmp_type == :time
466
- vt = :time
467
- v = cell.text
468
- elsif tmp_type == :datetime
469
- vt = :datetime
470
- v = cell.text
471
- elsif tmp_type == :formula
472
- vt = :formula
473
- v = cell.text.to_f #TODO: !!!!
474
- else
475
- vt = :float
476
- v = cell.text
477
- end
478
- #puts "vt: #{vt}" if cell.text.include? "22606.5120"
479
- x,y = split_coordinate(row.attributes['r'])
480
- tr=nil #TODO: ???s
481
- set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
482
- end
483
- end
484
- end
401
+ @sheet_doc[n].find("//*[local-name()='c']").each do |c|
402
+ s_attribute = c.attributes.to_h['s'].to_i # should be here
403
+ if (c.attributes.to_h['t'] == 's')
404
+ tmp_type = :shared
405
+ elsif (c.attributes.to_h['t'] == 'b')
406
+ tmp_type = :boolean
407
+ else
408
+ # s_attribute = c.attributes.to_h['s'].to_i # was here
409
+ format = attribute2format(s_attribute)
410
+ tmp_type = format2type(format)
411
+ end
412
+ formula = nil
413
+ c.each_element do |cell|
414
+ if cell.name == 'f'
415
+ formula = cell.content
416
+ end
417
+ if cell.name == 'v'
418
+ if tmp_type == :time or tmp_type == :datetime
419
+ if cell.content.to_f >= 1.0
420
+ if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
421
+ tmp_type = :datetime
422
+ else
423
+ tmp_type = :date
485
424
  end
486
- end
425
+ else
426
+ end
427
+ end
428
+ excelx_type = [:numeric_or_formula,format]
429
+ excelx_value = cell.content
430
+ if tmp_type == :shared
431
+ vt = :string
432
+ str_v = @shared_table[cell.content.to_i]
433
+ excelx_type = :string
434
+ elsif tmp_type == :boolean
435
+ vt = :boolean
436
+ cell.content.to_i == 1 ? v = 'TRUE' : v = 'FALSE'
437
+ elsif tmp_type == :date
438
+ vt = :date
439
+ v = cell.content
440
+ elsif tmp_type == :time
441
+ vt = :time
442
+ v = cell.content
443
+ elsif tmp_type == :datetime
444
+ vt = :datetime
445
+ v = cell.content
446
+ elsif tmp_type == :formula
447
+ vt = :formula
448
+ v = cell.content.to_f #TODO: !!!!
449
+ else
450
+ vt = :float
451
+ v = cell.content
487
452
  end
453
+ #puts "vt: #{vt}" if cell.text.include? "22606.5120"
454
+ x,y = split_coordinate(c.attributes.to_h['r'])
455
+ tr=nil #TODO: ???s
456
+ set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
488
457
  end
489
458
  end
490
459
  end
@@ -500,17 +469,9 @@ class Excelx < GenericSpreadsheet
500
469
  def check_default_sheet
501
470
  sheet_found = false
502
471
  raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
503
- @workbook_doc.each_element do |workbook|
504
- workbook.each_element do |el|
505
- if el.name == "sheets"
506
- el.each_element do |sheet|
507
- if @default_sheet == sheet.attributes['name']
508
- sheet_found = true
509
- end
510
- end
511
- end
512
- end
513
- end
472
+
473
+ sheet_found = true if sheets.include?(@default_sheet)
474
+
514
475
  if ! sheet_found
515
476
  raise RangeError, "sheet '#{@default_sheet}' not found"
516
477
  end
@@ -570,18 +531,21 @@ class Excelx < GenericSpreadsheet
570
531
 
571
532
  # read the shared strings xml document
572
533
  def read_shared_strings(doc)
573
- doc.each_element do |sst|
574
- if sst.name == 'sst'
575
- sst.each_element do |si|
576
- if si.name == 'si'
577
- si.each_element do |elem|
578
- if elem.name == 't'
579
- @shared_table << elem.text
580
- end
534
+ doc.find("//*[local-name()='si']").each do |si|
535
+ shared_table_entry = ''
536
+ si.each_element do |elem|
537
+ if (elem.name == 'r')
538
+ elem.each_element do |r_elem|
539
+ if (r_elem.name == 't')
540
+ shared_table_entry << r_elem.content
581
541
  end
582
542
  end
583
543
  end
544
+ if (elem.name == 't')
545
+ shared_table_entry = elem.content
546
+ end
584
547
  end
548
+ @shared_table << shared_table_entry
585
549
  end
586
550
  end
587
551
 
@@ -590,47 +554,39 @@ class Excelx < GenericSpreadsheet
590
554
  @numFmts = []
591
555
  @cellXfs = []
592
556
  fonts = []
593
- doc.each_element do |e1|
594
- if e1.name == "styleSheet"
595
- e1.each_element do |e2|
596
- if e2.name == "numFmts"
597
- e2.each_element do |e3|
598
- if e3.name == 'numFmt'
599
- numFmtId = e3.attributes['numFmtId']
600
- formatCode = e3.attributes['formatCode']
601
- @numFmts << [numFmtId, formatCode]
602
- end
603
- end
604
- elsif e2.name == "fonts"
605
- e2.each_element do |e3|
606
- if e3.name == 'font'
607
- font = Excelx::Font.new
608
- e3.each do |e4|
609
- case e4.name
610
- when 'b'
611
- font.bold = true
612
- when 'i'
613
- font.italic = true
614
- when 'u'
615
- font.underline = true
616
- end
617
- end
618
- fonts << font
619
- end
620
- end
621
- elsif e2.name == "cellXfs"
622
- e2.each_element do |e3|
623
- if e3.name == 'xf'
624
- numFmtId = e3.attributes['numFmtId']
625
- @cellXfs << [numFmtId]
626
- fontId = e3.attributes['fontId'].to_i
627
- @style_definitions << fonts[fontId]
557
+
558
+ doc.find("//*[local-name()='numFmt']").each do |numFmt|
559
+ numFmtId = numFmt.attributes.to_h['numFmtId']
560
+ formatCode = numFmt.attributes.to_h['formatCode']
561
+ @numFmts << [numFmtId, formatCode]
562
+ end
563
+ doc.find("//*[local-name()='fonts']").each do |fonts_el|
564
+ fonts_el.each_element do |font_el|
565
+ if font_el.name == 'font'
566
+ font = Excelx::Font.new
567
+ font_el.each_element do |font_sub_el|
568
+ case font_sub_el.name
569
+ when 'b'
570
+ font.bold = true
571
+ when 'i'
572
+ font.italic = true
573
+ when 'u'
574
+ font.underline = true
628
575
  end
629
- end
630
576
  end
577
+ fonts << font
631
578
  end
632
579
  end
633
580
  end
581
+
582
+ doc.find("//*[local-name()='cellXfs']").each do |xfs|
583
+ xfs.each do |xf|
584
+ numFmtId = xf.attributes.to_h['numFmtId']
585
+ @cellXfs << [numFmtId]
586
+ fontId = xf.attributes.to_h['fontId'].to_i
587
+ @style_definitions << fonts[fontId]
588
+ end
589
+ end
634
590
  end
635
591
 
636
592
  # convert internal excelx attribute to a format
@@ -1,6 +1,6 @@
1
-
2
1
  require 'rubygems'
3
- require 'rexml/document'
2
+ gem 'libxml-ruby', '>= 0.8.3'
3
+ require 'xml'
4
4
  require 'fileutils'
5
5
  require 'zip/zipfilesystem'
6
6
  require 'date'
@@ -38,7 +38,7 @@ class Openoffice < GenericSpreadsheet
38
38
  @file_nr = @@nr
39
39
  extract_content
40
40
  file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_content.xml"))
41
- @doc = REXML::Document.new file
41
+ @doc = XML::Parser.io(file).parse
42
42
  file.close
43
43
  ensure
44
44
  #if ENV["roo_local"] != "thomas-p"
@@ -174,31 +174,14 @@ class Openoffice < GenericSpreadsheet
174
174
  end
175
175
  end
176
176
 
177
- # returns an array of sheet names in the spreadsheet
178
177
  def sheets
179
178
  return_sheets = []
180
- oo_document_count = 0
181
- @doc.each_element do |oo_document|
182
- oo_document_count += 1
183
- oo_element_count = 0
184
- oo_document.each_element do |oo_element|
185
- oo_element_count += 1
186
- if oo_element.name == "body"
187
- oo_element.each_element do |be|
188
- if be.name == "spreadsheet"
189
- be.each_element do |se|
190
- if se.name == "table"
191
- return_sheets << se.attributes['name']
192
- end
193
- end
194
- end
195
- end
196
- end
197
- end
179
+ @doc.find("//*[local-name()='table']").each do |sheet|
180
+ return_sheets << sheet.attributes['name']
198
181
  end
199
182
  return_sheets
200
183
  end
201
-
184
+
202
185
  # version of the openoffice document
203
186
  # at 2007 this is always "1.0"
204
187
  def officeversion
@@ -240,14 +223,14 @@ class Openoffice < GenericSpreadsheet
240
223
 
241
224
  # read the version of the OO-Version
242
225
  def oo_version
243
- @doc.each_element do |oo_document|
244
- @officeversion = oo_document.attributes['version']
226
+ @doc.find("//*[local-name()='document-content']").each do |office|
227
+ @officeversion = office.attributes['version']
245
228
  end
246
229
  end
247
230
 
248
231
  # helper function to set the internal representation of cells
249
- def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,style_name)
250
- key = [y,x+i]
232
+ def set_cell_values(sheet,x,y,i,v,vt,formula,table_cell,str_v,style_name)
233
+ key = [y,x+i]
251
234
  @cell_type[sheet] = {} unless @cell_type[sheet]
252
235
  @cell_type[sheet][key] = Openoffice.oo_type_2_roo_type(vt)
253
236
  @formula[sheet] = {} unless @formula[sheet]
@@ -261,13 +244,13 @@ class Openoffice < GenericSpreadsheet
261
244
  when :string
262
245
  @cell[sheet][key] = str_v
263
246
  when :date
264
- if tr.attributes['date-value'].size != "XXXX-XX-XX".size
247
+ if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size
265
248
  #-- dann ist noch eine Uhrzeit vorhanden
266
249
  #-- "1961-11-21T12:17:18"
267
- @cell[sheet][key] = DateTime.parse(tr.attributes['date-value'])
250
+ @cell[sheet][key] = DateTime.parse(table_cell.attributes['date-value'])
268
251
  @cell_type[sheet][key] = :datetime
269
252
  else
270
- @cell[sheet][key] = tr.attributes['date-value']
253
+ @cell[sheet][key] = table_cell.attributes['date-value']
271
254
  end
272
255
  when :percentage
273
256
  @cell[sheet][key] = v.to_f
@@ -289,104 +272,88 @@ class Openoffice < GenericSpreadsheet
289
272
  sheet_found = false
290
273
  raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
291
274
  raise RangeError unless self.sheets.include? sheet
292
- oo_document_count = 0
293
- @doc.each_element do |oo_document|
294
- # @officeversion = oo_document.attributes['version']
295
- oo_document_count += 1
296
- oo_element_count = 0
297
- oo_document.each_element do |oo_element|
298
- oo_element_count += 1
299
- if oo_element.name == "body"
300
- oo_element.each_element do |be|
301
- if be.name == "spreadsheet"
302
- be.each_element do |se|
303
- if se.name == "table"
304
- if se.attributes['name']==sheet
305
- sheet_found = true
306
- x=1
307
- y=1
308
- se.each_element do |te|
309
- if te.name == "table-column"
310
- rep = te.attributes["number-columns-repeated"]
311
- @style_defaults[sheet] << te.attributes["default-cell-style-name"]
312
- elsif te.name == "table-row"
313
- if te.attributes['number-rows-repeated']
314
- skip_y = te.attributes['number-rows-repeated'].to_i
315
- y = y + skip_y - 1 # minus 1 because this line will be counted as a line element
316
- end
317
- te.each_element do |tr|
318
- if tr.name == 'table-cell'
319
- skip = tr.attributes['number-columns-repeated']
320
- formula = tr.attributes['formula']
321
- vt = tr.attributes['value-type']
322
- v = tr.attributes['value']
323
- style_name = tr.attributes['style-name']
324
- if vt == 'string'
325
- str_v = ''
326
- # insert \n if there is more than one paragraph
327
- para_count = 0
328
- tr.each_element do |str|
329
- if str.name == 'p'
330
- v = str.text
331
- str_v += "\n" if para_count > 0
332
- para_count += 1
333
- if str.children.size > 1
334
- str_v = children_to_string(str.children)
335
- else
336
- str.children.each {|child|
337
- str_v = str_v + child.to_s #.text
338
- }
339
- end
340
- str_v.gsub!(/&apos;/,"'") # special case not supported by unescapeHTML
341
- str_v = CGI.unescapeHTML(str_v)
342
- end # == 'p'
343
- end
344
- elsif vt == 'time'
345
- tr.each_element do |str|
346
- if str.name == 'p'
347
- v = str.text
348
- end
349
- end
350
- elsif vt == '' or vt == nil
351
- #
352
- elsif vt == 'date'
353
- #
354
- elsif vt == 'percentage'
355
- #
356
- elsif vt == 'float'
357
- #
358
- elsif vt == 'boolean'
359
- v = tr.attributes['boolean-value']
360
- #
361
- else
362
- # raise "unknown type #{vt}"
363
- end
364
- if skip
365
- if v != nil or tr.attributes['date-value']
366
- 0.upto(skip.to_i-1) do |i|
367
- set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,style_name)
368
- end
369
- end
370
- x += (skip.to_i - 1)
371
- end # if skip
372
- set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,style_name)
373
- x += 1
374
- end
375
- end
376
- y += 1
377
- x = 1
275
+
276
+ @doc.find("//*[local-name()='table']").each do |ws|
277
+ if sheet == ws.attributes['name']
278
+ sheet_found = true
279
+ col = 1
280
+ row = 1
281
+ ws.each_element do |table_element|
282
+ case table_element.name
283
+ when 'table-column'
284
+ @style_defaults[sheet] << table_element.attributes['default-cell-style-name']
285
+ when 'table-row'
286
+ if table_element.attributes['number-rows-repeated']
287
+ skip_row = table_element.attributes['number-rows-repeated'].to_i
288
+ row = row + skip_row - 1
289
+ end
290
+ table_element.each_element do |cell|
291
+ skip_col = cell.attributes['number-columns-repeated']
292
+ formula = cell.attributes['formula']
293
+ vt = cell.attributes['value-type']
294
+ v = cell.attributes['value']
295
+ style_name = cell.attributes['style-name']
296
+ if vt == 'string'
297
+ str_v = ''
298
+ # insert \n if there is more than one paragraph
299
+ para_count = 0
300
+ cell.each_element do |str|
301
+ if str.name == 'p'
302
+ v = str.content
303
+ str_v += "\n" if para_count > 0
304
+ para_count += 1
305
+ if str.children.size > 1
306
+ str_v += children_to_string(str.children)
307
+ else
308
+ str.children.each do |child|
309
+ str_v += child.content #.text
378
310
  end
379
311
  end
380
- end # sheet
312
+ str_v.gsub!(/&apos;/,"'") # special case not supported by unescapeHTML
313
+ str_v = CGI.unescapeHTML(str_v)
314
+ end # == 'p'
315
+ end
316
+ elsif vt == 'time'
317
+ cell.each_element do |str|
318
+ if str.name == 'p'
319
+ v = str.content
320
+ end
381
321
  end
322
+ elsif vt == '' or vt == nil
323
+ #
324
+ elsif vt == 'date'
325
+ #
326
+ elsif vt == 'percentage'
327
+ #
328
+ elsif vt == 'float'
329
+ #
330
+ elsif vt == 'boolean'
331
+ v = cell.attributes['boolean-value']
332
+ #
333
+ else
334
+ # raise "unknown type #{vt}"
382
335
  end
383
- end
336
+ if skip_col
337
+ if v != nil or cell.attributes['date-value']
338
+ 0.upto(skip_col.to_i-1) do |i|
339
+ set_cell_values(sheet,col,row,i,v,vt,formula,cell,str_v,style_name)
340
+ end
341
+ end
342
+ col += (skip_col.to_i - 1)
343
+ end # if skip
344
+ set_cell_values(sheet,col,row,0,v,vt,formula,cell,str_v,style_name)
345
+ col += 1
346
+ end
347
+ row += 1
348
+ col = 1
384
349
  end
385
- elsif oo_element.name == "automatic-styles"
386
- read_styles(oo_element)
387
350
  end
388
351
  end
389
352
  end
353
+
354
+ @doc.find("//*[local-name()='automatic-styles']").each do |style|
355
+ read_styles(style)
356
+ end
390
357
  if !sheet_found
391
358
  raise RangeError
392
359
  end
@@ -413,25 +380,7 @@ class Openoffice < GenericSpreadsheet
413
380
  def check_default_sheet
414
381
  sheet_found = false
415
382
  raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
416
- @doc.each_element do |oo_document|
417
- oo_element_count = 0
418
- oo_document.each_element do |oo_element|
419
- oo_element_count += 1
420
- if oo_element.name == "body"
421
- oo_element.each_element do |be|
422
- if be.name == "spreadsheet"
423
- be.each_element do |se|
424
- if se.name == "table"
425
- if se.attributes['name'] == @default_sheet
426
- sheet_found = true
427
- end # sheet
428
- end
429
- end
430
- end
431
- end
432
- end
433
- end
434
- end
383
+ sheet_found = true if sheets.include?(@default_sheet)
435
384
  if ! sheet_found
436
385
  raise RangeError, "sheet '#{@default_sheet}' not found"
437
386
  end
@@ -487,8 +436,8 @@ class Openoffice < GenericSpreadsheet
487
436
  def children_to_string(children)
488
437
  result = ''
489
438
  children.each {|child|
490
- if child.class == REXML::Text
491
- result = result + child.to_s
439
+ if child.text?
440
+ result = result + child.content
492
441
  else
493
442
  if child.name == 's'
494
443
  compressed_spaces = child.attributes['c'].to_i
@@ -498,7 +447,7 @@ class Openoffice < GenericSpreadsheet
498
447
  end
499
448
  result = result + " "*compressed_spaces
500
449
  else
501
- result = result + child.to_s
450
+ result = result + child.content
502
451
  end
503
452
  end
504
453
  }
Binary file
Binary file
Binary file
Binary file
data/test/test_roo.rb CHANGED
@@ -3817,7 +3817,7 @@ Sheet 3:
3817
3817
  def do_test_xml(oo)
3818
3818
  assert_nothing_raised {oo.to_xml}
3819
3819
  sheetname = oo.sheets.first
3820
- doc = REXML::Document.new(oo.to_xml)
3820
+ doc = XML::Parser.string(oo.to_xml).parse
3821
3821
  doc.root.each_element {|xml_sheet|
3822
3822
  all_cells = init_all_cells(oo, sheetname)
3823
3823
  x = 0
@@ -3831,7 +3831,7 @@ Sheet 3:
3831
3831
  result = [
3832
3832
  cell.attributes['row'],
3833
3833
  cell.attributes['column'],
3834
- cell.text,
3834
+ cell.content,
3835
3835
  cell.attributes['type'],
3836
3836
  ]
3837
3837
  assert_equal expected, result
@@ -4847,7 +4847,7 @@ This attached file is the newer format of Microsoft Excel (.xlsx).
4847
4847
  end
4848
4848
  end
4849
4849
 
4850
- def test_cell_excel_boolean
4850
+ def test_cell_boolean
4851
4851
  if EXCEL
4852
4852
  oo = Excel.new(File.join(TESTDIR,"boolean.xls"))
4853
4853
  oo.default_sheet = oo.sheets.first
@@ -4868,6 +4868,31 @@ This attached file is the newer format of Microsoft Excel (.xlsx).
4868
4868
  end
4869
4869
  end
4870
4870
 
4871
+ def test_cell_multiline
4872
+ if EXCEL
4873
+ oo = Excel.new(File.join(TESTDIR,"paragraph.xls"))
4874
+ oo.default_sheet = oo.sheets.first
4875
+ assert_equal "This is a test\nof a multiline\nCell", oo.cell(1,1)
4876
+ assert_equal "This is a test\n¶\nof a multiline\n\nCell", oo.cell(1,2)
4877
+ assert_equal "first p\n\nsecond p\n\nlast p", oo.cell(2,1)
4878
+ end
4879
+ if OPENOFFICE
4880
+ oo = Openoffice.new(File.join(TESTDIR,"paragraph.ods"))
4881
+ oo.default_sheet = oo.sheets.first
4882
+ assert_equal "This is a test\nof a multiline\nCell", oo.cell(1,1)
4883
+ assert_equal "This is a test\n¶\nof a multiline\n\nCell", oo.cell(1,2)
4884
+ assert_equal "first p\n\nsecond p\n\nlast p", oo.cell(2,1)
4885
+ end
4886
+ if EXCELX
4887
+ oo = Excelx.new(File.join(TESTDIR,"paragraph.xlsx"))
4888
+ oo.default_sheet = oo.sheets.first
4889
+ assert_equal "This is a test\nof a multiline\nCell", oo.cell(1,1)
4890
+ assert_equal "This is a test\n¶\nof a multiline\n\nCell", oo.cell(1,2)
4891
+ assert_equal "first p\n\nsecond p\n\nlast p", oo.cell(2,1)
4892
+ end
4893
+ end
4894
+
4895
+
4871
4896
  # for test_cell_styles
4872
4897
  def verify_cell_fonts(oo)
4873
4898
  oo.default_sheet = oo.sheets.first
@@ -4940,7 +4965,21 @@ This attached file is the newer format of Microsoft Excel (.xlsx).
4940
4965
  if EXCEL
4941
4966
  oo = Excel.new(File.join(TESTDIR,"style.xls"))
4942
4967
  verify_cell_fonts(oo)
4943
- end
4944
4968
  end
4969
+ end
4970
+
4971
+ # If a cell has a date-like string but is preceeded by a '
4972
+ # to force that date to be treated like a string, we were getting an exception.
4973
+ # This test just checks for that exception to make sure it's not raised in this cas
4974
+ def test_date_to_float_conversion
4975
+ if EXCEL
4976
+ assert_nothing_raised(NoMethodError) do
4977
+ oo = Excel.new(File.join(TESTDIR,"datetime_floatconv.xls"))
4978
+ oo.default_sheet = oo.sheets.first
4979
+ oo.cell('a',1)
4980
+ oo.cell('a',2)
4981
+ end
4982
+ end
4983
+ end
4945
4984
 
4946
4985
  end # class
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hmcgowan-roo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.4
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Preymesser
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-03-12 00:00:00 -07:00
12
+ date: 2009-04-20 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -59,7 +59,7 @@ executables: []
59
59
  extensions: []
60
60
 
61
61
  extra_rdoc_files:
62
- - README.txt
62
+ - README.markdown
63
63
  - History.txt
64
64
  files:
65
65
  - lib/roo
@@ -71,74 +71,6 @@ files:
71
71
  - lib/roo/roo_rails_helper.rb
72
72
  - lib/roo/version.rb
73
73
  - lib/roo.rb
74
- - test/_ods
75
- - test/_ods/Configurations2
76
- - test/_ods/Configurations2/accelerator
77
- - test/_ods/Configurations2/accelerator/current.xml
78
- - test/_ods/Configurations2/floater
79
- - test/_ods/Configurations2/images
80
- - test/_ods/Configurations2/images/Bitmaps
81
- - test/_ods/Configurations2/menubar
82
- - test/_ods/Configurations2/popupmenu
83
- - test/_ods/Configurations2/progressbar
84
- - test/_ods/Configurations2/statusbar
85
- - test/_ods/Configurations2/toolbar
86
- - test/_ods/content.xml
87
- - test/_ods/META-INF
88
- - test/_ods/META-INF/manifest.xml
89
- - test/_ods/meta.xml
90
- - test/_ods/mimetype
91
- - test/_ods/settings.xml
92
- - test/_ods/style.ods
93
- - test/_ods/styles.xml
94
- - test/_ods/Thumbnails
95
- - test/_ods/Thumbnails/thumbnail.png
96
- - test/_ods_old
97
- - test/_ods_old/Configurations2
98
- - test/_ods_old/Configurations2/accelerator
99
- - test/_ods_old/Configurations2/accelerator/current.xml
100
- - test/_ods_old/Configurations2/floater
101
- - test/_ods_old/Configurations2/images
102
- - test/_ods_old/Configurations2/images/Bitmaps
103
- - test/_ods_old/Configurations2/menubar
104
- - test/_ods_old/Configurations2/popupmenu
105
- - test/_ods_old/Configurations2/progressbar
106
- - test/_ods_old/Configurations2/statusbar
107
- - test/_ods_old/Configurations2/toolbar
108
- - test/_ods_old/content.xml
109
- - test/_ods_old/META-INF
110
- - test/_ods_old/META-INF/manifest.xml
111
- - test/_ods_old/meta.xml
112
- - test/_ods_old/mimetype
113
- - test/_ods_old/settings.xml
114
- - test/_ods_old/style.ods
115
- - test/_ods_old/styles.xml
116
- - test/_ods_old/Thumbnails
117
- - test/_ods_old/Thumbnails/thumbnail.png
118
- - test/_xlsx
119
- - test/_xlsx/[Content_Types].xml
120
- - test/_xlsx/_rels
121
- - test/_xlsx/docProps
122
- - test/_xlsx/docProps/app.xml
123
- - test/_xlsx/docProps/core.xml
124
- - test/_xlsx/style.xlsx
125
- - test/_xlsx/style.xlsx.cpgz
126
- - test/_xlsx/xl
127
- - test/_xlsx/xl/_rels
128
- - test/_xlsx/xl/_rels/workbook.xml.rels
129
- - test/_xlsx/xl/printerSettings
130
- - test/_xlsx/xl/printerSettings/printerSettings1.bin
131
- - test/_xlsx/xl/sharedStrings.xml
132
- - test/_xlsx/xl/styles.xml
133
- - test/_xlsx/xl/theme
134
- - test/_xlsx/xl/theme/theme1.xml
135
- - test/_xlsx/xl/workbook.xml
136
- - test/_xlsx/xl/worksheets
137
- - test/_xlsx/xl/worksheets/_rels
138
- - test/_xlsx/xl/worksheets/_rels/sheet1.xml.rels
139
- - test/_xlsx/xl/worksheets/sheet1.xml
140
- - test/_xlsx/xl/worksheets/sheet2.xml
141
- - test/_xlsx/xl/worksheets/sheet3.xml
142
74
  - test/bbu.ods
143
75
  - test/bbu.xls
144
76
  - test/bbu.xlsx
@@ -159,6 +91,7 @@ files:
159
91
  - test/datetime.ods
160
92
  - test/datetime.xls
161
93
  - test/datetime.xlsx
94
+ - test/datetime_floatconv.xls
162
95
  - test/emptysheets.ods
163
96
  - test/emptysheets.xls
164
97
  - test/false_encoding.xls
@@ -175,6 +108,9 @@ files:
175
108
  - test/only_one_sheet.ods
176
109
  - test/only_one_sheet.xls
177
110
  - test/only_one_sheet.xlsx
111
+ - test/paragraph.ods
112
+ - test/paragraph.xls
113
+ - test/paragraph.xlsx
178
114
  - test/ric.ods
179
115
  - test/simple_spreadsheet.ods
180
116
  - test/simple_spreadsheet.xls
@@ -190,14 +126,14 @@ files:
190
126
  - test/time-test.ods
191
127
  - test/time-test.xls
192
128
  - test/time-test.xlsx
193
- - README.txt
129
+ - README.markdown
194
130
  - History.txt
195
131
  has_rdoc: true
196
132
  homepage: http://roo.rubyforge.org
197
133
  post_install_message:
198
134
  rdoc_options:
199
135
  - --main
200
- - README.txt
136
+ - README.markdown
201
137
  - --inline-source
202
138
  - --charset=UTF-8
203
139
  require_paths: