hmcgowan-roo 1.2.4 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,11 +1,19 @@
1
- README for roo
2
- ==============
1
+ # README for Roo
3
2
 
4
- Installation:
3
+ This is the semi-official roo repository. I've been unable to contact the maintainer so this
4
+ repository should allow us to continue in the interim. If you'd like to contribute I'm happy
5
+ to pull your changes in and will be making periodic releases from here until we can get
6
+ the gems on RubyForge.
5
7
 
8
+
9
+ ## Installation
10
+
11
+ # Run the following if you haven't done so before:
12
+ gem sources -a http://gems.github.com
13
+ # Install the gem:
6
14
  sudo gem install roo
7
15
 
8
- Usage:
16
+ ## Usage:
9
17
 
10
18
  require 'rubygems'
11
19
  require 'roo'
data/lib/roo/excel.rb CHANGED
@@ -1,8 +1,12 @@
1
1
  require 'rubygems'
2
2
  gem 'spreadsheet', '>= 0.6.3.1'
3
3
  require 'spreadsheet'
4
- CHARGUESS = false
5
- require 'charguess' if CHARGUESS
4
+ CHARGUESS = begin
5
+ require 'charguess'
6
+ true
7
+ rescue LoadError => e
8
+ false
9
+ end
6
10
 
7
11
  # ruby-spreadsheet has a font object so we're extending it
8
12
  # with our own functionality but still providing full access
@@ -79,18 +83,7 @@ class Excel < GenericSpreadsheet
79
83
  def sheets
80
84
  result = []
81
85
  @workbook.worksheets.each do |worksheet|
82
- # TODO: is there a better way to do conversion?
83
- if CHARGUESS
84
- encoding = CharGuess::guess(worksheet.name)
85
- encoding = 'unicode' unless encoding
86
-
87
-
88
- result << Iconv.new('utf-8',encoding).iconv(
89
- worksheet.name
90
- )
91
- else
92
- result << platform_specific_iconv(worksheet.name)
93
- end
86
+ result << normalize_string(worksheet.name)
94
87
  end
95
88
  return result
96
89
  end
@@ -243,11 +236,7 @@ class Excel < GenericSpreadsheet
243
236
  return name-1 if name.kind_of?(Fixnum)
244
237
  i = 0
245
238
  @workbook.worksheets.each do |worksheet|
246
- # TODO: is there a better way to do conversion?
247
- return i if name == platform_specific_iconv(worksheet.name)
248
- #Iconv.new('utf-8','unicode').iconv(
249
- # @workbook.worksheet(i).name
250
- # )
239
+ return i if name == normalize_string(worksheet.name)
251
240
  i += 1
252
241
  end
253
242
  raise StandardError, "sheet '#{name}' not found"
@@ -272,7 +261,16 @@ class Excel < GenericSpreadsheet
272
261
  }
273
262
  ! content
274
263
  end
275
-
264
+
265
+ def normalize_string(value)
266
+ value = every_second_null?(value) ? remove_every_second_null(value) : value
267
+ if CHARGUESS && encoding = CharGuess::guess(value)
268
+ Iconv.new('utf-8', encoding)
269
+ else
270
+ platform_specific_iconv(value)
271
+ end
272
+ end
273
+
276
274
  def platform_specific_iconv(value)
277
275
  case RUBY_PLATFORM.downcase
278
276
  when /darwin/
@@ -381,7 +379,7 @@ class Excel < GenericSpreadsheet
381
379
  format = row.format(idx)
382
380
  if format.date_or_time?
383
381
  cell = row.at(idx)
384
- cell.to_s.to_f > 0 ? true : false # cell value must be numeric
382
+ true if Float(cell) > 0 rescue false
385
383
  else
386
384
  false
387
385
  end
data/lib/roo/excelx.rb CHANGED
@@ -1,6 +1,7 @@
1
1
 
2
2
  require 'rubygems'
3
- require 'rexml/document'
3
+ gem 'libxml-ruby', '>= 0.8.3'
4
+ require 'xml'
4
5
  require 'fileutils'
5
6
  require 'zip/zipfilesystem'
6
7
  require 'date'
@@ -102,12 +103,12 @@ class Excelx < GenericSpreadsheet
102
103
  @file_nr = @@nr
103
104
  extract_content(@filename)
104
105
  file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
105
- @workbook_doc = REXML::Document.new file
106
+ @workbook_doc = XML::Parser.io(file).parse
106
107
  file.close
107
108
  @shared_table = []
108
109
  if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
109
110
  file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
110
- @sharedstring_doc = REXML::Document.new file
111
+ @sharedstring_doc = XML::Parser.io(file).parse
111
112
  file.close
112
113
  read_shared_strings(@sharedstring_doc)
113
114
  end
@@ -115,14 +116,14 @@ class Excelx < GenericSpreadsheet
115
116
  @style_definitions = Array.new { |h,k| h[k] = {} }
116
117
  if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
117
118
  file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
118
- @styles_doc = REXML::Document.new file
119
+ @styles_doc = XML::Parser.io(file).parse
119
120
  file.close
120
121
  read_styles(@styles_doc)
121
122
  end
122
123
  @sheet_doc = []
123
124
  @sheet_files.each_with_index do |item, i|
124
125
  file = File.new(item)
125
- @sheet_doc[i] = REXML::Document.new file
126
+ @sheet_doc[i] = XML::Parser.io(file).parse
126
127
  file.close
127
128
  end
128
129
  ensure
@@ -288,18 +289,11 @@ class Excelx < GenericSpreadsheet
288
289
  # returns an array of sheet names in the spreadsheet
289
290
  def sheets
290
291
  return_sheets = []
291
- @workbook_doc.each_element do |workbook|
292
- workbook.each_element do |el|
293
- if el.name == "sheets"
294
- el.each_element do |sheet|
295
- return_sheets << sheet.attributes['name']
296
- end
297
- end
298
- end
292
+ @workbook_doc.find("//*[local-name()='sheet']").each do |sheet|
293
+ return_sheets << sheet.attributes.to_h['name']
299
294
  end
300
295
  return_sheets
301
296
  end
302
-
303
297
  # shows the internal representation of all cells
304
298
  # for debugging purposes
305
299
  def to_s(sheet=nil)
@@ -404,87 +398,62 @@ class Excelx < GenericSpreadsheet
404
398
  raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
405
399
  raise RangeError unless self.sheets.include? sheet
406
400
  n = self.sheets.index(sheet)
407
- @sheet_doc[n].each_element do |worksheet|
408
- worksheet.each_element do |elem|
409
- if elem.name == 'sheetData'
410
- elem.each_element do |sheetdata|
411
- if sheetdata.name == 'row'
412
- sheetdata.each_element do |row|
413
- if row.name == 'c'
414
- s_attribute = row.attributes['s']
415
- if row.attributes['t'] == 's'
416
- tmp_type = :shared
417
- elsif row.attributes['t'] == 'b'
418
- tmp_type = :boolean
419
- else
420
- format = attribute2format(s_attribute)
421
- tmp_type = format2type(format)
422
- end
423
- formula = nil
424
- row.each_element do |cell|
425
- # puts "cell.name: #{cell.name}" if cell.text.include? "22606.5120"
426
- # puts "cell.text: #{cell.text}" if cell.text.include? "22606.5120"
427
- if cell.name == 'f'
428
- formula = cell.text
429
- end
430
- if cell.name == 'v'
431
- #puts "tmp_type: #{tmp_type}" if cell.text.include? "22606.5120"
432
- #puts cell.name
433
- if tmp_type == :time or tmp_type == :datetime #2008-07-26
434
- #p cell.text
435
- # p cell.text.to_f if cell.text.include? "22606.5120"
436
- if cell.text.to_f >= 1.0 # 2008-07-26
437
- # puts ">= 1.0" if cell.text.include? "22606.5120"
438
- # puts "cell.text.to_f: #{cell.text.to_f}" if cell.text.include? "22606.5120"
439
- #puts "cell.text.to_f.floor: #{cell.text.to_f.floor}" if cell.text.include? "22606.5120"
440
- if (cell.text.to_f - cell.text.to_f.floor).abs > 0.000001 #TODO:
441
- # puts "abs ist groesser" if cell.text.include? "22606.5120"
442
- # @cell[sheet][key] = DateTime.parse(tr.attributes['date-value'])
443
- tmp_type = :datetime
444
-
445
- else
446
- #puts ":date"
447
- tmp_type = :date # 2008-07-26
448
- end
449
- else
450
- #puts "<1.0"
451
- end # 2008-07-26
452
- end # 2008-07-26
453
- excelx_type = [:numeric_or_formula,format]
454
- excelx_value = cell.text
455
- if tmp_type == :shared
456
- vt = :string
457
- str_v = @shared_table[cell.text.to_i]
458
- excelx_type = :string
459
- elsif tmp_type == :boolean
460
- vt = :boolean
461
- cell.text.to_i == 1 ? v = 'TRUE' : v = 'FALSE'
462
- elsif tmp_type == :date
463
- vt = :date
464
- v = cell.text
465
- elsif tmp_type == :time
466
- vt = :time
467
- v = cell.text
468
- elsif tmp_type == :datetime
469
- vt = :datetime
470
- v = cell.text
471
- elsif tmp_type == :formula
472
- vt = :formula
473
- v = cell.text.to_f #TODO: !!!!
474
- else
475
- vt = :float
476
- v = cell.text
477
- end
478
- #puts "vt: #{vt}" if cell.text.include? "22606.5120"
479
- x,y = split_coordinate(row.attributes['r'])
480
- tr=nil #TODO: ???s
481
- set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
482
- end
483
- end
484
- end
401
+ @sheet_doc[n].find("//*[local-name()='c']").each do |c|
402
+ s_attribute = c.attributes.to_h['s'].to_i # should be here
403
+ if (c.attributes.to_h['t'] == 's')
404
+ tmp_type = :shared
405
+ elsif (c.attributes.to_h['t'] == 'b')
406
+ tmp_type = :boolean
407
+ else
408
+ # s_attribute = c.attributes.to_h['s'].to_i # was here
409
+ format = attribute2format(s_attribute)
410
+ tmp_type = format2type(format)
411
+ end
412
+ formula = nil
413
+ c.each_element do |cell|
414
+ if cell.name == 'f'
415
+ formula = cell.content
416
+ end
417
+ if cell.name == 'v'
418
+ if tmp_type == :time or tmp_type == :datetime
419
+ if cell.content.to_f >= 1.0
420
+ if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
421
+ tmp_type = :datetime
422
+ else
423
+ tmp_type = :date
485
424
  end
486
- end
425
+ else
426
+ end
427
+ end
428
+ excelx_type = [:numeric_or_formula,format]
429
+ excelx_value = cell.content
430
+ if tmp_type == :shared
431
+ vt = :string
432
+ str_v = @shared_table[cell.content.to_i]
433
+ excelx_type = :string
434
+ elsif tmp_type == :boolean
435
+ vt = :boolean
436
+ cell.content.to_i == 1 ? v = 'TRUE' : v = 'FALSE'
437
+ elsif tmp_type == :date
438
+ vt = :date
439
+ v = cell.content
440
+ elsif tmp_type == :time
441
+ vt = :time
442
+ v = cell.content
443
+ elsif tmp_type == :datetime
444
+ vt = :datetime
445
+ v = cell.content
446
+ elsif tmp_type == :formula
447
+ vt = :formula
448
+ v = cell.content.to_f #TODO: !!!!
449
+ else
450
+ vt = :float
451
+ v = cell.content
487
452
  end
453
+ #puts "vt: #{vt}" if cell.text.include? "22606.5120"
454
+ x,y = split_coordinate(c.attributes.to_h['r'])
455
+ tr=nil #TODO: ???s
456
+ set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
488
457
  end
489
458
  end
490
459
  end
@@ -500,17 +469,9 @@ class Excelx < GenericSpreadsheet
500
469
  def check_default_sheet
501
470
  sheet_found = false
502
471
  raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
503
- @workbook_doc.each_element do |workbook|
504
- workbook.each_element do |el|
505
- if el.name == "sheets"
506
- el.each_element do |sheet|
507
- if @default_sheet == sheet.attributes['name']
508
- sheet_found = true
509
- end
510
- end
511
- end
512
- end
513
- end
472
+
473
+ sheet_found = true if sheets.include?(@default_sheet)
474
+
514
475
  if ! sheet_found
515
476
  raise RangeError, "sheet '#{@default_sheet}' not found"
516
477
  end
@@ -570,18 +531,21 @@ class Excelx < GenericSpreadsheet
570
531
 
571
532
  # read the shared strings xml document
572
533
  def read_shared_strings(doc)
573
- doc.each_element do |sst|
574
- if sst.name == 'sst'
575
- sst.each_element do |si|
576
- if si.name == 'si'
577
- si.each_element do |elem|
578
- if elem.name == 't'
579
- @shared_table << elem.text
580
- end
534
+ doc.find("//*[local-name()='si']").each do |si|
535
+ shared_table_entry = ''
536
+ si.each_element do |elem|
537
+ if (elem.name == 'r')
538
+ elem.each_element do |r_elem|
539
+ if (r_elem.name == 't')
540
+ shared_table_entry << r_elem.content
581
541
  end
582
542
  end
583
543
  end
544
+ if (elem.name == 't')
545
+ shared_table_entry = elem.content
546
+ end
584
547
  end
548
+ @shared_table << shared_table_entry
585
549
  end
586
550
  end
587
551
 
@@ -590,47 +554,39 @@ class Excelx < GenericSpreadsheet
590
554
  @numFmts = []
591
555
  @cellXfs = []
592
556
  fonts = []
593
- doc.each_element do |e1|
594
- if e1.name == "styleSheet"
595
- e1.each_element do |e2|
596
- if e2.name == "numFmts"
597
- e2.each_element do |e3|
598
- if e3.name == 'numFmt'
599
- numFmtId = e3.attributes['numFmtId']
600
- formatCode = e3.attributes['formatCode']
601
- @numFmts << [numFmtId, formatCode]
602
- end
603
- end
604
- elsif e2.name == "fonts"
605
- e2.each_element do |e3|
606
- if e3.name == 'font'
607
- font = Excelx::Font.new
608
- e3.each do |e4|
609
- case e4.name
610
- when 'b'
611
- font.bold = true
612
- when 'i'
613
- font.italic = true
614
- when 'u'
615
- font.underline = true
616
- end
617
- end
618
- fonts << font
619
- end
620
- end
621
- elsif e2.name == "cellXfs"
622
- e2.each_element do |e3|
623
- if e3.name == 'xf'
624
- numFmtId = e3.attributes['numFmtId']
625
- @cellXfs << [numFmtId]
626
- fontId = e3.attributes['fontId'].to_i
627
- @style_definitions << fonts[fontId]
557
+
558
+ doc.find("//*[local-name()='numFmt']").each do |numFmt|
559
+ numFmtId = numFmt.attributes.to_h['numFmtId']
560
+ formatCode = numFmt.attributes.to_h['formatCode']
561
+ @numFmts << [numFmtId, formatCode]
562
+ end
563
+ doc.find("//*[local-name()='fonts']").each do |fonts_el|
564
+ fonts_el.each_element do |font_el|
565
+ if font_el.name == 'font'
566
+ font = Excelx::Font.new
567
+ font_el.each_element do |font_sub_el|
568
+ case font_sub_el.name
569
+ when 'b'
570
+ font.bold = true
571
+ when 'i'
572
+ font.italic = true
573
+ when 'u'
574
+ font.underline = true
628
575
  end
629
- end
630
576
  end
577
+ fonts << font
631
578
  end
632
579
  end
633
580
  end
581
+
582
+ doc.find("//*[local-name()='cellXfs']").each do |xfs|
583
+ xfs.each do |xf|
584
+ numFmtId = xf.attributes.to_h['numFmtId']
585
+ @cellXfs << [numFmtId]
586
+ fontId = xf.attributes.to_h['fontId'].to_i
587
+ @style_definitions << fonts[fontId]
588
+ end
589
+ end
634
590
  end
635
591
 
636
592
  # convert internal excelx attribute to a format
@@ -1,6 +1,6 @@
1
-
2
1
  require 'rubygems'
3
- require 'rexml/document'
2
+ gem 'libxml-ruby', '>= 0.8.3'
3
+ require 'xml'
4
4
  require 'fileutils'
5
5
  require 'zip/zipfilesystem'
6
6
  require 'date'
@@ -38,7 +38,7 @@ class Openoffice < GenericSpreadsheet
38
38
  @file_nr = @@nr
39
39
  extract_content
40
40
  file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_content.xml"))
41
- @doc = REXML::Document.new file
41
+ @doc = XML::Parser.io(file).parse
42
42
  file.close
43
43
  ensure
44
44
  #if ENV["roo_local"] != "thomas-p"
@@ -174,31 +174,14 @@ class Openoffice < GenericSpreadsheet
174
174
  end
175
175
  end
176
176
 
177
- # returns an array of sheet names in the spreadsheet
178
177
  def sheets
179
178
  return_sheets = []
180
- oo_document_count = 0
181
- @doc.each_element do |oo_document|
182
- oo_document_count += 1
183
- oo_element_count = 0
184
- oo_document.each_element do |oo_element|
185
- oo_element_count += 1
186
- if oo_element.name == "body"
187
- oo_element.each_element do |be|
188
- if be.name == "spreadsheet"
189
- be.each_element do |se|
190
- if se.name == "table"
191
- return_sheets << se.attributes['name']
192
- end
193
- end
194
- end
195
- end
196
- end
197
- end
179
+ @doc.find("//*[local-name()='table']").each do |sheet|
180
+ return_sheets << sheet.attributes['name']
198
181
  end
199
182
  return_sheets
200
183
  end
201
-
184
+
202
185
  # version of the openoffice document
203
186
  # at 2007 this is always "1.0"
204
187
  def officeversion
@@ -240,14 +223,14 @@ class Openoffice < GenericSpreadsheet
240
223
 
241
224
  # read the version of the OO-Version
242
225
  def oo_version
243
- @doc.each_element do |oo_document|
244
- @officeversion = oo_document.attributes['version']
226
+ @doc.find("//*[local-name()='document-content']").each do |office|
227
+ @officeversion = office.attributes['version']
245
228
  end
246
229
  end
247
230
 
248
231
  # helper function to set the internal representation of cells
249
- def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,style_name)
250
- key = [y,x+i]
232
+ def set_cell_values(sheet,x,y,i,v,vt,formula,table_cell,str_v,style_name)
233
+ key = [y,x+i]
251
234
  @cell_type[sheet] = {} unless @cell_type[sheet]
252
235
  @cell_type[sheet][key] = Openoffice.oo_type_2_roo_type(vt)
253
236
  @formula[sheet] = {} unless @formula[sheet]
@@ -261,13 +244,13 @@ class Openoffice < GenericSpreadsheet
261
244
  when :string
262
245
  @cell[sheet][key] = str_v
263
246
  when :date
264
- if tr.attributes['date-value'].size != "XXXX-XX-XX".size
247
+ if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size
265
248
  #-- dann ist noch eine Uhrzeit vorhanden
266
249
  #-- "1961-11-21T12:17:18"
267
- @cell[sheet][key] = DateTime.parse(tr.attributes['date-value'])
250
+ @cell[sheet][key] = DateTime.parse(table_cell.attributes['date-value'])
268
251
  @cell_type[sheet][key] = :datetime
269
252
  else
270
- @cell[sheet][key] = tr.attributes['date-value']
253
+ @cell[sheet][key] = table_cell.attributes['date-value']
271
254
  end
272
255
  when :percentage
273
256
  @cell[sheet][key] = v.to_f
@@ -289,104 +272,88 @@ class Openoffice < GenericSpreadsheet
289
272
  sheet_found = false
290
273
  raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
291
274
  raise RangeError unless self.sheets.include? sheet
292
- oo_document_count = 0
293
- @doc.each_element do |oo_document|
294
- # @officeversion = oo_document.attributes['version']
295
- oo_document_count += 1
296
- oo_element_count = 0
297
- oo_document.each_element do |oo_element|
298
- oo_element_count += 1
299
- if oo_element.name == "body"
300
- oo_element.each_element do |be|
301
- if be.name == "spreadsheet"
302
- be.each_element do |se|
303
- if se.name == "table"
304
- if se.attributes['name']==sheet
305
- sheet_found = true
306
- x=1
307
- y=1
308
- se.each_element do |te|
309
- if te.name == "table-column"
310
- rep = te.attributes["number-columns-repeated"]
311
- @style_defaults[sheet] << te.attributes["default-cell-style-name"]
312
- elsif te.name == "table-row"
313
- if te.attributes['number-rows-repeated']
314
- skip_y = te.attributes['number-rows-repeated'].to_i
315
- y = y + skip_y - 1 # minus 1 because this line will be counted as a line element
316
- end
317
- te.each_element do |tr|
318
- if tr.name == 'table-cell'
319
- skip = tr.attributes['number-columns-repeated']
320
- formula = tr.attributes['formula']
321
- vt = tr.attributes['value-type']
322
- v = tr.attributes['value']
323
- style_name = tr.attributes['style-name']
324
- if vt == 'string'
325
- str_v = ''
326
- # insert \n if there is more than one paragraph
327
- para_count = 0
328
- tr.each_element do |str|
329
- if str.name == 'p'
330
- v = str.text
331
- str_v += "\n" if para_count > 0
332
- para_count += 1
333
- if str.children.size > 1
334
- str_v = children_to_string(str.children)
335
- else
336
- str.children.each {|child|
337
- str_v = str_v + child.to_s #.text
338
- }
339
- end
340
- str_v.gsub!(/&apos;/,"'") # special case not supported by unescapeHTML
341
- str_v = CGI.unescapeHTML(str_v)
342
- end # == 'p'
343
- end
344
- elsif vt == 'time'
345
- tr.each_element do |str|
346
- if str.name == 'p'
347
- v = str.text
348
- end
349
- end
350
- elsif vt == '' or vt == nil
351
- #
352
- elsif vt == 'date'
353
- #
354
- elsif vt == 'percentage'
355
- #
356
- elsif vt == 'float'
357
- #
358
- elsif vt == 'boolean'
359
- v = tr.attributes['boolean-value']
360
- #
361
- else
362
- # raise "unknown type #{vt}"
363
- end
364
- if skip
365
- if v != nil or tr.attributes['date-value']
366
- 0.upto(skip.to_i-1) do |i|
367
- set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,style_name)
368
- end
369
- end
370
- x += (skip.to_i - 1)
371
- end # if skip
372
- set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,style_name)
373
- x += 1
374
- end
375
- end
376
- y += 1
377
- x = 1
275
+
276
+ @doc.find("//*[local-name()='table']").each do |ws|
277
+ if sheet == ws.attributes['name']
278
+ sheet_found = true
279
+ col = 1
280
+ row = 1
281
+ ws.each_element do |table_element|
282
+ case table_element.name
283
+ when 'table-column'
284
+ @style_defaults[sheet] << table_element.attributes['default-cell-style-name']
285
+ when 'table-row'
286
+ if table_element.attributes['number-rows-repeated']
287
+ skip_row = table_element.attributes['number-rows-repeated'].to_i
288
+ row = row + skip_row - 1
289
+ end
290
+ table_element.each_element do |cell|
291
+ skip_col = cell.attributes['number-columns-repeated']
292
+ formula = cell.attributes['formula']
293
+ vt = cell.attributes['value-type']
294
+ v = cell.attributes['value']
295
+ style_name = cell.attributes['style-name']
296
+ if vt == 'string'
297
+ str_v = ''
298
+ # insert \n if there is more than one paragraph
299
+ para_count = 0
300
+ cell.each_element do |str|
301
+ if str.name == 'p'
302
+ v = str.content
303
+ str_v += "\n" if para_count > 0
304
+ para_count += 1
305
+ if str.children.size > 1
306
+ str_v += children_to_string(str.children)
307
+ else
308
+ str.children.each do |child|
309
+ str_v += child.content #.text
378
310
  end
379
311
  end
380
- end # sheet
312
+ str_v.gsub!(/&apos;/,"'") # special case not supported by unescapeHTML
313
+ str_v = CGI.unescapeHTML(str_v)
314
+ end # == 'p'
315
+ end
316
+ elsif vt == 'time'
317
+ cell.each_element do |str|
318
+ if str.name == 'p'
319
+ v = str.content
320
+ end
381
321
  end
322
+ elsif vt == '' or vt == nil
323
+ #
324
+ elsif vt == 'date'
325
+ #
326
+ elsif vt == 'percentage'
327
+ #
328
+ elsif vt == 'float'
329
+ #
330
+ elsif vt == 'boolean'
331
+ v = cell.attributes['boolean-value']
332
+ #
333
+ else
334
+ # raise "unknown type #{vt}"
382
335
  end
383
- end
336
+ if skip_col
337
+ if v != nil or cell.attributes['date-value']
338
+ 0.upto(skip_col.to_i-1) do |i|
339
+ set_cell_values(sheet,col,row,i,v,vt,formula,cell,str_v,style_name)
340
+ end
341
+ end
342
+ col += (skip_col.to_i - 1)
343
+ end # if skip
344
+ set_cell_values(sheet,col,row,0,v,vt,formula,cell,str_v,style_name)
345
+ col += 1
346
+ end
347
+ row += 1
348
+ col = 1
384
349
  end
385
- elsif oo_element.name == "automatic-styles"
386
- read_styles(oo_element)
387
350
  end
388
351
  end
389
352
  end
353
+
354
+ @doc.find("//*[local-name()='automatic-styles']").each do |style|
355
+ read_styles(style)
356
+ end
390
357
  if !sheet_found
391
358
  raise RangeError
392
359
  end
@@ -413,25 +380,7 @@ class Openoffice < GenericSpreadsheet
413
380
  def check_default_sheet
414
381
  sheet_found = false
415
382
  raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
416
- @doc.each_element do |oo_document|
417
- oo_element_count = 0
418
- oo_document.each_element do |oo_element|
419
- oo_element_count += 1
420
- if oo_element.name == "body"
421
- oo_element.each_element do |be|
422
- if be.name == "spreadsheet"
423
- be.each_element do |se|
424
- if se.name == "table"
425
- if se.attributes['name'] == @default_sheet
426
- sheet_found = true
427
- end # sheet
428
- end
429
- end
430
- end
431
- end
432
- end
433
- end
434
- end
383
+ sheet_found = true if sheets.include?(@default_sheet)
435
384
  if ! sheet_found
436
385
  raise RangeError, "sheet '#{@default_sheet}' not found"
437
386
  end
@@ -487,8 +436,8 @@ class Openoffice < GenericSpreadsheet
487
436
  def children_to_string(children)
488
437
  result = ''
489
438
  children.each {|child|
490
- if child.class == REXML::Text
491
- result = result + child.to_s
439
+ if child.text?
440
+ result = result + child.content
492
441
  else
493
442
  if child.name == 's'
494
443
  compressed_spaces = child.attributes['c'].to_i
@@ -498,7 +447,7 @@ class Openoffice < GenericSpreadsheet
498
447
  end
499
448
  result = result + " "*compressed_spaces
500
449
  else
501
- result = result + child.to_s
450
+ result = result + child.content
502
451
  end
503
452
  end
504
453
  }
Binary file
Binary file
Binary file
Binary file
data/test/test_roo.rb CHANGED
@@ -3817,7 +3817,7 @@ Sheet 3:
3817
3817
  def do_test_xml(oo)
3818
3818
  assert_nothing_raised {oo.to_xml}
3819
3819
  sheetname = oo.sheets.first
3820
- doc = REXML::Document.new(oo.to_xml)
3820
+ doc = XML::Parser.string(oo.to_xml).parse
3821
3821
  doc.root.each_element {|xml_sheet|
3822
3822
  all_cells = init_all_cells(oo, sheetname)
3823
3823
  x = 0
@@ -3831,7 +3831,7 @@ Sheet 3:
3831
3831
  result = [
3832
3832
  cell.attributes['row'],
3833
3833
  cell.attributes['column'],
3834
- cell.text,
3834
+ cell.content,
3835
3835
  cell.attributes['type'],
3836
3836
  ]
3837
3837
  assert_equal expected, result
@@ -4847,7 +4847,7 @@ This attached file is the newer format of Microsoft Excel (.xlsx).
4847
4847
  end
4848
4848
  end
4849
4849
 
4850
- def test_cell_excel_boolean
4850
+ def test_cell_boolean
4851
4851
  if EXCEL
4852
4852
  oo = Excel.new(File.join(TESTDIR,"boolean.xls"))
4853
4853
  oo.default_sheet = oo.sheets.first
@@ -4868,6 +4868,31 @@ This attached file is the newer format of Microsoft Excel (.xlsx).
4868
4868
  end
4869
4869
  end
4870
4870
 
4871
+ def test_cell_multiline
4872
+ if EXCEL
4873
+ oo = Excel.new(File.join(TESTDIR,"paragraph.xls"))
4874
+ oo.default_sheet = oo.sheets.first
4875
+ assert_equal "This is a test\nof a multiline\nCell", oo.cell(1,1)
4876
+ assert_equal "This is a test\n¶\nof a multiline\n\nCell", oo.cell(1,2)
4877
+ assert_equal "first p\n\nsecond p\n\nlast p", oo.cell(2,1)
4878
+ end
4879
+ if OPENOFFICE
4880
+ oo = Openoffice.new(File.join(TESTDIR,"paragraph.ods"))
4881
+ oo.default_sheet = oo.sheets.first
4882
+ assert_equal "This is a test\nof a multiline\nCell", oo.cell(1,1)
4883
+ assert_equal "This is a test\n¶\nof a multiline\n\nCell", oo.cell(1,2)
4884
+ assert_equal "first p\n\nsecond p\n\nlast p", oo.cell(2,1)
4885
+ end
4886
+ if EXCELX
4887
+ oo = Excelx.new(File.join(TESTDIR,"paragraph.xlsx"))
4888
+ oo.default_sheet = oo.sheets.first
4889
+ assert_equal "This is a test\nof a multiline\nCell", oo.cell(1,1)
4890
+ assert_equal "This is a test\n¶\nof a multiline\n\nCell", oo.cell(1,2)
4891
+ assert_equal "first p\n\nsecond p\n\nlast p", oo.cell(2,1)
4892
+ end
4893
+ end
4894
+
4895
+
4871
4896
  # for test_cell_styles
4872
4897
  def verify_cell_fonts(oo)
4873
4898
  oo.default_sheet = oo.sheets.first
@@ -4940,7 +4965,21 @@ This attached file is the newer format of Microsoft Excel (.xlsx).
4940
4965
  if EXCEL
4941
4966
  oo = Excel.new(File.join(TESTDIR,"style.xls"))
4942
4967
  verify_cell_fonts(oo)
4943
- end
4944
4968
  end
4969
+ end
4970
+
4971
+ # If a cell has a date-like string but is preceeded by a '
4972
+ # to force that date to be treated like a string, we were getting an exception.
4973
+ # This test just checks for that exception to make sure it's not raised in this cas
4974
+ def test_date_to_float_conversion
4975
+ if EXCEL
4976
+ assert_nothing_raised(NoMethodError) do
4977
+ oo = Excel.new(File.join(TESTDIR,"datetime_floatconv.xls"))
4978
+ oo.default_sheet = oo.sheets.first
4979
+ oo.cell('a',1)
4980
+ oo.cell('a',2)
4981
+ end
4982
+ end
4983
+ end
4945
4984
 
4946
4985
  end # class
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hmcgowan-roo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.4
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Preymesser
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-03-12 00:00:00 -07:00
12
+ date: 2009-04-20 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -59,7 +59,7 @@ executables: []
59
59
  extensions: []
60
60
 
61
61
  extra_rdoc_files:
62
- - README.txt
62
+ - README.markdown
63
63
  - History.txt
64
64
  files:
65
65
  - lib/roo
@@ -71,74 +71,6 @@ files:
71
71
  - lib/roo/roo_rails_helper.rb
72
72
  - lib/roo/version.rb
73
73
  - lib/roo.rb
74
- - test/_ods
75
- - test/_ods/Configurations2
76
- - test/_ods/Configurations2/accelerator
77
- - test/_ods/Configurations2/accelerator/current.xml
78
- - test/_ods/Configurations2/floater
79
- - test/_ods/Configurations2/images
80
- - test/_ods/Configurations2/images/Bitmaps
81
- - test/_ods/Configurations2/menubar
82
- - test/_ods/Configurations2/popupmenu
83
- - test/_ods/Configurations2/progressbar
84
- - test/_ods/Configurations2/statusbar
85
- - test/_ods/Configurations2/toolbar
86
- - test/_ods/content.xml
87
- - test/_ods/META-INF
88
- - test/_ods/META-INF/manifest.xml
89
- - test/_ods/meta.xml
90
- - test/_ods/mimetype
91
- - test/_ods/settings.xml
92
- - test/_ods/style.ods
93
- - test/_ods/styles.xml
94
- - test/_ods/Thumbnails
95
- - test/_ods/Thumbnails/thumbnail.png
96
- - test/_ods_old
97
- - test/_ods_old/Configurations2
98
- - test/_ods_old/Configurations2/accelerator
99
- - test/_ods_old/Configurations2/accelerator/current.xml
100
- - test/_ods_old/Configurations2/floater
101
- - test/_ods_old/Configurations2/images
102
- - test/_ods_old/Configurations2/images/Bitmaps
103
- - test/_ods_old/Configurations2/menubar
104
- - test/_ods_old/Configurations2/popupmenu
105
- - test/_ods_old/Configurations2/progressbar
106
- - test/_ods_old/Configurations2/statusbar
107
- - test/_ods_old/Configurations2/toolbar
108
- - test/_ods_old/content.xml
109
- - test/_ods_old/META-INF
110
- - test/_ods_old/META-INF/manifest.xml
111
- - test/_ods_old/meta.xml
112
- - test/_ods_old/mimetype
113
- - test/_ods_old/settings.xml
114
- - test/_ods_old/style.ods
115
- - test/_ods_old/styles.xml
116
- - test/_ods_old/Thumbnails
117
- - test/_ods_old/Thumbnails/thumbnail.png
118
- - test/_xlsx
119
- - test/_xlsx/[Content_Types].xml
120
- - test/_xlsx/_rels
121
- - test/_xlsx/docProps
122
- - test/_xlsx/docProps/app.xml
123
- - test/_xlsx/docProps/core.xml
124
- - test/_xlsx/style.xlsx
125
- - test/_xlsx/style.xlsx.cpgz
126
- - test/_xlsx/xl
127
- - test/_xlsx/xl/_rels
128
- - test/_xlsx/xl/_rels/workbook.xml.rels
129
- - test/_xlsx/xl/printerSettings
130
- - test/_xlsx/xl/printerSettings/printerSettings1.bin
131
- - test/_xlsx/xl/sharedStrings.xml
132
- - test/_xlsx/xl/styles.xml
133
- - test/_xlsx/xl/theme
134
- - test/_xlsx/xl/theme/theme1.xml
135
- - test/_xlsx/xl/workbook.xml
136
- - test/_xlsx/xl/worksheets
137
- - test/_xlsx/xl/worksheets/_rels
138
- - test/_xlsx/xl/worksheets/_rels/sheet1.xml.rels
139
- - test/_xlsx/xl/worksheets/sheet1.xml
140
- - test/_xlsx/xl/worksheets/sheet2.xml
141
- - test/_xlsx/xl/worksheets/sheet3.xml
142
74
  - test/bbu.ods
143
75
  - test/bbu.xls
144
76
  - test/bbu.xlsx
@@ -159,6 +91,7 @@ files:
159
91
  - test/datetime.ods
160
92
  - test/datetime.xls
161
93
  - test/datetime.xlsx
94
+ - test/datetime_floatconv.xls
162
95
  - test/emptysheets.ods
163
96
  - test/emptysheets.xls
164
97
  - test/false_encoding.xls
@@ -175,6 +108,9 @@ files:
175
108
  - test/only_one_sheet.ods
176
109
  - test/only_one_sheet.xls
177
110
  - test/only_one_sheet.xlsx
111
+ - test/paragraph.ods
112
+ - test/paragraph.xls
113
+ - test/paragraph.xlsx
178
114
  - test/ric.ods
179
115
  - test/simple_spreadsheet.ods
180
116
  - test/simple_spreadsheet.xls
@@ -190,14 +126,14 @@ files:
190
126
  - test/time-test.ods
191
127
  - test/time-test.xls
192
128
  - test/time-test.xlsx
193
- - README.txt
129
+ - README.markdown
194
130
  - History.txt
195
131
  has_rdoc: true
196
132
  homepage: http://roo.rubyforge.org
197
133
  post_install_message:
198
134
  rdoc_options:
199
135
  - --main
200
- - README.txt
136
+ - README.markdown
201
137
  - --inline-source
202
138
  - --charset=UTF-8
203
139
  require_paths: