roo 1.9.6 → 1.9.7
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +6 -0
- data/bin/roo +2 -1
- data/lib/roo.rb +1 -1
- data/lib/roo/excel.rb +16 -54
- data/lib/roo/excelx.rb +38 -78
- data/lib/roo/generic_spreadsheet.rb +17 -45
- data/lib/roo/google.rb +0 -8
- data/lib/roo/openoffice.rb +9 -44
- data/test/benchmark1.rb +43 -0
- data/test/dreimalvier.ods +0 -0
- data/test/test_roo.rb +27 -2
- metadata +18 -20
- data/lib/.roo.rb.swp +0 -0
- data/lib/roo/.generic_spreadsheet.rb.swp +0 -0
data/History.txt
CHANGED
@@ -1,4 +1,10 @@
|
|
1
|
+
== 1.9.7 2011-08-27
|
2
|
+
|
3
|
+
* 1 bugfix
|
4
|
+
* Openoffice: Better way for extracting formula strings, some characters were deleted at the formula string under some circumstances.
|
5
|
+
|
1
6
|
== 1.9.6 2011-08-03
|
7
|
+
|
2
8
|
* 1 enhancement
|
3
9
|
* new class Libreoffice (Libreoffice should do exactly the same as the Openoffice
|
4
10
|
class. It's just another name. Technically, Libreoffice is inherited from
|
data/bin/roo
CHANGED
data/lib/roo.rb
CHANGED
data/lib/roo/excel.rb
CHANGED
@@ -100,8 +100,7 @@ class Excel < GenericSpreadsheet
|
|
100
100
|
def initialize(filename, packed = nil, file_warning = :error)
|
101
101
|
super()
|
102
102
|
@file_warning = file_warning
|
103
|
-
|
104
|
-
# @tmpdir = "oo_"+$$.to_s
|
103
|
+
file_type_check(filename,'.xls','an Excel',packed)
|
105
104
|
@tmpdir = GenericSpreadsheet.next_tmpdir
|
106
105
|
@tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
|
107
106
|
unless File.exists?(@tmpdir)
|
@@ -110,36 +109,19 @@ class Excel < GenericSpreadsheet
|
|
110
109
|
filename = open_from_uri(filename) if filename[0,7] == "http://"
|
111
110
|
filename = open_from_stream(filename[7..-1]) if filename[0,7] == "stream:"
|
112
111
|
filename = unzip(filename) if packed and packed == :zip
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
#
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
unless File.file?(@filename)
|
122
|
-
FileUtils::rm_r(@tmpdir)
|
123
|
-
raise IOError, "file #{@filename} does not exist"
|
124
|
-
end
|
125
|
-
begin
|
126
|
-
@workbook = Spreadsheet.open(filename)
|
127
|
-
rescue Ole::Storage::FormatError
|
128
|
-
FileUtils::rm_r(@tmpdir)
|
129
|
-
raise # nach aussen weiterhin sichtbar
|
130
|
-
end
|
131
|
-
@default_sheet = self.sheets.first
|
132
|
-
#ensure
|
133
|
-
#if ENV["roo_local"] != "thomas-p"
|
134
|
-
# ich glaube ich darf hier noch nicht die temporaere
|
135
|
-
# Datei loeschen, weil
|
136
|
-
#to do
|
137
|
-
#"Loeschen temp. Directory anpassen"
|
138
|
-
#end
|
139
|
-
#end
|
140
|
-
#if fremdrechner?
|
112
|
+
@filename = filename
|
113
|
+
unless File.file?(@filename)
|
114
|
+
FileUtils::rm_r(@tmpdir)
|
115
|
+
raise IOError, "file #{@filename} does not exist"
|
116
|
+
end
|
117
|
+
begin
|
118
|
+
@workbook = Spreadsheet.open(filename)
|
119
|
+
rescue Ole::Storage::FormatError
|
141
120
|
FileUtils::rm_r(@tmpdir)
|
142
|
-
|
121
|
+
raise # nach aussen weiterhin sichtbar
|
122
|
+
end
|
123
|
+
@default_sheet = self.sheets.first
|
124
|
+
FileUtils::rm_r(@tmpdir)
|
143
125
|
@cell = Hash.new
|
144
126
|
@cell_type = Hash.new
|
145
127
|
@formula = Hash.new
|
@@ -154,11 +136,7 @@ class Excel < GenericSpreadsheet
|
|
154
136
|
|
155
137
|
# returns an array of sheet names in the spreadsheet
|
156
138
|
def sheets
|
157
|
-
|
158
|
-
@workbook.worksheets.each do |worksheet|
|
159
|
-
result << normalize_string(worksheet.name)
|
160
|
-
end
|
161
|
-
return result
|
139
|
+
@workbook.worksheets.collect {|worksheet| normalize_string(worksheet.name)}
|
162
140
|
end
|
163
141
|
|
164
142
|
# returns the content of a cell. The upper left corner is (1,1) or ('A',1)
|
@@ -175,7 +153,6 @@ class Excel < GenericSpreadsheet
|
|
175
153
|
if celltype(row,col,sheet) == :string
|
176
154
|
return platform_specific_iconv(@cell[sheet][[row,col]])
|
177
155
|
else
|
178
|
-
#return @cell[sheet][[row,col]]
|
179
156
|
if @cell[sheet] and @cell[sheet][[row,col]]
|
180
157
|
return @cell[sheet][[row,col]]
|
181
158
|
else
|
@@ -200,7 +177,6 @@ class Excel < GenericSpreadsheet
|
|
200
177
|
if @formula[sheet] and @formula[sheet][[row,col]]
|
201
178
|
return :formula
|
202
179
|
else
|
203
|
-
# @cell_type[sheet][[row,col]]
|
204
180
|
if @cell_type[sheet] and @cell_type[sheet][[row,col]]
|
205
181
|
return @cell_type[sheet][[row,col]]
|
206
182
|
else
|
@@ -354,7 +330,7 @@ class Excel < GenericSpreadsheet
|
|
354
330
|
@fonts[sheet] = {} unless @fonts[sheet]
|
355
331
|
@fonts[sheet][key] = font
|
356
332
|
|
357
|
-
case vt
|
333
|
+
case vt
|
358
334
|
when :float
|
359
335
|
@cell[sheet][key] = v.to_f
|
360
336
|
when :string
|
@@ -388,7 +364,7 @@ class Excel < GenericSpreadsheet
|
|
388
364
|
(0..row.size).each do |cell_index|
|
389
365
|
cell = row.at(cell_index)
|
390
366
|
next if cell.nil? #skip empty cells
|
391
|
-
next if cell.class == Spreadsheet::Formula && cell.value.nil? # skip empty
|
367
|
+
next if cell.class == Spreadsheet::Formula && cell.value.nil? # skip empty formula cells
|
392
368
|
if date_or_time?(row, cell_index)
|
393
369
|
vt, v = read_cell_date_or_time(row, cell_index)
|
394
370
|
else
|
@@ -484,18 +460,4 @@ class Excel < GenericSpreadsheet
|
|
484
460
|
end
|
485
461
|
private :read_cell
|
486
462
|
|
487
|
-
#TODO: testing only
|
488
|
-
# def inject_null_characters(str)
|
489
|
-
# if str.class != String
|
490
|
-
# return str
|
491
|
-
# end
|
492
|
-
# new_str=''
|
493
|
-
# 0.upto(str.size-1) do |i|
|
494
|
-
# new_str += str[i,1]
|
495
|
-
# new_str += "\000"
|
496
|
-
# end
|
497
|
-
# new_str
|
498
|
-
# end
|
499
|
-
#
|
500
|
-
|
501
463
|
end
|
data/lib/roo/excelx.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
#TODO: require 'xml'
|
2
1
|
require 'fileutils'
|
3
2
|
require 'zip/zipfilesystem'
|
4
3
|
require 'date'
|
@@ -87,8 +86,7 @@ class Excelx < GenericSpreadsheet
|
|
87
86
|
def initialize(filename, packed=nil, file_warning = :error) #, create = false)
|
88
87
|
super()
|
89
88
|
@file_warning = file_warning
|
90
|
-
|
91
|
-
#@tmpdir = "oo_"+$$.to_s
|
89
|
+
file_type_check(filename,'.xlsx','an Excel-xlsx',packed)
|
92
90
|
@tmpdir = GenericSpreadsheet.next_tmpdir
|
93
91
|
@tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
|
94
92
|
unless File.exists?(@tmpdir)
|
@@ -96,52 +94,39 @@ class Excelx < GenericSpreadsheet
|
|
96
94
|
end
|
97
95
|
filename = open_from_uri(filename) if filename[0,7] == "http://"
|
98
96
|
filename = unzip(filename) if packed and packed == :zip
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
@
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
97
|
+
@cells_read = Hash.new
|
98
|
+
@filename = filename
|
99
|
+
unless File.file?(@filename)
|
100
|
+
FileUtils::rm_r(@tmpdir)
|
101
|
+
raise IOError, "file #{@filename} does not exist"
|
102
|
+
end
|
103
|
+
@@nr += 1
|
104
|
+
@file_nr = @@nr
|
105
|
+
extract_content(@filename)
|
106
|
+
file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
|
107
|
+
@workbook_doc = Nokogiri::XML(file)
|
108
|
+
file.close
|
109
|
+
@shared_table = []
|
110
|
+
if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
|
111
|
+
file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
|
112
|
+
@sharedstring_doc = Nokogiri::XML(file)
|
115
113
|
file.close
|
116
|
-
@
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
@
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
end
|
133
|
-
@sheet_doc = []
|
134
|
-
@sheet_files.each_with_index do |item, i|
|
135
|
-
file = File.new(item)
|
136
|
-
#TODO: @sheet_doc[i] = XML::Parser.io(file).parse
|
137
|
-
@sheet_doc[i] = Nokogiri::XML(file)
|
138
|
-
file.close
|
139
|
-
end
|
140
|
-
#ensure
|
141
|
-
#if ENV["roo_local"] != "thomas-p"
|
142
|
-
#FileUtils::rm_r(@tmpdir)
|
143
|
-
#end
|
144
|
-
#end
|
114
|
+
read_shared_strings(@sharedstring_doc)
|
115
|
+
end
|
116
|
+
@styles_table = []
|
117
|
+
@style_definitions = Array.new # TODO: ??? { |h,k| h[k] = {} }
|
118
|
+
if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
|
119
|
+
file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
|
120
|
+
@styles_doc = Nokogiri::XML(file)
|
121
|
+
file.close
|
122
|
+
read_styles(@styles_doc)
|
123
|
+
end
|
124
|
+
@sheet_doc = []
|
125
|
+
@sheet_files.each_with_index do |item, i|
|
126
|
+
file = File.new(item)
|
127
|
+
@sheet_doc[i] = Nokogiri::XML(file)
|
128
|
+
file.close
|
129
|
+
end
|
145
130
|
FileUtils::rm_r(@tmpdir)
|
146
131
|
@default_sheet = self.sheets.first
|
147
132
|
@cell = Hash.new
|
@@ -296,13 +281,12 @@ class Excelx < GenericSpreadsheet
|
|
296
281
|
# returns an array of sheet names in the spreadsheet
|
297
282
|
def sheets
|
298
283
|
return_sheets = []
|
299
|
-
#TODO: @workbook_doc.find("//*[local-name()='sheet']").each do |sheet|
|
300
284
|
@workbook_doc.xpath("//*[local-name()='sheet']").each do |sheet|
|
301
|
-
#TODO: return_sheets << sheet.attributes.to_h['name']
|
302
285
|
return_sheets << sheet['name']
|
303
286
|
end
|
304
287
|
return_sheets
|
305
288
|
end
|
289
|
+
|
306
290
|
# shows the internal representation of all cells
|
307
291
|
# for debugging purposes
|
308
292
|
def to_s(sheet=nil)
|
@@ -315,9 +299,9 @@ class Excelx < GenericSpreadsheet
|
|
315
299
|
|
316
300
|
# helper function to set the internal representation of cells
|
317
301
|
def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,
|
318
|
-
|
319
|
-
|
320
|
-
|
302
|
+
excelx_type=nil,
|
303
|
+
excelx_value=nil,
|
304
|
+
s_attribute=nil)
|
321
305
|
key = [y,x+i]
|
322
306
|
@cell_type[sheet] = {} unless @cell_type[sheet]
|
323
307
|
@cell_type[sheet][key] = vt
|
@@ -390,18 +374,13 @@ class Excelx < GenericSpreadsheet
|
|
390
374
|
raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
|
391
375
|
raise RangeError unless self.sheets.include? sheet
|
392
376
|
n = self.sheets.index(sheet)
|
393
|
-
#TODO: @sheet_doc[n].find("//*[local-name()='c']").each do |c|
|
394
377
|
@sheet_doc[n].xpath("//*[local-name()='c']").each do |c|
|
395
|
-
#TODO: s_attribute = c.attributes.to_h['s'].to_i # should be here
|
396
378
|
s_attribute = c['s'].to_i # should be here
|
397
|
-
#TODO: if (c.attributes.to_h['t'] == 's')
|
398
379
|
# c: <c r="A5" s="2">
|
399
380
|
# <v>22606</v>
|
400
381
|
# </c>, format: , tmp_type: float
|
401
|
-
|
402
382
|
if c['t'] == 's'
|
403
383
|
tmp_type = :shared
|
404
|
-
#TODO: elsif (c.attributes.to_h['t'] == 'b')
|
405
384
|
elsif c['t'] == 'b'
|
406
385
|
tmp_type = :boolean
|
407
386
|
# 2011-02-25 BEGIN
|
@@ -409,19 +388,15 @@ class Excelx < GenericSpreadsheet
|
|
409
388
|
tmp_type = :string
|
410
389
|
# 2011-02-25 END
|
411
390
|
else
|
412
|
-
|
413
|
-
s_attribute = c['s'].to_i # was here
|
391
|
+
s_attribute = c['s'].to_i
|
414
392
|
format = attribute2format(s_attribute)
|
415
393
|
tmp_type = format2type(format)
|
416
394
|
end
|
417
395
|
formula = nil
|
418
|
-
#TODO: c.each_element do |cell|
|
419
396
|
c.children.each do |cell|
|
420
|
-
#TODO: if cell.name == 'f'
|
421
397
|
if cell.name == 'f'
|
422
398
|
formula = cell.content
|
423
399
|
end
|
424
|
-
#TODO: if cell.name == 'v'
|
425
400
|
if cell.name == 'v'
|
426
401
|
if tmp_type == :time or tmp_type == :datetime
|
427
402
|
if cell.content.to_f >= 1.0
|
@@ -464,8 +439,6 @@ class Excelx < GenericSpreadsheet
|
|
464
439
|
vt = :float
|
465
440
|
v = cell.content
|
466
441
|
end
|
467
|
-
#puts "vt: #{vt}" if cell.text.include? "22606.5120"
|
468
|
-
#TODO: x,y = split_coordinate(c.attributes.to_h['r'])
|
469
442
|
x,y = split_coordinate(c['r'])
|
470
443
|
tr=nil #TODO: ???s
|
471
444
|
set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
|
@@ -546,13 +519,10 @@ class Excelx < GenericSpreadsheet
|
|
546
519
|
|
547
520
|
# read the shared strings xml document
|
548
521
|
def read_shared_strings(doc)
|
549
|
-
#TODO: doc.find("//*[local-name()='si']").each do |si|
|
550
522
|
doc.xpath("//*[local-name()='si']").each do |si|
|
551
523
|
shared_table_entry = ''
|
552
|
-
#TODO: si.each_element do |elem|
|
553
524
|
si.children.each do |elem|
|
554
525
|
if elem.name == 'r' and elem.children
|
555
|
-
# elem.each_element do |r_elem|
|
556
526
|
elem.children.each do |r_elem|
|
557
527
|
if r_elem.name == 't'
|
558
528
|
shared_table_entry << r_elem.content
|
@@ -573,19 +543,13 @@ class Excelx < GenericSpreadsheet
|
|
573
543
|
@cellXfs = []
|
574
544
|
fonts = []
|
575
545
|
|
576
|
-
#TODO: doc.find("//*[local-name()='numFmt']").each do |numFmt|
|
577
546
|
doc.xpath("//*[local-name()='numFmt']").each do |numFmt|
|
578
|
-
# TODO: numFmtId = numFmt.attributes.to_h['numFmtId']
|
579
547
|
numFmtId = numFmt.attributes['numFmtId']
|
580
|
-
#TODO: formatCode = numFmt.attributes.to_h['formatCode']
|
581
548
|
formatCode = numFmt.attributes['formatCode']
|
582
549
|
@numFmts << [numFmtId, formatCode]
|
583
550
|
end
|
584
|
-
#TODO: doc.find("//*[local-name()='fonts']").each do |fonts_el|
|
585
551
|
doc.xpath("//*[local-name()='fonts']").each do |fonts_el|
|
586
|
-
#TODO: fonts_el.each_element do |font_el|
|
587
552
|
fonts_el.children.each do |font_el|
|
588
|
-
#TODO: if font_el.name == 'font'
|
589
553
|
if font_el == 'font'
|
590
554
|
font = Excelx::Font.new
|
591
555
|
font_el.each_element do |font_sub_el|
|
@@ -603,13 +567,10 @@ class Excelx < GenericSpreadsheet
|
|
603
567
|
end
|
604
568
|
end
|
605
569
|
|
606
|
-
#TODO: doc.find("//*[local-name()='cellXfs']").each do |xfs|
|
607
570
|
doc.xpath("//*[local-name()='cellXfs']").each do |xfs|
|
608
571
|
xfs.children.each do |xf|
|
609
|
-
#TODO: numFmtId = xf.attributes.to_h['numFmtId']
|
610
572
|
numFmtId = xf['numFmtId']
|
611
573
|
@cellXfs << [numFmtId]
|
612
|
-
#TODO: fontId = xf.attributes.to_h['fontId'].to_i
|
613
574
|
fontId = xf['fontId'].to_i
|
614
575
|
@style_definitions << fonts[fontId]
|
615
576
|
end
|
@@ -620,7 +581,6 @@ class Excelx < GenericSpreadsheet
|
|
620
581
|
def attribute2format(s)
|
621
582
|
result = nil
|
622
583
|
@numFmts.each {|nf|
|
623
|
-
#TODO: if nf.first == @cellXfs[s.to_i].first
|
624
584
|
# to_s weil das eine Nokogiri::XML::Attr und das
|
625
585
|
# andere ein String ist
|
626
586
|
if nf.first.to_s == @cellXfs[s.to_i].first
|
@@ -6,8 +6,6 @@ class GenericSpreadsheet
|
|
6
6
|
|
7
7
|
attr_reader :default_sheet
|
8
8
|
|
9
|
-
@@class_counter = 0
|
10
|
-
|
11
9
|
# sets the line with attribute names (default: 1)
|
12
10
|
attr_accessor :header_line
|
13
11
|
|
@@ -30,11 +28,8 @@ class GenericSpreadsheet
|
|
30
28
|
end
|
31
29
|
|
32
30
|
def self.next_tmpdir
|
33
|
-
|
34
|
-
|
35
|
-
tmpdir = "oo_"+$$.to_s+"_"+sprintf("%010d",rand(10_000_000_000))
|
36
|
-
# p "@tmpdir = #{tmpdir}"; sleep 5
|
37
|
-
tmpdir
|
31
|
+
tmpdir = "oo_"+$$.to_s+"_"+sprintf("%010d",rand(10_000_000_000))
|
32
|
+
tmpdir
|
38
33
|
end
|
39
34
|
|
40
35
|
|
@@ -333,9 +328,14 @@ class GenericSpreadsheet
|
|
333
328
|
def reload
|
334
329
|
# von Abfrage der Klasse direkt auf .to_s == '..' umgestellt
|
335
330
|
ds = @default_sheet
|
336
|
-
|
337
|
-
|
338
|
-
|
331
|
+
if self.class.to_s == 'Google'
|
332
|
+
initialize(@spreadsheetkey,@user,@password)
|
333
|
+
else
|
334
|
+
initialize(@filename)
|
335
|
+
to do
|
336
|
+
'was ist mit weiteren Parametern bei initialize'
|
337
|
+
end
|
338
|
+
end
|
339
339
|
self.default_sheet = ds
|
340
340
|
#@first_row = @last_row = @first_column = @last_column = nil
|
341
341
|
end
|
@@ -354,13 +354,7 @@ class GenericSpreadsheet
|
|
354
354
|
# recursively removes the current temporary directory
|
355
355
|
# this is only needed if you work with zipped files or files via the web
|
356
356
|
def remove_tmp
|
357
|
-
#to do
|
358
|
-
# "remove_tmp wieder aktivieren"
|
359
|
-
#end
|
360
|
-
#return
|
361
|
-
#$log.debug("remove_tmp('#{@tmpdir}')")
|
362
357
|
if File.exists?(@tmpdir)
|
363
|
-
#$log.debug("#{@tmpdir} exists")
|
364
358
|
FileUtils::rm_r(@tmpdir)
|
365
359
|
end
|
366
360
|
end
|
@@ -441,12 +435,11 @@ class GenericSpreadsheet
|
|
441
435
|
sheet = @default_sheet unless sheet
|
442
436
|
read_cells(sheet) unless @cells_read[sheet]
|
443
437
|
return theformulas unless first_row(sheet) # if there is no first row then
|
444
|
-
|
438
|
+
# there can't be formulas
|
445
439
|
first_row(sheet).upto(last_row(sheet)) {|row|
|
446
440
|
first_column(sheet).upto(last_column(sheet)) {|col|
|
447
441
|
if formula?(row,col,sheet)
|
448
|
-
|
449
|
-
theformulas << f
|
442
|
+
theformulas << [row, col, formula(row,col,sheet)]
|
450
443
|
end
|
451
444
|
}
|
452
445
|
}
|
@@ -592,17 +585,12 @@ class GenericSpreadsheet
|
|
592
585
|
if ! sheet_found
|
593
586
|
raise RangeError, "sheet '#{@default_sheet}' not found"
|
594
587
|
end
|
595
|
-
#raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
|
596
588
|
end
|
597
589
|
|
598
590
|
def process_zipfile_packed(zip, path='')
|
599
591
|
ret=nil
|
600
592
|
if zip.file.file? path
|
601
593
|
# extract and return filename
|
602
|
-
#2011-08-01 @tmpdir = "oo_"+$$.to_s
|
603
|
-
#2011-08-01 unless File.exists?(@tmpdir)
|
604
|
-
#2011-08-01 FileUtils::mkdir(@tmpdir)
|
605
|
-
#2011-08-01end
|
606
594
|
file = File.open(File.join(@tmpdir, path),"wb")
|
607
595
|
file.write(zip.read(path))
|
608
596
|
file.close
|
@@ -623,7 +611,6 @@ class GenericSpreadsheet
|
|
623
611
|
def write_csv_content(file=nil,sheet=nil)
|
624
612
|
file = STDOUT unless file
|
625
613
|
if first_row(sheet) # sheet is not empty
|
626
|
-
# first_row(sheet).upto(last_row(sheet)) do |row|
|
627
614
|
1.upto(last_row(sheet)) do |row|
|
628
615
|
1.upto(last_column(sheet)) do |col|
|
629
616
|
file.print(",") if col > 1
|
@@ -644,16 +631,10 @@ class GenericSpreadsheet
|
|
644
631
|
else
|
645
632
|
case onecelltype
|
646
633
|
when :string
|
647
|
-
|
648
|
-
# str << ''
|
649
|
-
#else
|
650
|
-
# one = onecell.gsub(/"/,'""')
|
651
|
-
# str << ('"'+one+'"')
|
652
|
-
#end
|
653
|
-
unless onecell.empty?
|
634
|
+
unless onecell.empty?
|
654
635
|
one = onecell.gsub(/"/,'""')
|
655
636
|
str << ('"'+one+'"')
|
656
|
-
|
637
|
+
end
|
657
638
|
when :float, :percentage
|
658
639
|
if onecell == onecell.to_i
|
659
640
|
str << onecell.to_i.to_s
|
@@ -662,16 +643,10 @@ class GenericSpreadsheet
|
|
662
643
|
end
|
663
644
|
when :formula
|
664
645
|
if onecell.class == String
|
665
|
-
|
666
|
-
# str << ''
|
667
|
-
# else
|
668
|
-
# one = onecell.gsub(/"/,'""')
|
669
|
-
# str << '"'+one+'"'
|
670
|
-
# end
|
671
|
-
unless onecell.empty?
|
646
|
+
unless onecell.empty?
|
672
647
|
one = onecell.gsub(/"/,'""')
|
673
648
|
str << '"'+one+'"'
|
674
|
-
|
649
|
+
end
|
675
650
|
elsif onecell.class == Float
|
676
651
|
if onecell == onecell.to_i
|
677
652
|
str << onecell.to_i.to_s
|
@@ -686,10 +661,7 @@ class GenericSpreadsheet
|
|
686
661
|
when :time
|
687
662
|
str << GenericSpreadsheet.integer_to_timestring(onecell)
|
688
663
|
when :datetime
|
689
|
-
|
690
|
-
# time = rest.split('+').first
|
691
|
-
# str << date + ' ' + time
|
692
|
-
str << onecell.to_s
|
664
|
+
str << onecell.to_s
|
693
665
|
else
|
694
666
|
raise "unhandled celltype "+onecelltype.to_s
|
695
667
|
end
|
data/lib/roo/google.rb
CHANGED
@@ -249,19 +249,12 @@ class Google < GenericSpreadsheet
|
|
249
249
|
@cell_type[sheet][key] = value_type
|
250
250
|
@formula[sheet] = {} unless @formula[sheet]
|
251
251
|
@formula[sheet][key] = string_value if value_type == :formula
|
252
|
-
############
|
253
|
-
#$log.debug("key: #{key}")
|
254
|
-
#$log.debug "#{ws[row,col].inspect}"
|
255
|
-
#@cell[sheet][key] = ws[row,col]
|
256
|
-
#$log.debug "@cell[sheet][key]: #{@cell[sheet][key]}"
|
257
|
-
############
|
258
252
|
end
|
259
253
|
end
|
260
254
|
@cells_read[sheet] = true
|
261
255
|
end
|
262
256
|
|
263
257
|
def determine_datatype(val, numval=nil)
|
264
|
-
# $log.debug "val: #{val} numval: #{numval}"
|
265
258
|
if val.nil? || val[0,1] == '='
|
266
259
|
ty = :formula
|
267
260
|
if numeric?(numval)
|
@@ -284,7 +277,6 @@ class Google < GenericSpreadsheet
|
|
284
277
|
ty = :string
|
285
278
|
end
|
286
279
|
end
|
287
|
-
#$log.debug "val: #{val} ty: #{ty}" if ty == :date
|
288
280
|
return val, ty
|
289
281
|
end
|
290
282
|
|
data/lib/roo/openoffice.rb
CHANGED
@@ -2,7 +2,6 @@ require 'rubygems'
|
|
2
2
|
require 'fileutils'
|
3
3
|
require 'zip/zipfilesystem'
|
4
4
|
require 'date'
|
5
|
-
require 'base64'
|
6
5
|
require 'nokogiri'
|
7
6
|
require 'cgi'
|
8
7
|
|
@@ -12,7 +11,7 @@ class Openoffice < GenericSpreadsheet
|
|
12
11
|
|
13
12
|
# initialization and opening of a spreadsheet file
|
14
13
|
# values for packed: :zip
|
15
|
-
def initialize(filename, packed=nil, file_warning=:error, tmpdir=nil)
|
14
|
+
def initialize(filename, packed=nil, file_warning=:error, tmpdir=nil)
|
16
15
|
@file_warning = file_warning
|
17
16
|
super()
|
18
17
|
file_type_check(filename,'.ods','an openoffice', packed)
|
@@ -24,9 +23,6 @@ class Openoffice < GenericSpreadsheet
|
|
24
23
|
end
|
25
24
|
filename = open_from_uri(filename) if filename[0,7] == "http://"
|
26
25
|
filename = unzip(filename) if packed and packed == :zip
|
27
|
-
#if create and ! File.exists?(filename)
|
28
|
-
# self.create_openoffice(filename)
|
29
|
-
#end
|
30
26
|
@cells_read = Hash.new
|
31
27
|
#TODO: @cells_read[:default] = false
|
32
28
|
@filename = filename
|
@@ -40,9 +36,7 @@ class Openoffice < GenericSpreadsheet
|
|
40
36
|
file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_content.xml"))
|
41
37
|
@doc = Nokogiri::XML(file)
|
42
38
|
file.close
|
43
|
-
|
44
|
-
FileUtils::rm_r(@tmpdir)
|
45
|
-
#end
|
39
|
+
FileUtils::rm_r(@tmpdir)
|
46
40
|
@default_sheet = self.sheets.first
|
47
41
|
@cell = Hash.new
|
48
42
|
@cell_type = Hash.new
|
@@ -69,18 +63,6 @@ class Openoffice < GenericSpreadsheet
|
|
69
63
|
end
|
70
64
|
end
|
71
65
|
|
72
|
-
# creates a new empty openoffice-spreadsheet file
|
73
|
-
def create_openoffice(filename) #:nodoc:
|
74
|
-
#TODO: a better way for creating the file contents
|
75
|
-
# now you have to call mkbase64...rb to create an include file with all
|
76
|
-
# the empty files in an openoffice zip-file
|
77
|
-
load 'base64include.rb'
|
78
|
-
# puts @@empty_spreadsheet
|
79
|
-
f = File.open(filename,'wb')
|
80
|
-
f.print(Base64.decode64(@@empty_spreadsheet))
|
81
|
-
f.close
|
82
|
-
end
|
83
|
-
|
84
66
|
# Returns the content of a spreadsheet-cell.
|
85
67
|
# (1,1) is the upper left corner.
|
86
68
|
# (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
|
@@ -90,7 +72,6 @@ class Openoffice < GenericSpreadsheet
|
|
90
72
|
read_cells(sheet) unless @cells_read[sheet]
|
91
73
|
row,col = normalize(row,col)
|
92
74
|
if celltype(row,col,sheet) == :date
|
93
|
-
#TODO: yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
|
94
75
|
yyyy,mm,dd = @cell[sheet][[row,col]].to_s.split('-')
|
95
76
|
return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
|
96
77
|
end
|
@@ -107,7 +88,13 @@ class Openoffice < GenericSpreadsheet
|
|
107
88
|
if @formula[sheet][[row,col]] == nil
|
108
89
|
return nil
|
109
90
|
else
|
110
|
-
return @formula[sheet][[row,col]]["oooc:".length..-1]
|
91
|
+
# return @formula[sheet][[row,col]]["oooc:".length..-1]
|
92
|
+
str = @formula[sheet][[row,col]]
|
93
|
+
if str.include? ':'
|
94
|
+
return str[str.index(':')+1..-1]
|
95
|
+
else
|
96
|
+
return str
|
97
|
+
end
|
111
98
|
end
|
112
99
|
end
|
113
100
|
|
@@ -183,9 +170,7 @@ class Openoffice < GenericSpreadsheet
|
|
183
170
|
|
184
171
|
def sheets
|
185
172
|
return_sheets = []
|
186
|
-
#TODO: @doc.find("//*[local-name()='table']").each do |sheet|
|
187
173
|
@doc.xpath("//*[local-name()='table']").each do |sheet|
|
188
|
-
#TODO: return_sheets << sheet.attributes['name']
|
189
174
|
return_sheets << sheet['name']
|
190
175
|
end
|
191
176
|
return_sheets
|
@@ -206,11 +191,6 @@ class Openoffice < GenericSpreadsheet
|
|
206
191
|
@cell[sheet].inspect
|
207
192
|
end
|
208
193
|
|
209
|
-
# save spreadsheet
|
210
|
-
def save #:nodoc:
|
211
|
-
42
|
212
|
-
end
|
213
|
-
|
214
194
|
# returns the row,col values of the labelled cell
|
215
195
|
# (nil,nil) if label is not defined
|
216
196
|
# sheet parameter is not really needed because label names are global
|
@@ -231,7 +211,6 @@ class Openoffice < GenericSpreadsheet
|
|
231
211
|
|
232
212
|
# read the version of the OO-Version
|
233
213
|
def oo_version
|
234
|
-
#TODO: @doc.find("//*[local-name()='document-content']").each do |office|
|
235
214
|
@doc.xpath("//*[local-name()='document-content']").each do |office|
|
236
215
|
@officeversion = office.attributes['version'].to_s
|
237
216
|
end
|
@@ -300,41 +279,30 @@ class Openoffice < GenericSpreadsheet
|
|
300
279
|
@labels[name] = [sheetname,row,col]
|
301
280
|
end
|
302
281
|
|
303
|
-
#TODO: @doc.find("//*[local-name()='table']").each do |ws|
|
304
282
|
@doc.xpath("//*[local-name()='table']").each do |ws|
|
305
|
-
#TODO: if sheet == ws.attributes['name']
|
306
283
|
if sheet == ws['name']
|
307
284
|
sheet_found = true
|
308
285
|
col = 1
|
309
286
|
row = 1
|
310
|
-
#TODO: ws.each_element do |table_element|
|
311
287
|
ws.children.each do |table_element|
|
312
288
|
case table_element.name
|
313
289
|
when 'table-column'
|
314
290
|
@style_defaults[sheet] << table_element.attributes['default-cell-style-name']
|
315
291
|
when 'table-row'
|
316
292
|
if table_element.attributes['number-rows-repeated']
|
317
|
-
#TODO: skip_row = table_element.attributes['number-rows-repeated'].to_i
|
318
293
|
skip_row = table_element.attributes['number-rows-repeated'].to_s.to_i
|
319
294
|
row = row + skip_row - 1
|
320
295
|
end
|
321
|
-
#TODO: table_element.each_element do |cell|
|
322
296
|
table_element.children.each do |cell|
|
323
|
-
#TODO: skip_col = cell.attributes['number-columns-repeated']
|
324
297
|
skip_col = cell['number-columns-repeated']
|
325
|
-
#TODO: formula = cell.attributes['formula']
|
326
298
|
formula = cell['formula']
|
327
|
-
#TODO: vt = cell.attributes['value-type']
|
328
299
|
vt = cell['value-type']
|
329
|
-
#TODO: v = cell.attributes['value']
|
330
300
|
v = cell['value']
|
331
|
-
#TODO: style_name = cell.attributes['style-name']
|
332
301
|
style_name = cell['style-name']
|
333
302
|
if vt == 'string'
|
334
303
|
str_v = ''
|
335
304
|
# insert \n if there is more than one paragraph
|
336
305
|
para_count = 0
|
337
|
-
#TODO: cell.each_element do |str|
|
338
306
|
cell.children.each do |str|
|
339
307
|
if str.name == 'p'
|
340
308
|
v = str.content
|
@@ -352,7 +320,6 @@ class Openoffice < GenericSpreadsheet
|
|
352
320
|
end # == 'p'
|
353
321
|
end
|
354
322
|
elsif vt == 'time'
|
355
|
-
#TODO: cell.each_element do |str|
|
356
323
|
cell.children.each do |str|
|
357
324
|
if str.name == 'p'
|
358
325
|
v = str.content
|
@@ -388,8 +355,6 @@ class Openoffice < GenericSpreadsheet
|
|
388
355
|
end
|
389
356
|
end
|
390
357
|
end
|
391
|
-
|
392
|
-
#TODO: @doc.find("//*[local-name()='automatic-styles']").each do |style|
|
393
358
|
@doc.xpath("//*[local-name()='automatic-styles']").each do |style|
|
394
359
|
read_styles(style)
|
395
360
|
end
|
data/test/benchmark1.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'roo'
|
3
|
+
require 'benchmark'
|
4
|
+
|
5
|
+
def process_all_cells(oo)
|
6
|
+
oo.default_sheet = oo.sheets.first
|
7
|
+
#oo.first_row.upto(oo.last_row) do |row|
|
8
|
+
# oo.first_column.upto(oo.last_column) do |col|
|
9
|
+
# result = oo.cell(row,col)
|
10
|
+
# end
|
11
|
+
#end
|
12
|
+
ret = []
|
13
|
+
oo.first_row.upto(oo.last_row) do |row|
|
14
|
+
ret << oo.row(row)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
# 3735 Zeilen jeweils
|
18
|
+
def openoffice
|
19
|
+
oo = Openoffice.new('Bibelbund.ods')
|
20
|
+
process_all_cells(oo)
|
21
|
+
end
|
22
|
+
def excel
|
23
|
+
oo = Excel.new('Bibelbund.xls')
|
24
|
+
process_all_cells(oo)
|
25
|
+
end
|
26
|
+
def excelx
|
27
|
+
oo = Excelx.new('Bibelbund.xlsx')
|
28
|
+
process_all_cells(oo)
|
29
|
+
end
|
30
|
+
|
31
|
+
n = 1
|
32
|
+
Benchmark.bmbm(10) do |x|
|
33
|
+
x.report('openoffice:') { n.times do
|
34
|
+
openoffice
|
35
|
+
end }
|
36
|
+
x.report('excel:') { n.times do
|
37
|
+
excel
|
38
|
+
end }
|
39
|
+
x.report('excelx:') { n.times do
|
40
|
+
excelx
|
41
|
+
end }
|
42
|
+
end
|
43
|
+
|
Binary file
|
data/test/test_roo.rb
CHANGED
@@ -34,7 +34,7 @@ $log = Logger.new(File.join(ENV['HOME'],"roo.log"))
|
|
34
34
|
#$log.level = Logger::WARN
|
35
35
|
$log.level = Logger::DEBUG
|
36
36
|
|
37
|
-
DISPLAY_LOG =
|
37
|
+
DISPLAY_LOG = true
|
38
38
|
DB_LOG = false
|
39
39
|
if DB_LOG
|
40
40
|
require 'activerecord'
|
@@ -170,7 +170,7 @@ class TestRoo < Test::Unit::TestCase
|
|
170
170
|
LIBREOFFICE = true # do Libreoffice tests? (.ods files)
|
171
171
|
|
172
172
|
ONLINE = false
|
173
|
-
LONG_RUN =
|
173
|
+
LONG_RUN = true
|
174
174
|
GLOBAL_TIMEOUT = 48.minutes
|
175
175
|
|
176
176
|
def setup
|
@@ -2351,4 +2351,29 @@ where the expected result is
|
|
2351
2351
|
assert_equal "", `diff test/so_datetime.csv datetime.csv`
|
2352
2352
|
end
|
2353
2353
|
end
|
2354
|
+
|
2355
|
+
# 2011-08-11
|
2356
|
+
def test_bug_openoffice_formula_missing_letters
|
2357
|
+
if OPENOFFICE
|
2358
|
+
# Dieses Dokument wurde mit LibreOffice angelegt.
|
2359
|
+
# Keine Ahnung, ob es damit zusammenhaengt, das diese
|
2360
|
+
# Formeln anders sind, als in der Datei formula.ods, welche
|
2361
|
+
# mit Openoffice angelegt wurde.
|
2362
|
+
# Bei den Openoffice-Dateien ist in diesem Feld in der XML-
|
2363
|
+
# Datei of: als Prefix enthalten, waehrend in dieser Datei
|
2364
|
+
# irgendetwas mit oooc: als Prefix verwendet wird.
|
2365
|
+
oo = Openoffice.new(File.join(TESTDIR,'dreimalvier.ods'))
|
2366
|
+
oo.default_sheet = oo.sheets.first
|
2367
|
+
assert_equal '=SUM([.A1:.D1])', oo.formula('e',1)
|
2368
|
+
assert_equal '=SUM([.A2:.D2])', oo.formula('e',2)
|
2369
|
+
assert_equal '=SUM([.A3:.D3])', oo.formula('e',3)
|
2370
|
+
assert_equal [
|
2371
|
+
[1,5,'=SUM([.A1:.D1])'],
|
2372
|
+
[2,5,'=SUM([.A2:.D2])'],
|
2373
|
+
[3,5,'=SUM([.A3:.D3])'],
|
2374
|
+
], oo.formulas
|
2375
|
+
|
2376
|
+
end
|
2377
|
+
end
|
2378
|
+
|
2354
2379
|
end # class
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: roo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.9.
|
4
|
+
version: 1.9.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-08-
|
12
|
+
date: 2011-08-27 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: spreadsheet
|
16
|
-
requirement: &
|
16
|
+
requirement: &25467852 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>'
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 0.6.4
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *25467852
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: nokogiri
|
27
|
-
requirement: &
|
27
|
+
requirement: &25467564 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.5.0
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *25467564
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: google-spreadsheet-ruby
|
38
|
-
requirement: &
|
38
|
+
requirement: &25467036 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 0.1.5
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *25467036
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: choice
|
49
|
-
requirement: &
|
49
|
+
requirement: &25466676 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 0.1.4
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *25466676
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: todonotes
|
60
|
-
requirement: &
|
60
|
+
requirement: &25466244 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,18 +65,18 @@ dependencies:
|
|
65
65
|
version: 0.1.0
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *25466244
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: bones
|
71
|
-
requirement: &
|
71
|
+
requirement: &25465872 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
75
75
|
- !ruby/object:Gem::Version
|
76
|
-
version: 3.7.
|
76
|
+
version: 3.7.1
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *25465872
|
80
80
|
description: ! 'Roo can access the contents of various spreadsheet files. It can handle
|
81
81
|
|
82
82
|
* Openoffice
|
@@ -96,8 +96,6 @@ extra_rdoc_files:
|
|
96
96
|
- PostInstall.txt
|
97
97
|
- README.txt
|
98
98
|
- bin/roo
|
99
|
-
- lib/.roo.rb.swp
|
100
|
-
- lib/roo/.generic_spreadsheet.rb.swp
|
101
99
|
- test/no_spreadsheet_file.txt
|
102
100
|
files:
|
103
101
|
- History.txt
|
@@ -110,9 +108,7 @@ files:
|
|
110
108
|
- bin/roo
|
111
109
|
- csv8532
|
112
110
|
- datetime.csv
|
113
|
-
- lib/.roo.rb.swp
|
114
111
|
- lib/roo.rb
|
115
|
-
- lib/roo/.generic_spreadsheet.rb.swp
|
116
112
|
- lib/roo/excel.rb
|
117
113
|
- lib/roo/excelx.rb
|
118
114
|
- lib/roo/generic_spreadsheet.rb
|
@@ -131,6 +127,7 @@ files:
|
|
131
127
|
- test/bbu.ods
|
132
128
|
- test/bbu.xls
|
133
129
|
- test/bbu.xlsx
|
130
|
+
- test/benchmark1.rb
|
134
131
|
- test/bode-v1.ods.zip
|
135
132
|
- test/bode-v1.xls.zip
|
136
133
|
- test/boolean.ods
|
@@ -144,6 +141,7 @@ files:
|
|
144
141
|
- test/datetime.xls
|
145
142
|
- test/datetime.xlsx
|
146
143
|
- test/datetime_floatconv.xls
|
144
|
+
- test/dreimalvier.ods
|
147
145
|
- test/emptysheets.ods
|
148
146
|
- test/emptysheets.xls
|
149
147
|
- test/emptysheets.xlsx
|
@@ -215,7 +213,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
215
213
|
version: '0'
|
216
214
|
requirements: []
|
217
215
|
rubyforge_project: roo
|
218
|
-
rubygems_version: 1.8.
|
216
|
+
rubygems_version: 1.8.9
|
219
217
|
signing_key:
|
220
218
|
specification_version: 3
|
221
219
|
summary: Roo can access the contents of various spreadsheet files.
|
data/lib/.roo.rb.swp
DELETED
Binary file
|
Binary file
|