roo 1.9.6 → 1.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +6 -0
- data/bin/roo +2 -1
- data/lib/roo.rb +1 -1
- data/lib/roo/excel.rb +16 -54
- data/lib/roo/excelx.rb +38 -78
- data/lib/roo/generic_spreadsheet.rb +17 -45
- data/lib/roo/google.rb +0 -8
- data/lib/roo/openoffice.rb +9 -44
- data/test/benchmark1.rb +43 -0
- data/test/dreimalvier.ods +0 -0
- data/test/test_roo.rb +27 -2
- metadata +18 -20
- data/lib/.roo.rb.swp +0 -0
- data/lib/roo/.generic_spreadsheet.rb.swp +0 -0
data/History.txt
CHANGED
@@ -1,4 +1,10 @@
|
|
1
|
+
== 1.9.7 2011-08-27
|
2
|
+
|
3
|
+
* 1 bugfix
|
4
|
+
* Openoffice: Better way for extracting formula strings, some characters were deleted at the formula string under some circumstances.
|
5
|
+
|
1
6
|
== 1.9.6 2011-08-03
|
7
|
+
|
2
8
|
* 1 enhancement
|
3
9
|
* new class Libreoffice (Libreoffice should do exactly the same as the Openoffice
|
4
10
|
class. It's just another name. Technically, Libreoffice is inherited from
|
data/bin/roo
CHANGED
data/lib/roo.rb
CHANGED
data/lib/roo/excel.rb
CHANGED
@@ -100,8 +100,7 @@ class Excel < GenericSpreadsheet
|
|
100
100
|
def initialize(filename, packed = nil, file_warning = :error)
|
101
101
|
super()
|
102
102
|
@file_warning = file_warning
|
103
|
-
|
104
|
-
# @tmpdir = "oo_"+$$.to_s
|
103
|
+
file_type_check(filename,'.xls','an Excel',packed)
|
105
104
|
@tmpdir = GenericSpreadsheet.next_tmpdir
|
106
105
|
@tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
|
107
106
|
unless File.exists?(@tmpdir)
|
@@ -110,36 +109,19 @@ class Excel < GenericSpreadsheet
|
|
110
109
|
filename = open_from_uri(filename) if filename[0,7] == "http://"
|
111
110
|
filename = open_from_stream(filename[7..-1]) if filename[0,7] == "stream:"
|
112
111
|
filename = unzip(filename) if packed and packed == :zip
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
#
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
unless File.file?(@filename)
|
122
|
-
FileUtils::rm_r(@tmpdir)
|
123
|
-
raise IOError, "file #{@filename} does not exist"
|
124
|
-
end
|
125
|
-
begin
|
126
|
-
@workbook = Spreadsheet.open(filename)
|
127
|
-
rescue Ole::Storage::FormatError
|
128
|
-
FileUtils::rm_r(@tmpdir)
|
129
|
-
raise # nach aussen weiterhin sichtbar
|
130
|
-
end
|
131
|
-
@default_sheet = self.sheets.first
|
132
|
-
#ensure
|
133
|
-
#if ENV["roo_local"] != "thomas-p"
|
134
|
-
# ich glaube ich darf hier noch nicht die temporaere
|
135
|
-
# Datei loeschen, weil
|
136
|
-
#to do
|
137
|
-
#"Loeschen temp. Directory anpassen"
|
138
|
-
#end
|
139
|
-
#end
|
140
|
-
#if fremdrechner?
|
112
|
+
@filename = filename
|
113
|
+
unless File.file?(@filename)
|
114
|
+
FileUtils::rm_r(@tmpdir)
|
115
|
+
raise IOError, "file #{@filename} does not exist"
|
116
|
+
end
|
117
|
+
begin
|
118
|
+
@workbook = Spreadsheet.open(filename)
|
119
|
+
rescue Ole::Storage::FormatError
|
141
120
|
FileUtils::rm_r(@tmpdir)
|
142
|
-
|
121
|
+
raise # nach aussen weiterhin sichtbar
|
122
|
+
end
|
123
|
+
@default_sheet = self.sheets.first
|
124
|
+
FileUtils::rm_r(@tmpdir)
|
143
125
|
@cell = Hash.new
|
144
126
|
@cell_type = Hash.new
|
145
127
|
@formula = Hash.new
|
@@ -154,11 +136,7 @@ class Excel < GenericSpreadsheet
|
|
154
136
|
|
155
137
|
# returns an array of sheet names in the spreadsheet
|
156
138
|
def sheets
|
157
|
-
|
158
|
-
@workbook.worksheets.each do |worksheet|
|
159
|
-
result << normalize_string(worksheet.name)
|
160
|
-
end
|
161
|
-
return result
|
139
|
+
@workbook.worksheets.collect {|worksheet| normalize_string(worksheet.name)}
|
162
140
|
end
|
163
141
|
|
164
142
|
# returns the content of a cell. The upper left corner is (1,1) or ('A',1)
|
@@ -175,7 +153,6 @@ class Excel < GenericSpreadsheet
|
|
175
153
|
if celltype(row,col,sheet) == :string
|
176
154
|
return platform_specific_iconv(@cell[sheet][[row,col]])
|
177
155
|
else
|
178
|
-
#return @cell[sheet][[row,col]]
|
179
156
|
if @cell[sheet] and @cell[sheet][[row,col]]
|
180
157
|
return @cell[sheet][[row,col]]
|
181
158
|
else
|
@@ -200,7 +177,6 @@ class Excel < GenericSpreadsheet
|
|
200
177
|
if @formula[sheet] and @formula[sheet][[row,col]]
|
201
178
|
return :formula
|
202
179
|
else
|
203
|
-
# @cell_type[sheet][[row,col]]
|
204
180
|
if @cell_type[sheet] and @cell_type[sheet][[row,col]]
|
205
181
|
return @cell_type[sheet][[row,col]]
|
206
182
|
else
|
@@ -354,7 +330,7 @@ class Excel < GenericSpreadsheet
|
|
354
330
|
@fonts[sheet] = {} unless @fonts[sheet]
|
355
331
|
@fonts[sheet][key] = font
|
356
332
|
|
357
|
-
case vt
|
333
|
+
case vt
|
358
334
|
when :float
|
359
335
|
@cell[sheet][key] = v.to_f
|
360
336
|
when :string
|
@@ -388,7 +364,7 @@ class Excel < GenericSpreadsheet
|
|
388
364
|
(0..row.size).each do |cell_index|
|
389
365
|
cell = row.at(cell_index)
|
390
366
|
next if cell.nil? #skip empty cells
|
391
|
-
next if cell.class == Spreadsheet::Formula && cell.value.nil? # skip empty
|
367
|
+
next if cell.class == Spreadsheet::Formula && cell.value.nil? # skip empty formula cells
|
392
368
|
if date_or_time?(row, cell_index)
|
393
369
|
vt, v = read_cell_date_or_time(row, cell_index)
|
394
370
|
else
|
@@ -484,18 +460,4 @@ class Excel < GenericSpreadsheet
|
|
484
460
|
end
|
485
461
|
private :read_cell
|
486
462
|
|
487
|
-
#TODO: testing only
|
488
|
-
# def inject_null_characters(str)
|
489
|
-
# if str.class != String
|
490
|
-
# return str
|
491
|
-
# end
|
492
|
-
# new_str=''
|
493
|
-
# 0.upto(str.size-1) do |i|
|
494
|
-
# new_str += str[i,1]
|
495
|
-
# new_str += "\000"
|
496
|
-
# end
|
497
|
-
# new_str
|
498
|
-
# end
|
499
|
-
#
|
500
|
-
|
501
463
|
end
|
data/lib/roo/excelx.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
#TODO: require 'xml'
|
2
1
|
require 'fileutils'
|
3
2
|
require 'zip/zipfilesystem'
|
4
3
|
require 'date'
|
@@ -87,8 +86,7 @@ class Excelx < GenericSpreadsheet
|
|
87
86
|
def initialize(filename, packed=nil, file_warning = :error) #, create = false)
|
88
87
|
super()
|
89
88
|
@file_warning = file_warning
|
90
|
-
|
91
|
-
#@tmpdir = "oo_"+$$.to_s
|
89
|
+
file_type_check(filename,'.xlsx','an Excel-xlsx',packed)
|
92
90
|
@tmpdir = GenericSpreadsheet.next_tmpdir
|
93
91
|
@tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
|
94
92
|
unless File.exists?(@tmpdir)
|
@@ -96,52 +94,39 @@ class Excelx < GenericSpreadsheet
|
|
96
94
|
end
|
97
95
|
filename = open_from_uri(filename) if filename[0,7] == "http://"
|
98
96
|
filename = unzip(filename) if packed and packed == :zip
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
@
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
97
|
+
@cells_read = Hash.new
|
98
|
+
@filename = filename
|
99
|
+
unless File.file?(@filename)
|
100
|
+
FileUtils::rm_r(@tmpdir)
|
101
|
+
raise IOError, "file #{@filename} does not exist"
|
102
|
+
end
|
103
|
+
@@nr += 1
|
104
|
+
@file_nr = @@nr
|
105
|
+
extract_content(@filename)
|
106
|
+
file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
|
107
|
+
@workbook_doc = Nokogiri::XML(file)
|
108
|
+
file.close
|
109
|
+
@shared_table = []
|
110
|
+
if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
|
111
|
+
file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
|
112
|
+
@sharedstring_doc = Nokogiri::XML(file)
|
115
113
|
file.close
|
116
|
-
@
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
@
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
end
|
133
|
-
@sheet_doc = []
|
134
|
-
@sheet_files.each_with_index do |item, i|
|
135
|
-
file = File.new(item)
|
136
|
-
#TODO: @sheet_doc[i] = XML::Parser.io(file).parse
|
137
|
-
@sheet_doc[i] = Nokogiri::XML(file)
|
138
|
-
file.close
|
139
|
-
end
|
140
|
-
#ensure
|
141
|
-
#if ENV["roo_local"] != "thomas-p"
|
142
|
-
#FileUtils::rm_r(@tmpdir)
|
143
|
-
#end
|
144
|
-
#end
|
114
|
+
read_shared_strings(@sharedstring_doc)
|
115
|
+
end
|
116
|
+
@styles_table = []
|
117
|
+
@style_definitions = Array.new # TODO: ??? { |h,k| h[k] = {} }
|
118
|
+
if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
|
119
|
+
file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
|
120
|
+
@styles_doc = Nokogiri::XML(file)
|
121
|
+
file.close
|
122
|
+
read_styles(@styles_doc)
|
123
|
+
end
|
124
|
+
@sheet_doc = []
|
125
|
+
@sheet_files.each_with_index do |item, i|
|
126
|
+
file = File.new(item)
|
127
|
+
@sheet_doc[i] = Nokogiri::XML(file)
|
128
|
+
file.close
|
129
|
+
end
|
145
130
|
FileUtils::rm_r(@tmpdir)
|
146
131
|
@default_sheet = self.sheets.first
|
147
132
|
@cell = Hash.new
|
@@ -296,13 +281,12 @@ class Excelx < GenericSpreadsheet
|
|
296
281
|
# returns an array of sheet names in the spreadsheet
|
297
282
|
def sheets
|
298
283
|
return_sheets = []
|
299
|
-
#TODO: @workbook_doc.find("//*[local-name()='sheet']").each do |sheet|
|
300
284
|
@workbook_doc.xpath("//*[local-name()='sheet']").each do |sheet|
|
301
|
-
#TODO: return_sheets << sheet.attributes.to_h['name']
|
302
285
|
return_sheets << sheet['name']
|
303
286
|
end
|
304
287
|
return_sheets
|
305
288
|
end
|
289
|
+
|
306
290
|
# shows the internal representation of all cells
|
307
291
|
# for debugging purposes
|
308
292
|
def to_s(sheet=nil)
|
@@ -315,9 +299,9 @@ class Excelx < GenericSpreadsheet
|
|
315
299
|
|
316
300
|
# helper function to set the internal representation of cells
|
317
301
|
def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,
|
318
|
-
|
319
|
-
|
320
|
-
|
302
|
+
excelx_type=nil,
|
303
|
+
excelx_value=nil,
|
304
|
+
s_attribute=nil)
|
321
305
|
key = [y,x+i]
|
322
306
|
@cell_type[sheet] = {} unless @cell_type[sheet]
|
323
307
|
@cell_type[sheet][key] = vt
|
@@ -390,18 +374,13 @@ class Excelx < GenericSpreadsheet
|
|
390
374
|
raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
|
391
375
|
raise RangeError unless self.sheets.include? sheet
|
392
376
|
n = self.sheets.index(sheet)
|
393
|
-
#TODO: @sheet_doc[n].find("//*[local-name()='c']").each do |c|
|
394
377
|
@sheet_doc[n].xpath("//*[local-name()='c']").each do |c|
|
395
|
-
#TODO: s_attribute = c.attributes.to_h['s'].to_i # should be here
|
396
378
|
s_attribute = c['s'].to_i # should be here
|
397
|
-
#TODO: if (c.attributes.to_h['t'] == 's')
|
398
379
|
# c: <c r="A5" s="2">
|
399
380
|
# <v>22606</v>
|
400
381
|
# </c>, format: , tmp_type: float
|
401
|
-
|
402
382
|
if c['t'] == 's'
|
403
383
|
tmp_type = :shared
|
404
|
-
#TODO: elsif (c.attributes.to_h['t'] == 'b')
|
405
384
|
elsif c['t'] == 'b'
|
406
385
|
tmp_type = :boolean
|
407
386
|
# 2011-02-25 BEGIN
|
@@ -409,19 +388,15 @@ class Excelx < GenericSpreadsheet
|
|
409
388
|
tmp_type = :string
|
410
389
|
# 2011-02-25 END
|
411
390
|
else
|
412
|
-
|
413
|
-
s_attribute = c['s'].to_i # was here
|
391
|
+
s_attribute = c['s'].to_i
|
414
392
|
format = attribute2format(s_attribute)
|
415
393
|
tmp_type = format2type(format)
|
416
394
|
end
|
417
395
|
formula = nil
|
418
|
-
#TODO: c.each_element do |cell|
|
419
396
|
c.children.each do |cell|
|
420
|
-
#TODO: if cell.name == 'f'
|
421
397
|
if cell.name == 'f'
|
422
398
|
formula = cell.content
|
423
399
|
end
|
424
|
-
#TODO: if cell.name == 'v'
|
425
400
|
if cell.name == 'v'
|
426
401
|
if tmp_type == :time or tmp_type == :datetime
|
427
402
|
if cell.content.to_f >= 1.0
|
@@ -464,8 +439,6 @@ class Excelx < GenericSpreadsheet
|
|
464
439
|
vt = :float
|
465
440
|
v = cell.content
|
466
441
|
end
|
467
|
-
#puts "vt: #{vt}" if cell.text.include? "22606.5120"
|
468
|
-
#TODO: x,y = split_coordinate(c.attributes.to_h['r'])
|
469
442
|
x,y = split_coordinate(c['r'])
|
470
443
|
tr=nil #TODO: ???s
|
471
444
|
set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
|
@@ -546,13 +519,10 @@ class Excelx < GenericSpreadsheet
|
|
546
519
|
|
547
520
|
# read the shared strings xml document
|
548
521
|
def read_shared_strings(doc)
|
549
|
-
#TODO: doc.find("//*[local-name()='si']").each do |si|
|
550
522
|
doc.xpath("//*[local-name()='si']").each do |si|
|
551
523
|
shared_table_entry = ''
|
552
|
-
#TODO: si.each_element do |elem|
|
553
524
|
si.children.each do |elem|
|
554
525
|
if elem.name == 'r' and elem.children
|
555
|
-
# elem.each_element do |r_elem|
|
556
526
|
elem.children.each do |r_elem|
|
557
527
|
if r_elem.name == 't'
|
558
528
|
shared_table_entry << r_elem.content
|
@@ -573,19 +543,13 @@ class Excelx < GenericSpreadsheet
|
|
573
543
|
@cellXfs = []
|
574
544
|
fonts = []
|
575
545
|
|
576
|
-
#TODO: doc.find("//*[local-name()='numFmt']").each do |numFmt|
|
577
546
|
doc.xpath("//*[local-name()='numFmt']").each do |numFmt|
|
578
|
-
# TODO: numFmtId = numFmt.attributes.to_h['numFmtId']
|
579
547
|
numFmtId = numFmt.attributes['numFmtId']
|
580
|
-
#TODO: formatCode = numFmt.attributes.to_h['formatCode']
|
581
548
|
formatCode = numFmt.attributes['formatCode']
|
582
549
|
@numFmts << [numFmtId, formatCode]
|
583
550
|
end
|
584
|
-
#TODO: doc.find("//*[local-name()='fonts']").each do |fonts_el|
|
585
551
|
doc.xpath("//*[local-name()='fonts']").each do |fonts_el|
|
586
|
-
#TODO: fonts_el.each_element do |font_el|
|
587
552
|
fonts_el.children.each do |font_el|
|
588
|
-
#TODO: if font_el.name == 'font'
|
589
553
|
if font_el == 'font'
|
590
554
|
font = Excelx::Font.new
|
591
555
|
font_el.each_element do |font_sub_el|
|
@@ -603,13 +567,10 @@ class Excelx < GenericSpreadsheet
|
|
603
567
|
end
|
604
568
|
end
|
605
569
|
|
606
|
-
#TODO: doc.find("//*[local-name()='cellXfs']").each do |xfs|
|
607
570
|
doc.xpath("//*[local-name()='cellXfs']").each do |xfs|
|
608
571
|
xfs.children.each do |xf|
|
609
|
-
#TODO: numFmtId = xf.attributes.to_h['numFmtId']
|
610
572
|
numFmtId = xf['numFmtId']
|
611
573
|
@cellXfs << [numFmtId]
|
612
|
-
#TODO: fontId = xf.attributes.to_h['fontId'].to_i
|
613
574
|
fontId = xf['fontId'].to_i
|
614
575
|
@style_definitions << fonts[fontId]
|
615
576
|
end
|
@@ -620,7 +581,6 @@ class Excelx < GenericSpreadsheet
|
|
620
581
|
def attribute2format(s)
|
621
582
|
result = nil
|
622
583
|
@numFmts.each {|nf|
|
623
|
-
#TODO: if nf.first == @cellXfs[s.to_i].first
|
624
584
|
# to_s weil das eine Nokogiri::XML::Attr und das
|
625
585
|
# andere ein String ist
|
626
586
|
if nf.first.to_s == @cellXfs[s.to_i].first
|
@@ -6,8 +6,6 @@ class GenericSpreadsheet
|
|
6
6
|
|
7
7
|
attr_reader :default_sheet
|
8
8
|
|
9
|
-
@@class_counter = 0
|
10
|
-
|
11
9
|
# sets the line with attribute names (default: 1)
|
12
10
|
attr_accessor :header_line
|
13
11
|
|
@@ -30,11 +28,8 @@ class GenericSpreadsheet
|
|
30
28
|
end
|
31
29
|
|
32
30
|
def self.next_tmpdir
|
33
|
-
|
34
|
-
|
35
|
-
tmpdir = "oo_"+$$.to_s+"_"+sprintf("%010d",rand(10_000_000_000))
|
36
|
-
# p "@tmpdir = #{tmpdir}"; sleep 5
|
37
|
-
tmpdir
|
31
|
+
tmpdir = "oo_"+$$.to_s+"_"+sprintf("%010d",rand(10_000_000_000))
|
32
|
+
tmpdir
|
38
33
|
end
|
39
34
|
|
40
35
|
|
@@ -333,9 +328,14 @@ class GenericSpreadsheet
|
|
333
328
|
def reload
|
334
329
|
# von Abfrage der Klasse direkt auf .to_s == '..' umgestellt
|
335
330
|
ds = @default_sheet
|
336
|
-
|
337
|
-
|
338
|
-
|
331
|
+
if self.class.to_s == 'Google'
|
332
|
+
initialize(@spreadsheetkey,@user,@password)
|
333
|
+
else
|
334
|
+
initialize(@filename)
|
335
|
+
to do
|
336
|
+
'was ist mit weiteren Parametern bei initialize'
|
337
|
+
end
|
338
|
+
end
|
339
339
|
self.default_sheet = ds
|
340
340
|
#@first_row = @last_row = @first_column = @last_column = nil
|
341
341
|
end
|
@@ -354,13 +354,7 @@ class GenericSpreadsheet
|
|
354
354
|
# recursively removes the current temporary directory
|
355
355
|
# this is only needed if you work with zipped files or files via the web
|
356
356
|
def remove_tmp
|
357
|
-
#to do
|
358
|
-
# "remove_tmp wieder aktivieren"
|
359
|
-
#end
|
360
|
-
#return
|
361
|
-
#$log.debug("remove_tmp('#{@tmpdir}')")
|
362
357
|
if File.exists?(@tmpdir)
|
363
|
-
#$log.debug("#{@tmpdir} exists")
|
364
358
|
FileUtils::rm_r(@tmpdir)
|
365
359
|
end
|
366
360
|
end
|
@@ -441,12 +435,11 @@ class GenericSpreadsheet
|
|
441
435
|
sheet = @default_sheet unless sheet
|
442
436
|
read_cells(sheet) unless @cells_read[sheet]
|
443
437
|
return theformulas unless first_row(sheet) # if there is no first row then
|
444
|
-
|
438
|
+
# there can't be formulas
|
445
439
|
first_row(sheet).upto(last_row(sheet)) {|row|
|
446
440
|
first_column(sheet).upto(last_column(sheet)) {|col|
|
447
441
|
if formula?(row,col,sheet)
|
448
|
-
|
449
|
-
theformulas << f
|
442
|
+
theformulas << [row, col, formula(row,col,sheet)]
|
450
443
|
end
|
451
444
|
}
|
452
445
|
}
|
@@ -592,17 +585,12 @@ class GenericSpreadsheet
|
|
592
585
|
if ! sheet_found
|
593
586
|
raise RangeError, "sheet '#{@default_sheet}' not found"
|
594
587
|
end
|
595
|
-
#raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
|
596
588
|
end
|
597
589
|
|
598
590
|
def process_zipfile_packed(zip, path='')
|
599
591
|
ret=nil
|
600
592
|
if zip.file.file? path
|
601
593
|
# extract and return filename
|
602
|
-
#2011-08-01 @tmpdir = "oo_"+$$.to_s
|
603
|
-
#2011-08-01 unless File.exists?(@tmpdir)
|
604
|
-
#2011-08-01 FileUtils::mkdir(@tmpdir)
|
605
|
-
#2011-08-01end
|
606
594
|
file = File.open(File.join(@tmpdir, path),"wb")
|
607
595
|
file.write(zip.read(path))
|
608
596
|
file.close
|
@@ -623,7 +611,6 @@ class GenericSpreadsheet
|
|
623
611
|
def write_csv_content(file=nil,sheet=nil)
|
624
612
|
file = STDOUT unless file
|
625
613
|
if first_row(sheet) # sheet is not empty
|
626
|
-
# first_row(sheet).upto(last_row(sheet)) do |row|
|
627
614
|
1.upto(last_row(sheet)) do |row|
|
628
615
|
1.upto(last_column(sheet)) do |col|
|
629
616
|
file.print(",") if col > 1
|
@@ -644,16 +631,10 @@ class GenericSpreadsheet
|
|
644
631
|
else
|
645
632
|
case onecelltype
|
646
633
|
when :string
|
647
|
-
|
648
|
-
# str << ''
|
649
|
-
#else
|
650
|
-
# one = onecell.gsub(/"/,'""')
|
651
|
-
# str << ('"'+one+'"')
|
652
|
-
#end
|
653
|
-
unless onecell.empty?
|
634
|
+
unless onecell.empty?
|
654
635
|
one = onecell.gsub(/"/,'""')
|
655
636
|
str << ('"'+one+'"')
|
656
|
-
|
637
|
+
end
|
657
638
|
when :float, :percentage
|
658
639
|
if onecell == onecell.to_i
|
659
640
|
str << onecell.to_i.to_s
|
@@ -662,16 +643,10 @@ class GenericSpreadsheet
|
|
662
643
|
end
|
663
644
|
when :formula
|
664
645
|
if onecell.class == String
|
665
|
-
|
666
|
-
# str << ''
|
667
|
-
# else
|
668
|
-
# one = onecell.gsub(/"/,'""')
|
669
|
-
# str << '"'+one+'"'
|
670
|
-
# end
|
671
|
-
unless onecell.empty?
|
646
|
+
unless onecell.empty?
|
672
647
|
one = onecell.gsub(/"/,'""')
|
673
648
|
str << '"'+one+'"'
|
674
|
-
|
649
|
+
end
|
675
650
|
elsif onecell.class == Float
|
676
651
|
if onecell == onecell.to_i
|
677
652
|
str << onecell.to_i.to_s
|
@@ -686,10 +661,7 @@ class GenericSpreadsheet
|
|
686
661
|
when :time
|
687
662
|
str << GenericSpreadsheet.integer_to_timestring(onecell)
|
688
663
|
when :datetime
|
689
|
-
|
690
|
-
# time = rest.split('+').first
|
691
|
-
# str << date + ' ' + time
|
692
|
-
str << onecell.to_s
|
664
|
+
str << onecell.to_s
|
693
665
|
else
|
694
666
|
raise "unhandled celltype "+onecelltype.to_s
|
695
667
|
end
|
data/lib/roo/google.rb
CHANGED
@@ -249,19 +249,12 @@ class Google < GenericSpreadsheet
|
|
249
249
|
@cell_type[sheet][key] = value_type
|
250
250
|
@formula[sheet] = {} unless @formula[sheet]
|
251
251
|
@formula[sheet][key] = string_value if value_type == :formula
|
252
|
-
############
|
253
|
-
#$log.debug("key: #{key}")
|
254
|
-
#$log.debug "#{ws[row,col].inspect}"
|
255
|
-
#@cell[sheet][key] = ws[row,col]
|
256
|
-
#$log.debug "@cell[sheet][key]: #{@cell[sheet][key]}"
|
257
|
-
############
|
258
252
|
end
|
259
253
|
end
|
260
254
|
@cells_read[sheet] = true
|
261
255
|
end
|
262
256
|
|
263
257
|
def determine_datatype(val, numval=nil)
|
264
|
-
# $log.debug "val: #{val} numval: #{numval}"
|
265
258
|
if val.nil? || val[0,1] == '='
|
266
259
|
ty = :formula
|
267
260
|
if numeric?(numval)
|
@@ -284,7 +277,6 @@ class Google < GenericSpreadsheet
|
|
284
277
|
ty = :string
|
285
278
|
end
|
286
279
|
end
|
287
|
-
#$log.debug "val: #{val} ty: #{ty}" if ty == :date
|
288
280
|
return val, ty
|
289
281
|
end
|
290
282
|
|
data/lib/roo/openoffice.rb
CHANGED
@@ -2,7 +2,6 @@ require 'rubygems'
|
|
2
2
|
require 'fileutils'
|
3
3
|
require 'zip/zipfilesystem'
|
4
4
|
require 'date'
|
5
|
-
require 'base64'
|
6
5
|
require 'nokogiri'
|
7
6
|
require 'cgi'
|
8
7
|
|
@@ -12,7 +11,7 @@ class Openoffice < GenericSpreadsheet
|
|
12
11
|
|
13
12
|
# initialization and opening of a spreadsheet file
|
14
13
|
# values for packed: :zip
|
15
|
-
def initialize(filename, packed=nil, file_warning=:error, tmpdir=nil)
|
14
|
+
def initialize(filename, packed=nil, file_warning=:error, tmpdir=nil)
|
16
15
|
@file_warning = file_warning
|
17
16
|
super()
|
18
17
|
file_type_check(filename,'.ods','an openoffice', packed)
|
@@ -24,9 +23,6 @@ class Openoffice < GenericSpreadsheet
|
|
24
23
|
end
|
25
24
|
filename = open_from_uri(filename) if filename[0,7] == "http://"
|
26
25
|
filename = unzip(filename) if packed and packed == :zip
|
27
|
-
#if create and ! File.exists?(filename)
|
28
|
-
# self.create_openoffice(filename)
|
29
|
-
#end
|
30
26
|
@cells_read = Hash.new
|
31
27
|
#TODO: @cells_read[:default] = false
|
32
28
|
@filename = filename
|
@@ -40,9 +36,7 @@ class Openoffice < GenericSpreadsheet
|
|
40
36
|
file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_content.xml"))
|
41
37
|
@doc = Nokogiri::XML(file)
|
42
38
|
file.close
|
43
|
-
|
44
|
-
FileUtils::rm_r(@tmpdir)
|
45
|
-
#end
|
39
|
+
FileUtils::rm_r(@tmpdir)
|
46
40
|
@default_sheet = self.sheets.first
|
47
41
|
@cell = Hash.new
|
48
42
|
@cell_type = Hash.new
|
@@ -69,18 +63,6 @@ class Openoffice < GenericSpreadsheet
|
|
69
63
|
end
|
70
64
|
end
|
71
65
|
|
72
|
-
# creates a new empty openoffice-spreadsheet file
|
73
|
-
def create_openoffice(filename) #:nodoc:
|
74
|
-
#TODO: a better way for creating the file contents
|
75
|
-
# now you have to call mkbase64...rb to create an include file with all
|
76
|
-
# the empty files in an openoffice zip-file
|
77
|
-
load 'base64include.rb'
|
78
|
-
# puts @@empty_spreadsheet
|
79
|
-
f = File.open(filename,'wb')
|
80
|
-
f.print(Base64.decode64(@@empty_spreadsheet))
|
81
|
-
f.close
|
82
|
-
end
|
83
|
-
|
84
66
|
# Returns the content of a spreadsheet-cell.
|
85
67
|
# (1,1) is the upper left corner.
|
86
68
|
# (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
|
@@ -90,7 +72,6 @@ class Openoffice < GenericSpreadsheet
|
|
90
72
|
read_cells(sheet) unless @cells_read[sheet]
|
91
73
|
row,col = normalize(row,col)
|
92
74
|
if celltype(row,col,sheet) == :date
|
93
|
-
#TODO: yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
|
94
75
|
yyyy,mm,dd = @cell[sheet][[row,col]].to_s.split('-')
|
95
76
|
return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
|
96
77
|
end
|
@@ -107,7 +88,13 @@ class Openoffice < GenericSpreadsheet
|
|
107
88
|
if @formula[sheet][[row,col]] == nil
|
108
89
|
return nil
|
109
90
|
else
|
110
|
-
return @formula[sheet][[row,col]]["oooc:".length..-1]
|
91
|
+
# return @formula[sheet][[row,col]]["oooc:".length..-1]
|
92
|
+
str = @formula[sheet][[row,col]]
|
93
|
+
if str.include? ':'
|
94
|
+
return str[str.index(':')+1..-1]
|
95
|
+
else
|
96
|
+
return str
|
97
|
+
end
|
111
98
|
end
|
112
99
|
end
|
113
100
|
|
@@ -183,9 +170,7 @@ class Openoffice < GenericSpreadsheet
|
|
183
170
|
|
184
171
|
def sheets
|
185
172
|
return_sheets = []
|
186
|
-
#TODO: @doc.find("//*[local-name()='table']").each do |sheet|
|
187
173
|
@doc.xpath("//*[local-name()='table']").each do |sheet|
|
188
|
-
#TODO: return_sheets << sheet.attributes['name']
|
189
174
|
return_sheets << sheet['name']
|
190
175
|
end
|
191
176
|
return_sheets
|
@@ -206,11 +191,6 @@ class Openoffice < GenericSpreadsheet
|
|
206
191
|
@cell[sheet].inspect
|
207
192
|
end
|
208
193
|
|
209
|
-
# save spreadsheet
|
210
|
-
def save #:nodoc:
|
211
|
-
42
|
212
|
-
end
|
213
|
-
|
214
194
|
# returns the row,col values of the labelled cell
|
215
195
|
# (nil,nil) if label is not defined
|
216
196
|
# sheet parameter is not really needed because label names are global
|
@@ -231,7 +211,6 @@ class Openoffice < GenericSpreadsheet
|
|
231
211
|
|
232
212
|
# read the version of the OO-Version
|
233
213
|
def oo_version
|
234
|
-
#TODO: @doc.find("//*[local-name()='document-content']").each do |office|
|
235
214
|
@doc.xpath("//*[local-name()='document-content']").each do |office|
|
236
215
|
@officeversion = office.attributes['version'].to_s
|
237
216
|
end
|
@@ -300,41 +279,30 @@ class Openoffice < GenericSpreadsheet
|
|
300
279
|
@labels[name] = [sheetname,row,col]
|
301
280
|
end
|
302
281
|
|
303
|
-
#TODO: @doc.find("//*[local-name()='table']").each do |ws|
|
304
282
|
@doc.xpath("//*[local-name()='table']").each do |ws|
|
305
|
-
#TODO: if sheet == ws.attributes['name']
|
306
283
|
if sheet == ws['name']
|
307
284
|
sheet_found = true
|
308
285
|
col = 1
|
309
286
|
row = 1
|
310
|
-
#TODO: ws.each_element do |table_element|
|
311
287
|
ws.children.each do |table_element|
|
312
288
|
case table_element.name
|
313
289
|
when 'table-column'
|
314
290
|
@style_defaults[sheet] << table_element.attributes['default-cell-style-name']
|
315
291
|
when 'table-row'
|
316
292
|
if table_element.attributes['number-rows-repeated']
|
317
|
-
#TODO: skip_row = table_element.attributes['number-rows-repeated'].to_i
|
318
293
|
skip_row = table_element.attributes['number-rows-repeated'].to_s.to_i
|
319
294
|
row = row + skip_row - 1
|
320
295
|
end
|
321
|
-
#TODO: table_element.each_element do |cell|
|
322
296
|
table_element.children.each do |cell|
|
323
|
-
#TODO: skip_col = cell.attributes['number-columns-repeated']
|
324
297
|
skip_col = cell['number-columns-repeated']
|
325
|
-
#TODO: formula = cell.attributes['formula']
|
326
298
|
formula = cell['formula']
|
327
|
-
#TODO: vt = cell.attributes['value-type']
|
328
299
|
vt = cell['value-type']
|
329
|
-
#TODO: v = cell.attributes['value']
|
330
300
|
v = cell['value']
|
331
|
-
#TODO: style_name = cell.attributes['style-name']
|
332
301
|
style_name = cell['style-name']
|
333
302
|
if vt == 'string'
|
334
303
|
str_v = ''
|
335
304
|
# insert \n if there is more than one paragraph
|
336
305
|
para_count = 0
|
337
|
-
#TODO: cell.each_element do |str|
|
338
306
|
cell.children.each do |str|
|
339
307
|
if str.name == 'p'
|
340
308
|
v = str.content
|
@@ -352,7 +320,6 @@ class Openoffice < GenericSpreadsheet
|
|
352
320
|
end # == 'p'
|
353
321
|
end
|
354
322
|
elsif vt == 'time'
|
355
|
-
#TODO: cell.each_element do |str|
|
356
323
|
cell.children.each do |str|
|
357
324
|
if str.name == 'p'
|
358
325
|
v = str.content
|
@@ -388,8 +355,6 @@ class Openoffice < GenericSpreadsheet
|
|
388
355
|
end
|
389
356
|
end
|
390
357
|
end
|
391
|
-
|
392
|
-
#TODO: @doc.find("//*[local-name()='automatic-styles']").each do |style|
|
393
358
|
@doc.xpath("//*[local-name()='automatic-styles']").each do |style|
|
394
359
|
read_styles(style)
|
395
360
|
end
|
data/test/benchmark1.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'roo'
|
3
|
+
require 'benchmark'
|
4
|
+
|
5
|
+
def process_all_cells(oo)
|
6
|
+
oo.default_sheet = oo.sheets.first
|
7
|
+
#oo.first_row.upto(oo.last_row) do |row|
|
8
|
+
# oo.first_column.upto(oo.last_column) do |col|
|
9
|
+
# result = oo.cell(row,col)
|
10
|
+
# end
|
11
|
+
#end
|
12
|
+
ret = []
|
13
|
+
oo.first_row.upto(oo.last_row) do |row|
|
14
|
+
ret << oo.row(row)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
# 3735 Zeilen jeweils
|
18
|
+
def openoffice
|
19
|
+
oo = Openoffice.new('Bibelbund.ods')
|
20
|
+
process_all_cells(oo)
|
21
|
+
end
|
22
|
+
def excel
|
23
|
+
oo = Excel.new('Bibelbund.xls')
|
24
|
+
process_all_cells(oo)
|
25
|
+
end
|
26
|
+
def excelx
|
27
|
+
oo = Excelx.new('Bibelbund.xlsx')
|
28
|
+
process_all_cells(oo)
|
29
|
+
end
|
30
|
+
|
31
|
+
n = 1
|
32
|
+
Benchmark.bmbm(10) do |x|
|
33
|
+
x.report('openoffice:') { n.times do
|
34
|
+
openoffice
|
35
|
+
end }
|
36
|
+
x.report('excel:') { n.times do
|
37
|
+
excel
|
38
|
+
end }
|
39
|
+
x.report('excelx:') { n.times do
|
40
|
+
excelx
|
41
|
+
end }
|
42
|
+
end
|
43
|
+
|
Binary file
|
data/test/test_roo.rb
CHANGED
@@ -34,7 +34,7 @@ $log = Logger.new(File.join(ENV['HOME'],"roo.log"))
|
|
34
34
|
#$log.level = Logger::WARN
|
35
35
|
$log.level = Logger::DEBUG
|
36
36
|
|
37
|
-
DISPLAY_LOG =
|
37
|
+
DISPLAY_LOG = true
|
38
38
|
DB_LOG = false
|
39
39
|
if DB_LOG
|
40
40
|
require 'activerecord'
|
@@ -170,7 +170,7 @@ class TestRoo < Test::Unit::TestCase
|
|
170
170
|
LIBREOFFICE = true # do Libreoffice tests? (.ods files)
|
171
171
|
|
172
172
|
ONLINE = false
|
173
|
-
LONG_RUN =
|
173
|
+
LONG_RUN = true
|
174
174
|
GLOBAL_TIMEOUT = 48.minutes
|
175
175
|
|
176
176
|
def setup
|
@@ -2351,4 +2351,29 @@ where the expected result is
|
|
2351
2351
|
assert_equal "", `diff test/so_datetime.csv datetime.csv`
|
2352
2352
|
end
|
2353
2353
|
end
|
2354
|
+
|
2355
|
+
# 2011-08-11
|
2356
|
+
def test_bug_openoffice_formula_missing_letters
|
2357
|
+
if OPENOFFICE
|
2358
|
+
# Dieses Dokument wurde mit LibreOffice angelegt.
|
2359
|
+
# Keine Ahnung, ob es damit zusammenhaengt, das diese
|
2360
|
+
# Formeln anders sind, als in der Datei formula.ods, welche
|
2361
|
+
# mit Openoffice angelegt wurde.
|
2362
|
+
# Bei den Openoffice-Dateien ist in diesem Feld in der XML-
|
2363
|
+
# Datei of: als Prefix enthalten, waehrend in dieser Datei
|
2364
|
+
# irgendetwas mit oooc: als Prefix verwendet wird.
|
2365
|
+
oo = Openoffice.new(File.join(TESTDIR,'dreimalvier.ods'))
|
2366
|
+
oo.default_sheet = oo.sheets.first
|
2367
|
+
assert_equal '=SUM([.A1:.D1])', oo.formula('e',1)
|
2368
|
+
assert_equal '=SUM([.A2:.D2])', oo.formula('e',2)
|
2369
|
+
assert_equal '=SUM([.A3:.D3])', oo.formula('e',3)
|
2370
|
+
assert_equal [
|
2371
|
+
[1,5,'=SUM([.A1:.D1])'],
|
2372
|
+
[2,5,'=SUM([.A2:.D2])'],
|
2373
|
+
[3,5,'=SUM([.A3:.D3])'],
|
2374
|
+
], oo.formulas
|
2375
|
+
|
2376
|
+
end
|
2377
|
+
end
|
2378
|
+
|
2354
2379
|
end # class
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: roo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.9.
|
4
|
+
version: 1.9.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-08-
|
12
|
+
date: 2011-08-27 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: spreadsheet
|
16
|
-
requirement: &
|
16
|
+
requirement: &25467852 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>'
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 0.6.4
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *25467852
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: nokogiri
|
27
|
-
requirement: &
|
27
|
+
requirement: &25467564 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 1.5.0
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *25467564
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: google-spreadsheet-ruby
|
38
|
-
requirement: &
|
38
|
+
requirement: &25467036 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: 0.1.5
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *25467036
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: choice
|
49
|
-
requirement: &
|
49
|
+
requirement: &25466676 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: 0.1.4
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *25466676
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: todonotes
|
60
|
-
requirement: &
|
60
|
+
requirement: &25466244 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,18 +65,18 @@ dependencies:
|
|
65
65
|
version: 0.1.0
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *25466244
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: bones
|
71
|
-
requirement: &
|
71
|
+
requirement: &25465872 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
75
75
|
- !ruby/object:Gem::Version
|
76
|
-
version: 3.7.
|
76
|
+
version: 3.7.1
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *25465872
|
80
80
|
description: ! 'Roo can access the contents of various spreadsheet files. It can handle
|
81
81
|
|
82
82
|
* Openoffice
|
@@ -96,8 +96,6 @@ extra_rdoc_files:
|
|
96
96
|
- PostInstall.txt
|
97
97
|
- README.txt
|
98
98
|
- bin/roo
|
99
|
-
- lib/.roo.rb.swp
|
100
|
-
- lib/roo/.generic_spreadsheet.rb.swp
|
101
99
|
- test/no_spreadsheet_file.txt
|
102
100
|
files:
|
103
101
|
- History.txt
|
@@ -110,9 +108,7 @@ files:
|
|
110
108
|
- bin/roo
|
111
109
|
- csv8532
|
112
110
|
- datetime.csv
|
113
|
-
- lib/.roo.rb.swp
|
114
111
|
- lib/roo.rb
|
115
|
-
- lib/roo/.generic_spreadsheet.rb.swp
|
116
112
|
- lib/roo/excel.rb
|
117
113
|
- lib/roo/excelx.rb
|
118
114
|
- lib/roo/generic_spreadsheet.rb
|
@@ -131,6 +127,7 @@ files:
|
|
131
127
|
- test/bbu.ods
|
132
128
|
- test/bbu.xls
|
133
129
|
- test/bbu.xlsx
|
130
|
+
- test/benchmark1.rb
|
134
131
|
- test/bode-v1.ods.zip
|
135
132
|
- test/bode-v1.xls.zip
|
136
133
|
- test/boolean.ods
|
@@ -144,6 +141,7 @@ files:
|
|
144
141
|
- test/datetime.xls
|
145
142
|
- test/datetime.xlsx
|
146
143
|
- test/datetime_floatconv.xls
|
144
|
+
- test/dreimalvier.ods
|
147
145
|
- test/emptysheets.ods
|
148
146
|
- test/emptysheets.xls
|
149
147
|
- test/emptysheets.xlsx
|
@@ -215,7 +213,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
215
213
|
version: '0'
|
216
214
|
requirements: []
|
217
215
|
rubyforge_project: roo
|
218
|
-
rubygems_version: 1.8.
|
216
|
+
rubygems_version: 1.8.9
|
219
217
|
signing_key:
|
220
218
|
specification_version: 3
|
221
219
|
summary: Roo can access the contents of various spreadsheet files.
|
data/lib/.roo.rb.swp
DELETED
Binary file
|
Binary file
|