donibuchanan-roo 1.3.12
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +225 -0
- data/README.markdown +55 -0
- data/examples/roo_soap_client.rb +53 -0
- data/examples/roo_soap_server.rb +29 -0
- data/examples/write_me.rb +33 -0
- data/lib/roo/excel.rb +468 -0
- data/lib/roo/excel2003xml.rb +411 -0
- data/lib/roo/excelx.rb +602 -0
- data/lib/roo/generic_spreadsheet.rb +628 -0
- data/lib/roo/google.rb +379 -0
- data/lib/roo/openoffice.rb +451 -0
- data/lib/roo/roo_rails_helper.rb +82 -0
- data/lib/roo/version.rb +9 -0
- data/lib/roo.rb +32 -0
- data/test/1900_base.xls +0 -0
- data/test/1904_base.xls +0 -0
- data/test/Bibelbund.csv +3741 -0
- data/test/Bibelbund.ods +0 -0
- data/test/Bibelbund.xls +0 -0
- data/test/Bibelbund.xlsx +0 -0
- data/test/Bibelbund1.ods +0 -0
- data/test/bad_excel_date.xls +0 -0
- data/test/bbu.ods +0 -0
- data/test/bbu.xls +0 -0
- data/test/bbu.xlsx +0 -0
- data/test/bode-v1.ods.zip +0 -0
- data/test/bode-v1.xls.zip +0 -0
- data/test/boolean.ods +0 -0
- data/test/boolean.xls +0 -0
- data/test/boolean.xlsx +0 -0
- data/test/borders.ods +0 -0
- data/test/borders.xls +0 -0
- data/test/borders.xlsx +0 -0
- data/test/bug-row-column-fixnum-float.xls +0 -0
- data/test/datetime.ods +0 -0
- data/test/datetime.xls +0 -0
- data/test/datetime.xlsx +0 -0
- data/test/datetime_floatconv.xls +0 -0
- data/test/emptysheets.ods +0 -0
- data/test/emptysheets.xls +0 -0
- data/test/excel2003.xml +21140 -0
- data/test/false_encoding.xls +0 -0
- data/test/formula.ods +0 -0
- data/test/formula.xls +0 -0
- data/test/formula.xlsx +0 -0
- data/test/formula_parse_error.xls +0 -0
- data/test/html-escape.ods +0 -0
- data/test/no_spreadsheet_file.txt +1 -0
- data/test/numbers1.csv +18 -0
- data/test/numbers1.ods +0 -0
- data/test/numbers1.xls +0 -0
- data/test/numbers1.xlsx +0 -0
- data/test/only_one_sheet.ods +0 -0
- data/test/only_one_sheet.xls +0 -0
- data/test/only_one_sheet.xlsx +0 -0
- data/test/paragraph.ods +0 -0
- data/test/paragraph.xls +0 -0
- data/test/paragraph.xlsx +0 -0
- data/test/ric.ods +0 -0
- data/test/simple_spreadsheet.ods +0 -0
- data/test/simple_spreadsheet.xls +0 -0
- data/test/simple_spreadsheet.xlsx +0 -0
- data/test/simple_spreadsheet_from_italo.ods +0 -0
- data/test/simple_spreadsheet_from_italo.xls +0 -0
- data/test/skipped_tests.rb +789 -0
- data/test/style.ods +0 -0
- data/test/style.xls +0 -0
- data/test/style.xlsx +0 -0
- data/test/test_helper.rb +19 -0
- data/test/test_roo.rb +1834 -0
- data/test/time-test.csv +2 -0
- data/test/time-test.ods +0 -0
- data/test/time-test.xls +0 -0
- data/test/time-test.xlsx +0 -0
- data/test/whitespace.ods +0 -0
- data/test/whitespace.xls +0 -0
- data/test/whitespace.xlsx +0 -0
- metadata +185 -0
@@ -0,0 +1,411 @@
|
|
1
|
+
require 'xml'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'zip/zipfilesystem'
|
4
|
+
require 'date'
|
5
|
+
require 'base64'
|
6
|
+
require 'cgi'
|
7
|
+
|
8
|
+
class Excel2003XML < GenericSpreadsheet
|
9
|
+
|
10
|
+
@@nr = 0
|
11
|
+
|
12
|
+
# initialization and opening of a spreadsheet file
|
13
|
+
# values for packed: :zip
|
14
|
+
def initialize(filename, packed=nil, file_warning=:error)
|
15
|
+
@file_warning = file_warning
|
16
|
+
super()
|
17
|
+
@tmpdir = "oo_"+$$.to_s
|
18
|
+
@tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
|
19
|
+
unless File.exists?(@tmpdir)
|
20
|
+
FileUtils::mkdir(@tmpdir)
|
21
|
+
end
|
22
|
+
filename = open_from_uri(filename) if filename[0,7] == "http://"
|
23
|
+
filename = unzip(filename) if packed and packed == :zip
|
24
|
+
begin
|
25
|
+
file_type_check(filename,'.xml','an Excel 2003 XML')
|
26
|
+
@cells_read = Hash.new
|
27
|
+
@filename = filename
|
28
|
+
unless File.file?(@filename)
|
29
|
+
raise IOError, "file #{@filename} does not exist"
|
30
|
+
end
|
31
|
+
@doc = XML::Parser.file(@filename).parse
|
32
|
+
ensure
|
33
|
+
FileUtils::rm_r(@tmpdir)
|
34
|
+
end
|
35
|
+
@default_sheet = self.sheets.first
|
36
|
+
@cell = Hash.new
|
37
|
+
@cell_type = Hash.new
|
38
|
+
@formula = Hash.new
|
39
|
+
@first_row = Hash.new
|
40
|
+
@last_row = Hash.new
|
41
|
+
@first_column = Hash.new
|
42
|
+
@last_column = Hash.new
|
43
|
+
@style = Hash.new
|
44
|
+
@style_defaults = Hash.new { |h,k| h[k] = [] }
|
45
|
+
@style_definitions = Hash.new
|
46
|
+
@header_line = 1
|
47
|
+
end
|
48
|
+
|
49
|
+
# Returns the content of a spreadsheet-cell.
|
50
|
+
# (1,1) is the upper left corner.
|
51
|
+
# (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
|
52
|
+
# cell at the first line and first row.
|
53
|
+
def cell(row, col, sheet=nil)
|
54
|
+
sheet = @default_sheet unless sheet
|
55
|
+
read_cells(sheet) unless @cells_read[sheet]
|
56
|
+
row,col = normalize(row,col)
|
57
|
+
if celltype(row,col,sheet) == :date
|
58
|
+
yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
|
59
|
+
return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
|
60
|
+
end
|
61
|
+
@cell[sheet][[row,col]]
|
62
|
+
end
|
63
|
+
|
64
|
+
# Returns the formula at (row,col).
|
65
|
+
# Returns nil if there is no formula.
|
66
|
+
# The method #formula? checks if there is a formula.
|
67
|
+
def formula(row,col,sheet=nil)
|
68
|
+
sheet = @default_sheet unless sheet
|
69
|
+
read_cells(sheet) unless @cells_read[sheet]
|
70
|
+
row,col = normalize(row,col)
|
71
|
+
if @formula[sheet][[row,col]] == nil
|
72
|
+
return nil
|
73
|
+
else
|
74
|
+
return @formula[sheet][[row,col]]["oooc:".length..-1]
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# true, if there is a formula
|
79
|
+
def formula?(row,col,sheet=nil)
|
80
|
+
sheet = @default_sheet unless sheet
|
81
|
+
read_cells(sheet) unless @cells_read[sheet]
|
82
|
+
row,col = normalize(row,col)
|
83
|
+
formula(row,col) != nil
|
84
|
+
end
|
85
|
+
|
86
|
+
class Font
|
87
|
+
attr_accessor :bold, :italic, :underline
|
88
|
+
|
89
|
+
def bold?
|
90
|
+
@bold == 'bold'
|
91
|
+
end
|
92
|
+
|
93
|
+
def italic?
|
94
|
+
@italic == 'italic'
|
95
|
+
end
|
96
|
+
|
97
|
+
def underline?
|
98
|
+
@underline != nil
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Given a cell, return the cell's style
|
103
|
+
def font(row, col, sheet=nil)
|
104
|
+
sheet = @default_sheet unless sheet
|
105
|
+
read_cells(sheet) unless @cells_read[sheet]
|
106
|
+
row,col = normalize(row,col)
|
107
|
+
style_name = @style[sheet][[row,col]] || @style_defaults[sheet][col - 1] || 'Default'
|
108
|
+
@style_definitions[style_name]
|
109
|
+
end
|
110
|
+
|
111
|
+
# set a cell to a certain value
|
112
|
+
# (this will not be saved back to the spreadsheet file!)
|
113
|
+
def set(row,col,value,sheet=nil) #:nodoc:
|
114
|
+
sheet = @default_sheet unless sheet
|
115
|
+
read_cells(sheet) unless @cells_read[sheet]
|
116
|
+
row,col = normalize(row,col)
|
117
|
+
set_value(row,col,value,sheet)
|
118
|
+
if value.class == Fixnum
|
119
|
+
set_type(row,col,:float,sheet)
|
120
|
+
elsif value.class == String
|
121
|
+
set_type(row,col,:string,sheet)
|
122
|
+
elsif value.class == Float
|
123
|
+
set_type(row,col,:string,sheet)
|
124
|
+
else
|
125
|
+
raise ArgumentError, "Type for "+value.to_s+" not set"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# returns the type of a cell:
|
130
|
+
# * :float
|
131
|
+
# * :string
|
132
|
+
# * :date
|
133
|
+
# * :percentage
|
134
|
+
# * :formula
|
135
|
+
# * :time
|
136
|
+
# * :datetime
|
137
|
+
def celltype(row,col,sheet=nil)
|
138
|
+
sheet = @default_sheet unless sheet
|
139
|
+
read_cells(sheet) unless @cells_read[sheet]
|
140
|
+
row,col = normalize(row,col)
|
141
|
+
if @formula[sheet][[row,col]]
|
142
|
+
return :formula
|
143
|
+
else
|
144
|
+
@cell_type[sheet][[row,col]]
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def sheets
|
149
|
+
return_sheets = []
|
150
|
+
@doc.find("//ss:Worksheet").each do |sheet|
|
151
|
+
return_sheets << sheet.attributes['Name']
|
152
|
+
end
|
153
|
+
return_sheets
|
154
|
+
end
|
155
|
+
|
156
|
+
# version of the openoffice document
|
157
|
+
# at 2007 this is always "1.0"
|
158
|
+
def officeversion
|
159
|
+
oo_version
|
160
|
+
@officeversion
|
161
|
+
end
|
162
|
+
|
163
|
+
# shows the internal representation of all cells
|
164
|
+
# mainly for debugging purposes
|
165
|
+
def to_s(sheet=nil)
|
166
|
+
sheet = @default_sheet unless sheet
|
167
|
+
read_cells(sheet) unless @cells_read[sheet]
|
168
|
+
@cell[sheet].inspect
|
169
|
+
end
|
170
|
+
|
171
|
+
# save spreadsheet
|
172
|
+
def save #:nodoc:
|
173
|
+
42
|
174
|
+
end
|
175
|
+
|
176
|
+
# returns each formula in the selected sheet as an array of elements
|
177
|
+
# [row, col, formula]
|
178
|
+
def formulas(sheet=nil)
|
179
|
+
theformulas = Array.new
|
180
|
+
sheet = @default_sheet unless sheet
|
181
|
+
read_cells(sheet) unless @cells_read[sheet]
|
182
|
+
first_row(sheet).upto(last_row(sheet)) {|row|
|
183
|
+
first_column(sheet).upto(last_column(sheet)) {|col|
|
184
|
+
if formula?(row,col,sheet)
|
185
|
+
f = [row, col, formula(row,col,sheet)]
|
186
|
+
theformulas << f
|
187
|
+
end
|
188
|
+
}
|
189
|
+
}
|
190
|
+
theformulas
|
191
|
+
end
|
192
|
+
|
193
|
+
private
|
194
|
+
|
195
|
+
# read the version of the OO-Version
|
196
|
+
def oo_version
|
197
|
+
@doc.find("//*[local-name()='document-content']").each do |office|
|
198
|
+
@officeversion = office.attributes['version']
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
# helper function to set the internal representation of cells
|
203
|
+
def set_cell_values(sheet,x,y,i,v,vt,formula,table_cell,str_v,style_name)
|
204
|
+
key = [y,x+i]
|
205
|
+
@cell_type[sheet] = {} unless @cell_type[sheet]
|
206
|
+
@cell_type[sheet][key] = vt
|
207
|
+
@formula[sheet] = {} unless @formula[sheet]
|
208
|
+
@formula[sheet][key] = formula if formula
|
209
|
+
@cell[sheet] = {} unless @cell[sheet]
|
210
|
+
@style[sheet] = {} unless @style[sheet]
|
211
|
+
@style[sheet][key] = style_name
|
212
|
+
case @cell_type[sheet][key]
|
213
|
+
when :float
|
214
|
+
@cell[sheet][key] = v.to_f
|
215
|
+
when :string
|
216
|
+
@cell[sheet][key] = str_v
|
217
|
+
when :datetime
|
218
|
+
@cell[sheet][key] = DateTime.parse(v)
|
219
|
+
@cell_type[sheet][key] = :datetime
|
220
|
+
when :percentage
|
221
|
+
@cell[sheet][key] = v.to_f
|
222
|
+
# when :time
|
223
|
+
# hms = v.split(':')
|
224
|
+
# @cell[sheet][key] = hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i
|
225
|
+
else
|
226
|
+
@cell[sheet][key] = v
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
# read all cells in the selected sheet
|
231
|
+
#--
|
232
|
+
# the following construct means '4 blanks'
|
233
|
+
# some content <text:s text:c="3"/>
|
234
|
+
#++
|
235
|
+
def read_cells(sheet=nil)
|
236
|
+
sheet = @default_sheet unless sheet
|
237
|
+
sheet_found = false
|
238
|
+
raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
|
239
|
+
raise RangeError unless self.sheets.include? sheet
|
240
|
+
@doc.find("ss:Worksheet[@ss:Name='#{sheet}']").each do |ws|
|
241
|
+
sheet_found = true
|
242
|
+
row = 1
|
243
|
+
col = 1
|
244
|
+
ws.find('.//ss:Row').each do |r|
|
245
|
+
skip_to_row = r.attributes['Index'].to_i
|
246
|
+
row = skip_to_row if skip_to_row > 0
|
247
|
+
r.each do |c|
|
248
|
+
next unless c.name == 'Cell'
|
249
|
+
skip_to_col = c.attributes['Index'].to_i
|
250
|
+
col = skip_to_col if skip_to_col > 0
|
251
|
+
c.each_element do |cell|
|
252
|
+
formula = nil
|
253
|
+
style_name = cell.attributes['StyleID']
|
254
|
+
if cell.name == 'Data'
|
255
|
+
formula = cell.attributes['Formula']
|
256
|
+
vt = cell.attributes['Type'].downcase.to_sym
|
257
|
+
v = cell.content
|
258
|
+
str_v = v
|
259
|
+
case vt
|
260
|
+
# when :string
|
261
|
+
# str_v = ''
|
262
|
+
# # insert \n if there is more than one paragraph
|
263
|
+
# para_count = 0
|
264
|
+
# cell.each_element do |str|
|
265
|
+
# if str.name == 'p'
|
266
|
+
# v = str.content
|
267
|
+
# str_v += "\n" if para_count > 0
|
268
|
+
# para_count += 1
|
269
|
+
# if str.children.size > 1
|
270
|
+
# str_v += children_to_string(str.children)
|
271
|
+
# else
|
272
|
+
# str.children.each do |child|
|
273
|
+
# str_v += child.content #.text
|
274
|
+
# end
|
275
|
+
# end
|
276
|
+
# str_v.gsub!(/'/,"'") # special case not supported by unescapeHTML
|
277
|
+
# str_v = CGI.unescapeHTML(str_v)
|
278
|
+
# end # == 'p'
|
279
|
+
# end
|
280
|
+
when :number
|
281
|
+
v = v.to_f
|
282
|
+
vt = :float
|
283
|
+
when :datetime
|
284
|
+
if v =~ /^1899-12-31T(\d{2}:\d{2}:\d{2})/
|
285
|
+
v = $1
|
286
|
+
vt = :time
|
287
|
+
elsif v =~ /([^T]+)T00:00:00.000/
|
288
|
+
v = $1
|
289
|
+
vt = :date
|
290
|
+
end
|
291
|
+
when :boolean
|
292
|
+
v = cell.attributes['boolean-value']
|
293
|
+
else
|
294
|
+
# raise "unknown type #{vt}"
|
295
|
+
end
|
296
|
+
# puts vt
|
297
|
+
# puts v
|
298
|
+
# puts str_v
|
299
|
+
# puts row
|
300
|
+
# puts col
|
301
|
+
# puts '---'
|
302
|
+
end
|
303
|
+
set_cell_values(sheet,col,row,0,v,vt.to_sym,formula,cell,str_v,style_name)
|
304
|
+
end
|
305
|
+
col += 1
|
306
|
+
end
|
307
|
+
row += 1
|
308
|
+
col = 1
|
309
|
+
end
|
310
|
+
end
|
311
|
+
if !sheet_found
|
312
|
+
raise RangeError
|
313
|
+
end
|
314
|
+
@cells_read[sheet] = true
|
315
|
+
end
|
316
|
+
|
317
|
+
def read_styles(style_elements)
|
318
|
+
@style_definitions['Default'] = Openoffice::Font.new
|
319
|
+
style_elements.each do |style|
|
320
|
+
next unless style.name == 'style'
|
321
|
+
style_name = style.attributes['name']
|
322
|
+
style.each do |properties|
|
323
|
+
font = Openoffice::Font.new
|
324
|
+
font.bold = properties.attributes['font-weight']
|
325
|
+
font.italic = properties.attributes['font-style']
|
326
|
+
font.underline = properties.attributes['text-underline-style']
|
327
|
+
@style_definitions[style_name] = font
|
328
|
+
end
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
# Checks if the default_sheet exists. If not an RangeError exception is
|
333
|
+
# raised
|
334
|
+
def check_default_sheet
|
335
|
+
sheet_found = false
|
336
|
+
raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
|
337
|
+
sheet_found = true if sheets.include?(@default_sheet)
|
338
|
+
if ! sheet_found
|
339
|
+
raise RangeError, "sheet '#{@default_sheet}' not found"
|
340
|
+
end
|
341
|
+
end
|
342
|
+
|
343
|
+
def process_zipfile(zip, path='')
|
344
|
+
if zip.file.file? path
|
345
|
+
if path == "content.xml"
|
346
|
+
open(File.join(@tmpdir, @file_nr.to_s+'_roo_content.xml'),'wb') {|f|
|
347
|
+
f << zip.read(path)
|
348
|
+
}
|
349
|
+
end
|
350
|
+
else
|
351
|
+
unless path.empty?
|
352
|
+
path += '/'
|
353
|
+
end
|
354
|
+
zip.dir.foreach(path) do |filename|
|
355
|
+
process_zipfile(zip, path+filename)
|
356
|
+
end
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
def extract_content
|
361
|
+
Zip::ZipFile.open(@filename) do |zip|
|
362
|
+
process_zipfile(zip)
|
363
|
+
end
|
364
|
+
end
|
365
|
+
|
366
|
+
def set_value(row,col,value,sheet=nil)
|
367
|
+
sheet = @default_value unless sheet
|
368
|
+
@cell[sheet][[row,col]] = value
|
369
|
+
end
|
370
|
+
|
371
|
+
def set_type(row,col,type,sheet=nil)
|
372
|
+
sheet = @default_value unless sheet
|
373
|
+
@cell_type[sheet][[row,col]] = type
|
374
|
+
end
|
375
|
+
|
376
|
+
A_ROO_TYPE = {
|
377
|
+
"float" => :float,
|
378
|
+
"string" => :string,
|
379
|
+
"date" => :date,
|
380
|
+
"percentage" => :percentage,
|
381
|
+
"time" => :time,
|
382
|
+
}
|
383
|
+
|
384
|
+
def Openoffice.oo_type_2_roo_type(ootype)
|
385
|
+
return A_ROO_TYPE[ootype]
|
386
|
+
end
|
387
|
+
|
388
|
+
# helper method to convert compressed spaces and other elements within
|
389
|
+
# an text into a string
|
390
|
+
def children_to_string(children)
|
391
|
+
result = ''
|
392
|
+
children.each {|child|
|
393
|
+
if child.text?
|
394
|
+
result = result + child.content
|
395
|
+
else
|
396
|
+
if child.name == 's'
|
397
|
+
compressed_spaces = child.attributes['c'].to_i
|
398
|
+
# no explicit number means a count of 1:
|
399
|
+
if compressed_spaces == 0
|
400
|
+
compressed_spaces = 1
|
401
|
+
end
|
402
|
+
result = result + " "*compressed_spaces
|
403
|
+
else
|
404
|
+
result = result + child.content
|
405
|
+
end
|
406
|
+
end
|
407
|
+
}
|
408
|
+
result
|
409
|
+
end
|
410
|
+
|
411
|
+
end # class
|