donibuchanan-roo 1.3.12
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +225 -0
- data/README.markdown +55 -0
- data/examples/roo_soap_client.rb +53 -0
- data/examples/roo_soap_server.rb +29 -0
- data/examples/write_me.rb +33 -0
- data/lib/roo/excel.rb +468 -0
- data/lib/roo/excel2003xml.rb +411 -0
- data/lib/roo/excelx.rb +602 -0
- data/lib/roo/generic_spreadsheet.rb +628 -0
- data/lib/roo/google.rb +379 -0
- data/lib/roo/openoffice.rb +451 -0
- data/lib/roo/roo_rails_helper.rb +82 -0
- data/lib/roo/version.rb +9 -0
- data/lib/roo.rb +32 -0
- data/test/1900_base.xls +0 -0
- data/test/1904_base.xls +0 -0
- data/test/Bibelbund.csv +3741 -0
- data/test/Bibelbund.ods +0 -0
- data/test/Bibelbund.xls +0 -0
- data/test/Bibelbund.xlsx +0 -0
- data/test/Bibelbund1.ods +0 -0
- data/test/bad_excel_date.xls +0 -0
- data/test/bbu.ods +0 -0
- data/test/bbu.xls +0 -0
- data/test/bbu.xlsx +0 -0
- data/test/bode-v1.ods.zip +0 -0
- data/test/bode-v1.xls.zip +0 -0
- data/test/boolean.ods +0 -0
- data/test/boolean.xls +0 -0
- data/test/boolean.xlsx +0 -0
- data/test/borders.ods +0 -0
- data/test/borders.xls +0 -0
- data/test/borders.xlsx +0 -0
- data/test/bug-row-column-fixnum-float.xls +0 -0
- data/test/datetime.ods +0 -0
- data/test/datetime.xls +0 -0
- data/test/datetime.xlsx +0 -0
- data/test/datetime_floatconv.xls +0 -0
- data/test/emptysheets.ods +0 -0
- data/test/emptysheets.xls +0 -0
- data/test/excel2003.xml +21140 -0
- data/test/false_encoding.xls +0 -0
- data/test/formula.ods +0 -0
- data/test/formula.xls +0 -0
- data/test/formula.xlsx +0 -0
- data/test/formula_parse_error.xls +0 -0
- data/test/html-escape.ods +0 -0
- data/test/no_spreadsheet_file.txt +1 -0
- data/test/numbers1.csv +18 -0
- data/test/numbers1.ods +0 -0
- data/test/numbers1.xls +0 -0
- data/test/numbers1.xlsx +0 -0
- data/test/only_one_sheet.ods +0 -0
- data/test/only_one_sheet.xls +0 -0
- data/test/only_one_sheet.xlsx +0 -0
- data/test/paragraph.ods +0 -0
- data/test/paragraph.xls +0 -0
- data/test/paragraph.xlsx +0 -0
- data/test/ric.ods +0 -0
- data/test/simple_spreadsheet.ods +0 -0
- data/test/simple_spreadsheet.xls +0 -0
- data/test/simple_spreadsheet.xlsx +0 -0
- data/test/simple_spreadsheet_from_italo.ods +0 -0
- data/test/simple_spreadsheet_from_italo.xls +0 -0
- data/test/skipped_tests.rb +789 -0
- data/test/style.ods +0 -0
- data/test/style.xls +0 -0
- data/test/style.xlsx +0 -0
- data/test/test_helper.rb +19 -0
- data/test/test_roo.rb +1834 -0
- data/test/time-test.csv +2 -0
- data/test/time-test.ods +0 -0
- data/test/time-test.xls +0 -0
- data/test/time-test.xlsx +0 -0
- data/test/whitespace.ods +0 -0
- data/test/whitespace.xls +0 -0
- data/test/whitespace.xlsx +0 -0
- metadata +185 -0
data/lib/roo/excel.rb
ADDED
@@ -0,0 +1,468 @@
|
|
1
|
+
require 'spreadsheet'
|
2
|
+
CHARGUESS = begin
|
3
|
+
require 'charguess'
|
4
|
+
true
|
5
|
+
rescue LoadError => e
|
6
|
+
false
|
7
|
+
end
|
8
|
+
|
9
|
+
# The Spreadsheet library has a bug in handling Excel
|
10
|
+
# base dates so if the file is a 1904 base date then
|
11
|
+
# dates are off by a day. 1900 base dates work fine
|
12
|
+
module Spreadsheet
|
13
|
+
module Excel
|
14
|
+
class Row < Spreadsheet::Row
|
15
|
+
def _datetime data # :nodoc:
|
16
|
+
return data if data.is_a?(DateTime)
|
17
|
+
base = @worksheet.date_base
|
18
|
+
date = base + data.to_f
|
19
|
+
hour = (data % 1) * 24
|
20
|
+
min = (hour % 1) * 60
|
21
|
+
sec = ((min % 1) * 60).round
|
22
|
+
min = min.floor
|
23
|
+
hour = hour.floor
|
24
|
+
if sec > 59
|
25
|
+
sec = 0
|
26
|
+
min += 1
|
27
|
+
end
|
28
|
+
if min > 59
|
29
|
+
min = 0
|
30
|
+
hour += 1
|
31
|
+
end
|
32
|
+
if hour > 23
|
33
|
+
hour = 0
|
34
|
+
date += 1
|
35
|
+
end
|
36
|
+
if LEAP_ERROR > base
|
37
|
+
date -= 1
|
38
|
+
end
|
39
|
+
DateTime.new(date.year, date.month, date.day, hour, min, sec)
|
40
|
+
end
|
41
|
+
public :_date
|
42
|
+
public :_datetime
|
43
|
+
end
|
44
|
+
# patch for ruby-spreadsheet parsing formulas
|
45
|
+
class Reader
|
46
|
+
def read_formula worksheet, addr, work
|
47
|
+
row, column, xf, rtype, rval, rcheck, opts = work.unpack 'v3CxCx3v2'
|
48
|
+
formula = Formula.new
|
49
|
+
formula.shared = (opts & 0x08) > 0
|
50
|
+
formula.data = work[20..-1]
|
51
|
+
if rcheck != 0xffff || rtype > 3
|
52
|
+
value, = work.unpack 'x6E'
|
53
|
+
unless value
|
54
|
+
# on architectures where sizeof(double) > 8
|
55
|
+
value, = work.unpack 'x6e'
|
56
|
+
end
|
57
|
+
formula.value = value
|
58
|
+
elsif rtype == 0
|
59
|
+
pos, op, len, work = get_next_chunk
|
60
|
+
if op == :string
|
61
|
+
formula.value = client read_string(work, 2), @workbook.encoding
|
62
|
+
else
|
63
|
+
# This seems to work but I don't know why :). It at least
|
64
|
+
# seems to correct the case we saw but doubtful it's the right fix
|
65
|
+
formula.value = client read_string(work[10..-1], 2), @workbook.encoding
|
66
|
+
end
|
67
|
+
elsif rtype == 1
|
68
|
+
formula.value = rval > 0
|
69
|
+
elsif rtype == 2
|
70
|
+
formula.value = Error.new rval
|
71
|
+
else
|
72
|
+
# leave the Formula value blank
|
73
|
+
end
|
74
|
+
set_cell worksheet, row, column, xf, formula
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
# ruby-spreadsheet has a font object so we're extending it
|
82
|
+
# with our own functionality but still providing full access
|
83
|
+
# to the user for other font information
|
84
|
+
module ExcelFontExtensions
|
85
|
+
def bold?(*args)
|
86
|
+
#From ruby-spreadsheet doc: 100 <= weight <= 1000, bold => 700, normal => 400
|
87
|
+
case weight
|
88
|
+
when 700
|
89
|
+
true
|
90
|
+
else
|
91
|
+
false
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def italic?
|
96
|
+
italic
|
97
|
+
end
|
98
|
+
|
99
|
+
def underline?
|
100
|
+
underline != :none
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
|
105
|
+
# Class for handling Excel-Spreadsheets
|
106
|
+
class Excel < GenericSpreadsheet
|
107
|
+
|
108
|
+
EXCEL_NO_FORMULAS = 'formulas are not supported for excel spreadsheets'
|
109
|
+
|
110
|
+
# Creates a new Excel spreadsheet object.
|
111
|
+
# Parameter packed: :zip - File is a zip-file
|
112
|
+
def initialize(filename, packed = nil, file_warning = :error)
|
113
|
+
super()
|
114
|
+
@file_warning = file_warning
|
115
|
+
@tmpdir = "oo_"+$$.to_s
|
116
|
+
@tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
|
117
|
+
unless File.exists?(@tmpdir)
|
118
|
+
FileUtils::mkdir(@tmpdir)
|
119
|
+
end
|
120
|
+
filename = open_from_uri(filename) if filename[0,7] == "http://"
|
121
|
+
filename = open_from_stream(filename[7..-1]) if filename[0,7] == "stream:"
|
122
|
+
filename = unzip(filename) if packed and packed == :zip
|
123
|
+
begin
|
124
|
+
file_type_check(filename,'.xls','an Excel')
|
125
|
+
@filename = filename
|
126
|
+
unless File.file?(@filename)
|
127
|
+
raise IOError, "file #{@filename} does not exist"
|
128
|
+
end
|
129
|
+
@workbook = Spreadsheet.open(filename)
|
130
|
+
@default_sheet = self.sheets.first
|
131
|
+
ensure
|
132
|
+
#if ENV["roo_local"] != "thomas-p"
|
133
|
+
FileUtils::rm_r(@tmpdir)
|
134
|
+
#end
|
135
|
+
end
|
136
|
+
@cell = Hash.new
|
137
|
+
@cell_type = Hash.new
|
138
|
+
@formula = Hash.new
|
139
|
+
@first_row = Hash.new
|
140
|
+
@last_row = Hash.new
|
141
|
+
@first_column = Hash.new
|
142
|
+
@last_column = Hash.new
|
143
|
+
@header_line = 1
|
144
|
+
@cells_read = Hash.new
|
145
|
+
@fonts = Hash.new
|
146
|
+
end
|
147
|
+
|
148
|
+
# returns an array of sheet names in the spreadsheet
|
149
|
+
def sheets
|
150
|
+
result = []
|
151
|
+
@workbook.worksheets.each do |worksheet|
|
152
|
+
result << normalize_string(worksheet.name)
|
153
|
+
end
|
154
|
+
return result
|
155
|
+
end
|
156
|
+
|
157
|
+
# returns the content of a cell. The upper left corner is (1,1) or ('A',1)
|
158
|
+
def cell(row,col,sheet=nil)
|
159
|
+
sheet = @default_sheet unless sheet
|
160
|
+
raise ArgumentError unless sheet
|
161
|
+
read_cells(sheet) unless @cells_read[sheet]
|
162
|
+
raise "should be read" unless @cells_read[sheet]
|
163
|
+
row,col = normalize(row,col)
|
164
|
+
if celltype(row,col,sheet) == :date
|
165
|
+
yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
|
166
|
+
return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
|
167
|
+
end
|
168
|
+
if celltype(row,col,sheet) == :string
|
169
|
+
return platform_specific_iconv(@cell[sheet][[row,col]])
|
170
|
+
else
|
171
|
+
return @cell[sheet][[row,col]]
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
# returns the type of a cell:
|
176
|
+
# * :float
|
177
|
+
# * :string,
|
178
|
+
# * :date
|
179
|
+
# * :percentage
|
180
|
+
# * :formula
|
181
|
+
# * :time
|
182
|
+
# * :datetime
|
183
|
+
def celltype(row,col,sheet=nil)
|
184
|
+
sheet = @default_sheet unless sheet
|
185
|
+
read_cells(sheet) unless @cells_read[sheet]
|
186
|
+
row,col = normalize(row,col)
|
187
|
+
begin
|
188
|
+
if @formula[sheet][[row,col]]
|
189
|
+
return :formula
|
190
|
+
else
|
191
|
+
@cell_type[sheet][[row,col]]
|
192
|
+
end
|
193
|
+
rescue
|
194
|
+
puts "Error in sheet #{sheet}, row #{row}, col #{col}"
|
195
|
+
raise
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
# returns NO formula in excel spreadsheets
|
200
|
+
def formula(row,col,sheet=nil)
|
201
|
+
raise EXCEL_NO_FORMULAS
|
202
|
+
end
|
203
|
+
|
204
|
+
# raises an exception because formulas are not supported for excel files
|
205
|
+
def formula?(row,col,sheet=nil)
|
206
|
+
raise EXCEL_NO_FORMULAS
|
207
|
+
end
|
208
|
+
|
209
|
+
# returns NO formulas in excel spreadsheets
|
210
|
+
def formulas(sheet=nil)
|
211
|
+
raise EXCEL_NO_FORMULAS
|
212
|
+
end
|
213
|
+
|
214
|
+
# Given a cell, return the cell's font
|
215
|
+
def font(row, col, sheet=nil)
|
216
|
+
sheet = @default_sheet unless sheet
|
217
|
+
read_cells(sheet) unless @cells_read[sheet]
|
218
|
+
row,col = normalize(row,col)
|
219
|
+
@fonts[sheet][[row,col]]
|
220
|
+
end
|
221
|
+
|
222
|
+
# shows the internal representation of all cells
|
223
|
+
# mainly for debugging purposes
|
224
|
+
def to_s(sheet=nil)
|
225
|
+
sheet = @default_sheet unless sheet
|
226
|
+
read_cells(sheet) unless @cells_read[sheet]
|
227
|
+
@cell[sheet].inspect
|
228
|
+
end
|
229
|
+
|
230
|
+
private
|
231
|
+
|
232
|
+
# converts name of a sheet to index (0,1,2,..)
|
233
|
+
def sheet_no(name)
|
234
|
+
return name-1 if name.kind_of?(Fixnum)
|
235
|
+
i = 0
|
236
|
+
@workbook.worksheets.each do |worksheet|
|
237
|
+
return i if name == normalize_string(worksheet.name)
|
238
|
+
i += 1
|
239
|
+
end
|
240
|
+
raise StandardError, "sheet '#{name}' not found"
|
241
|
+
end
|
242
|
+
|
243
|
+
def empty_row?(row)
|
244
|
+
content = false
|
245
|
+
row.compact.each {|elem|
|
246
|
+
if elem != ''
|
247
|
+
content = true
|
248
|
+
end
|
249
|
+
}
|
250
|
+
! content
|
251
|
+
end
|
252
|
+
|
253
|
+
def empty_column?(col)
|
254
|
+
content = false
|
255
|
+
col.compact.each {|elem|
|
256
|
+
if elem != ''
|
257
|
+
content = true
|
258
|
+
end
|
259
|
+
}
|
260
|
+
! content
|
261
|
+
end
|
262
|
+
|
263
|
+
def normalize_string(value)
|
264
|
+
value = every_second_null?(value) ? remove_every_second_null(value) : value
|
265
|
+
if CHARGUESS && encoding = CharGuess::guess(value)
|
266
|
+
Iconv.new('utf-8', encoding)
|
267
|
+
else
|
268
|
+
platform_specific_iconv(value)
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def platform_specific_iconv(value)
|
273
|
+
case RUBY_PLATFORM.downcase
|
274
|
+
when /darwin/
|
275
|
+
result = Iconv.new('utf-8','utf-8').iconv(value)
|
276
|
+
when /solaris/
|
277
|
+
result = Iconv.new('utf-8','utf-8').iconv(value)
|
278
|
+
when /mswin32/
|
279
|
+
result = Iconv.new('utf-8','iso-8859-1').iconv(value)
|
280
|
+
else
|
281
|
+
result = value
|
282
|
+
end # case
|
283
|
+
if every_second_null?(result)
|
284
|
+
result = remove_every_second_null(result)
|
285
|
+
end
|
286
|
+
result
|
287
|
+
end
|
288
|
+
|
289
|
+
def every_second_null?(str)
|
290
|
+
result = true
|
291
|
+
return false if str.length < 2
|
292
|
+
0.upto(str.length/2-1) do |i|
|
293
|
+
c = str[i*2,1]
|
294
|
+
n = str[i*2+1,1]
|
295
|
+
if n != "\000"
|
296
|
+
result = false
|
297
|
+
break
|
298
|
+
end
|
299
|
+
end
|
300
|
+
result
|
301
|
+
end
|
302
|
+
|
303
|
+
def remove_every_second_null(str)
|
304
|
+
result = ''
|
305
|
+
0.upto(str.length/2-1) do |i|
|
306
|
+
c = str[i*2,1]
|
307
|
+
result += c
|
308
|
+
end
|
309
|
+
result
|
310
|
+
end
|
311
|
+
|
312
|
+
# helper function to set the internal representation of cells
|
313
|
+
def set_cell_values(sheet,row,col,i,v,vt,formula,tr,font)
|
314
|
+
#key = "#{y},#{x+i}"
|
315
|
+
key = [row,col+i]
|
316
|
+
@cell_type[sheet] = {} unless @cell_type[sheet]
|
317
|
+
@cell_type[sheet][key] = vt
|
318
|
+
@formula[sheet] = {} unless @formula[sheet]
|
319
|
+
@formula[sheet][key] = formula if formula
|
320
|
+
@cell[sheet] = {} unless @cell[sheet]
|
321
|
+
@fonts[sheet] = {} unless @fonts[sheet]
|
322
|
+
@fonts[sheet][key] = font
|
323
|
+
|
324
|
+
case vt # @cell_type[sheet][key]
|
325
|
+
when :float
|
326
|
+
@cell[sheet][key] = v.to_f
|
327
|
+
when :string
|
328
|
+
@cell[sheet][key] = v
|
329
|
+
when :date
|
330
|
+
@cell[sheet][key] = v
|
331
|
+
when :datetime
|
332
|
+
@cell[sheet][key] = DateTime.new(v.year,v.month,v.day,v.hour,v.min,v.sec)
|
333
|
+
when :percentage
|
334
|
+
@cell[sheet][key] = v.to_f
|
335
|
+
when :time
|
336
|
+
@cell[sheet][key] = v
|
337
|
+
else
|
338
|
+
@cell[sheet][key] = v
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
# read all cells in the selected sheet
|
343
|
+
def read_cells(sheet=nil)
|
344
|
+
sheet = @default_sheet unless sheet
|
345
|
+
raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
|
346
|
+
raise RangeError unless self.sheets.include? sheet
|
347
|
+
|
348
|
+
if @cells_read[sheet]
|
349
|
+
raise "sheet #{sheet} already read"
|
350
|
+
end
|
351
|
+
|
352
|
+
worksheet = @workbook.worksheet(sheet_no(sheet))
|
353
|
+
row_index=1
|
354
|
+
worksheet.each(0) do |row|
|
355
|
+
(0..row.size).each do |cell_index|
|
356
|
+
cell = row.at(cell_index)
|
357
|
+
next if cell.nil? #skip empty cells
|
358
|
+
next if cell.class == Spreadsheet::Formula && cell.value.nil? # skip empty formla cells
|
359
|
+
if date_or_time?(row, cell_index)
|
360
|
+
vt, v = read_cell_date_or_time(row, cell_index)
|
361
|
+
else
|
362
|
+
vt, v = read_cell(row, cell_index)
|
363
|
+
end
|
364
|
+
formula = tr = nil #TODO:???
|
365
|
+
col_index = cell_index + 1
|
366
|
+
font = row.format(cell_index).font
|
367
|
+
font.extend(ExcelFontExtensions)
|
368
|
+
set_cell_values(sheet,row_index,col_index,0,v,vt,formula,tr,font)
|
369
|
+
end #row
|
370
|
+
row_index += 1
|
371
|
+
end # worksheet
|
372
|
+
@cells_read[sheet] = true
|
373
|
+
end
|
374
|
+
|
375
|
+
# Get the contents of a cell, accounting for the
|
376
|
+
# way formula stores the value
|
377
|
+
def read_cell_content(row, idx)
|
378
|
+
cell = row.at(idx)
|
379
|
+
cell = cell.value if cell.class == Spreadsheet::Formula
|
380
|
+
cell
|
381
|
+
end
|
382
|
+
|
383
|
+
# Test the cell to see if it's a valid date/time.
|
384
|
+
def date_or_time?(row, idx)
|
385
|
+
format = row.format(idx)
|
386
|
+
if format.date_or_time?
|
387
|
+
cell = read_cell_content(row, idx)
|
388
|
+
true if Float(cell) > 0 rescue false
|
389
|
+
else
|
390
|
+
false
|
391
|
+
end
|
392
|
+
end
|
393
|
+
private :date_or_time?
|
394
|
+
|
395
|
+
# Read the date-time cell and convert to,
|
396
|
+
# the date-time values for Roo
|
397
|
+
def read_cell_date_or_time(row, idx)
|
398
|
+
cell = read_cell_content(row, idx)
|
399
|
+
cell = cell.to_s.to_f
|
400
|
+
if cell < 1.0
|
401
|
+
value_type = :time
|
402
|
+
f = cell*24.0*60.0*60.0
|
403
|
+
secs = f.round
|
404
|
+
h = (secs / 3600.0).floor
|
405
|
+
secs = secs - 3600*h
|
406
|
+
m = (secs / 60.0).floor
|
407
|
+
secs = secs - 60*m
|
408
|
+
s = secs
|
409
|
+
value = h*3600+m*60+s
|
410
|
+
else
|
411
|
+
if row.at(idx).class == Spreadsheet::Formula
|
412
|
+
datetime = row._datetime(cell)
|
413
|
+
else
|
414
|
+
datetime = row.datetime(idx)
|
415
|
+
end
|
416
|
+
if datetime.hour != 0 or
|
417
|
+
datetime.min != 0 or
|
418
|
+
datetime.sec != 0
|
419
|
+
value_type = :datetime
|
420
|
+
value = datetime
|
421
|
+
else
|
422
|
+
value_type = :date
|
423
|
+
if row.at(idx).class == Spreadsheet::Formula
|
424
|
+
value = row._date(cell)
|
425
|
+
else
|
426
|
+
value = row.date(idx)
|
427
|
+
end
|
428
|
+
value = sprintf("%04d-%02d-%02d",value.year,value.month,value.day)
|
429
|
+
end
|
430
|
+
end
|
431
|
+
return value_type, value
|
432
|
+
end
|
433
|
+
private :read_cell_date_or_time
|
434
|
+
|
435
|
+
# Read the cell and based on the class,
|
436
|
+
# return the values for Roo
|
437
|
+
def read_cell(row, idx)
|
438
|
+
cell = read_cell_content(row, idx)
|
439
|
+
case cell
|
440
|
+
when Float, Integer, Fixnum, Bignum
|
441
|
+
value_type = :float
|
442
|
+
value = cell.to_f
|
443
|
+
when String, TrueClass, FalseClass
|
444
|
+
value_type = :string
|
445
|
+
value = cell.to_s
|
446
|
+
else
|
447
|
+
value_type = cell.class.to_s.downcase.to_sym
|
448
|
+
value = nil
|
449
|
+
end # case
|
450
|
+
return value_type, value
|
451
|
+
end
|
452
|
+
private :read_cell
|
453
|
+
|
454
|
+
#TODO: testing only
|
455
|
+
# def inject_null_characters(str)
|
456
|
+
# if str.class != String
|
457
|
+
# return str
|
458
|
+
# end
|
459
|
+
# new_str=''
|
460
|
+
# 0.upto(str.size-1) do |i|
|
461
|
+
# new_str += str[i,1]
|
462
|
+
# new_str += "\000"
|
463
|
+
# end
|
464
|
+
# new_str
|
465
|
+
# end
|
466
|
+
#
|
467
|
+
|
468
|
+
end
|