donibuchanan-roo 1.3.12

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. data/History.txt +225 -0
  2. data/README.markdown +55 -0
  3. data/examples/roo_soap_client.rb +53 -0
  4. data/examples/roo_soap_server.rb +29 -0
  5. data/examples/write_me.rb +33 -0
  6. data/lib/roo/excel.rb +468 -0
  7. data/lib/roo/excel2003xml.rb +411 -0
  8. data/lib/roo/excelx.rb +602 -0
  9. data/lib/roo/generic_spreadsheet.rb +628 -0
  10. data/lib/roo/google.rb +379 -0
  11. data/lib/roo/openoffice.rb +451 -0
  12. data/lib/roo/roo_rails_helper.rb +82 -0
  13. data/lib/roo/version.rb +9 -0
  14. data/lib/roo.rb +32 -0
  15. data/test/1900_base.xls +0 -0
  16. data/test/1904_base.xls +0 -0
  17. data/test/Bibelbund.csv +3741 -0
  18. data/test/Bibelbund.ods +0 -0
  19. data/test/Bibelbund.xls +0 -0
  20. data/test/Bibelbund.xlsx +0 -0
  21. data/test/Bibelbund1.ods +0 -0
  22. data/test/bad_excel_date.xls +0 -0
  23. data/test/bbu.ods +0 -0
  24. data/test/bbu.xls +0 -0
  25. data/test/bbu.xlsx +0 -0
  26. data/test/bode-v1.ods.zip +0 -0
  27. data/test/bode-v1.xls.zip +0 -0
  28. data/test/boolean.ods +0 -0
  29. data/test/boolean.xls +0 -0
  30. data/test/boolean.xlsx +0 -0
  31. data/test/borders.ods +0 -0
  32. data/test/borders.xls +0 -0
  33. data/test/borders.xlsx +0 -0
  34. data/test/bug-row-column-fixnum-float.xls +0 -0
  35. data/test/datetime.ods +0 -0
  36. data/test/datetime.xls +0 -0
  37. data/test/datetime.xlsx +0 -0
  38. data/test/datetime_floatconv.xls +0 -0
  39. data/test/emptysheets.ods +0 -0
  40. data/test/emptysheets.xls +0 -0
  41. data/test/excel2003.xml +21140 -0
  42. data/test/false_encoding.xls +0 -0
  43. data/test/formula.ods +0 -0
  44. data/test/formula.xls +0 -0
  45. data/test/formula.xlsx +0 -0
  46. data/test/formula_parse_error.xls +0 -0
  47. data/test/html-escape.ods +0 -0
  48. data/test/no_spreadsheet_file.txt +1 -0
  49. data/test/numbers1.csv +18 -0
  50. data/test/numbers1.ods +0 -0
  51. data/test/numbers1.xls +0 -0
  52. data/test/numbers1.xlsx +0 -0
  53. data/test/only_one_sheet.ods +0 -0
  54. data/test/only_one_sheet.xls +0 -0
  55. data/test/only_one_sheet.xlsx +0 -0
  56. data/test/paragraph.ods +0 -0
  57. data/test/paragraph.xls +0 -0
  58. data/test/paragraph.xlsx +0 -0
  59. data/test/ric.ods +0 -0
  60. data/test/simple_spreadsheet.ods +0 -0
  61. data/test/simple_spreadsheet.xls +0 -0
  62. data/test/simple_spreadsheet.xlsx +0 -0
  63. data/test/simple_spreadsheet_from_italo.ods +0 -0
  64. data/test/simple_spreadsheet_from_italo.xls +0 -0
  65. data/test/skipped_tests.rb +789 -0
  66. data/test/style.ods +0 -0
  67. data/test/style.xls +0 -0
  68. data/test/style.xlsx +0 -0
  69. data/test/test_helper.rb +19 -0
  70. data/test/test_roo.rb +1834 -0
  71. data/test/time-test.csv +2 -0
  72. data/test/time-test.ods +0 -0
  73. data/test/time-test.xls +0 -0
  74. data/test/time-test.xlsx +0 -0
  75. data/test/whitespace.ods +0 -0
  76. data/test/whitespace.xls +0 -0
  77. data/test/whitespace.xlsx +0 -0
  78. metadata +185 -0
data/lib/roo/excel.rb ADDED
@@ -0,0 +1,468 @@
1
+ require 'spreadsheet'
2
+ CHARGUESS = begin
3
+ require 'charguess'
4
+ true
5
+ rescue LoadError => e
6
+ false
7
+ end
8
+
9
+ # The Spreadsheet library has a bug in handling Excel
10
+ # base dates so if the file is a 1904 base date then
11
+ # dates are off by a day. 1900 base dates work fine
12
+ module Spreadsheet
13
+ module Excel
14
+ class Row < Spreadsheet::Row
15
+ def _datetime data # :nodoc:
16
+ return data if data.is_a?(DateTime)
17
+ base = @worksheet.date_base
18
+ date = base + data.to_f
19
+ hour = (data % 1) * 24
20
+ min = (hour % 1) * 60
21
+ sec = ((min % 1) * 60).round
22
+ min = min.floor
23
+ hour = hour.floor
24
+ if sec > 59
25
+ sec = 0
26
+ min += 1
27
+ end
28
+ if min > 59
29
+ min = 0
30
+ hour += 1
31
+ end
32
+ if hour > 23
33
+ hour = 0
34
+ date += 1
35
+ end
36
+ if LEAP_ERROR > base
37
+ date -= 1
38
+ end
39
+ DateTime.new(date.year, date.month, date.day, hour, min, sec)
40
+ end
41
+ public :_date
42
+ public :_datetime
43
+ end
44
+ # patch for ruby-spreadsheet parsing formulas
45
+ class Reader
46
+ def read_formula worksheet, addr, work
47
+ row, column, xf, rtype, rval, rcheck, opts = work.unpack 'v3CxCx3v2'
48
+ formula = Formula.new
49
+ formula.shared = (opts & 0x08) > 0
50
+ formula.data = work[20..-1]
51
+ if rcheck != 0xffff || rtype > 3
52
+ value, = work.unpack 'x6E'
53
+ unless value
54
+ # on architectures where sizeof(double) > 8
55
+ value, = work.unpack 'x6e'
56
+ end
57
+ formula.value = value
58
+ elsif rtype == 0
59
+ pos, op, len, work = get_next_chunk
60
+ if op == :string
61
+ formula.value = client read_string(work, 2), @workbook.encoding
62
+ else
63
+ # This seems to work but I don't know why :). It at least
64
+ # seems to correct the case we saw but doubtful it's the right fix
65
+ formula.value = client read_string(work[10..-1], 2), @workbook.encoding
66
+ end
67
+ elsif rtype == 1
68
+ formula.value = rval > 0
69
+ elsif rtype == 2
70
+ formula.value = Error.new rval
71
+ else
72
+ # leave the Formula value blank
73
+ end
74
+ set_cell worksheet, row, column, xf, formula
75
+ end
76
+ end
77
+ end
78
+ end
79
+
80
+
81
+ # ruby-spreadsheet has a font object so we're extending it
82
+ # with our own functionality but still providing full access
83
+ # to the user for other font information
84
+ module ExcelFontExtensions
85
+ def bold?(*args)
86
+ #From ruby-spreadsheet doc: 100 <= weight <= 1000, bold => 700, normal => 400
87
+ case weight
88
+ when 700
89
+ true
90
+ else
91
+ false
92
+ end
93
+ end
94
+
95
+ def italic?
96
+ italic
97
+ end
98
+
99
+ def underline?
100
+ underline != :none
101
+ end
102
+
103
+ end
104
+
105
+ # Class for handling Excel-Spreadsheets
106
+ class Excel < GenericSpreadsheet
107
+
108
+ EXCEL_NO_FORMULAS = 'formulas are not supported for excel spreadsheets'
109
+
110
+ # Creates a new Excel spreadsheet object.
111
+ # Parameter packed: :zip - File is a zip-file
112
+ def initialize(filename, packed = nil, file_warning = :error)
113
+ super()
114
+ @file_warning = file_warning
115
+ @tmpdir = "oo_"+$$.to_s
116
+ @tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
117
+ unless File.exists?(@tmpdir)
118
+ FileUtils::mkdir(@tmpdir)
119
+ end
120
+ filename = open_from_uri(filename) if filename[0,7] == "http://"
121
+ filename = open_from_stream(filename[7..-1]) if filename[0,7] == "stream:"
122
+ filename = unzip(filename) if packed and packed == :zip
123
+ begin
124
+ file_type_check(filename,'.xls','an Excel')
125
+ @filename = filename
126
+ unless File.file?(@filename)
127
+ raise IOError, "file #{@filename} does not exist"
128
+ end
129
+ @workbook = Spreadsheet.open(filename)
130
+ @default_sheet = self.sheets.first
131
+ ensure
132
+ #if ENV["roo_local"] != "thomas-p"
133
+ FileUtils::rm_r(@tmpdir)
134
+ #end
135
+ end
136
+ @cell = Hash.new
137
+ @cell_type = Hash.new
138
+ @formula = Hash.new
139
+ @first_row = Hash.new
140
+ @last_row = Hash.new
141
+ @first_column = Hash.new
142
+ @last_column = Hash.new
143
+ @header_line = 1
144
+ @cells_read = Hash.new
145
+ @fonts = Hash.new
146
+ end
147
+
148
+ # returns an array of sheet names in the spreadsheet
149
+ def sheets
150
+ result = []
151
+ @workbook.worksheets.each do |worksheet|
152
+ result << normalize_string(worksheet.name)
153
+ end
154
+ return result
155
+ end
156
+
157
+ # returns the content of a cell. The upper left corner is (1,1) or ('A',1)
158
+ def cell(row,col,sheet=nil)
159
+ sheet = @default_sheet unless sheet
160
+ raise ArgumentError unless sheet
161
+ read_cells(sheet) unless @cells_read[sheet]
162
+ raise "should be read" unless @cells_read[sheet]
163
+ row,col = normalize(row,col)
164
+ if celltype(row,col,sheet) == :date
165
+ yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
166
+ return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
167
+ end
168
+ if celltype(row,col,sheet) == :string
169
+ return platform_specific_iconv(@cell[sheet][[row,col]])
170
+ else
171
+ return @cell[sheet][[row,col]]
172
+ end
173
+ end
174
+
175
+ # returns the type of a cell:
176
+ # * :float
177
+ # * :string,
178
+ # * :date
179
+ # * :percentage
180
+ # * :formula
181
+ # * :time
182
+ # * :datetime
183
+ def celltype(row,col,sheet=nil)
184
+ sheet = @default_sheet unless sheet
185
+ read_cells(sheet) unless @cells_read[sheet]
186
+ row,col = normalize(row,col)
187
+ begin
188
+ if @formula[sheet][[row,col]]
189
+ return :formula
190
+ else
191
+ @cell_type[sheet][[row,col]]
192
+ end
193
+ rescue
194
+ puts "Error in sheet #{sheet}, row #{row}, col #{col}"
195
+ raise
196
+ end
197
+ end
198
+
199
+ # returns NO formula in excel spreadsheets
200
+ def formula(row,col,sheet=nil)
201
+ raise EXCEL_NO_FORMULAS
202
+ end
203
+
204
+ # raises an exception because formulas are not supported for excel files
205
+ def formula?(row,col,sheet=nil)
206
+ raise EXCEL_NO_FORMULAS
207
+ end
208
+
209
+ # returns NO formulas in excel spreadsheets
210
+ def formulas(sheet=nil)
211
+ raise EXCEL_NO_FORMULAS
212
+ end
213
+
214
+ # Given a cell, return the cell's font
215
+ def font(row, col, sheet=nil)
216
+ sheet = @default_sheet unless sheet
217
+ read_cells(sheet) unless @cells_read[sheet]
218
+ row,col = normalize(row,col)
219
+ @fonts[sheet][[row,col]]
220
+ end
221
+
222
+ # shows the internal representation of all cells
223
+ # mainly for debugging purposes
224
+ def to_s(sheet=nil)
225
+ sheet = @default_sheet unless sheet
226
+ read_cells(sheet) unless @cells_read[sheet]
227
+ @cell[sheet].inspect
228
+ end
229
+
230
+ private
231
+
232
+ # converts name of a sheet to index (0,1,2,..)
233
+ def sheet_no(name)
234
+ return name-1 if name.kind_of?(Fixnum)
235
+ i = 0
236
+ @workbook.worksheets.each do |worksheet|
237
+ return i if name == normalize_string(worksheet.name)
238
+ i += 1
239
+ end
240
+ raise StandardError, "sheet '#{name}' not found"
241
+ end
242
+
243
+ def empty_row?(row)
244
+ content = false
245
+ row.compact.each {|elem|
246
+ if elem != ''
247
+ content = true
248
+ end
249
+ }
250
+ ! content
251
+ end
252
+
253
+ def empty_column?(col)
254
+ content = false
255
+ col.compact.each {|elem|
256
+ if elem != ''
257
+ content = true
258
+ end
259
+ }
260
+ ! content
261
+ end
262
+
263
+ def normalize_string(value)
264
+ value = every_second_null?(value) ? remove_every_second_null(value) : value
265
+ if CHARGUESS && encoding = CharGuess::guess(value)
266
+ Iconv.new('utf-8', encoding)
267
+ else
268
+ platform_specific_iconv(value)
269
+ end
270
+ end
271
+
272
+ def platform_specific_iconv(value)
273
+ case RUBY_PLATFORM.downcase
274
+ when /darwin/
275
+ result = Iconv.new('utf-8','utf-8').iconv(value)
276
+ when /solaris/
277
+ result = Iconv.new('utf-8','utf-8').iconv(value)
278
+ when /mswin32/
279
+ result = Iconv.new('utf-8','iso-8859-1').iconv(value)
280
+ else
281
+ result = value
282
+ end # case
283
+ if every_second_null?(result)
284
+ result = remove_every_second_null(result)
285
+ end
286
+ result
287
+ end
288
+
289
+ def every_second_null?(str)
290
+ result = true
291
+ return false if str.length < 2
292
+ 0.upto(str.length/2-1) do |i|
293
+ c = str[i*2,1]
294
+ n = str[i*2+1,1]
295
+ if n != "\000"
296
+ result = false
297
+ break
298
+ end
299
+ end
300
+ result
301
+ end
302
+
303
+ def remove_every_second_null(str)
304
+ result = ''
305
+ 0.upto(str.length/2-1) do |i|
306
+ c = str[i*2,1]
307
+ result += c
308
+ end
309
+ result
310
+ end
311
+
312
+ # helper function to set the internal representation of cells
313
+ def set_cell_values(sheet,row,col,i,v,vt,formula,tr,font)
314
+ #key = "#{y},#{x+i}"
315
+ key = [row,col+i]
316
+ @cell_type[sheet] = {} unless @cell_type[sheet]
317
+ @cell_type[sheet][key] = vt
318
+ @formula[sheet] = {} unless @formula[sheet]
319
+ @formula[sheet][key] = formula if formula
320
+ @cell[sheet] = {} unless @cell[sheet]
321
+ @fonts[sheet] = {} unless @fonts[sheet]
322
+ @fonts[sheet][key] = font
323
+
324
+ case vt # @cell_type[sheet][key]
325
+ when :float
326
+ @cell[sheet][key] = v.to_f
327
+ when :string
328
+ @cell[sheet][key] = v
329
+ when :date
330
+ @cell[sheet][key] = v
331
+ when :datetime
332
+ @cell[sheet][key] = DateTime.new(v.year,v.month,v.day,v.hour,v.min,v.sec)
333
+ when :percentage
334
+ @cell[sheet][key] = v.to_f
335
+ when :time
336
+ @cell[sheet][key] = v
337
+ else
338
+ @cell[sheet][key] = v
339
+ end
340
+ end
341
+
342
+ # read all cells in the selected sheet
343
+ def read_cells(sheet=nil)
344
+ sheet = @default_sheet unless sheet
345
+ raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
346
+ raise RangeError unless self.sheets.include? sheet
347
+
348
+ if @cells_read[sheet]
349
+ raise "sheet #{sheet} already read"
350
+ end
351
+
352
+ worksheet = @workbook.worksheet(sheet_no(sheet))
353
+ row_index=1
354
+ worksheet.each(0) do |row|
355
+ (0..row.size).each do |cell_index|
356
+ cell = row.at(cell_index)
357
+ next if cell.nil? #skip empty cells
358
+ next if cell.class == Spreadsheet::Formula && cell.value.nil? # skip empty formla cells
359
+ if date_or_time?(row, cell_index)
360
+ vt, v = read_cell_date_or_time(row, cell_index)
361
+ else
362
+ vt, v = read_cell(row, cell_index)
363
+ end
364
+ formula = tr = nil #TODO:???
365
+ col_index = cell_index + 1
366
+ font = row.format(cell_index).font
367
+ font.extend(ExcelFontExtensions)
368
+ set_cell_values(sheet,row_index,col_index,0,v,vt,formula,tr,font)
369
+ end #row
370
+ row_index += 1
371
+ end # worksheet
372
+ @cells_read[sheet] = true
373
+ end
374
+
375
+ # Get the contents of a cell, accounting for the
376
+ # way formula stores the value
377
+ def read_cell_content(row, idx)
378
+ cell = row.at(idx)
379
+ cell = cell.value if cell.class == Spreadsheet::Formula
380
+ cell
381
+ end
382
+
383
+ # Test the cell to see if it's a valid date/time.
384
+ def date_or_time?(row, idx)
385
+ format = row.format(idx)
386
+ if format.date_or_time?
387
+ cell = read_cell_content(row, idx)
388
+ true if Float(cell) > 0 rescue false
389
+ else
390
+ false
391
+ end
392
+ end
393
+ private :date_or_time?
394
+
395
+ # Read the date-time cell and convert to,
396
+ # the date-time values for Roo
397
+ def read_cell_date_or_time(row, idx)
398
+ cell = read_cell_content(row, idx)
399
+ cell = cell.to_s.to_f
400
+ if cell < 1.0
401
+ value_type = :time
402
+ f = cell*24.0*60.0*60.0
403
+ secs = f.round
404
+ h = (secs / 3600.0).floor
405
+ secs = secs - 3600*h
406
+ m = (secs / 60.0).floor
407
+ secs = secs - 60*m
408
+ s = secs
409
+ value = h*3600+m*60+s
410
+ else
411
+ if row.at(idx).class == Spreadsheet::Formula
412
+ datetime = row._datetime(cell)
413
+ else
414
+ datetime = row.datetime(idx)
415
+ end
416
+ if datetime.hour != 0 or
417
+ datetime.min != 0 or
418
+ datetime.sec != 0
419
+ value_type = :datetime
420
+ value = datetime
421
+ else
422
+ value_type = :date
423
+ if row.at(idx).class == Spreadsheet::Formula
424
+ value = row._date(cell)
425
+ else
426
+ value = row.date(idx)
427
+ end
428
+ value = sprintf("%04d-%02d-%02d",value.year,value.month,value.day)
429
+ end
430
+ end
431
+ return value_type, value
432
+ end
433
+ private :read_cell_date_or_time
434
+
435
+ # Read the cell and based on the class,
436
+ # return the values for Roo
437
+ def read_cell(row, idx)
438
+ cell = read_cell_content(row, idx)
439
+ case cell
440
+ when Float, Integer, Fixnum, Bignum
441
+ value_type = :float
442
+ value = cell.to_f
443
+ when String, TrueClass, FalseClass
444
+ value_type = :string
445
+ value = cell.to_s
446
+ else
447
+ value_type = cell.class.to_s.downcase.to_sym
448
+ value = nil
449
+ end # case
450
+ return value_type, value
451
+ end
452
+ private :read_cell
453
+
454
+ #TODO: testing only
455
+ # def inject_null_characters(str)
456
+ # if str.class != String
457
+ # return str
458
+ # end
459
+ # new_str=''
460
+ # 0.upto(str.size-1) do |i|
461
+ # new_str += str[i,1]
462
+ # new_str += "\000"
463
+ # end
464
+ # new_str
465
+ # end
466
+ #
467
+
468
+ end