roo 0.9.4 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. data/History.txt +6 -0
  2. data/License.txt +0 -0
  3. data/Manifest.txt +8 -0
  4. data/README.txt +0 -0
  5. data/Rakefile +0 -0
  6. data/base64include.rb +0 -0
  7. data/examples/roo_soap_client.rb +0 -0
  8. data/examples/roo_soap_server.rb +0 -0
  9. data/examples/write_me.rb +0 -0
  10. data/lib/roo.rb +1 -0
  11. data/lib/roo/excel.rb +0 -0
  12. data/lib/roo/excelx.rb +426 -0
  13. data/lib/roo/generic_spreadsheet.rb +5 -5
  14. data/lib/roo/google.rb +0 -0
  15. data/lib/roo/openoffice.rb +0 -0
  16. data/lib/roo/roo_rails_helper.rb +0 -0
  17. data/lib/roo/version.rb +3 -3
  18. data/scripts/txt2html +0 -0
  19. data/setup.rb +0 -0
  20. data/test/Bibelbund.csv +0 -0
  21. data/test/Bibelbund.ods +0 -0
  22. data/test/Bibelbund.xls +0 -0
  23. data/test/Bibelbund1.ods +0 -0
  24. data/test/bbu.ods +0 -0
  25. data/test/bbu.xls +0 -0
  26. data/test/bbu.xlsx +0 -0
  27. data/test/bode-v1.ods.zip +0 -0
  28. data/test/bode-v1.xls.zip +0 -0
  29. data/test/borders.ods +0 -0
  30. data/test/borders.xls +0 -0
  31. data/test/borders.xlsx +0 -0
  32. data/test/formula.ods +0 -0
  33. data/test/formula.xls +0 -0
  34. data/test/formula.xlsx +0 -0
  35. data/test/no_spreadsheet_file.txt +0 -0
  36. data/test/numbers1.csv +0 -0
  37. data/test/numbers1.ods +0 -0
  38. data/test/numbers1.xls +0 -0
  39. data/test/numbers1.xlsx +0 -0
  40. data/test/numbers1_excel.csv +0 -0
  41. data/test/only_one_sheet.ods +0 -0
  42. data/test/only_one_sheet.xls +0 -0
  43. data/test/only_one_sheet.xlsx +0 -0
  44. data/test/ric.ods +0 -0
  45. data/test/simple_spreadsheet.ods +0 -0
  46. data/test/simple_spreadsheet.xls +0 -0
  47. data/test/simple_spreadsheet.xlsx +0 -0
  48. data/test/simple_spreadsheet_from_italo.ods +0 -0
  49. data/test/simple_spreadsheet_from_italo.xls +0 -0
  50. data/test/test_helper.rb +0 -0
  51. data/test/test_roo.rb +1252 -265
  52. data/test/time-test.csv +0 -0
  53. data/test/time-test.ods +0 -0
  54. data/test/time-test.xls +0 -0
  55. data/test/time-test.xlsx +0 -0
  56. data/website/index.html +9 -11
  57. data/website/index.txt +7 -12
  58. data/website/javascripts/rounded_corners_lite.inc.js +0 -0
  59. data/website/stylesheets/screen.css +0 -0
  60. data/website/template.rhtml +0 -0
  61. metadata +10 -2
data/History.txt CHANGED
@@ -1,3 +1,9 @@
1
+ == 1.0.0 2008-05-28
2
+ * 2 major enhancements
3
+ * support of Excel's new .xlsx file format
4
+ * method #to_xml for exporting a spreadsheet to an xml representation
5
+ * 1 bugfix
6
+ * fixed a bug with excel-spreadsheet character conversion under Macintosh Darwin
1
7
  == 0.9.4 2008-04-22
2
8
  * 1 bugfix
3
9
  * fixed a bug with excel-spreadsheet character conversion under Solaris
data/License.txt CHANGED
File without changes
data/Manifest.txt CHANGED
@@ -12,6 +12,7 @@ lib/roo/version.rb
12
12
  lib/roo/generic_spreadsheet.rb
13
13
  lib/roo/openoffice.rb
14
14
  lib/roo/excel.rb
15
+ lib/roo/excelx.rb
15
16
  lib/roo/google.rb
16
17
  lib/roo/roo_rails_helper.rb
17
18
  scripts/txt2html
@@ -22,27 +23,34 @@ test/Bibelbund.ods
22
23
  test/Bibelbund.xls
23
24
  test/Bibelbund.csv
24
25
  test/bbu.xls
26
+ test/bbu.xlsx
25
27
  test/bbu.ods
26
28
  test/no_spreadsheet_file.txt
27
29
  test/simple_spreadsheet.ods
28
30
  test/simple_spreadsheet.xls
31
+ test/simple_spreadsheet.xlsx
29
32
  test/simple_spreadsheet_from_italo.ods
30
33
  test/simple_spreadsheet_from_italo.xls
31
34
  test/test_helper.rb
32
35
  test/test_roo.rb
33
36
  test/time-test.ods
34
37
  test/time-test.xls
38
+ test/time-test.xlsx
35
39
  test/time-test.csv
36
40
  test/numbers1.csv
37
41
  test/numbers1_excel.csv
38
42
  test/numbers1.ods
39
43
  test/numbers1.xls
44
+ test/numbers1.xlsx
40
45
  test/borders.ods
41
46
  test/borders.xls
47
+ test/borders.xlsx
42
48
  test/formula.ods
43
49
  test/formula.xls
50
+ test/formula.xlsx
44
51
  test/only_one_sheet.ods
45
52
  test/only_one_sheet.xls
53
+ test/only_one_sheet.xlsx
46
54
  test/bode-v1.xls.zip
47
55
  test/bode-v1.ods.zip
48
56
  test/ric.ods
data/README.txt CHANGED
File without changes
data/Rakefile CHANGED
File without changes
data/base64include.rb CHANGED
File without changes
File without changes
File without changes
data/examples/write_me.rb CHANGED
File without changes
data/lib/roo.rb CHANGED
@@ -6,5 +6,6 @@ require 'roo/version'
6
6
  require 'roo/generic_spreadsheet'
7
7
  require 'roo/openoffice'
8
8
  require 'roo/excel'
9
+ require 'roo/excelx'
9
10
  require 'roo/google'
10
11
  require 'roo/roo_rails_helper'
data/lib/roo/excel.rb CHANGED
File without changes
data/lib/roo/excelx.rb ADDED
@@ -0,0 +1,426 @@
1
+
2
+ require 'rubygems'
3
+ require 'rexml/document'
4
+ require 'fileutils'
5
+ require 'zip/zipfilesystem'
6
+ require 'date'
7
+ #require 'base64'
8
+
9
+ class String
10
+ def end_with?(str)
11
+ self[-str.length,str.length] == str
12
+ end
13
+ end
14
+
15
+ class Excelx < GenericSpreadsheet
16
+
17
+ @@nr = 0
18
+
19
+ # initialization and opening of a spreadsheet file
20
+ # values for packed: :zip
21
+ def initialize(filename, packed=nil) #, create = false)
22
+ @tmpdir = "oo_"+$$.to_s
23
+ unless File.exists?(@tmpdir)
24
+ FileUtils::mkdir(@tmpdir)
25
+ end
26
+ filename = open_from_uri(filename) if filename[0,7] == "http://"
27
+ filename = unzip(filename) if packed and packed == :zip
28
+ if File.extname(filename) != ".xlsx"
29
+ warn "are you sure, this is an Excel-xlsx file?"
30
+ end
31
+ @cells_read = Hash.new
32
+ @filename = filename
33
+ begin
34
+ unless File.file?(@filename)
35
+ raise IOError, "file #{@filename} does not exist"
36
+ end
37
+ @@nr += 1
38
+ @file_nr = @@nr
39
+ extract_content(@filename)
40
+ file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
41
+ @workbook_doc = REXML::Document.new file
42
+ file.close
43
+ @shared_table = []
44
+ if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
45
+ file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
46
+ @sharedstring_doc = REXML::Document.new file
47
+ file.close
48
+ read_shared_strings(@sharedstring_doc)
49
+ end
50
+ @sheet_doc = []
51
+ @sheet_files.each_with_index do |item, i|
52
+ file = File.new(item)
53
+ @sheet_doc[i] = REXML::Document.new file
54
+ file.close
55
+ end
56
+ ensure
57
+ #if ENV["roo_local"] != "thomas-p"
58
+ FileUtils::rm_r(@tmpdir)
59
+ #end
60
+ end
61
+ @default_sheet = nil
62
+ # no need to set default_sheet if there is only one sheet in the document
63
+ if self.sheets.size == 1
64
+ @default_sheet = self.sheets.first
65
+ end
66
+ @cell = Hash.new
67
+ @cell_type = Hash.new
68
+ @formula = Hash.new
69
+ @first_row = Hash.new
70
+ @last_row = Hash.new
71
+ @first_column = Hash.new
72
+ @last_column = Hash.new
73
+ @header_line = 1
74
+ end
75
+
76
+ # Returns the content of a spreadsheet-cell.
77
+ # (1,1) is the upper left corner.
78
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
79
+ # cell at the first line and first row.
80
+ def cell(row, col, sheet=nil)
81
+ sheet = @default_sheet unless sheet
82
+ read_cells(sheet) unless @cells_read[sheet]
83
+ row,col = normalize(row,col)
84
+ if celltype(row,col,sheet) == :date
85
+ yyyy,mm,dd = @cell[sheet]["#{row},#{col}"].split('-')
86
+ return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
87
+ end
88
+ @cell[sheet]["#{row},#{col}"]
89
+ end
90
+
91
+ # Returns the formula at (row,col).
92
+ # Returns nil if there is no formula.
93
+ # The method #formula? checks if there is a formula.
94
+ def formula(row,col,sheet=nil)
95
+ sheet = @default_sheet unless sheet
96
+ read_cells(sheet) unless @cells_read[sheet]
97
+ row,col = normalize(row,col)
98
+ if @formula[sheet]["#{row},#{col}"] == nil
99
+ return nil
100
+ else
101
+ return @formula[sheet]["#{row},#{col}"]
102
+ end
103
+ end
104
+
105
+ # true, if there is a formula
106
+ def formula?(row,col,sheet=nil)
107
+ sheet = @default_sheet unless sheet
108
+ read_cells unless @cells_read[sheet]
109
+ row,col = normalize(row,col)
110
+ formula(row,col) != nil
111
+ end
112
+
113
+ # set a cell to a certain value
114
+ # (this will not be saved back to the spreadsheet file!)
115
+ def set(row,col,value,sheet=nil) #:nodoc:
116
+ sheet = @default_sheet unless sheet
117
+ read_cells(sheet) unless @cells_read[sheet]
118
+ row,col = normalize(row,col)
119
+ set_value(row,col,value,sheet)
120
+ if value.class == Fixnum
121
+ set_type(row,col,:float,sheet)
122
+ elsif value.class == String
123
+ set_type(row,col,:string,sheet)
124
+ elsif value.class == Float
125
+ set_type(row,col,:string,sheet)
126
+ else
127
+ raise ArgumentError, "Type for "+value.to_s+" not set"
128
+ end
129
+ end
130
+
131
+ # returns the type of a cell:
132
+ # * :float
133
+ # * :string,
134
+ # * :date
135
+ # * :percentage
136
+ def celltype(row,col,sheet=nil)
137
+ sheet = @default_sheet unless sheet
138
+ read_cells(sheet) unless @cells_read[sheet]
139
+ row,col = normalize(row,col)
140
+ if @formula[sheet]["#{row},#{col}"]
141
+ return :formula
142
+ else
143
+ @cell_type[sheet]["#{row},#{col}"]
144
+ end
145
+ end
146
+
147
+ # returns an array of sheet names in the spreadsheet
148
+ def sheets
149
+ return_sheets = []
150
+ @workbook_doc.each_element do |workbook|
151
+ workbook.each_element do |el|
152
+ if el.name == "sheets"
153
+ el.each_element do |sheet|
154
+ return_sheets << sheet.attributes['name']
155
+ end
156
+ end
157
+ end
158
+ end
159
+ return_sheets
160
+ end
161
+
162
+ # shows the internal representation of all cells
163
+ # for debugging purposes
164
+ def to_s(sheet=nil)
165
+ sheet = @default_sheet unless sheet
166
+ read_cells(sheet) unless @cells_read[sheet]
167
+ @cell[sheet].inspect
168
+ end
169
+
170
+ # returns all values in this row as an array
171
+ # row numbers are 1,2,3,... like in the spreadsheet
172
+ def row(rownumber,sheet=nil)
173
+ sheet = @default_sheet unless sheet
174
+ read_cells(sheet) unless @cells_read[sheet]
175
+ result = []
176
+ tmp_arr = []
177
+ @cell[sheet].each_pair {|key,value|
178
+ y,x = key.split(',')
179
+ x = x.to_i
180
+ y = y.to_i
181
+ if y == rownumber
182
+ tmp_arr[x] = value
183
+ end
184
+ }
185
+ result = tmp_arr[1..-1]
186
+ while result[-1] == nil
187
+ result = result[0..-2]
188
+ end
189
+ result
190
+ end
191
+
192
+ # returns all values in this column as an array
193
+ # column numbers are 1,2,3,... like in the spreadsheet
194
+ def column(columnnumber,sheet=nil)
195
+ if columnnumber.class == String
196
+ columnnumber = GenericSpreadsheet.letter_to_number(columnnumber)
197
+ end
198
+ sheet = @default_sheet unless sheet
199
+ read_cells(sheet) unless @cells_read[sheet]
200
+ result = []
201
+ first_row(sheet).upto(last_row(sheet)) do |row|
202
+ result << cell(row,columnnumber,sheet)
203
+ end
204
+ result
205
+ end
206
+
207
+ # returns each formula in the selected sheet as an array of elements
208
+ # [row, col, formula]
209
+ def formulas(sheet=nil)
210
+ theformulas = Array.new
211
+ sheet = @default_sheet unless sheet
212
+ read_cells(sheet) unless @cells_read[sheet]
213
+ first_row(sheet).upto(last_row(sheet)) {|row|
214
+ first_column(sheet).upto(last_column(sheet)) {|col|
215
+ if formula?(row,col,sheet)
216
+ f = [row, col, formula(row,col,sheet)]
217
+ theformulas << f
218
+ end
219
+ }
220
+ }
221
+ theformulas
222
+ end
223
+
224
+ private
225
+
226
+ # helper function to set the internal representation of cells
227
+ def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v)
228
+ key = "#{y},#{x+i}"
229
+ @cell_type[sheet] = {} unless @cell_type[sheet]
230
+ @cell_type[sheet][key] = vt
231
+ @formula[sheet] = {} unless @formula[sheet]
232
+ @formula[sheet][key] = formula if formula
233
+ @cell[sheet] = {} unless @cell[sheet]
234
+ case @cell_type[sheet][key]
235
+ when :float
236
+ @cell[sheet][key] = v.to_f
237
+ when :string
238
+ @cell[sheet][key] = str_v
239
+ when :date
240
+ @cell[sheet][key] = (Date.new(1899,12,30)+v.to_i).strftime("%Y-%m-%d") # tr.attributes['date-value']
241
+ when :percentage
242
+ @cell[sheet][key] = v.to_f
243
+ when :time
244
+ #hms = v.split(':')
245
+ #@cell[sheet][key] = hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i
246
+ @cell[sheet][key] = v.to_f*(24*60*60)
247
+ else
248
+ @cell[sheet][key] = v
249
+ end
250
+ end
251
+
252
+ def split_coord(s)
253
+ letter = ""
254
+ number = 0
255
+ i = 0
256
+ while i<s.length and "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".include?(s[i,1])
257
+ letter += s[i,1]
258
+ i+=1
259
+ end
260
+ while i<s.length and "01234567890".include?(s[i,1])
261
+ number = number*10 + s[i,1].to_i
262
+ i+=1
263
+ end
264
+ if letter=="" or number==0
265
+ raise ArgumentError
266
+ end
267
+ return letter,number
268
+ end
269
+
270
+ def split_coordinate(str)
271
+ letter,number = split_coord(str)
272
+ x = GenericSpreadsheet.letter_to_number(letter)
273
+ y = number
274
+ return x,y
275
+ end
276
+
277
+ # read all cells in the selected sheet
278
+ def read_cells(sheet=nil)
279
+ sheet = @default_sheet unless sheet
280
+ sheet_found = false
281
+ raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
282
+ n = self.sheets.index(sheet)
283
+ @sheet_doc[n].each_element do |worksheet|
284
+ worksheet.each_element do |elem|
285
+ if elem.name == 'sheetData'
286
+ elem.each_element do |sheetdata|
287
+ if sheetdata.name == 'row'
288
+ sheetdata.each_element do |row|
289
+ if row.name == 'c'
290
+ if row.attributes['t'] == 's'
291
+ tmp_type = :shared
292
+ end
293
+ if row.attributes['s'] == '2'
294
+ tmp_type = :date
295
+ elsif row.attributes['s'] == '1'
296
+ tmp_type = :formula
297
+ end
298
+ formula = nil
299
+ f_element_found = false
300
+ row.each_element do |cell|
301
+ if cell.name == 'f'
302
+ f_element_found = true
303
+ formula = cell.text
304
+ end
305
+ if cell.name == 'v'
306
+ if tmp_type == :formula and f_element_found == false
307
+ tmp_type = :time
308
+ end
309
+ if tmp_type == :shared
310
+ vt = :string
311
+ str_v = @shared_table[cell.text.to_i]
312
+ elsif tmp_type == :date
313
+ vt = :date
314
+ v = cell.text
315
+ elsif tmp_type == :time
316
+ vt = :time
317
+ v = cell.text
318
+ elsif tmp_type == :formula
319
+ vt = :formula
320
+ v = cell.text.to_f #TODO: !!!!
321
+ else
322
+ vt = :float
323
+ v = cell.text
324
+ end
325
+ x,y = split_coordinate(row.attributes['r'])
326
+ tr=nil #TODO: ???
327
+ set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v)
328
+ end
329
+ end
330
+ end
331
+ end
332
+ end
333
+ end
334
+ end
335
+ end
336
+ end
337
+ sheet_found = true #TODO:
338
+ if !sheet_found
339
+ raise RangeError
340
+ end
341
+ @cells_read[sheet] = true
342
+ end
343
+
344
+ # Checks if the default_sheet exists. If not an RangeError exception is
345
+ # raised
346
+ def check_default_sheet
347
+ sheet_found = false
348
+ raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
349
+ @workbook_doc.each_element do |workbook|
350
+ workbook.each_element do |el|
351
+ if el.name == "sheets"
352
+ el.each_element do |sheet|
353
+ if @default_sheet == sheet.attributes['name']
354
+ sheet_found = true
355
+ end
356
+ end
357
+ end
358
+ end
359
+ end
360
+ if ! sheet_found
361
+ raise RangeError, "sheet '#{@default_sheet}' not found"
362
+ end
363
+ end
364
+
365
+
366
+ def process_zipfile(zipfilename, zip, path='')
367
+ @sheet_files = []
368
+ Zip::ZipFile.open(zipfilename) {|zf|
369
+ zf.entries.each {|entry|
370
+ #entry.extract
371
+ if entry.to_s.end_with?('workbook.xml')
372
+ open(@tmpdir+'/'+@file_nr.to_s+'_roo_workbook.xml','wb') {|f|
373
+ f << zip.read(entry)
374
+ }
375
+ end
376
+ if entry.to_s.end_with?('sharedStrings.xml')
377
+ open(@tmpdir+'/'+@file_nr.to_s+'_roo_sharedStrings.xml','wb') {|f|
378
+ f << zip.read(entry)
379
+ }
380
+ end
381
+ if entry.to_s =~ /sheet([0-9]+).xml/
382
+ nr = $1
383
+ # entry.extract("xaxa_sheet#{nr}.xml")
384
+ open(@tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}",'wb') {|f|
385
+ f << zip.read(entry)
386
+ }
387
+ @sheet_files[nr.to_i-1] = @tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}"
388
+ end
389
+ }
390
+ }
391
+ return
392
+ end
393
+
394
+ def extract_content(zipfilename)
395
+ Zip::ZipFile.open(@filename) do |zip|
396
+ process_zipfile(zipfilename,zip)
397
+ end
398
+ end
399
+
400
+ def set_value(row,col,value,sheet=nil)
401
+ sheet = @default_value unless sheet
402
+ @cell[sheet]["#{row},#{col}"] = value
403
+ end
404
+
405
+ def set_type(row,col,type,sheet=nil)
406
+ sheet = @default_value unless sheet
407
+ @cell_type[sheet]["#{row},#{col}"] = type
408
+ end
409
+
410
+ def read_shared_strings(doc)
411
+ doc.each_element do |sst|
412
+ if sst.name == 'sst'
413
+ sst.each_element do |si|
414
+ if si.name == 'si'
415
+ si.each_element do |elem|
416
+ if elem.name == 't'
417
+ @shared_table << elem.text
418
+ end
419
+ end
420
+ end
421
+ end
422
+ end
423
+ end
424
+ end
425
+
426
+ end # class