roo-immersion 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. data/History.txt +225 -0
  2. data/README.markdown +60 -0
  3. data/examples/roo_soap_client.rb +53 -0
  4. data/examples/roo_soap_server.rb +29 -0
  5. data/examples/write_me.rb +33 -0
  6. data/lib/roo.rb +32 -0
  7. data/lib/roo/excel.rb +468 -0
  8. data/lib/roo/excel2003xml.rb +394 -0
  9. data/lib/roo/excelx.rb +601 -0
  10. data/lib/roo/generic_spreadsheet.rb +628 -0
  11. data/lib/roo/google.rb +379 -0
  12. data/lib/roo/openoffice.rb +451 -0
  13. data/lib/roo/roo_rails_helper.rb +82 -0
  14. data/lib/roo/version.rb +9 -0
  15. data/test/1900_base.xls +0 -0
  16. data/test/1904_base.xls +0 -0
  17. data/test/Bibelbund.csv +3741 -0
  18. data/test/Bibelbund.ods +0 -0
  19. data/test/Bibelbund.xls +0 -0
  20. data/test/Bibelbund.xlsx +0 -0
  21. data/test/Bibelbund.xml +62518 -0
  22. data/test/Bibelbund1.ods +0 -0
  23. data/test/bad_excel_date.xls +0 -0
  24. data/test/bbu.ods +0 -0
  25. data/test/bbu.xls +0 -0
  26. data/test/bbu.xlsx +0 -0
  27. data/test/bbu.xml +152 -0
  28. data/test/bode-v1.ods.zip +0 -0
  29. data/test/bode-v1.xls.zip +0 -0
  30. data/test/boolean.ods +0 -0
  31. data/test/boolean.xls +0 -0
  32. data/test/boolean.xlsx +0 -0
  33. data/test/boolean.xml +112 -0
  34. data/test/borders.ods +0 -0
  35. data/test/borders.xls +0 -0
  36. data/test/borders.xlsx +0 -0
  37. data/test/borders.xml +144 -0
  38. data/test/bug-row-column-fixnum-float.xls +0 -0
  39. data/test/bug-row-column-fixnum-float.xml +127 -0
  40. data/test/datetime.ods +0 -0
  41. data/test/datetime.xls +0 -0
  42. data/test/datetime.xlsx +0 -0
  43. data/test/datetime.xml +142 -0
  44. data/test/datetime_floatconv.xls +0 -0
  45. data/test/datetime_floatconv.xml +148 -0
  46. data/test/emptysheets.ods +0 -0
  47. data/test/emptysheets.xls +0 -0
  48. data/test/emptysheets.xml +105 -0
  49. data/test/excel2003.xml +21140 -0
  50. data/test/false_encoding.xls +0 -0
  51. data/test/false_encoding.xml +132 -0
  52. data/test/formula.ods +0 -0
  53. data/test/formula.xls +0 -0
  54. data/test/formula.xlsx +0 -0
  55. data/test/formula.xml +134 -0
  56. data/test/formula_parse_error.xls +0 -0
  57. data/test/formula_parse_error.xml +1833 -0
  58. data/test/html-escape.ods +0 -0
  59. data/test/no_spreadsheet_file.txt +1 -0
  60. data/test/numbers1.csv +18 -0
  61. data/test/numbers1.ods +0 -0
  62. data/test/numbers1.xls +0 -0
  63. data/test/numbers1.xlsx +0 -0
  64. data/test/numbers1.xml +312 -0
  65. data/test/only_one_sheet.ods +0 -0
  66. data/test/only_one_sheet.xls +0 -0
  67. data/test/only_one_sheet.xlsx +0 -0
  68. data/test/only_one_sheet.xml +67 -0
  69. data/test/paragraph.ods +0 -0
  70. data/test/paragraph.xls +0 -0
  71. data/test/paragraph.xlsx +0 -0
  72. data/test/paragraph.xml +127 -0
  73. data/test/ric.ods +0 -0
  74. data/test/simple_spreadsheet.ods +0 -0
  75. data/test/simple_spreadsheet.xls +0 -0
  76. data/test/simple_spreadsheet.xlsx +0 -0
  77. data/test/simple_spreadsheet.xml +225 -0
  78. data/test/simple_spreadsheet_from_italo.ods +0 -0
  79. data/test/simple_spreadsheet_from_italo.xls +0 -0
  80. data/test/simple_spreadsheet_from_italo.xml +242 -0
  81. data/test/skipped_tests.rb +789 -0
  82. data/test/style.ods +0 -0
  83. data/test/style.xls +0 -0
  84. data/test/style.xlsx +0 -0
  85. data/test/style.xml +154 -0
  86. data/test/test_helper.rb +19 -0
  87. data/test/test_roo.rb +1834 -0
  88. data/test/time-test.csv +2 -0
  89. data/test/time-test.ods +0 -0
  90. data/test/time-test.xls +0 -0
  91. data/test/time-test.xlsx +0 -0
  92. data/test/time-test.xml +131 -0
  93. data/test/whitespace.ods +0 -0
  94. data/test/whitespace.xls +0 -0
  95. data/test/whitespace.xlsx +0 -0
  96. data/test/whitespace.xml +184 -0
  97. metadata +231 -0
@@ -0,0 +1,394 @@
1
+ require 'xml'
2
+ require 'fileutils'
3
+ require 'zip/zipfilesystem'
4
+ require 'date'
5
+ require 'base64'
6
+ require 'cgi'
7
+
8
+ class Excel2003XML < GenericSpreadsheet
9
+
10
+ @@nr = 0
11
+
12
+ # initialization and opening of a spreadsheet file
13
+ # values for packed: :zip
14
+ def initialize(filename, packed=nil, file_warning=:error)
15
+ @file_warning = file_warning
16
+ super()
17
+ @tmpdir = "oo_"+$$.to_s
18
+ @tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
19
+ unless File.exists?(@tmpdir)
20
+ FileUtils::mkdir(@tmpdir)
21
+ end
22
+ filename = open_from_uri(filename) if filename[0,7] == "http://"
23
+ filename = unzip(filename) if packed and packed == :zip
24
+ begin
25
+ file_type_check(filename,'.xml','an Excel 2003 XML')
26
+ @cells_read = Hash.new
27
+ @filename = filename
28
+ unless File.file?(@filename)
29
+ raise IOError, "file #{@filename} does not exist"
30
+ end
31
+ @doc = XML::Parser.file(@filename).parse
32
+ ensure
33
+ FileUtils::rm_r(@tmpdir)
34
+ end
35
+ @default_sheet = self.sheets.first
36
+ @cell = Hash.new
37
+ @cell_type = Hash.new
38
+ @formula = Hash.new
39
+ @first_row = Hash.new
40
+ @last_row = Hash.new
41
+ @first_column = Hash.new
42
+ @last_column = Hash.new
43
+ @header_line = 1
44
+ @style = Hash.new
45
+ @style_defaults = Hash.new { |h,k| h[k] = [] }
46
+ @style_definitions = Hash.new
47
+ read_styles
48
+ end
49
+
50
+ # Returns the content of a spreadsheet-cell.
51
+ # (1,1) is the upper left corner.
52
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
53
+ # cell at the first line and first row.
54
+ def cell(row, col, sheet=nil)
55
+ sheet = @default_sheet unless sheet
56
+ read_cells(sheet) unless @cells_read[sheet]
57
+ row,col = normalize(row,col)
58
+ if celltype(row,col,sheet) == :date
59
+ yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
60
+ return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
61
+ end
62
+ @cell[sheet][[row,col]]
63
+ end
64
+
65
+ # Returns the formula at (row,col).
66
+ # Returns nil if there is no formula.
67
+ # The method #formula? checks if there is a formula.
68
+ def formula(row,col,sheet=nil)
69
+ sheet = @default_sheet unless sheet
70
+ read_cells(sheet) unless @cells_read[sheet]
71
+ row,col = normalize(row,col)
72
+ if @formula[sheet][[row,col]] == nil
73
+ return nil
74
+ else
75
+ return @formula[sheet][[row,col]]["oooc:".length..-1]
76
+ end
77
+ end
78
+
79
+ # true, if there is a formula
80
+ def formula?(row,col,sheet=nil)
81
+ sheet = @default_sheet unless sheet
82
+ read_cells(sheet) unless @cells_read[sheet]
83
+ row,col = normalize(row,col)
84
+ formula(row,col) != nil
85
+ end
86
+
87
+ class Font
88
+ attr_accessor :bold, :italic, :underline
89
+
90
+ def bold?
91
+ @bold == '1'
92
+ end
93
+
94
+ def italic?
95
+ @italic == '1'
96
+ end
97
+
98
+ def underline?
99
+ @underline != nil
100
+ end
101
+ end
102
+
103
+ # Given a cell, return the cell's style
104
+ def font(row, col, sheet=nil)
105
+ sheet = @default_sheet unless sheet
106
+ read_cells(sheet) unless @cells_read[sheet]
107
+ row,col = normalize(row,col)
108
+ style_name = @style[sheet][[row,col]] || @style_defaults[sheet][col - 1] || 'Default'
109
+ @style_definitions[style_name]
110
+ end
111
+
112
+ # set a cell to a certain value
113
+ # (this will not be saved back to the spreadsheet file!)
114
+ def set(row,col,value,sheet=nil) #:nodoc:
115
+ sheet = @default_sheet unless sheet
116
+ read_cells(sheet) unless @cells_read[sheet]
117
+ row,col = normalize(row,col)
118
+ set_value(row,col,value,sheet)
119
+ if value.class == Fixnum
120
+ set_type(row,col,:float,sheet)
121
+ elsif value.class == String
122
+ set_type(row,col,:string,sheet)
123
+ elsif value.class == Float
124
+ set_type(row,col,:string,sheet)
125
+ else
126
+ raise ArgumentError, "Type for "+value.to_s+" not set"
127
+ end
128
+ end
129
+
130
+ # returns the type of a cell:
131
+ # * :float
132
+ # * :string
133
+ # * :date
134
+ # * :percentage
135
+ # * :formula
136
+ # * :time
137
+ # * :datetime
138
+ def celltype(row,col,sheet=nil)
139
+ sheet = @default_sheet unless sheet
140
+ read_cells(sheet) unless @cells_read[sheet]
141
+ row,col = normalize(row,col)
142
+ if @formula[sheet][[row,col]]
143
+ return :formula
144
+ else
145
+ @cell_type[sheet][[row,col]]
146
+ end
147
+ end
148
+
149
+ def sheets
150
+ return_sheets = []
151
+ @doc.find("//ss:Worksheet").each do |sheet|
152
+ return_sheets << sheet.attributes['Name']
153
+ end
154
+ return_sheets
155
+ end
156
+
157
+ # version of the openoffice document
158
+ # at 2007 this is always "1.0"
159
+ def officeversion
160
+ oo_version
161
+ @officeversion
162
+ end
163
+
164
+ # shows the internal representation of all cells
165
+ # mainly for debugging purposes
166
+ def to_s(sheet=nil)
167
+ sheet = @default_sheet unless sheet
168
+ read_cells(sheet) unless @cells_read[sheet]
169
+ @cell[sheet].inspect
170
+ end
171
+
172
+ # save spreadsheet
173
+ def save #:nodoc:
174
+ 42
175
+ end
176
+
177
+ # returns each formula in the selected sheet as an array of elements
178
+ # [row, col, formula]
179
+ def formulas(sheet=nil)
180
+ theformulas = Array.new
181
+ sheet = @default_sheet unless sheet
182
+ read_cells(sheet) unless @cells_read[sheet]
183
+ first_row(sheet).upto(last_row(sheet)) {|row|
184
+ first_column(sheet).upto(last_column(sheet)) {|col|
185
+ if formula?(row,col,sheet)
186
+ f = [row, col, formula(row,col,sheet)]
187
+ theformulas << f
188
+ end
189
+ }
190
+ }
191
+ theformulas
192
+ end
193
+
194
+ private
195
+
196
+ # read the version of the OO-Version
197
+ def oo_version
198
+ @doc.find("//*[local-name()='document-content']").each do |office|
199
+ @officeversion = office.attributes['version']
200
+ end
201
+ end
202
+
203
+ # helper function to set the internal representation of cells
204
+ def set_cell_values(sheet,x,y,i,v,vt,formula,table_cell,str_v,style_name)
205
+ key = [y,x+i]
206
+ @cell_type[sheet] = {} unless @cell_type[sheet]
207
+ @cell_type[sheet][key] = vt
208
+ @formula[sheet] = {} unless @formula[sheet]
209
+ @formula[sheet][key] = formula if formula
210
+ @cell[sheet] = {} unless @cell[sheet]
211
+ @style[sheet] = {} unless @style[sheet]
212
+ @style[sheet][key] = style_name
213
+ case @cell_type[sheet][key]
214
+ when :float
215
+ @cell[sheet][key] = (v.to_s.include?('.') ? v.to_f : v.to_i)
216
+ when :string
217
+ @cell[sheet][key] = str_v
218
+ when :datetime
219
+ @cell[sheet][key] = DateTime.parse(v)
220
+ @cell_type[sheet][key] = :datetime
221
+ when :percentage
222
+ @cell[sheet][key] = v.to_f
223
+ # when :time
224
+ # hms = v.split(':')
225
+ # @cell[sheet][key] = hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i
226
+ else
227
+ @cell[sheet][key] = v
228
+ end
229
+ end
230
+
231
+ # read all cells in the selected sheet
232
+ #--
233
+ # the following construct means '4 blanks'
234
+ # some content <text:s text:c="3"/>
235
+ #++
236
+ def read_cells(sheet=nil)
237
+ sheet = @default_sheet unless sheet
238
+ sheet_found = false
239
+ raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
240
+ raise RangeError unless self.sheets.include? sheet
241
+ @doc.find("ss:Worksheet[@ss:Name='#{sheet}']").each do |ws|
242
+ sheet_found = true
243
+ row = 1
244
+ col = 1
245
+ column_attributes = {}
246
+ idx = 0
247
+ ws.find('.//ss:Column').each do |c|
248
+ column_attributes[(idx += 1).to_s] = c.attributes['StyleID']
249
+ end
250
+ ws.find('.//ss:Row').each do |r|
251
+ skip_to_row = r.attributes['Index'].to_i
252
+ row = skip_to_row if skip_to_row > 0
253
+ style_name = r.attributes['StyleID'] if r.attributes['StyleID']
254
+ r.each do |c|
255
+ next unless c.name == 'Cell'
256
+ skip_to_col = c.attributes['Index'].to_i
257
+ col = skip_to_col if skip_to_col > 0
258
+ if c.attributes['StyleID']
259
+ style_name = c.attributes['StyleID']
260
+ elsif
261
+ style_name ||= column_attributes[c.attributes['Index']]
262
+ end
263
+ c.each_element do |cell|
264
+ formula = nil
265
+ if cell.name == 'Data'
266
+ formula = cell.attributes['Formula']
267
+ vt = cell.attributes['Type'].downcase.to_sym
268
+ v = cell.content
269
+ str_v = v
270
+ case vt
271
+ when :number
272
+ v = v.to_f
273
+ vt = :float
274
+ when :datetime
275
+ if v =~ /^1899-12-31T(\d{2}:\d{2}:\d{2})/
276
+ v = $1
277
+ vt = :time
278
+ elsif v =~ /([^T]+)T00:00:00.000/
279
+ v = $1
280
+ vt = :date
281
+ end
282
+ when :boolean
283
+ v = cell.attributes['boolean-value']
284
+ end
285
+ end
286
+ set_cell_values(sheet,col,row,0,v,vt,formula,cell,str_v,style_name)
287
+ end
288
+ col += 1
289
+ end
290
+ row += 1
291
+ col = 1
292
+ end
293
+ end
294
+ if !sheet_found
295
+ raise RangeError
296
+ end
297
+ @cells_read[sheet] = true
298
+ end
299
+
300
+ def read_styles
301
+ @doc.find("ss:Styles").each do |styles|
302
+ styles.find('.//ss:Style').each do |style|
303
+ style_id = style.attributes['ID']
304
+ @style_definitions[style_id] = Excel2003XML::Font.new
305
+ font = style.find_first('.//ss:Font')
306
+ if font
307
+ @style_definitions[style_id].bold = font.attributes['Bold']
308
+ @style_definitions[style_id].italic = font.attributes['Italic']
309
+ @style_definitions[style_id].underline = font.attributes['Underline']
310
+ end
311
+ end
312
+ end
313
+ end
314
+
315
+ # Checks if the default_sheet exists. If not an RangeError exception is
316
+ # raised
317
+ def check_default_sheet
318
+ sheet_found = false
319
+ raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
320
+ sheet_found = true if sheets.include?(@default_sheet)
321
+ if ! sheet_found
322
+ raise RangeError, "sheet '#{@default_sheet}' not found"
323
+ end
324
+ end
325
+
326
+ def process_zipfile(zip, path='')
327
+ if zip.file.file? path
328
+ if path == "content.xml"
329
+ open(File.join(@tmpdir, @file_nr.to_s+'_roo_content.xml'),'wb') {|f|
330
+ f << zip.read(path)
331
+ }
332
+ end
333
+ else
334
+ unless path.empty?
335
+ path += '/'
336
+ end
337
+ zip.dir.foreach(path) do |filename|
338
+ process_zipfile(zip, path+filename)
339
+ end
340
+ end
341
+ end
342
+
343
+ def extract_content
344
+ Zip::ZipFile.open(@filename) do |zip|
345
+ process_zipfile(zip)
346
+ end
347
+ end
348
+
349
+ def set_value(row,col,value,sheet=nil)
350
+ sheet = @default_value unless sheet
351
+ @cell[sheet][[row,col]] = value
352
+ end
353
+
354
+ def set_type(row,col,type,sheet=nil)
355
+ sheet = @default_value unless sheet
356
+ @cell_type[sheet][[row,col]] = type
357
+ end
358
+
359
+ A_ROO_TYPE = {
360
+ "float" => :float,
361
+ "string" => :string,
362
+ "date" => :date,
363
+ "percentage" => :percentage,
364
+ "time" => :time,
365
+ }
366
+
367
+ def Openoffice.oo_type_2_roo_type(ootype)
368
+ return A_ROO_TYPE[ootype]
369
+ end
370
+
371
+ # helper method to convert compressed spaces and other elements within
372
+ # an text into a string
373
+ def children_to_string(children)
374
+ result = ''
375
+ children.each {|child|
376
+ if child.text?
377
+ result = result + child.content
378
+ else
379
+ if child.name == 's'
380
+ compressed_spaces = child.attributes['c'].to_i
381
+ # no explicit number means a count of 1:
382
+ if compressed_spaces == 0
383
+ compressed_spaces = 1
384
+ end
385
+ result = result + " "*compressed_spaces
386
+ else
387
+ result = result + child.content
388
+ end
389
+ end
390
+ }
391
+ result
392
+ end
393
+
394
+ end # class
@@ -0,0 +1,601 @@
1
+ require 'xml'
2
+ require 'fileutils'
3
+ require 'zip/zipfilesystem'
4
+ require 'date'
5
+
6
+ class String
7
+ def end_with?(str)
8
+ self[-str.length,str.length] == str
9
+ end
10
+ end
11
+
12
+ class Excelx < GenericSpreadsheet
13
+ FORMATS = {
14
+ 'General' => :float,
15
+ '0' => :float,
16
+ '0.00' => :float,
17
+ '#,##0' => :float,
18
+ '#,##0.00' => :float,
19
+ '0%' => :percentage,
20
+ '0.00%' => :percentage,
21
+ '0.00E+00' => :float,
22
+ '# ?/?' => :float, #??? TODO:
23
+ '# ??/??' => :float, #??? TODO:
24
+ 'mm-dd-yy' => :date,
25
+ 'd-mmm-yy' => :date,
26
+ 'd-mmm' => :date,
27
+ 'mmm-yy' => :date,
28
+ 'h:mm AM/PM' => :date,
29
+ 'h:mm:ss AM/PM' => :date,
30
+ 'h:mm' => :time,
31
+ 'h:mm:ss' => :time,
32
+ 'm/d/yy h:mm' => :date,
33
+ '#,##0 ;(#,##0)' => :float,
34
+ '#,##0 ;[Red](#,##0)' => :float,
35
+ '#,##0.00;(#,##0.00)' => :float,
36
+ '#,##0.00;[Red](#,##0.00)' => :float,
37
+ 'mm:ss' => :time,
38
+ '[h]:mm:ss' => :time,
39
+ 'mmss.0' => :time,
40
+ '##0.0E+0' => :float,
41
+ '@' => :float,
42
+ #-- zusaetzliche Formate, die nicht standardmaessig definiert sind:
43
+ "yyyy\\-mm\\-dd" => :date,
44
+ 'dd/mm/yy' => :date,
45
+ 'hh:mm:ss' => :time,
46
+ "dd/mm/yy\\ hh:mm" => :datetime,
47
+ }
48
+ STANDARD_FORMATS = {
49
+ 0 => 'General',
50
+ 1 => '0',
51
+ 2 => '0.00',
52
+ 3 => '#,##0',
53
+ 4 => '#,##0.00',
54
+ 9 => '0%',
55
+ 10 => '0.00%',
56
+ 11 => '0.00E+00',
57
+ 12 => '# ?/?',
58
+ 13 => '# ??/??',
59
+ 14 => 'mm-dd-yy',
60
+ 15 => 'd-mmm-yy',
61
+ 16 => 'd-mmm',
62
+ 17 => 'mmm-yy',
63
+ 18 => 'h:mm AM/PM',
64
+ 19 => 'h:mm:ss AM/PM',
65
+ 20 => 'h:mm',
66
+ 21 => 'h:mm:ss',
67
+ 22 => 'm/d/yy h:mm',
68
+ 37 => '#,##0 ;(#,##0)',
69
+ 38 => '#,##0 ;[Red](#,##0)',
70
+ 39 => '#,##0.00;(#,##0.00)',
71
+ 40 => '#,##0.00;[Red](#,##0.00)',
72
+ 45 => 'mm:ss',
73
+ 46 => '[h]:mm:ss',
74
+ 47 => 'mmss.0',
75
+ 48 => '##0.0E+0',
76
+ 49 => '@',
77
+ }
78
+ @@nr = 0
79
+
80
+ # initialization and opening of a spreadsheet file
81
+ # values for packed: :zip
82
+ def initialize(filename, packed=nil, file_warning = :error) #, create = false)
83
+ super()
84
+ @file_warning = file_warning
85
+ @tmpdir = "oo_"+$$.to_s
86
+ @tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
87
+ unless File.exists?(@tmpdir)
88
+ FileUtils::mkdir(@tmpdir)
89
+ end
90
+ filename = open_from_uri(filename) if filename[0,7] == "http://"
91
+ filename = unzip(filename) if packed and packed == :zip
92
+ begin
93
+ file_type_check(filename,'.xlsx','an Excel-xlsx')
94
+ @cells_read = Hash.new
95
+ @filename = filename
96
+ unless File.file?(@filename)
97
+ raise IOError, "file #{@filename} does not exist"
98
+ end
99
+ @@nr += 1
100
+ @file_nr = @@nr
101
+ extract_content(@filename)
102
+ file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
103
+ @workbook_doc = XML::Parser.io(file).parse
104
+ file.close
105
+ @shared_table = []
106
+ if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
107
+ file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
108
+ @sharedstring_doc = XML::Parser.io(file).parse
109
+ file.close
110
+ read_shared_strings(@sharedstring_doc)
111
+ end
112
+ @styles_table = []
113
+ @style_definitions = Array.new { |h,k| h[k] = {} }
114
+ if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
115
+ file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
116
+ @styles_doc = XML::Parser.io(file).parse
117
+ file.close
118
+ read_styles(@styles_doc)
119
+ end
120
+ @sheet_doc = []
121
+ @sheet_files.each_with_index do |item, i|
122
+ file = File.new(item)
123
+ @sheet_doc[i] = XML::Parser.io(file).parse
124
+ file.close
125
+ end
126
+ ensure
127
+ #if ENV["roo_local"] != "thomas-p"
128
+ FileUtils::rm_r(@tmpdir)
129
+ #end
130
+ end
131
+ @default_sheet = self.sheets.first
132
+ @cell = Hash.new
133
+ @cell_type = Hash.new
134
+ @formula = Hash.new
135
+ @first_row = Hash.new
136
+ @last_row = Hash.new
137
+ @first_column = Hash.new
138
+ @last_column = Hash.new
139
+ @header_line = 1
140
+ @excelx_type = Hash.new
141
+ @excelx_value = Hash.new
142
+ @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt
143
+ end
144
+
145
+ # Returns the content of a spreadsheet-cell.
146
+ # (1,1) is the upper left corner.
147
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
148
+ # cell at the first line and first row.
149
+ def cell(row, col, sheet=nil)
150
+ sheet = @default_sheet unless sheet
151
+ read_cells(sheet) unless @cells_read[sheet]
152
+ row,col = normalize(row,col)
153
+ if celltype(row,col,sheet) == :date
154
+ yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
155
+ return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
156
+ elsif celltype(row,col,sheet) == :datetime
157
+ date_part,time_part = @cell[sheet][[row,col]].split(' ')
158
+ yyyy,mm,dd = date_part.split('-')
159
+ hh,mi,ss = time_part.split(':')
160
+ return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
161
+ end
162
+ @cell[sheet][[row,col]]
163
+ end
164
+
165
+ # Returns the formula at (row,col).
166
+ # Returns nil if there is no formula.
167
+ # The method #formula? checks if there is a formula.
168
+ def formula(row,col,sheet=nil)
169
+ sheet = @default_sheet unless sheet
170
+ read_cells(sheet) unless @cells_read[sheet]
171
+ row,col = normalize(row,col)
172
+ if @formula[sheet][[row,col]] == nil
173
+ return nil
174
+ else
175
+ return @formula[sheet][[row,col]]
176
+ end
177
+ end
178
+
179
+ # true, if there is a formula
180
+ def formula?(row,col,sheet=nil)
181
+ sheet = @default_sheet unless sheet
182
+ read_cells(sheet) unless @cells_read[sheet]
183
+ row,col = normalize(row,col)
184
+ formula(row,col) != nil
185
+ end
186
+
187
+ class Font
188
+ attr_accessor :bold, :italic, :underline
189
+
190
+ def bold?
191
+ @bold == true
192
+ end
193
+
194
+ def italic?
195
+ @italic == true
196
+ end
197
+
198
+ def underline?
199
+ @underline == true
200
+ end
201
+ end
202
+
203
+ # Given a cell, return the cell's style
204
+ def font(row, col, sheet=nil)
205
+ sheet = @default_sheet unless sheet
206
+ read_cells(sheet) unless @cells_read[sheet]
207
+ row,col = normalize(row,col)
208
+ s_attribute = @s_attribute[sheet][[row,col]]
209
+ s_attribute ||= 0
210
+ s_attribute = s_attribute.to_i
211
+ @style_definitions[s_attribute]
212
+ end
213
+
214
+ # set a cell to a certain value
215
+ # (this will not be saved back to the spreadsheet file!)
216
+ def set(row,col,value,sheet=nil) #:nodoc:
217
+ sheet = @default_sheet unless sheet
218
+ read_cells(sheet) unless @cells_read[sheet]
219
+ row,col = normalize(row,col)
220
+ set_value(row,col,value,sheet)
221
+ if value.class == Fixnum
222
+ set_type(row,col,:float,sheet)
223
+ elsif value.class == String
224
+ set_type(row,col,:string,sheet)
225
+ elsif value.class == Float
226
+ set_type(row,col,:string,sheet)
227
+ else
228
+ raise ArgumentError, "Type for "+value.to_s+" not set"
229
+ end
230
+ end
231
+
232
+ # returns the type of a cell:
233
+ # * :float
234
+ # * :string,
235
+ # * :date
236
+ # * :percentage
237
+ # * :formula
238
+ # * :time
239
+ # * :datetime
240
+ def celltype(row,col,sheet=nil)
241
+ sheet = @default_sheet unless sheet
242
+ read_cells(sheet) unless @cells_read[sheet]
243
+ row,col = normalize(row,col)
244
+ if @formula[sheet][[row,col]]
245
+ return :formula
246
+ else
247
+ @cell_type[sheet][[row,col]]
248
+ end
249
+ end
250
+
251
+ # returns the internal type of an excel cell
252
+ # * :numeric_or_formula
253
+ # * :string
254
+ # Note: this is only available within the Excelx class
255
+ def excelx_type(row,col,sheet=nil)
256
+ sheet = @default_sheet unless sheet
257
+ read_cells(sheet) unless @cells_read[sheet]
258
+ row,col = normalize(row,col)
259
+ return @excelx_type[sheet][[row,col]]
260
+ end
261
+
262
+ # returns the internal value of an excelx cell
263
+ # Note: this is only available within the Excelx class
264
+ def excelx_value(row,col,sheet=nil)
265
+ sheet = @default_sheet unless sheet
266
+ read_cells(sheet) unless @cells_read[sheet]
267
+ row,col = normalize(row,col)
268
+ return @excelx_value[sheet][[row,col]]
269
+ end
270
+
271
+ # returns the internal format of an excel cell
272
+ def excelx_format(row,col,sheet=nil)
273
+ sheet = @default_sheet unless sheet
274
+ read_cells(sheet) unless @cells_read[sheet]
275
+ row,col = normalize(row,col)
276
+ s = @s_attribute[sheet][[row,col]]
277
+ result = attribute2format(s)
278
+ result
279
+ end
280
+
281
+ # returns an array of sheet names in the spreadsheet
282
+ def sheets
283
+ return_sheets = []
284
+ @workbook_doc.find("//*[local-name()='sheet']").each do |sheet|
285
+ return_sheets << sheet.attributes.to_h['name']
286
+ end
287
+ return_sheets
288
+ end
289
+ # shows the internal representation of all cells
290
+ # for debugging purposes
291
+ def to_s(sheet=nil)
292
+ sheet = @default_sheet unless sheet
293
+ read_cells(sheet) unless @cells_read[sheet]
294
+ @cell[sheet].inspect
295
+ end
296
+
297
+ # returns each formula in the selected sheet as an array of elements
298
+ # [row, col, formula]
299
+ def formulas(sheet=nil)
300
+ theformulas = Array.new
301
+ sheet = @default_sheet unless sheet
302
+ read_cells(sheet) unless @cells_read[sheet]
303
+ first_row(sheet).upto(last_row(sheet)) {|row|
304
+ first_column(sheet).upto(last_column(sheet)) {|col|
305
+ if formula?(row,col,sheet)
306
+ f = [row, col, formula(row,col,sheet)]
307
+ theformulas << f
308
+ end
309
+ }
310
+ }
311
+ theformulas
312
+ end
313
+
314
+ private
315
+
316
+ # helper function to set the internal representation of cells
317
+ def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,
318
+ excelx_type=nil,
319
+ excelx_value=nil,
320
+ s_attribute=nil)
321
+ key = [y,x+i]
322
+ @cell_type[sheet] = {} unless @cell_type[sheet]
323
+ @cell_type[sheet][key] = vt
324
+ @formula[sheet] = {} unless @formula[sheet]
325
+ @formula[sheet][key] = formula if formula
326
+ @cell[sheet] = {} unless @cell[sheet]
327
+ case @cell_type[sheet][key]
328
+ when :float
329
+ @cell[sheet][key] = (v.to_s.include?('.') ? v.to_f : v.to_i)
330
+ when :string
331
+ @cell[sheet][key] = str_v
332
+ when :date
333
+ @cell[sheet][key] = (Date.new(1899,12,30)+v.to_i).strftime("%Y-%m-%d")
334
+ when :datetime
335
+ @cell[sheet][key] = (DateTime.new(1899,12,30)+v.to_f).strftime("%Y-%m-%d %H:%M:%S")
336
+ when :percentage
337
+ @cell[sheet][key] = v.to_f
338
+ when :time
339
+ @cell[sheet][key] = v.to_f*(24*60*60)
340
+ else
341
+ @cell[sheet][key] = v
342
+ end
343
+ @excelx_type[sheet] = {} unless @excelx_type[sheet]
344
+ @excelx_type[sheet][key] = excelx_type
345
+ @excelx_value[sheet] = {} unless @excelx_value[sheet]
346
+ @excelx_value[sheet][key] = excelx_value
347
+ @s_attribute[sheet] = {} unless @s_attribute[sheet]
348
+ @s_attribute[sheet][key] = s_attribute
349
+ end
350
+
351
+ # splits a coordinate like "AA12" into the parts "AA" (String) and 12 (Fixnum)
352
+ def split_coord(s)
353
+ letter = ""
354
+ number = 0
355
+ i = 0
356
+ while i<s.length and "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".include?(s[i,1])
357
+ letter += s[i,1]
358
+ i+=1
359
+ end
360
+ while i<s.length and "0123456789".include?(s[i,1])
361
+ number = number*10 + s[i,1].to_i
362
+ i+=1
363
+ end
364
+ if letter=="" or number==0
365
+ raise ArgumentError
366
+ end
367
+ return letter,number
368
+ end
369
+
370
+ def split_coordinate(str)
371
+ letter,number = split_coord(str)
372
+ x = GenericSpreadsheet.letter_to_number(letter)
373
+ y = number
374
+ return x,y
375
+ end
376
+
377
+ # read all cells in the selected sheet
378
+ def format2type(format)
379
+ if FORMATS.has_key? format
380
+ FORMATS[format]
381
+ else
382
+ :float
383
+ end
384
+ end
385
+
386
+ # read all cells in the selected sheet
387
+ def read_cells(sheet=nil)
388
+ sheet = @default_sheet unless sheet
389
+ sheet_found = false
390
+ raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
391
+ raise RangeError unless self.sheets.include? sheet
392
+ n = self.sheets.index(sheet)
393
+ @sheet_doc[n].find("//*[local-name()='c']").each do |c|
394
+ s_attribute = c.attributes.to_h['s'].to_i
395
+ if (c.attributes.to_h['t'] == 's')
396
+ tmp_type = :shared
397
+ elsif (c.attributes.to_h['t'] == 'b')
398
+ tmp_type = :boolean
399
+ else
400
+ format = attribute2format(s_attribute)
401
+ tmp_type = format2type(format)
402
+ end
403
+ formula = nil
404
+ c.each_element do |cell|
405
+ if cell.name == 'f'
406
+ formula = cell.content
407
+ end
408
+ if cell.name == 'v'
409
+ if tmp_type == :time or tmp_type == :datetime
410
+ if cell.content.to_f >= 1.0
411
+ if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
412
+ tmp_type = :datetime
413
+ else
414
+ tmp_type = :date
415
+ end
416
+ else
417
+ end
418
+ end
419
+ excelx_type = [:numeric_or_formula,format]
420
+ excelx_value = cell.content
421
+ if tmp_type == :shared
422
+ vt = :string
423
+ str_v = @shared_table[cell.content.to_i]
424
+ excelx_type = :string
425
+ elsif tmp_type == :boolean
426
+ vt = :boolean
427
+ cell.content.to_i == 1 ? v = 'TRUE' : v = 'FALSE'
428
+ elsif tmp_type == :date
429
+ vt = :date
430
+ v = cell.content
431
+ elsif tmp_type == :time
432
+ vt = :time
433
+ v = cell.content
434
+ elsif tmp_type == :datetime
435
+ vt = :datetime
436
+ v = cell.content
437
+ elsif tmp_type == :formula
438
+ vt = :formula
439
+ v = cell.content.to_f #TODO: !!!!
440
+ else
441
+ vt = :float
442
+ v = cell.content
443
+ end
444
+ #puts "vt: #{vt}" if cell.text.include? "22606.5120"
445
+ x,y = split_coordinate(c.attributes.to_h['r'])
446
+ tr=nil #TODO: ???s
447
+ set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
448
+ end
449
+ end
450
+ end
451
+ sheet_found = true #TODO:
452
+ if !sheet_found
453
+ raise RangeError
454
+ end
455
+ @cells_read[sheet] = true
456
+ end
457
+
458
+ # Checks if the default_sheet exists. If not an RangeError exception is
459
+ # raised
460
+ def check_default_sheet
461
+ sheet_found = false
462
+ raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
463
+
464
+ sheet_found = true if sheets.include?(@default_sheet)
465
+
466
+ if ! sheet_found
467
+ raise RangeError, "sheet '#{@default_sheet}' not found"
468
+ end
469
+ end
470
+
471
+ # extracts all needed files from the zip file
472
+ def process_zipfile(zipfilename, zip, path='')
473
+ @sheet_files = []
474
+ Zip::ZipFile.open(zipfilename) {|zf|
475
+ zf.entries.each {|entry|
476
+ #entry.extract
477
+ if entry.to_s.end_with?('workbook.xml')
478
+ open(@tmpdir+'/'+@file_nr.to_s+'_roo_workbook.xml','wb') {|f|
479
+ f << zip.read(entry)
480
+ }
481
+ end
482
+ if entry.to_s.end_with?('sharedStrings.xml')
483
+ open(@tmpdir+'/'+@file_nr.to_s+'_roo_sharedStrings.xml','wb') {|f|
484
+ f << zip.read(entry)
485
+ }
486
+ end
487
+ if entry.to_s.end_with?('styles.xml')
488
+ open(@tmpdir+'/'+@file_nr.to_s+'_roo_styles.xml','wb') {|f|
489
+ f << zip.read(entry)
490
+ }
491
+ end
492
+ if entry.to_s =~ /sheet([0-9]+).xml$/
493
+ nr = $1
494
+ open(@tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}",'wb') {|f|
495
+ f << zip.read(entry)
496
+ }
497
+ @sheet_files[nr.to_i-1] = @tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}"
498
+ end
499
+ }
500
+ }
501
+ return
502
+ end
503
+
504
+ # extract files from the zip file
505
+ def extract_content(zipfilename)
506
+ Zip::ZipFile.open(@filename) do |zip|
507
+ process_zipfile(zipfilename,zip)
508
+ end
509
+ end
510
+
511
+ # sets the value of a cell
512
+ def set_value(row,col,value,sheet=nil)
513
+ sheet = @default_value unless sheet
514
+ @cell[sheet][[row,col]] = value
515
+ end
516
+
517
+ # sets the type of a cell
518
+ def set_type(row,col,type,sheet=nil)
519
+ sheet = @default_value unless sheet
520
+ @cell_type[sheet][[row,col]] = type
521
+ end
522
+
523
+ # read the shared strings xml document
524
+ def read_shared_strings(doc)
525
+ doc.find("//*[local-name()='si']").each do |si|
526
+ shared_table_entry = ''
527
+ si.each_element do |elem|
528
+ if (elem.name == 'r')
529
+ elem.each_element do |r_elem|
530
+ if (r_elem.name == 't')
531
+ shared_table_entry << r_elem.content
532
+ end
533
+ end
534
+ end
535
+ if (elem.name == 't')
536
+ shared_table_entry = elem.content
537
+ end
538
+ end
539
+ @shared_table << shared_table_entry
540
+ end
541
+ end
542
+
543
+ # read the styles elements of an excelx document
544
+ def read_styles(doc)
545
+ @numFmts = []
546
+ @cellXfs = []
547
+ fonts = []
548
+
549
+ doc.find("//*[local-name()='numFmt']").each do |numFmt|
550
+ numFmtId = numFmt.attributes.to_h['numFmtId']
551
+ formatCode = numFmt.attributes.to_h['formatCode']
552
+ @numFmts << [numFmtId, formatCode]
553
+ end
554
+ doc.find("//*[local-name()='fonts']").each do |fonts_el|
555
+ fonts_el.each_element do |font_el|
556
+ if font_el.name == 'font'
557
+ font = Excelx::Font.new
558
+ font_el.each_element do |font_sub_el|
559
+ case font_sub_el.name
560
+ when 'b'
561
+ font.bold = true
562
+ when 'i'
563
+ font.italic = true
564
+ when 'u'
565
+ font.underline = true
566
+ end
567
+ end
568
+ fonts << font
569
+ end
570
+ end
571
+ end
572
+
573
+ doc.find("//*[local-name()='cellXfs']").each do |xfs|
574
+ xfs.each do |xf|
575
+ numFmtId = xf.attributes.to_h['numFmtId']
576
+ @cellXfs << [numFmtId]
577
+ fontId = xf.attributes.to_h['fontId'].to_i
578
+ @style_definitions << fonts[fontId]
579
+ end
580
+ end
581
+ end
582
+
583
+ # convert internal excelx attribute to a format
584
+ def attribute2format(s)
585
+ result = nil
586
+ @numFmts.each {|nf|
587
+ if nf.first == @cellXfs[s.to_i].first
588
+ result = nf[1]
589
+ break
590
+ end
591
+ }
592
+ unless result
593
+ id = @cellXfs[s.to_i].first.to_i
594
+ if STANDARD_FORMATS.has_key? id
595
+ result = STANDARD_FORMATS[id]
596
+ end
597
+ end
598
+ result
599
+ end
600
+
601
+ end # class