roo-immersion 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (97) hide show
  1. data/History.txt +225 -0
  2. data/README.markdown +60 -0
  3. data/examples/roo_soap_client.rb +53 -0
  4. data/examples/roo_soap_server.rb +29 -0
  5. data/examples/write_me.rb +33 -0
  6. data/lib/roo.rb +32 -0
  7. data/lib/roo/excel.rb +468 -0
  8. data/lib/roo/excel2003xml.rb +394 -0
  9. data/lib/roo/excelx.rb +601 -0
  10. data/lib/roo/generic_spreadsheet.rb +628 -0
  11. data/lib/roo/google.rb +379 -0
  12. data/lib/roo/openoffice.rb +451 -0
  13. data/lib/roo/roo_rails_helper.rb +82 -0
  14. data/lib/roo/version.rb +9 -0
  15. data/test/1900_base.xls +0 -0
  16. data/test/1904_base.xls +0 -0
  17. data/test/Bibelbund.csv +3741 -0
  18. data/test/Bibelbund.ods +0 -0
  19. data/test/Bibelbund.xls +0 -0
  20. data/test/Bibelbund.xlsx +0 -0
  21. data/test/Bibelbund.xml +62518 -0
  22. data/test/Bibelbund1.ods +0 -0
  23. data/test/bad_excel_date.xls +0 -0
  24. data/test/bbu.ods +0 -0
  25. data/test/bbu.xls +0 -0
  26. data/test/bbu.xlsx +0 -0
  27. data/test/bbu.xml +152 -0
  28. data/test/bode-v1.ods.zip +0 -0
  29. data/test/bode-v1.xls.zip +0 -0
  30. data/test/boolean.ods +0 -0
  31. data/test/boolean.xls +0 -0
  32. data/test/boolean.xlsx +0 -0
  33. data/test/boolean.xml +112 -0
  34. data/test/borders.ods +0 -0
  35. data/test/borders.xls +0 -0
  36. data/test/borders.xlsx +0 -0
  37. data/test/borders.xml +144 -0
  38. data/test/bug-row-column-fixnum-float.xls +0 -0
  39. data/test/bug-row-column-fixnum-float.xml +127 -0
  40. data/test/datetime.ods +0 -0
  41. data/test/datetime.xls +0 -0
  42. data/test/datetime.xlsx +0 -0
  43. data/test/datetime.xml +142 -0
  44. data/test/datetime_floatconv.xls +0 -0
  45. data/test/datetime_floatconv.xml +148 -0
  46. data/test/emptysheets.ods +0 -0
  47. data/test/emptysheets.xls +0 -0
  48. data/test/emptysheets.xml +105 -0
  49. data/test/excel2003.xml +21140 -0
  50. data/test/false_encoding.xls +0 -0
  51. data/test/false_encoding.xml +132 -0
  52. data/test/formula.ods +0 -0
  53. data/test/formula.xls +0 -0
  54. data/test/formula.xlsx +0 -0
  55. data/test/formula.xml +134 -0
  56. data/test/formula_parse_error.xls +0 -0
  57. data/test/formula_parse_error.xml +1833 -0
  58. data/test/html-escape.ods +0 -0
  59. data/test/no_spreadsheet_file.txt +1 -0
  60. data/test/numbers1.csv +18 -0
  61. data/test/numbers1.ods +0 -0
  62. data/test/numbers1.xls +0 -0
  63. data/test/numbers1.xlsx +0 -0
  64. data/test/numbers1.xml +312 -0
  65. data/test/only_one_sheet.ods +0 -0
  66. data/test/only_one_sheet.xls +0 -0
  67. data/test/only_one_sheet.xlsx +0 -0
  68. data/test/only_one_sheet.xml +67 -0
  69. data/test/paragraph.ods +0 -0
  70. data/test/paragraph.xls +0 -0
  71. data/test/paragraph.xlsx +0 -0
  72. data/test/paragraph.xml +127 -0
  73. data/test/ric.ods +0 -0
  74. data/test/simple_spreadsheet.ods +0 -0
  75. data/test/simple_spreadsheet.xls +0 -0
  76. data/test/simple_spreadsheet.xlsx +0 -0
  77. data/test/simple_spreadsheet.xml +225 -0
  78. data/test/simple_spreadsheet_from_italo.ods +0 -0
  79. data/test/simple_spreadsheet_from_italo.xls +0 -0
  80. data/test/simple_spreadsheet_from_italo.xml +242 -0
  81. data/test/skipped_tests.rb +789 -0
  82. data/test/style.ods +0 -0
  83. data/test/style.xls +0 -0
  84. data/test/style.xlsx +0 -0
  85. data/test/style.xml +154 -0
  86. data/test/test_helper.rb +19 -0
  87. data/test/test_roo.rb +1834 -0
  88. data/test/time-test.csv +2 -0
  89. data/test/time-test.ods +0 -0
  90. data/test/time-test.xls +0 -0
  91. data/test/time-test.xlsx +0 -0
  92. data/test/time-test.xml +131 -0
  93. data/test/whitespace.ods +0 -0
  94. data/test/whitespace.xls +0 -0
  95. data/test/whitespace.xlsx +0 -0
  96. data/test/whitespace.xml +184 -0
  97. metadata +231 -0
@@ -0,0 +1,394 @@
1
+ require 'xml'
2
+ require 'fileutils'
3
+ require 'zip/zipfilesystem'
4
+ require 'date'
5
+ require 'base64'
6
+ require 'cgi'
7
+
8
+ class Excel2003XML < GenericSpreadsheet
9
+
10
+ @@nr = 0
11
+
12
+ # initialization and opening of a spreadsheet file
13
+ # values for packed: :zip
14
+ def initialize(filename, packed=nil, file_warning=:error)
15
+ @file_warning = file_warning
16
+ super()
17
+ @tmpdir = "oo_"+$$.to_s
18
+ @tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
19
+ unless File.exists?(@tmpdir)
20
+ FileUtils::mkdir(@tmpdir)
21
+ end
22
+ filename = open_from_uri(filename) if filename[0,7] == "http://"
23
+ filename = unzip(filename) if packed and packed == :zip
24
+ begin
25
+ file_type_check(filename,'.xml','an Excel 2003 XML')
26
+ @cells_read = Hash.new
27
+ @filename = filename
28
+ unless File.file?(@filename)
29
+ raise IOError, "file #{@filename} does not exist"
30
+ end
31
+ @doc = XML::Parser.file(@filename).parse
32
+ ensure
33
+ FileUtils::rm_r(@tmpdir)
34
+ end
35
+ @default_sheet = self.sheets.first
36
+ @cell = Hash.new
37
+ @cell_type = Hash.new
38
+ @formula = Hash.new
39
+ @first_row = Hash.new
40
+ @last_row = Hash.new
41
+ @first_column = Hash.new
42
+ @last_column = Hash.new
43
+ @header_line = 1
44
+ @style = Hash.new
45
+ @style_defaults = Hash.new { |h,k| h[k] = [] }
46
+ @style_definitions = Hash.new
47
+ read_styles
48
+ end
49
+
50
+ # Returns the content of a spreadsheet-cell.
51
+ # (1,1) is the upper left corner.
52
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
53
+ # cell at the first line and first row.
54
+ def cell(row, col, sheet=nil)
55
+ sheet = @default_sheet unless sheet
56
+ read_cells(sheet) unless @cells_read[sheet]
57
+ row,col = normalize(row,col)
58
+ if celltype(row,col,sheet) == :date
59
+ yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
60
+ return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
61
+ end
62
+ @cell[sheet][[row,col]]
63
+ end
64
+
65
+ # Returns the formula at (row,col).
66
+ # Returns nil if there is no formula.
67
+ # The method #formula? checks if there is a formula.
68
+ def formula(row,col,sheet=nil)
69
+ sheet = @default_sheet unless sheet
70
+ read_cells(sheet) unless @cells_read[sheet]
71
+ row,col = normalize(row,col)
72
+ if @formula[sheet][[row,col]] == nil
73
+ return nil
74
+ else
75
+ return @formula[sheet][[row,col]]["oooc:".length..-1]
76
+ end
77
+ end
78
+
79
+ # true, if there is a formula
80
+ def formula?(row,col,sheet=nil)
81
+ sheet = @default_sheet unless sheet
82
+ read_cells(sheet) unless @cells_read[sheet]
83
+ row,col = normalize(row,col)
84
+ formula(row,col) != nil
85
+ end
86
+
87
+ class Font
88
+ attr_accessor :bold, :italic, :underline
89
+
90
+ def bold?
91
+ @bold == '1'
92
+ end
93
+
94
+ def italic?
95
+ @italic == '1'
96
+ end
97
+
98
+ def underline?
99
+ @underline != nil
100
+ end
101
+ end
102
+
103
+ # Given a cell, return the cell's style
104
+ def font(row, col, sheet=nil)
105
+ sheet = @default_sheet unless sheet
106
+ read_cells(sheet) unless @cells_read[sheet]
107
+ row,col = normalize(row,col)
108
+ style_name = @style[sheet][[row,col]] || @style_defaults[sheet][col - 1] || 'Default'
109
+ @style_definitions[style_name]
110
+ end
111
+
112
+ # set a cell to a certain value
113
+ # (this will not be saved back to the spreadsheet file!)
114
+ def set(row,col,value,sheet=nil) #:nodoc:
115
+ sheet = @default_sheet unless sheet
116
+ read_cells(sheet) unless @cells_read[sheet]
117
+ row,col = normalize(row,col)
118
+ set_value(row,col,value,sheet)
119
+ if value.class == Fixnum
120
+ set_type(row,col,:float,sheet)
121
+ elsif value.class == String
122
+ set_type(row,col,:string,sheet)
123
+ elsif value.class == Float
124
+ set_type(row,col,:string,sheet)
125
+ else
126
+ raise ArgumentError, "Type for "+value.to_s+" not set"
127
+ end
128
+ end
129
+
130
+ # returns the type of a cell:
131
+ # * :float
132
+ # * :string
133
+ # * :date
134
+ # * :percentage
135
+ # * :formula
136
+ # * :time
137
+ # * :datetime
138
+ def celltype(row,col,sheet=nil)
139
+ sheet = @default_sheet unless sheet
140
+ read_cells(sheet) unless @cells_read[sheet]
141
+ row,col = normalize(row,col)
142
+ if @formula[sheet][[row,col]]
143
+ return :formula
144
+ else
145
+ @cell_type[sheet][[row,col]]
146
+ end
147
+ end
148
+
149
+ def sheets
150
+ return_sheets = []
151
+ @doc.find("//ss:Worksheet").each do |sheet|
152
+ return_sheets << sheet.attributes['Name']
153
+ end
154
+ return_sheets
155
+ end
156
+
157
+ # version of the openoffice document
158
+ # at 2007 this is always "1.0"
159
+ def officeversion
160
+ oo_version
161
+ @officeversion
162
+ end
163
+
164
+ # shows the internal representation of all cells
165
+ # mainly for debugging purposes
166
+ def to_s(sheet=nil)
167
+ sheet = @default_sheet unless sheet
168
+ read_cells(sheet) unless @cells_read[sheet]
169
+ @cell[sheet].inspect
170
+ end
171
+
172
+ # save spreadsheet
173
+ def save #:nodoc:
174
+ 42
175
+ end
176
+
177
+ # returns each formula in the selected sheet as an array of elements
178
+ # [row, col, formula]
179
+ def formulas(sheet=nil)
180
+ theformulas = Array.new
181
+ sheet = @default_sheet unless sheet
182
+ read_cells(sheet) unless @cells_read[sheet]
183
+ first_row(sheet).upto(last_row(sheet)) {|row|
184
+ first_column(sheet).upto(last_column(sheet)) {|col|
185
+ if formula?(row,col,sheet)
186
+ f = [row, col, formula(row,col,sheet)]
187
+ theformulas << f
188
+ end
189
+ }
190
+ }
191
+ theformulas
192
+ end
193
+
194
+ private
195
+
196
+ # read the version of the OO-Version
197
+ def oo_version
198
+ @doc.find("//*[local-name()='document-content']").each do |office|
199
+ @officeversion = office.attributes['version']
200
+ end
201
+ end
202
+
203
+ # helper function to set the internal representation of cells
204
+ def set_cell_values(sheet,x,y,i,v,vt,formula,table_cell,str_v,style_name)
205
+ key = [y,x+i]
206
+ @cell_type[sheet] = {} unless @cell_type[sheet]
207
+ @cell_type[sheet][key] = vt
208
+ @formula[sheet] = {} unless @formula[sheet]
209
+ @formula[sheet][key] = formula if formula
210
+ @cell[sheet] = {} unless @cell[sheet]
211
+ @style[sheet] = {} unless @style[sheet]
212
+ @style[sheet][key] = style_name
213
+ case @cell_type[sheet][key]
214
+ when :float
215
+ @cell[sheet][key] = (v.to_s.include?('.') ? v.to_f : v.to_i)
216
+ when :string
217
+ @cell[sheet][key] = str_v
218
+ when :datetime
219
+ @cell[sheet][key] = DateTime.parse(v)
220
+ @cell_type[sheet][key] = :datetime
221
+ when :percentage
222
+ @cell[sheet][key] = v.to_f
223
+ # when :time
224
+ # hms = v.split(':')
225
+ # @cell[sheet][key] = hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i
226
+ else
227
+ @cell[sheet][key] = v
228
+ end
229
+ end
230
+
231
+ # read all cells in the selected sheet
232
+ #--
233
+ # the following construct means '4 blanks'
234
+ # some content <text:s text:c="3"/>
235
+ #++
236
+ def read_cells(sheet=nil)
237
+ sheet = @default_sheet unless sheet
238
+ sheet_found = false
239
+ raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
240
+ raise RangeError unless self.sheets.include? sheet
241
+ @doc.find("ss:Worksheet[@ss:Name='#{sheet}']").each do |ws|
242
+ sheet_found = true
243
+ row = 1
244
+ col = 1
245
+ column_attributes = {}
246
+ idx = 0
247
+ ws.find('.//ss:Column').each do |c|
248
+ column_attributes[(idx += 1).to_s] = c.attributes['StyleID']
249
+ end
250
+ ws.find('.//ss:Row').each do |r|
251
+ skip_to_row = r.attributes['Index'].to_i
252
+ row = skip_to_row if skip_to_row > 0
253
+ style_name = r.attributes['StyleID'] if r.attributes['StyleID']
254
+ r.each do |c|
255
+ next unless c.name == 'Cell'
256
+ skip_to_col = c.attributes['Index'].to_i
257
+ col = skip_to_col if skip_to_col > 0
258
+ if c.attributes['StyleID']
259
+ style_name = c.attributes['StyleID']
260
+ elsif
261
+ style_name ||= column_attributes[c.attributes['Index']]
262
+ end
263
+ c.each_element do |cell|
264
+ formula = nil
265
+ if cell.name == 'Data'
266
+ formula = cell.attributes['Formula']
267
+ vt = cell.attributes['Type'].downcase.to_sym
268
+ v = cell.content
269
+ str_v = v
270
+ case vt
271
+ when :number
272
+ v = v.to_f
273
+ vt = :float
274
+ when :datetime
275
+ if v =~ /^1899-12-31T(\d{2}:\d{2}:\d{2})/
276
+ v = $1
277
+ vt = :time
278
+ elsif v =~ /([^T]+)T00:00:00.000/
279
+ v = $1
280
+ vt = :date
281
+ end
282
+ when :boolean
283
+ v = cell.attributes['boolean-value']
284
+ end
285
+ end
286
+ set_cell_values(sheet,col,row,0,v,vt,formula,cell,str_v,style_name)
287
+ end
288
+ col += 1
289
+ end
290
+ row += 1
291
+ col = 1
292
+ end
293
+ end
294
+ if !sheet_found
295
+ raise RangeError
296
+ end
297
+ @cells_read[sheet] = true
298
+ end
299
+
300
+ def read_styles
301
+ @doc.find("ss:Styles").each do |styles|
302
+ styles.find('.//ss:Style').each do |style|
303
+ style_id = style.attributes['ID']
304
+ @style_definitions[style_id] = Excel2003XML::Font.new
305
+ font = style.find_first('.//ss:Font')
306
+ if font
307
+ @style_definitions[style_id].bold = font.attributes['Bold']
308
+ @style_definitions[style_id].italic = font.attributes['Italic']
309
+ @style_definitions[style_id].underline = font.attributes['Underline']
310
+ end
311
+ end
312
+ end
313
+ end
314
+
315
+ # Checks if the default_sheet exists. If not an RangeError exception is
316
+ # raised
317
+ def check_default_sheet
318
+ sheet_found = false
319
+ raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
320
+ sheet_found = true if sheets.include?(@default_sheet)
321
+ if ! sheet_found
322
+ raise RangeError, "sheet '#{@default_sheet}' not found"
323
+ end
324
+ end
325
+
326
+ def process_zipfile(zip, path='')
327
+ if zip.file.file? path
328
+ if path == "content.xml"
329
+ open(File.join(@tmpdir, @file_nr.to_s+'_roo_content.xml'),'wb') {|f|
330
+ f << zip.read(path)
331
+ }
332
+ end
333
+ else
334
+ unless path.empty?
335
+ path += '/'
336
+ end
337
+ zip.dir.foreach(path) do |filename|
338
+ process_zipfile(zip, path+filename)
339
+ end
340
+ end
341
+ end
342
+
343
+ def extract_content
344
+ Zip::ZipFile.open(@filename) do |zip|
345
+ process_zipfile(zip)
346
+ end
347
+ end
348
+
349
+ def set_value(row,col,value,sheet=nil)
350
+ sheet = @default_value unless sheet
351
+ @cell[sheet][[row,col]] = value
352
+ end
353
+
354
+ def set_type(row,col,type,sheet=nil)
355
+ sheet = @default_value unless sheet
356
+ @cell_type[sheet][[row,col]] = type
357
+ end
358
+
359
+ A_ROO_TYPE = {
360
+ "float" => :float,
361
+ "string" => :string,
362
+ "date" => :date,
363
+ "percentage" => :percentage,
364
+ "time" => :time,
365
+ }
366
+
367
+ def Openoffice.oo_type_2_roo_type(ootype)
368
+ return A_ROO_TYPE[ootype]
369
+ end
370
+
371
+ # helper method to convert compressed spaces and other elements within
372
+ # an text into a string
373
+ def children_to_string(children)
374
+ result = ''
375
+ children.each {|child|
376
+ if child.text?
377
+ result = result + child.content
378
+ else
379
+ if child.name == 's'
380
+ compressed_spaces = child.attributes['c'].to_i
381
+ # no explicit number means a count of 1:
382
+ if compressed_spaces == 0
383
+ compressed_spaces = 1
384
+ end
385
+ result = result + " "*compressed_spaces
386
+ else
387
+ result = result + child.content
388
+ end
389
+ end
390
+ }
391
+ result
392
+ end
393
+
394
+ end # class
@@ -0,0 +1,601 @@
1
+ require 'xml'
2
+ require 'fileutils'
3
+ require 'zip/zipfilesystem'
4
+ require 'date'
5
+
6
+ class String
7
+ def end_with?(str)
8
+ self[-str.length,str.length] == str
9
+ end
10
+ end
11
+
12
+ class Excelx < GenericSpreadsheet
13
+ FORMATS = {
14
+ 'General' => :float,
15
+ '0' => :float,
16
+ '0.00' => :float,
17
+ '#,##0' => :float,
18
+ '#,##0.00' => :float,
19
+ '0%' => :percentage,
20
+ '0.00%' => :percentage,
21
+ '0.00E+00' => :float,
22
+ '# ?/?' => :float, #??? TODO:
23
+ '# ??/??' => :float, #??? TODO:
24
+ 'mm-dd-yy' => :date,
25
+ 'd-mmm-yy' => :date,
26
+ 'd-mmm' => :date,
27
+ 'mmm-yy' => :date,
28
+ 'h:mm AM/PM' => :date,
29
+ 'h:mm:ss AM/PM' => :date,
30
+ 'h:mm' => :time,
31
+ 'h:mm:ss' => :time,
32
+ 'm/d/yy h:mm' => :date,
33
+ '#,##0 ;(#,##0)' => :float,
34
+ '#,##0 ;[Red](#,##0)' => :float,
35
+ '#,##0.00;(#,##0.00)' => :float,
36
+ '#,##0.00;[Red](#,##0.00)' => :float,
37
+ 'mm:ss' => :time,
38
+ '[h]:mm:ss' => :time,
39
+ 'mmss.0' => :time,
40
+ '##0.0E+0' => :float,
41
+ '@' => :float,
42
+ #-- zusaetzliche Formate, die nicht standardmaessig definiert sind:
43
+ "yyyy\\-mm\\-dd" => :date,
44
+ 'dd/mm/yy' => :date,
45
+ 'hh:mm:ss' => :time,
46
+ "dd/mm/yy\\ hh:mm" => :datetime,
47
+ }
48
+ STANDARD_FORMATS = {
49
+ 0 => 'General',
50
+ 1 => '0',
51
+ 2 => '0.00',
52
+ 3 => '#,##0',
53
+ 4 => '#,##0.00',
54
+ 9 => '0%',
55
+ 10 => '0.00%',
56
+ 11 => '0.00E+00',
57
+ 12 => '# ?/?',
58
+ 13 => '# ??/??',
59
+ 14 => 'mm-dd-yy',
60
+ 15 => 'd-mmm-yy',
61
+ 16 => 'd-mmm',
62
+ 17 => 'mmm-yy',
63
+ 18 => 'h:mm AM/PM',
64
+ 19 => 'h:mm:ss AM/PM',
65
+ 20 => 'h:mm',
66
+ 21 => 'h:mm:ss',
67
+ 22 => 'm/d/yy h:mm',
68
+ 37 => '#,##0 ;(#,##0)',
69
+ 38 => '#,##0 ;[Red](#,##0)',
70
+ 39 => '#,##0.00;(#,##0.00)',
71
+ 40 => '#,##0.00;[Red](#,##0.00)',
72
+ 45 => 'mm:ss',
73
+ 46 => '[h]:mm:ss',
74
+ 47 => 'mmss.0',
75
+ 48 => '##0.0E+0',
76
+ 49 => '@',
77
+ }
78
+ @@nr = 0
79
+
80
+ # initialization and opening of a spreadsheet file
81
+ # values for packed: :zip
82
+ def initialize(filename, packed=nil, file_warning = :error) #, create = false)
83
+ super()
84
+ @file_warning = file_warning
85
+ @tmpdir = "oo_"+$$.to_s
86
+ @tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
87
+ unless File.exists?(@tmpdir)
88
+ FileUtils::mkdir(@tmpdir)
89
+ end
90
+ filename = open_from_uri(filename) if filename[0,7] == "http://"
91
+ filename = unzip(filename) if packed and packed == :zip
92
+ begin
93
+ file_type_check(filename,'.xlsx','an Excel-xlsx')
94
+ @cells_read = Hash.new
95
+ @filename = filename
96
+ unless File.file?(@filename)
97
+ raise IOError, "file #{@filename} does not exist"
98
+ end
99
+ @@nr += 1
100
+ @file_nr = @@nr
101
+ extract_content(@filename)
102
+ file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
103
+ @workbook_doc = XML::Parser.io(file).parse
104
+ file.close
105
+ @shared_table = []
106
+ if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
107
+ file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
108
+ @sharedstring_doc = XML::Parser.io(file).parse
109
+ file.close
110
+ read_shared_strings(@sharedstring_doc)
111
+ end
112
+ @styles_table = []
113
+ @style_definitions = Array.new { |h,k| h[k] = {} }
114
+ if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
115
+ file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
116
+ @styles_doc = XML::Parser.io(file).parse
117
+ file.close
118
+ read_styles(@styles_doc)
119
+ end
120
+ @sheet_doc = []
121
+ @sheet_files.each_with_index do |item, i|
122
+ file = File.new(item)
123
+ @sheet_doc[i] = XML::Parser.io(file).parse
124
+ file.close
125
+ end
126
+ ensure
127
+ #if ENV["roo_local"] != "thomas-p"
128
+ FileUtils::rm_r(@tmpdir)
129
+ #end
130
+ end
131
+ @default_sheet = self.sheets.first
132
+ @cell = Hash.new
133
+ @cell_type = Hash.new
134
+ @formula = Hash.new
135
+ @first_row = Hash.new
136
+ @last_row = Hash.new
137
+ @first_column = Hash.new
138
+ @last_column = Hash.new
139
+ @header_line = 1
140
+ @excelx_type = Hash.new
141
+ @excelx_value = Hash.new
142
+ @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt
143
+ end
144
+
145
+ # Returns the content of a spreadsheet-cell.
146
+ # (1,1) is the upper left corner.
147
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
148
+ # cell at the first line and first row.
149
+ def cell(row, col, sheet=nil)
150
+ sheet = @default_sheet unless sheet
151
+ read_cells(sheet) unless @cells_read[sheet]
152
+ row,col = normalize(row,col)
153
+ if celltype(row,col,sheet) == :date
154
+ yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
155
+ return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
156
+ elsif celltype(row,col,sheet) == :datetime
157
+ date_part,time_part = @cell[sheet][[row,col]].split(' ')
158
+ yyyy,mm,dd = date_part.split('-')
159
+ hh,mi,ss = time_part.split(':')
160
+ return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
161
+ end
162
+ @cell[sheet][[row,col]]
163
+ end
164
+
165
+ # Returns the formula at (row,col).
166
+ # Returns nil if there is no formula.
167
+ # The method #formula? checks if there is a formula.
168
+ def formula(row,col,sheet=nil)
169
+ sheet = @default_sheet unless sheet
170
+ read_cells(sheet) unless @cells_read[sheet]
171
+ row,col = normalize(row,col)
172
+ if @formula[sheet][[row,col]] == nil
173
+ return nil
174
+ else
175
+ return @formula[sheet][[row,col]]
176
+ end
177
+ end
178
+
179
+ # true, if there is a formula
180
+ def formula?(row,col,sheet=nil)
181
+ sheet = @default_sheet unless sheet
182
+ read_cells(sheet) unless @cells_read[sheet]
183
+ row,col = normalize(row,col)
184
+ formula(row,col) != nil
185
+ end
186
+
187
+ class Font
188
+ attr_accessor :bold, :italic, :underline
189
+
190
+ def bold?
191
+ @bold == true
192
+ end
193
+
194
+ def italic?
195
+ @italic == true
196
+ end
197
+
198
+ def underline?
199
+ @underline == true
200
+ end
201
+ end
202
+
203
+ # Given a cell, return the cell's style
204
+ def font(row, col, sheet=nil)
205
+ sheet = @default_sheet unless sheet
206
+ read_cells(sheet) unless @cells_read[sheet]
207
+ row,col = normalize(row,col)
208
+ s_attribute = @s_attribute[sheet][[row,col]]
209
+ s_attribute ||= 0
210
+ s_attribute = s_attribute.to_i
211
+ @style_definitions[s_attribute]
212
+ end
213
+
214
+ # set a cell to a certain value
215
+ # (this will not be saved back to the spreadsheet file!)
216
+ def set(row,col,value,sheet=nil) #:nodoc:
217
+ sheet = @default_sheet unless sheet
218
+ read_cells(sheet) unless @cells_read[sheet]
219
+ row,col = normalize(row,col)
220
+ set_value(row,col,value,sheet)
221
+ if value.class == Fixnum
222
+ set_type(row,col,:float,sheet)
223
+ elsif value.class == String
224
+ set_type(row,col,:string,sheet)
225
+ elsif value.class == Float
226
+ set_type(row,col,:string,sheet)
227
+ else
228
+ raise ArgumentError, "Type for "+value.to_s+" not set"
229
+ end
230
+ end
231
+
232
+ # returns the type of a cell:
233
+ # * :float
234
+ # * :string,
235
+ # * :date
236
+ # * :percentage
237
+ # * :formula
238
+ # * :time
239
+ # * :datetime
240
+ def celltype(row,col,sheet=nil)
241
+ sheet = @default_sheet unless sheet
242
+ read_cells(sheet) unless @cells_read[sheet]
243
+ row,col = normalize(row,col)
244
+ if @formula[sheet][[row,col]]
245
+ return :formula
246
+ else
247
+ @cell_type[sheet][[row,col]]
248
+ end
249
+ end
250
+
251
+ # returns the internal type of an excel cell
252
+ # * :numeric_or_formula
253
+ # * :string
254
+ # Note: this is only available within the Excelx class
255
+ def excelx_type(row,col,sheet=nil)
256
+ sheet = @default_sheet unless sheet
257
+ read_cells(sheet) unless @cells_read[sheet]
258
+ row,col = normalize(row,col)
259
+ return @excelx_type[sheet][[row,col]]
260
+ end
261
+
262
+ # returns the internal value of an excelx cell
263
+ # Note: this is only available within the Excelx class
264
+ def excelx_value(row,col,sheet=nil)
265
+ sheet = @default_sheet unless sheet
266
+ read_cells(sheet) unless @cells_read[sheet]
267
+ row,col = normalize(row,col)
268
+ return @excelx_value[sheet][[row,col]]
269
+ end
270
+
271
+ # returns the internal format of an excel cell
272
+ def excelx_format(row,col,sheet=nil)
273
+ sheet = @default_sheet unless sheet
274
+ read_cells(sheet) unless @cells_read[sheet]
275
+ row,col = normalize(row,col)
276
+ s = @s_attribute[sheet][[row,col]]
277
+ result = attribute2format(s)
278
+ result
279
+ end
280
+
281
+ # returns an array of sheet names in the spreadsheet
282
+ def sheets
283
+ return_sheets = []
284
+ @workbook_doc.find("//*[local-name()='sheet']").each do |sheet|
285
+ return_sheets << sheet.attributes.to_h['name']
286
+ end
287
+ return_sheets
288
+ end
289
+ # shows the internal representation of all cells
290
+ # for debugging purposes
291
+ def to_s(sheet=nil)
292
+ sheet = @default_sheet unless sheet
293
+ read_cells(sheet) unless @cells_read[sheet]
294
+ @cell[sheet].inspect
295
+ end
296
+
297
+ # returns each formula in the selected sheet as an array of elements
298
+ # [row, col, formula]
299
+ def formulas(sheet=nil)
300
+ theformulas = Array.new
301
+ sheet = @default_sheet unless sheet
302
+ read_cells(sheet) unless @cells_read[sheet]
303
+ first_row(sheet).upto(last_row(sheet)) {|row|
304
+ first_column(sheet).upto(last_column(sheet)) {|col|
305
+ if formula?(row,col,sheet)
306
+ f = [row, col, formula(row,col,sheet)]
307
+ theformulas << f
308
+ end
309
+ }
310
+ }
311
+ theformulas
312
+ end
313
+
314
+ private
315
+
316
+ # helper function to set the internal representation of cells
317
+ def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,
318
+ excelx_type=nil,
319
+ excelx_value=nil,
320
+ s_attribute=nil)
321
+ key = [y,x+i]
322
+ @cell_type[sheet] = {} unless @cell_type[sheet]
323
+ @cell_type[sheet][key] = vt
324
+ @formula[sheet] = {} unless @formula[sheet]
325
+ @formula[sheet][key] = formula if formula
326
+ @cell[sheet] = {} unless @cell[sheet]
327
+ case @cell_type[sheet][key]
328
+ when :float
329
+ @cell[sheet][key] = (v.to_s.include?('.') ? v.to_f : v.to_i)
330
+ when :string
331
+ @cell[sheet][key] = str_v
332
+ when :date
333
+ @cell[sheet][key] = (Date.new(1899,12,30)+v.to_i).strftime("%Y-%m-%d")
334
+ when :datetime
335
+ @cell[sheet][key] = (DateTime.new(1899,12,30)+v.to_f).strftime("%Y-%m-%d %H:%M:%S")
336
+ when :percentage
337
+ @cell[sheet][key] = v.to_f
338
+ when :time
339
+ @cell[sheet][key] = v.to_f*(24*60*60)
340
+ else
341
+ @cell[sheet][key] = v
342
+ end
343
+ @excelx_type[sheet] = {} unless @excelx_type[sheet]
344
+ @excelx_type[sheet][key] = excelx_type
345
+ @excelx_value[sheet] = {} unless @excelx_value[sheet]
346
+ @excelx_value[sheet][key] = excelx_value
347
+ @s_attribute[sheet] = {} unless @s_attribute[sheet]
348
+ @s_attribute[sheet][key] = s_attribute
349
+ end
350
+
351
+ # splits a coordinate like "AA12" into the parts "AA" (String) and 12 (Fixnum)
352
+ def split_coord(s)
353
+ letter = ""
354
+ number = 0
355
+ i = 0
356
+ while i<s.length and "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".include?(s[i,1])
357
+ letter += s[i,1]
358
+ i+=1
359
+ end
360
+ while i<s.length and "0123456789".include?(s[i,1])
361
+ number = number*10 + s[i,1].to_i
362
+ i+=1
363
+ end
364
+ if letter=="" or number==0
365
+ raise ArgumentError
366
+ end
367
+ return letter,number
368
+ end
369
+
370
+ def split_coordinate(str)
371
+ letter,number = split_coord(str)
372
+ x = GenericSpreadsheet.letter_to_number(letter)
373
+ y = number
374
+ return x,y
375
+ end
376
+
377
+ # read all cells in the selected sheet
378
+ def format2type(format)
379
+ if FORMATS.has_key? format
380
+ FORMATS[format]
381
+ else
382
+ :float
383
+ end
384
+ end
385
+
386
+ # read all cells in the selected sheet
387
+ def read_cells(sheet=nil)
388
+ sheet = @default_sheet unless sheet
389
+ sheet_found = false
390
+ raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
391
+ raise RangeError unless self.sheets.include? sheet
392
+ n = self.sheets.index(sheet)
393
+ @sheet_doc[n].find("//*[local-name()='c']").each do |c|
394
+ s_attribute = c.attributes.to_h['s'].to_i
395
+ if (c.attributes.to_h['t'] == 's')
396
+ tmp_type = :shared
397
+ elsif (c.attributes.to_h['t'] == 'b')
398
+ tmp_type = :boolean
399
+ else
400
+ format = attribute2format(s_attribute)
401
+ tmp_type = format2type(format)
402
+ end
403
+ formula = nil
404
+ c.each_element do |cell|
405
+ if cell.name == 'f'
406
+ formula = cell.content
407
+ end
408
+ if cell.name == 'v'
409
+ if tmp_type == :time or tmp_type == :datetime
410
+ if cell.content.to_f >= 1.0
411
+ if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
412
+ tmp_type = :datetime
413
+ else
414
+ tmp_type = :date
415
+ end
416
+ else
417
+ end
418
+ end
419
+ excelx_type = [:numeric_or_formula,format]
420
+ excelx_value = cell.content
421
+ if tmp_type == :shared
422
+ vt = :string
423
+ str_v = @shared_table[cell.content.to_i]
424
+ excelx_type = :string
425
+ elsif tmp_type == :boolean
426
+ vt = :boolean
427
+ cell.content.to_i == 1 ? v = 'TRUE' : v = 'FALSE'
428
+ elsif tmp_type == :date
429
+ vt = :date
430
+ v = cell.content
431
+ elsif tmp_type == :time
432
+ vt = :time
433
+ v = cell.content
434
+ elsif tmp_type == :datetime
435
+ vt = :datetime
436
+ v = cell.content
437
+ elsif tmp_type == :formula
438
+ vt = :formula
439
+ v = cell.content.to_f #TODO: !!!!
440
+ else
441
+ vt = :float
442
+ v = cell.content
443
+ end
444
+ #puts "vt: #{vt}" if cell.text.include? "22606.5120"
445
+ x,y = split_coordinate(c.attributes.to_h['r'])
446
+ tr=nil #TODO: ???s
447
+ set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
448
+ end
449
+ end
450
+ end
451
+ sheet_found = true #TODO:
452
+ if !sheet_found
453
+ raise RangeError
454
+ end
455
+ @cells_read[sheet] = true
456
+ end
457
+
458
+ # Checks if the default_sheet exists. If not an RangeError exception is
459
+ # raised
460
+ def check_default_sheet
461
+ sheet_found = false
462
+ raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
463
+
464
+ sheet_found = true if sheets.include?(@default_sheet)
465
+
466
+ if ! sheet_found
467
+ raise RangeError, "sheet '#{@default_sheet}' not found"
468
+ end
469
+ end
470
+
471
+ # extracts all needed files from the zip file
472
+ def process_zipfile(zipfilename, zip, path='')
473
+ @sheet_files = []
474
+ Zip::ZipFile.open(zipfilename) {|zf|
475
+ zf.entries.each {|entry|
476
+ #entry.extract
477
+ if entry.to_s.end_with?('workbook.xml')
478
+ open(@tmpdir+'/'+@file_nr.to_s+'_roo_workbook.xml','wb') {|f|
479
+ f << zip.read(entry)
480
+ }
481
+ end
482
+ if entry.to_s.end_with?('sharedStrings.xml')
483
+ open(@tmpdir+'/'+@file_nr.to_s+'_roo_sharedStrings.xml','wb') {|f|
484
+ f << zip.read(entry)
485
+ }
486
+ end
487
+ if entry.to_s.end_with?('styles.xml')
488
+ open(@tmpdir+'/'+@file_nr.to_s+'_roo_styles.xml','wb') {|f|
489
+ f << zip.read(entry)
490
+ }
491
+ end
492
+ if entry.to_s =~ /sheet([0-9]+).xml$/
493
+ nr = $1
494
+ open(@tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}",'wb') {|f|
495
+ f << zip.read(entry)
496
+ }
497
+ @sheet_files[nr.to_i-1] = @tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}"
498
+ end
499
+ }
500
+ }
501
+ return
502
+ end
503
+
504
+ # extract files from the zip file
505
+ def extract_content(zipfilename)
506
+ Zip::ZipFile.open(@filename) do |zip|
507
+ process_zipfile(zipfilename,zip)
508
+ end
509
+ end
510
+
511
+ # sets the value of a cell
512
+ def set_value(row,col,value,sheet=nil)
513
+ sheet = @default_value unless sheet
514
+ @cell[sheet][[row,col]] = value
515
+ end
516
+
517
+ # sets the type of a cell
518
+ def set_type(row,col,type,sheet=nil)
519
+ sheet = @default_value unless sheet
520
+ @cell_type[sheet][[row,col]] = type
521
+ end
522
+
523
+ # read the shared strings xml document
524
+ def read_shared_strings(doc)
525
+ doc.find("//*[local-name()='si']").each do |si|
526
+ shared_table_entry = ''
527
+ si.each_element do |elem|
528
+ if (elem.name == 'r')
529
+ elem.each_element do |r_elem|
530
+ if (r_elem.name == 't')
531
+ shared_table_entry << r_elem.content
532
+ end
533
+ end
534
+ end
535
+ if (elem.name == 't')
536
+ shared_table_entry = elem.content
537
+ end
538
+ end
539
+ @shared_table << shared_table_entry
540
+ end
541
+ end
542
+
543
+ # read the styles elements of an excelx document
544
+ def read_styles(doc)
545
+ @numFmts = []
546
+ @cellXfs = []
547
+ fonts = []
548
+
549
+ doc.find("//*[local-name()='numFmt']").each do |numFmt|
550
+ numFmtId = numFmt.attributes.to_h['numFmtId']
551
+ formatCode = numFmt.attributes.to_h['formatCode']
552
+ @numFmts << [numFmtId, formatCode]
553
+ end
554
+ doc.find("//*[local-name()='fonts']").each do |fonts_el|
555
+ fonts_el.each_element do |font_el|
556
+ if font_el.name == 'font'
557
+ font = Excelx::Font.new
558
+ font_el.each_element do |font_sub_el|
559
+ case font_sub_el.name
560
+ when 'b'
561
+ font.bold = true
562
+ when 'i'
563
+ font.italic = true
564
+ when 'u'
565
+ font.underline = true
566
+ end
567
+ end
568
+ fonts << font
569
+ end
570
+ end
571
+ end
572
+
573
+ doc.find("//*[local-name()='cellXfs']").each do |xfs|
574
+ xfs.each do |xf|
575
+ numFmtId = xf.attributes.to_h['numFmtId']
576
+ @cellXfs << [numFmtId]
577
+ fontId = xf.attributes.to_h['fontId'].to_i
578
+ @style_definitions << fonts[fontId]
579
+ end
580
+ end
581
+ end
582
+
583
+ # convert internal excelx attribute to a format
584
+ def attribute2format(s)
585
+ result = nil
586
+ @numFmts.each {|nf|
587
+ if nf.first == @cellXfs[s.to_i].first
588
+ result = nf[1]
589
+ break
590
+ end
591
+ }
592
+ unless result
593
+ id = @cellXfs[s.to_i].first.to_i
594
+ if STANDARD_FORMATS.has_key? id
595
+ result = STANDARD_FORMATS[id]
596
+ end
597
+ end
598
+ result
599
+ end
600
+
601
+ end # class