aunderwo-roo 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (97) hide show
  1. data/History.txt +225 -0
  2. data/README.markdown +55 -0
  3. data/examples/roo_soap_client.rb +53 -0
  4. data/examples/roo_soap_server.rb +29 -0
  5. data/examples/write_me.rb +33 -0
  6. data/lib/roo.rb +32 -0
  7. data/lib/roo/excel.rb +468 -0
  8. data/lib/roo/excel2003xml.rb +411 -0
  9. data/lib/roo/excelx.rb +602 -0
  10. data/lib/roo/generic_spreadsheet.rb +628 -0
  11. data/lib/roo/google.rb +379 -0
  12. data/lib/roo/openoffice.rb +451 -0
  13. data/lib/roo/roo_rails_helper.rb +82 -0
  14. data/lib/roo/version.rb +9 -0
  15. data/test/1900_base.xls +0 -0
  16. data/test/1904_base.xls +0 -0
  17. data/test/Bibelbund.csv +3741 -0
  18. data/test/Bibelbund.ods +0 -0
  19. data/test/Bibelbund.xls +0 -0
  20. data/test/Bibelbund.xlsx +0 -0
  21. data/test/Bibelbund.xml +62518 -0
  22. data/test/Bibelbund1.ods +0 -0
  23. data/test/bad_excel_date.xls +0 -0
  24. data/test/bbu.ods +0 -0
  25. data/test/bbu.xls +0 -0
  26. data/test/bbu.xlsx +0 -0
  27. data/test/bbu.xml +152 -0
  28. data/test/bode-v1.ods.zip +0 -0
  29. data/test/bode-v1.xls.zip +0 -0
  30. data/test/boolean.ods +0 -0
  31. data/test/boolean.xls +0 -0
  32. data/test/boolean.xlsx +0 -0
  33. data/test/boolean.xml +112 -0
  34. data/test/borders.ods +0 -0
  35. data/test/borders.xls +0 -0
  36. data/test/borders.xlsx +0 -0
  37. data/test/borders.xml +144 -0
  38. data/test/bug-row-column-fixnum-float.xls +0 -0
  39. data/test/bug-row-column-fixnum-float.xml +127 -0
  40. data/test/datetime.ods +0 -0
  41. data/test/datetime.xls +0 -0
  42. data/test/datetime.xlsx +0 -0
  43. data/test/datetime.xml +142 -0
  44. data/test/datetime_floatconv.xls +0 -0
  45. data/test/datetime_floatconv.xml +148 -0
  46. data/test/emptysheets.ods +0 -0
  47. data/test/emptysheets.xls +0 -0
  48. data/test/emptysheets.xml +105 -0
  49. data/test/excel2003.xml +21140 -0
  50. data/test/false_encoding.xls +0 -0
  51. data/test/false_encoding.xml +132 -0
  52. data/test/formula.ods +0 -0
  53. data/test/formula.xls +0 -0
  54. data/test/formula.xlsx +0 -0
  55. data/test/formula.xml +134 -0
  56. data/test/formula_parse_error.xls +0 -0
  57. data/test/formula_parse_error.xml +1833 -0
  58. data/test/html-escape.ods +0 -0
  59. data/test/no_spreadsheet_file.txt +1 -0
  60. data/test/numbers1.csv +18 -0
  61. data/test/numbers1.ods +0 -0
  62. data/test/numbers1.xls +0 -0
  63. data/test/numbers1.xlsx +0 -0
  64. data/test/numbers1.xml +312 -0
  65. data/test/only_one_sheet.ods +0 -0
  66. data/test/only_one_sheet.xls +0 -0
  67. data/test/only_one_sheet.xlsx +0 -0
  68. data/test/only_one_sheet.xml +67 -0
  69. data/test/paragraph.ods +0 -0
  70. data/test/paragraph.xls +0 -0
  71. data/test/paragraph.xlsx +0 -0
  72. data/test/paragraph.xml +127 -0
  73. data/test/ric.ods +0 -0
  74. data/test/simple_spreadsheet.ods +0 -0
  75. data/test/simple_spreadsheet.xls +0 -0
  76. data/test/simple_spreadsheet.xlsx +0 -0
  77. data/test/simple_spreadsheet.xml +225 -0
  78. data/test/simple_spreadsheet_from_italo.ods +0 -0
  79. data/test/simple_spreadsheet_from_italo.xls +0 -0
  80. data/test/simple_spreadsheet_from_italo.xml +242 -0
  81. data/test/skipped_tests.rb +789 -0
  82. data/test/style.ods +0 -0
  83. data/test/style.xls +0 -0
  84. data/test/style.xlsx +0 -0
  85. data/test/style.xml +154 -0
  86. data/test/test_helper.rb +19 -0
  87. data/test/test_roo.rb +1834 -0
  88. data/test/time-test.csv +2 -0
  89. data/test/time-test.ods +0 -0
  90. data/test/time-test.xls +0 -0
  91. data/test/time-test.xlsx +0 -0
  92. data/test/time-test.xml +131 -0
  93. data/test/whitespace.ods +0 -0
  94. data/test/whitespace.xls +0 -0
  95. data/test/whitespace.xlsx +0 -0
  96. data/test/whitespace.xml +184 -0
  97. metadata +205 -0
@@ -0,0 +1,411 @@
1
+ require 'xml'
2
+ require 'fileutils'
3
+ require 'zip/zipfilesystem'
4
+ require 'date'
5
+ require 'base64'
6
+ require 'cgi'
7
+
8
+ class Excel2003XML < GenericSpreadsheet
9
+
10
+ @@nr = 0
11
+
12
+ # initialization and opening of a spreadsheet file
13
+ # values for packed: :zip
14
+ def initialize(filename, packed=nil, file_warning=:error)
15
+ @file_warning = file_warning
16
+ super()
17
+ @tmpdir = "oo_"+$$.to_s
18
+ @tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
19
+ unless File.exists?(@tmpdir)
20
+ FileUtils::mkdir(@tmpdir)
21
+ end
22
+ filename = open_from_uri(filename) if filename[0,7] == "http://"
23
+ filename = unzip(filename) if packed and packed == :zip
24
+ begin
25
+ file_type_check(filename,'.xml','an Excel 2003 XML')
26
+ @cells_read = Hash.new
27
+ @filename = filename
28
+ unless File.file?(@filename)
29
+ raise IOError, "file #{@filename} does not exist"
30
+ end
31
+ @doc = XML::Parser.file(@filename).parse
32
+ ensure
33
+ FileUtils::rm_r(@tmpdir)
34
+ end
35
+ @default_sheet = self.sheets.first
36
+ @cell = Hash.new
37
+ @cell_type = Hash.new
38
+ @formula = Hash.new
39
+ @first_row = Hash.new
40
+ @last_row = Hash.new
41
+ @first_column = Hash.new
42
+ @last_column = Hash.new
43
+ @style = Hash.new
44
+ @style_defaults = Hash.new { |h,k| h[k] = [] }
45
+ @style_definitions = Hash.new
46
+ @header_line = 1
47
+ end
48
+
49
+ # Returns the content of a spreadsheet-cell.
50
+ # (1,1) is the upper left corner.
51
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
52
+ # cell at the first line and first row.
53
+ def cell(row, col, sheet=nil)
54
+ sheet = @default_sheet unless sheet
55
+ read_cells(sheet) unless @cells_read[sheet]
56
+ row,col = normalize(row,col)
57
+ if celltype(row,col,sheet) == :date
58
+ yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
59
+ return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
60
+ end
61
+ @cell[sheet][[row,col]]
62
+ end
63
+
64
+ # Returns the formula at (row,col).
65
+ # Returns nil if there is no formula.
66
+ # The method #formula? checks if there is a formula.
67
+ def formula(row,col,sheet=nil)
68
+ sheet = @default_sheet unless sheet
69
+ read_cells(sheet) unless @cells_read[sheet]
70
+ row,col = normalize(row,col)
71
+ if @formula[sheet][[row,col]] == nil
72
+ return nil
73
+ else
74
+ return @formula[sheet][[row,col]]["oooc:".length..-1]
75
+ end
76
+ end
77
+
78
+ # true, if there is a formula
79
+ def formula?(row,col,sheet=nil)
80
+ sheet = @default_sheet unless sheet
81
+ read_cells(sheet) unless @cells_read[sheet]
82
+ row,col = normalize(row,col)
83
+ formula(row,col) != nil
84
+ end
85
+
86
+ class Font
87
+ attr_accessor :bold, :italic, :underline
88
+
89
+ def bold?
90
+ @bold == 'bold'
91
+ end
92
+
93
+ def italic?
94
+ @italic == 'italic'
95
+ end
96
+
97
+ def underline?
98
+ @underline != nil
99
+ end
100
+ end
101
+
102
+ # Given a cell, return the cell's style
103
+ def font(row, col, sheet=nil)
104
+ sheet = @default_sheet unless sheet
105
+ read_cells(sheet) unless @cells_read[sheet]
106
+ row,col = normalize(row,col)
107
+ style_name = @style[sheet][[row,col]] || @style_defaults[sheet][col - 1] || 'Default'
108
+ @style_definitions[style_name]
109
+ end
110
+
111
+ # set a cell to a certain value
112
+ # (this will not be saved back to the spreadsheet file!)
113
+ def set(row,col,value,sheet=nil) #:nodoc:
114
+ sheet = @default_sheet unless sheet
115
+ read_cells(sheet) unless @cells_read[sheet]
116
+ row,col = normalize(row,col)
117
+ set_value(row,col,value,sheet)
118
+ if value.class == Fixnum
119
+ set_type(row,col,:float,sheet)
120
+ elsif value.class == String
121
+ set_type(row,col,:string,sheet)
122
+ elsif value.class == Float
123
+ set_type(row,col,:string,sheet)
124
+ else
125
+ raise ArgumentError, "Type for "+value.to_s+" not set"
126
+ end
127
+ end
128
+
129
+ # returns the type of a cell:
130
+ # * :float
131
+ # * :string
132
+ # * :date
133
+ # * :percentage
134
+ # * :formula
135
+ # * :time
136
+ # * :datetime
137
+ def celltype(row,col,sheet=nil)
138
+ sheet = @default_sheet unless sheet
139
+ read_cells(sheet) unless @cells_read[sheet]
140
+ row,col = normalize(row,col)
141
+ if @formula[sheet][[row,col]]
142
+ return :formula
143
+ else
144
+ @cell_type[sheet][[row,col]]
145
+ end
146
+ end
147
+
148
+ def sheets
149
+ return_sheets = []
150
+ @doc.find("//ss:Worksheet").each do |sheet|
151
+ return_sheets << sheet.attributes['Name']
152
+ end
153
+ return_sheets
154
+ end
155
+
156
+ # version of the openoffice document
157
+ # at 2007 this is always "1.0"
158
+ def officeversion
159
+ oo_version
160
+ @officeversion
161
+ end
162
+
163
+ # shows the internal representation of all cells
164
+ # mainly for debugging purposes
165
+ def to_s(sheet=nil)
166
+ sheet = @default_sheet unless sheet
167
+ read_cells(sheet) unless @cells_read[sheet]
168
+ @cell[sheet].inspect
169
+ end
170
+
171
+ # save spreadsheet
172
+ def save #:nodoc:
173
+ 42
174
+ end
175
+
176
+ # returns each formula in the selected sheet as an array of elements
177
+ # [row, col, formula]
178
+ def formulas(sheet=nil)
179
+ theformulas = Array.new
180
+ sheet = @default_sheet unless sheet
181
+ read_cells(sheet) unless @cells_read[sheet]
182
+ first_row(sheet).upto(last_row(sheet)) {|row|
183
+ first_column(sheet).upto(last_column(sheet)) {|col|
184
+ if formula?(row,col,sheet)
185
+ f = [row, col, formula(row,col,sheet)]
186
+ theformulas << f
187
+ end
188
+ }
189
+ }
190
+ theformulas
191
+ end
192
+
193
+ private
194
+
195
+ # read the version of the OO-Version
196
+ def oo_version
197
+ @doc.find("//*[local-name()='document-content']").each do |office|
198
+ @officeversion = office.attributes['version']
199
+ end
200
+ end
201
+
202
+ # helper function to set the internal representation of cells
203
+ def set_cell_values(sheet,x,y,i,v,vt,formula,table_cell,str_v,style_name)
204
+ key = [y,x+i]
205
+ @cell_type[sheet] = {} unless @cell_type[sheet]
206
+ @cell_type[sheet][key] = vt
207
+ @formula[sheet] = {} unless @formula[sheet]
208
+ @formula[sheet][key] = formula if formula
209
+ @cell[sheet] = {} unless @cell[sheet]
210
+ @style[sheet] = {} unless @style[sheet]
211
+ @style[sheet][key] = style_name
212
+ case @cell_type[sheet][key]
213
+ when :float
214
+ @cell[sheet][key] = v.to_f
215
+ when :string
216
+ @cell[sheet][key] = str_v
217
+ when :datetime
218
+ @cell[sheet][key] = DateTime.parse(v)
219
+ @cell_type[sheet][key] = :datetime
220
+ when :percentage
221
+ @cell[sheet][key] = v.to_f
222
+ # when :time
223
+ # hms = v.split(':')
224
+ # @cell[sheet][key] = hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i
225
+ else
226
+ @cell[sheet][key] = v
227
+ end
228
+ end
229
+
230
+ # read all cells in the selected sheet
231
+ #--
232
+ # the following construct means '4 blanks'
233
+ # some content <text:s text:c="3"/>
234
+ #++
235
+ def read_cells(sheet=nil)
236
+ sheet = @default_sheet unless sheet
237
+ sheet_found = false
238
+ raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
239
+ raise RangeError unless self.sheets.include? sheet
240
+ @doc.find("ss:Worksheet[@ss:Name='#{sheet}']").each do |ws|
241
+ sheet_found = true
242
+ row = 1
243
+ col = 1
244
+ ws.find('.//ss:Row').each do |r|
245
+ skip_to_row = r.attributes['Index'].to_i
246
+ row = skip_to_row if skip_to_row > 0
247
+ r.each do |c|
248
+ next unless c.name == 'Cell'
249
+ skip_to_col = c.attributes['Index'].to_i
250
+ col = skip_to_col if skip_to_col > 0
251
+ c.each_element do |cell|
252
+ formula = nil
253
+ style_name = cell.attributes['StyleID']
254
+ if cell.name == 'Data'
255
+ formula = cell.attributes['Formula']
256
+ vt = cell.attributes['Type'].downcase.to_sym
257
+ v = cell.content
258
+ str_v = v
259
+ case vt
260
+ # when :string
261
+ # str_v = ''
262
+ # # insert \n if there is more than one paragraph
263
+ # para_count = 0
264
+ # cell.each_element do |str|
265
+ # if str.name == 'p'
266
+ # v = str.content
267
+ # str_v += "\n" if para_count > 0
268
+ # para_count += 1
269
+ # if str.children.size > 1
270
+ # str_v += children_to_string(str.children)
271
+ # else
272
+ # str.children.each do |child|
273
+ # str_v += child.content #.text
274
+ # end
275
+ # end
276
+ # str_v.gsub!(/&apos;/,"'") # special case not supported by unescapeHTML
277
+ # str_v = CGI.unescapeHTML(str_v)
278
+ # end # == 'p'
279
+ # end
280
+ when :number
281
+ v = v.to_f
282
+ vt = :float
283
+ when :datetime
284
+ if v =~ /^1899-12-31T(\d{2}:\d{2}:\d{2})/
285
+ v = $1
286
+ vt = :time
287
+ elsif v =~ /([^T]+)T00:00:00.000/
288
+ v = $1
289
+ vt = :date
290
+ end
291
+ when :boolean
292
+ v = cell.attributes['boolean-value']
293
+ else
294
+ # raise "unknown type #{vt}"
295
+ end
296
+ # puts vt
297
+ # puts v
298
+ # puts str_v
299
+ # puts row
300
+ # puts col
301
+ # puts '---'
302
+ end
303
+ set_cell_values(sheet,col,row,0,v,vt.to_sym,formula,cell,str_v,style_name)
304
+ end
305
+ col += 1
306
+ end
307
+ row += 1
308
+ col = 1
309
+ end
310
+ end
311
+ if !sheet_found
312
+ raise RangeError
313
+ end
314
+ @cells_read[sheet] = true
315
+ end
316
+
317
+ def read_styles(style_elements)
318
+ @style_definitions['Default'] = Openoffice::Font.new
319
+ style_elements.each do |style|
320
+ next unless style.name == 'style'
321
+ style_name = style.attributes['name']
322
+ style.each do |properties|
323
+ font = Openoffice::Font.new
324
+ font.bold = properties.attributes['font-weight']
325
+ font.italic = properties.attributes['font-style']
326
+ font.underline = properties.attributes['text-underline-style']
327
+ @style_definitions[style_name] = font
328
+ end
329
+ end
330
+ end
331
+
332
+ # Checks if the default_sheet exists. If not an RangeError exception is
333
+ # raised
334
+ def check_default_sheet
335
+ sheet_found = false
336
+ raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
337
+ sheet_found = true if sheets.include?(@default_sheet)
338
+ if ! sheet_found
339
+ raise RangeError, "sheet '#{@default_sheet}' not found"
340
+ end
341
+ end
342
+
343
+ def process_zipfile(zip, path='')
344
+ if zip.file.file? path
345
+ if path == "content.xml"
346
+ open(File.join(@tmpdir, @file_nr.to_s+'_roo_content.xml'),'wb') {|f|
347
+ f << zip.read(path)
348
+ }
349
+ end
350
+ else
351
+ unless path.empty?
352
+ path += '/'
353
+ end
354
+ zip.dir.foreach(path) do |filename|
355
+ process_zipfile(zip, path+filename)
356
+ end
357
+ end
358
+ end
359
+
360
+ def extract_content
361
+ Zip::ZipFile.open(@filename) do |zip|
362
+ process_zipfile(zip)
363
+ end
364
+ end
365
+
366
+ def set_value(row,col,value,sheet=nil)
367
+ sheet = @default_value unless sheet
368
+ @cell[sheet][[row,col]] = value
369
+ end
370
+
371
+ def set_type(row,col,type,sheet=nil)
372
+ sheet = @default_value unless sheet
373
+ @cell_type[sheet][[row,col]] = type
374
+ end
375
+
376
+ A_ROO_TYPE = {
377
+ "float" => :float,
378
+ "string" => :string,
379
+ "date" => :date,
380
+ "percentage" => :percentage,
381
+ "time" => :time,
382
+ }
383
+
384
+ def Openoffice.oo_type_2_roo_type(ootype)
385
+ return A_ROO_TYPE[ootype]
386
+ end
387
+
388
+ # helper method to convert compressed spaces and other elements within
389
+ # an text into a string
390
+ def children_to_string(children)
391
+ result = ''
392
+ children.each {|child|
393
+ if child.text?
394
+ result = result + child.content
395
+ else
396
+ if child.name == 's'
397
+ compressed_spaces = child.attributes['c'].to_i
398
+ # no explicit number means a count of 1:
399
+ if compressed_spaces == 0
400
+ compressed_spaces = 1
401
+ end
402
+ result = result + " "*compressed_spaces
403
+ else
404
+ result = result + child.content
405
+ end
406
+ end
407
+ }
408
+ result
409
+ end
410
+
411
+ end # class
@@ -0,0 +1,602 @@
1
+ require 'xml'
2
+ require 'fileutils'
3
+ require 'zip/zipfilesystem'
4
+ require 'date'
5
+
6
+ class String
7
+ def end_with?(str)
8
+ self[-str.length,str.length] == str
9
+ end
10
+ end
11
+
12
+ class Excelx < GenericSpreadsheet
13
+ FORMATS = {
14
+ 'General' => :float,
15
+ '0' => :float,
16
+ '0.00' => :float,
17
+ '#,##0' => :float,
18
+ '#,##0.00' => :float,
19
+ '0%' => :percentage,
20
+ '0.00%' => :percentage,
21
+ '0.00E+00' => :float,
22
+ '# ?/?' => :float, #??? TODO:
23
+ '# ??/??' => :float, #??? TODO:
24
+ 'mm-dd-yy' => :date,
25
+ 'd-mmm-yy' => :date,
26
+ 'd-mmm' => :date,
27
+ 'mmm-yy' => :date,
28
+ 'h:mm AM/PM' => :date,
29
+ 'h:mm:ss AM/PM' => :date,
30
+ 'h:mm' => :time,
31
+ 'h:mm:ss' => :time,
32
+ 'm/d/yy h:mm' => :date,
33
+ '#,##0 ;(#,##0)' => :float,
34
+ '#,##0 ;[Red](#,##0)' => :float,
35
+ '#,##0.00;(#,##0.00)' => :float,
36
+ '#,##0.00;[Red](#,##0.00)' => :float,
37
+ 'mm:ss' => :time,
38
+ '[h]:mm:ss' => :time,
39
+ 'mmss.0' => :time,
40
+ '##0.0E+0' => :float,
41
+ '@' => :float,
42
+ #-- zusaetzliche Formate, die nicht standardmaessig definiert sind:
43
+ "yyyy\\-mm\\-dd" => :date,
44
+ 'dd/mm/yy' => :date,
45
+ 'hh:mm:ss' => :time,
46
+ "dd/mm/yy\\ hh:mm" => :datetime,
47
+ }
48
+ STANDARD_FORMATS = {
49
+ 0 => 'General',
50
+ 1 => '0',
51
+ 2 => '0.00',
52
+ 3 => '#,##0',
53
+ 4 => '#,##0.00',
54
+ 9 => '0%',
55
+ 10 => '0.00%',
56
+ 11 => '0.00E+00',
57
+ 12 => '# ?/?',
58
+ 13 => '# ??/??',
59
+ 14 => 'mm-dd-yy',
60
+ 15 => 'd-mmm-yy',
61
+ 16 => 'd-mmm',
62
+ 17 => 'mmm-yy',
63
+ 18 => 'h:mm AM/PM',
64
+ 19 => 'h:mm:ss AM/PM',
65
+ 20 => 'h:mm',
66
+ 21 => 'h:mm:ss',
67
+ 22 => 'm/d/yy h:mm',
68
+ 37 => '#,##0 ;(#,##0)',
69
+ 38 => '#,##0 ;[Red](#,##0)',
70
+ 39 => '#,##0.00;(#,##0.00)',
71
+ 40 => '#,##0.00;[Red](#,##0.00)',
72
+ 45 => 'mm:ss',
73
+ 46 => '[h]:mm:ss',
74
+ 47 => 'mmss.0',
75
+ 48 => '##0.0E+0',
76
+ 49 => '@',
77
+ }
78
+ @@nr = 0
79
+
80
+ # initialization and opening of a spreadsheet file
81
+ # values for packed: :zip
82
+ def initialize(filename, packed=nil, file_warning = :error) #, create = false)
83
+ super()
84
+ @file_warning = file_warning
85
+ @tmpdir = "oo_"+$$.to_s
86
+ @tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
87
+ unless File.exists?(@tmpdir)
88
+ FileUtils::mkdir(@tmpdir)
89
+ end
90
+ filename = open_from_uri(filename) if filename[0,7] == "http://"
91
+ filename = unzip(filename) if packed and packed == :zip
92
+ begin
93
+ file_type_check(filename,'.xlsx','an Excel-xlsx')
94
+ @cells_read = Hash.new
95
+ @filename = filename
96
+ unless File.file?(@filename)
97
+ raise IOError, "file #{@filename} does not exist"
98
+ end
99
+ @@nr += 1
100
+ @file_nr = @@nr
101
+ extract_content(@filename)
102
+ file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
103
+ @workbook_doc = XML::Parser.io(file).parse
104
+ file.close
105
+ @shared_table = []
106
+ if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
107
+ file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
108
+ @sharedstring_doc = XML::Parser.io(file).parse
109
+ file.close
110
+ read_shared_strings(@sharedstring_doc)
111
+ end
112
+ @styles_table = []
113
+ @style_definitions = Array.new { |h,k| h[k] = {} }
114
+ if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
115
+ file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
116
+ @styles_doc = XML::Parser.io(file).parse
117
+ file.close
118
+ read_styles(@styles_doc)
119
+ end
120
+ @sheet_doc = []
121
+ @sheet_files.each_with_index do |item, i|
122
+ file = File.new(item)
123
+ @sheet_doc[i] = XML::Parser.io(file).parse
124
+ file.close
125
+ end
126
+ ensure
127
+ #if ENV["roo_local"] != "thomas-p"
128
+ FileUtils::rm_r(@tmpdir)
129
+ #end
130
+ end
131
+ @default_sheet = self.sheets.first
132
+ @cell = Hash.new
133
+ @cell_type = Hash.new
134
+ @formula = Hash.new
135
+ @first_row = Hash.new
136
+ @last_row = Hash.new
137
+ @first_column = Hash.new
138
+ @last_column = Hash.new
139
+ @header_line = 1
140
+ @excelx_type = Hash.new
141
+ @excelx_value = Hash.new
142
+ @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt
143
+ end
144
+
145
+ # Returns the content of a spreadsheet-cell.
146
+ # (1,1) is the upper left corner.
147
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
148
+ # cell at the first line and first row.
149
+ def cell(row, col, sheet=nil)
150
+ sheet = @default_sheet unless sheet
151
+ read_cells(sheet) unless @cells_read[sheet]
152
+ row,col = normalize(row,col)
153
+ if celltype(row,col,sheet) == :date
154
+ yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
155
+ return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
156
+ elsif celltype(row,col,sheet) == :datetime
157
+ date_part,time_part = @cell[sheet][[row,col]].split(' ')
158
+ yyyy,mm,dd = date_part.split('-')
159
+ hh,mi,ss = time_part.split(':')
160
+ return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
161
+ end
162
+ @cell[sheet][[row,col]]
163
+ end
164
+
165
+ # Returns the formula at (row,col).
166
+ # Returns nil if there is no formula.
167
+ # The method #formula? checks if there is a formula.
168
+ def formula(row,col,sheet=nil)
169
+ sheet = @default_sheet unless sheet
170
+ read_cells(sheet) unless @cells_read[sheet]
171
+ row,col = normalize(row,col)
172
+ if @formula[sheet][[row,col]] == nil
173
+ return nil
174
+ else
175
+ return @formula[sheet][[row,col]]
176
+ end
177
+ end
178
+
179
+ # true, if there is a formula
180
+ def formula?(row,col,sheet=nil)
181
+ sheet = @default_sheet unless sheet
182
+ read_cells(sheet) unless @cells_read[sheet]
183
+ row,col = normalize(row,col)
184
+ formula(row,col) != nil
185
+ end
186
+
187
+ class Font
188
+ attr_accessor :bold, :italic, :underline
189
+
190
+ def bold?
191
+ @bold == true
192
+ end
193
+
194
+ def italic?
195
+ @italic == true
196
+ end
197
+
198
+ def underline?
199
+ @underline == true
200
+ end
201
+ end
202
+
203
+ # Given a cell, return the cell's style
204
+ def font(row, col, sheet=nil)
205
+ sheet = @default_sheet unless sheet
206
+ read_cells(sheet) unless @cells_read[sheet]
207
+ row,col = normalize(row,col)
208
+ s_attribute = @s_attribute[sheet][[row,col]]
209
+ s_attribute ||= 0
210
+ s_attribute = s_attribute.to_i
211
+ @style_definitions[s_attribute]
212
+ end
213
+
214
+ # set a cell to a certain value
215
+ # (this will not be saved back to the spreadsheet file!)
216
+ def set(row,col,value,sheet=nil) #:nodoc:
217
+ sheet = @default_sheet unless sheet
218
+ read_cells(sheet) unless @cells_read[sheet]
219
+ row,col = normalize(row,col)
220
+ set_value(row,col,value,sheet)
221
+ if value.class == Fixnum
222
+ set_type(row,col,:float,sheet)
223
+ elsif value.class == String
224
+ set_type(row,col,:string,sheet)
225
+ elsif value.class == Float
226
+ set_type(row,col,:string,sheet)
227
+ else
228
+ raise ArgumentError, "Type for "+value.to_s+" not set"
229
+ end
230
+ end
231
+
232
+ # returns the type of a cell:
233
+ # * :float
234
+ # * :string,
235
+ # * :date
236
+ # * :percentage
237
+ # * :formula
238
+ # * :time
239
+ # * :datetime
240
+ def celltype(row,col,sheet=nil)
241
+ sheet = @default_sheet unless sheet
242
+ read_cells(sheet) unless @cells_read[sheet]
243
+ row,col = normalize(row,col)
244
+ if @formula[sheet][[row,col]]
245
+ return :formula
246
+ else
247
+ @cell_type[sheet][[row,col]]
248
+ end
249
+ end
250
+
251
+ # returns the internal type of an excel cell
252
+ # * :numeric_or_formula
253
+ # * :string
254
+ # Note: this is only available within the Excelx class
255
+ def excelx_type(row,col,sheet=nil)
256
+ sheet = @default_sheet unless sheet
257
+ read_cells(sheet) unless @cells_read[sheet]
258
+ row,col = normalize(row,col)
259
+ return @excelx_type[sheet][[row,col]]
260
+ end
261
+
262
+ # returns the internal value of an excelx cell
263
+ # Note: this is only available within the Excelx class
264
+ def excelx_value(row,col,sheet=nil)
265
+ sheet = @default_sheet unless sheet
266
+ read_cells(sheet) unless @cells_read[sheet]
267
+ row,col = normalize(row,col)
268
+ return @excelx_value[sheet][[row,col]]
269
+ end
270
+
271
+ # returns the internal format of an excel cell
272
+ def excelx_format(row,col,sheet=nil)
273
+ sheet = @default_sheet unless sheet
274
+ read_cells(sheet) unless @cells_read[sheet]
275
+ row,col = normalize(row,col)
276
+ s = @s_attribute[sheet][[row,col]]
277
+ result = attribute2format(s)
278
+ result
279
+ end
280
+
281
+ # returns an array of sheet names in the spreadsheet
282
+ def sheets
283
+ return_sheets = []
284
+ @workbook_doc.find("//*[local-name()='sheet']").each do |sheet|
285
+ return_sheets << sheet.attributes.to_h['name']
286
+ end
287
+ return_sheets
288
+ end
289
+ # shows the internal representation of all cells
290
+ # for debugging purposes
291
+ def to_s(sheet=nil)
292
+ sheet = @default_sheet unless sheet
293
+ read_cells(sheet) unless @cells_read[sheet]
294
+ @cell[sheet].inspect
295
+ end
296
+
297
+ # returns each formula in the selected sheet as an array of elements
298
+ # [row, col, formula]
299
+ def formulas(sheet=nil)
300
+ theformulas = Array.new
301
+ sheet = @default_sheet unless sheet
302
+ read_cells(sheet) unless @cells_read[sheet]
303
+ first_row(sheet).upto(last_row(sheet)) {|row|
304
+ first_column(sheet).upto(last_column(sheet)) {|col|
305
+ if formula?(row,col,sheet)
306
+ f = [row, col, formula(row,col,sheet)]
307
+ theformulas << f
308
+ end
309
+ }
310
+ }
311
+ theformulas
312
+ end
313
+
314
+ private
315
+
316
+ # helper function to set the internal representation of cells
317
+ def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,
318
+ excelx_type=nil,
319
+ excelx_value=nil,
320
+ s_attribute=nil)
321
+ key = [y,x+i]
322
+ @cell_type[sheet] = {} unless @cell_type[sheet]
323
+ @cell_type[sheet][key] = vt
324
+ @formula[sheet] = {} unless @formula[sheet]
325
+ @formula[sheet][key] = formula if formula
326
+ @cell[sheet] = {} unless @cell[sheet]
327
+ case @cell_type[sheet][key]
328
+ when :float
329
+ @cell[sheet][key] = v.to_f
330
+ when :string
331
+ @cell[sheet][key] = str_v
332
+ when :date
333
+ @cell[sheet][key] = (Date.new(1899,12,30)+v.to_i).strftime("%Y-%m-%d")
334
+ when :datetime
335
+ @cell[sheet][key] = (DateTime.new(1899,12,30)+v.to_f).strftime("%Y-%m-%d %H:%M:%S")
336
+ when :percentage
337
+ @cell[sheet][key] = v.to_f
338
+ when :time
339
+ @cell[sheet][key] = v.to_f*(24*60*60)
340
+ else
341
+ @cell[sheet][key] = v
342
+ end
343
+ @excelx_type[sheet] = {} unless @excelx_type[sheet]
344
+ @excelx_type[sheet][key] = excelx_type
345
+ @excelx_value[sheet] = {} unless @excelx_value[sheet]
346
+ @excelx_value[sheet][key] = excelx_value
347
+ @s_attribute[sheet] = {} unless @s_attribute[sheet]
348
+ @s_attribute[sheet][key] = s_attribute
349
+ end
350
+
351
+ # splits a coordinate like "AA12" into the parts "AA" (String) and 12 (Fixnum)
352
+ def split_coord(s)
353
+ letter = ""
354
+ number = 0
355
+ i = 0
356
+ while i<s.length and "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".include?(s[i,1])
357
+ letter += s[i,1]
358
+ i+=1
359
+ end
360
+ while i<s.length and "0123456789".include?(s[i,1])
361
+ number = number*10 + s[i,1].to_i
362
+ i+=1
363
+ end
364
+ if letter=="" or number==0
365
+ raise ArgumentError
366
+ end
367
+ return letter,number
368
+ end
369
+
370
+ def split_coordinate(str)
371
+ letter,number = split_coord(str)
372
+ x = GenericSpreadsheet.letter_to_number(letter)
373
+ y = number
374
+ return x,y
375
+ end
376
+
377
+ # read all cells in the selected sheet
378
+ def format2type(format)
379
+ if FORMATS.has_key? format
380
+ FORMATS[format]
381
+ else
382
+ :float
383
+ end
384
+ end
385
+
386
+ # read all cells in the selected sheet
387
+ def read_cells(sheet=nil)
388
+ sheet = @default_sheet unless sheet
389
+ sheet_found = false
390
+ raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
391
+ raise RangeError unless self.sheets.include? sheet
392
+ n = self.sheets.index(sheet)
393
+ @sheet_doc[n].find("//*[local-name()='c']").each do |c|
394
+ s_attribute = c.attributes.to_h['s'].to_i # should be here
395
+ if (c.attributes.to_h['t'] == 's')
396
+ tmp_type = :shared
397
+ elsif (c.attributes.to_h['t'] == 'b')
398
+ tmp_type = :boolean
399
+ else
400
+ # s_attribute = c.attributes.to_h['s'].to_i # was here
401
+ format = attribute2format(s_attribute)
402
+ tmp_type = format2type(format)
403
+ end
404
+ formula = nil
405
+ c.each_element do |cell|
406
+ if cell.name == 'f'
407
+ formula = cell.content
408
+ end
409
+ if cell.name == 'v'
410
+ if tmp_type == :time or tmp_type == :datetime
411
+ if cell.content.to_f >= 1.0
412
+ if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
413
+ tmp_type = :datetime
414
+ else
415
+ tmp_type = :date
416
+ end
417
+ else
418
+ end
419
+ end
420
+ excelx_type = [:numeric_or_formula,format]
421
+ excelx_value = cell.content
422
+ if tmp_type == :shared
423
+ vt = :string
424
+ str_v = @shared_table[cell.content.to_i]
425
+ excelx_type = :string
426
+ elsif tmp_type == :boolean
427
+ vt = :boolean
428
+ cell.content.to_i == 1 ? v = 'TRUE' : v = 'FALSE'
429
+ elsif tmp_type == :date
430
+ vt = :date
431
+ v = cell.content
432
+ elsif tmp_type == :time
433
+ vt = :time
434
+ v = cell.content
435
+ elsif tmp_type == :datetime
436
+ vt = :datetime
437
+ v = cell.content
438
+ elsif tmp_type == :formula
439
+ vt = :formula
440
+ v = cell.content.to_f #TODO: !!!!
441
+ else
442
+ vt = :float
443
+ v = cell.content
444
+ end
445
+ #puts "vt: #{vt}" if cell.text.include? "22606.5120"
446
+ x,y = split_coordinate(c.attributes.to_h['r'])
447
+ tr=nil #TODO: ???s
448
+ set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
449
+ end
450
+ end
451
+ end
452
+ sheet_found = true #TODO:
453
+ if !sheet_found
454
+ raise RangeError
455
+ end
456
+ @cells_read[sheet] = true
457
+ end
458
+
459
+ # Checks if the default_sheet exists. If not an RangeError exception is
460
+ # raised
461
+ def check_default_sheet
462
+ sheet_found = false
463
+ raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
464
+
465
+ sheet_found = true if sheets.include?(@default_sheet)
466
+
467
+ if ! sheet_found
468
+ raise RangeError, "sheet '#{@default_sheet}' not found"
469
+ end
470
+ end
471
+
472
+ # extracts all needed files from the zip file
473
+ def process_zipfile(zipfilename, zip, path='')
474
+ @sheet_files = []
475
+ Zip::ZipFile.open(zipfilename) {|zf|
476
+ zf.entries.each {|entry|
477
+ #entry.extract
478
+ if entry.to_s.end_with?('workbook.xml')
479
+ open(@tmpdir+'/'+@file_nr.to_s+'_roo_workbook.xml','wb') {|f|
480
+ f << zip.read(entry)
481
+ }
482
+ end
483
+ if entry.to_s.end_with?('sharedStrings.xml')
484
+ open(@tmpdir+'/'+@file_nr.to_s+'_roo_sharedStrings.xml','wb') {|f|
485
+ f << zip.read(entry)
486
+ }
487
+ end
488
+ if entry.to_s.end_with?('styles.xml')
489
+ open(@tmpdir+'/'+@file_nr.to_s+'_roo_styles.xml','wb') {|f|
490
+ f << zip.read(entry)
491
+ }
492
+ end
493
+ if entry.to_s =~ /sheet([0-9]+).xml$/
494
+ nr = $1
495
+ open(@tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}",'wb') {|f|
496
+ f << zip.read(entry)
497
+ }
498
+ @sheet_files[nr.to_i-1] = @tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}"
499
+ end
500
+ }
501
+ }
502
+ return
503
+ end
504
+
505
+ # extract files from the zip file
506
+ def extract_content(zipfilename)
507
+ Zip::ZipFile.open(@filename) do |zip|
508
+ process_zipfile(zipfilename,zip)
509
+ end
510
+ end
511
+
512
+ # sets the value of a cell
513
+ def set_value(row,col,value,sheet=nil)
514
+ sheet = @default_value unless sheet
515
+ @cell[sheet][[row,col]] = value
516
+ end
517
+
518
+ # sets the type of a cell
519
+ def set_type(row,col,type,sheet=nil)
520
+ sheet = @default_value unless sheet
521
+ @cell_type[sheet][[row,col]] = type
522
+ end
523
+
524
+ # read the shared strings xml document
525
+ def read_shared_strings(doc)
526
+ doc.find("//*[local-name()='si']").each do |si|
527
+ shared_table_entry = ''
528
+ si.each_element do |elem|
529
+ if (elem.name == 'r')
530
+ elem.each_element do |r_elem|
531
+ if (r_elem.name == 't')
532
+ shared_table_entry << r_elem.content
533
+ end
534
+ end
535
+ end
536
+ if (elem.name == 't')
537
+ shared_table_entry = elem.content
538
+ end
539
+ end
540
+ @shared_table << shared_table_entry
541
+ end
542
+ end
543
+
544
+ # read the styles elements of an excelx document
545
+ def read_styles(doc)
546
+ @numFmts = []
547
+ @cellXfs = []
548
+ fonts = []
549
+
550
+ doc.find("//*[local-name()='numFmt']").each do |numFmt|
551
+ numFmtId = numFmt.attributes.to_h['numFmtId']
552
+ formatCode = numFmt.attributes.to_h['formatCode']
553
+ @numFmts << [numFmtId, formatCode]
554
+ end
555
+ doc.find("//*[local-name()='fonts']").each do |fonts_el|
556
+ fonts_el.each_element do |font_el|
557
+ if font_el.name == 'font'
558
+ font = Excelx::Font.new
559
+ font_el.each_element do |font_sub_el|
560
+ case font_sub_el.name
561
+ when 'b'
562
+ font.bold = true
563
+ when 'i'
564
+ font.italic = true
565
+ when 'u'
566
+ font.underline = true
567
+ end
568
+ end
569
+ fonts << font
570
+ end
571
+ end
572
+ end
573
+
574
+ doc.find("//*[local-name()='cellXfs']").each do |xfs|
575
+ xfs.each do |xf|
576
+ numFmtId = xf.attributes.to_h['numFmtId']
577
+ @cellXfs << [numFmtId]
578
+ fontId = xf.attributes.to_h['fontId'].to_i
579
+ @style_definitions << fonts[fontId]
580
+ end
581
+ end
582
+ end
583
+
584
+ # convert internal excelx attribute to a format
585
+ def attribute2format(s)
586
+ result = nil
587
+ @numFmts.each {|nf|
588
+ if nf.first == @cellXfs[s.to_i].first
589
+ result = nf[1]
590
+ break
591
+ end
592
+ }
593
+ unless result
594
+ id = @cellXfs[s.to_i].first.to_i
595
+ if STANDARD_FORMATS.has_key? id
596
+ result = STANDARD_FORMATS[id]
597
+ end
598
+ end
599
+ result
600
+ end
601
+
602
+ end # class