aunderwo-roo 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. data/History.txt +225 -0
  2. data/README.markdown +55 -0
  3. data/examples/roo_soap_client.rb +53 -0
  4. data/examples/roo_soap_server.rb +29 -0
  5. data/examples/write_me.rb +33 -0
  6. data/lib/roo.rb +32 -0
  7. data/lib/roo/excel.rb +468 -0
  8. data/lib/roo/excel2003xml.rb +411 -0
  9. data/lib/roo/excelx.rb +602 -0
  10. data/lib/roo/generic_spreadsheet.rb +628 -0
  11. data/lib/roo/google.rb +379 -0
  12. data/lib/roo/openoffice.rb +451 -0
  13. data/lib/roo/roo_rails_helper.rb +82 -0
  14. data/lib/roo/version.rb +9 -0
  15. data/test/1900_base.xls +0 -0
  16. data/test/1904_base.xls +0 -0
  17. data/test/Bibelbund.csv +3741 -0
  18. data/test/Bibelbund.ods +0 -0
  19. data/test/Bibelbund.xls +0 -0
  20. data/test/Bibelbund.xlsx +0 -0
  21. data/test/Bibelbund.xml +62518 -0
  22. data/test/Bibelbund1.ods +0 -0
  23. data/test/bad_excel_date.xls +0 -0
  24. data/test/bbu.ods +0 -0
  25. data/test/bbu.xls +0 -0
  26. data/test/bbu.xlsx +0 -0
  27. data/test/bbu.xml +152 -0
  28. data/test/bode-v1.ods.zip +0 -0
  29. data/test/bode-v1.xls.zip +0 -0
  30. data/test/boolean.ods +0 -0
  31. data/test/boolean.xls +0 -0
  32. data/test/boolean.xlsx +0 -0
  33. data/test/boolean.xml +112 -0
  34. data/test/borders.ods +0 -0
  35. data/test/borders.xls +0 -0
  36. data/test/borders.xlsx +0 -0
  37. data/test/borders.xml +144 -0
  38. data/test/bug-row-column-fixnum-float.xls +0 -0
  39. data/test/bug-row-column-fixnum-float.xml +127 -0
  40. data/test/datetime.ods +0 -0
  41. data/test/datetime.xls +0 -0
  42. data/test/datetime.xlsx +0 -0
  43. data/test/datetime.xml +142 -0
  44. data/test/datetime_floatconv.xls +0 -0
  45. data/test/datetime_floatconv.xml +148 -0
  46. data/test/emptysheets.ods +0 -0
  47. data/test/emptysheets.xls +0 -0
  48. data/test/emptysheets.xml +105 -0
  49. data/test/excel2003.xml +21140 -0
  50. data/test/false_encoding.xls +0 -0
  51. data/test/false_encoding.xml +132 -0
  52. data/test/formula.ods +0 -0
  53. data/test/formula.xls +0 -0
  54. data/test/formula.xlsx +0 -0
  55. data/test/formula.xml +134 -0
  56. data/test/formula_parse_error.xls +0 -0
  57. data/test/formula_parse_error.xml +1833 -0
  58. data/test/html-escape.ods +0 -0
  59. data/test/no_spreadsheet_file.txt +1 -0
  60. data/test/numbers1.csv +18 -0
  61. data/test/numbers1.ods +0 -0
  62. data/test/numbers1.xls +0 -0
  63. data/test/numbers1.xlsx +0 -0
  64. data/test/numbers1.xml +312 -0
  65. data/test/only_one_sheet.ods +0 -0
  66. data/test/only_one_sheet.xls +0 -0
  67. data/test/only_one_sheet.xlsx +0 -0
  68. data/test/only_one_sheet.xml +67 -0
  69. data/test/paragraph.ods +0 -0
  70. data/test/paragraph.xls +0 -0
  71. data/test/paragraph.xlsx +0 -0
  72. data/test/paragraph.xml +127 -0
  73. data/test/ric.ods +0 -0
  74. data/test/simple_spreadsheet.ods +0 -0
  75. data/test/simple_spreadsheet.xls +0 -0
  76. data/test/simple_spreadsheet.xlsx +0 -0
  77. data/test/simple_spreadsheet.xml +225 -0
  78. data/test/simple_spreadsheet_from_italo.ods +0 -0
  79. data/test/simple_spreadsheet_from_italo.xls +0 -0
  80. data/test/simple_spreadsheet_from_italo.xml +242 -0
  81. data/test/skipped_tests.rb +789 -0
  82. data/test/style.ods +0 -0
  83. data/test/style.xls +0 -0
  84. data/test/style.xlsx +0 -0
  85. data/test/style.xml +154 -0
  86. data/test/test_helper.rb +19 -0
  87. data/test/test_roo.rb +1834 -0
  88. data/test/time-test.csv +2 -0
  89. data/test/time-test.ods +0 -0
  90. data/test/time-test.xls +0 -0
  91. data/test/time-test.xlsx +0 -0
  92. data/test/time-test.xml +131 -0
  93. data/test/whitespace.ods +0 -0
  94. data/test/whitespace.xls +0 -0
  95. data/test/whitespace.xlsx +0 -0
  96. data/test/whitespace.xml +184 -0
  97. metadata +205 -0
@@ -0,0 +1,411 @@
1
+ require 'xml'
2
+ require 'fileutils'
3
+ require 'zip/zipfilesystem'
4
+ require 'date'
5
+ require 'base64'
6
+ require 'cgi'
7
+
8
+ class Excel2003XML < GenericSpreadsheet
9
+
10
+ @@nr = 0
11
+
12
+ # initialization and opening of a spreadsheet file
13
+ # values for packed: :zip
14
+ def initialize(filename, packed=nil, file_warning=:error)
15
+ @file_warning = file_warning
16
+ super()
17
+ @tmpdir = "oo_"+$$.to_s
18
+ @tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
19
+ unless File.exists?(@tmpdir)
20
+ FileUtils::mkdir(@tmpdir)
21
+ end
22
+ filename = open_from_uri(filename) if filename[0,7] == "http://"
23
+ filename = unzip(filename) if packed and packed == :zip
24
+ begin
25
+ file_type_check(filename,'.xml','an Excel 2003 XML')
26
+ @cells_read = Hash.new
27
+ @filename = filename
28
+ unless File.file?(@filename)
29
+ raise IOError, "file #{@filename} does not exist"
30
+ end
31
+ @doc = XML::Parser.file(@filename).parse
32
+ ensure
33
+ FileUtils::rm_r(@tmpdir)
34
+ end
35
+ @default_sheet = self.sheets.first
36
+ @cell = Hash.new
37
+ @cell_type = Hash.new
38
+ @formula = Hash.new
39
+ @first_row = Hash.new
40
+ @last_row = Hash.new
41
+ @first_column = Hash.new
42
+ @last_column = Hash.new
43
+ @style = Hash.new
44
+ @style_defaults = Hash.new { |h,k| h[k] = [] }
45
+ @style_definitions = Hash.new
46
+ @header_line = 1
47
+ end
48
+
49
+ # Returns the content of a spreadsheet-cell.
50
+ # (1,1) is the upper left corner.
51
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
52
+ # cell at the first line and first row.
53
+ def cell(row, col, sheet=nil)
54
+ sheet = @default_sheet unless sheet
55
+ read_cells(sheet) unless @cells_read[sheet]
56
+ row,col = normalize(row,col)
57
+ if celltype(row,col,sheet) == :date
58
+ yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
59
+ return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
60
+ end
61
+ @cell[sheet][[row,col]]
62
+ end
63
+
64
+ # Returns the formula at (row,col).
65
+ # Returns nil if there is no formula.
66
+ # The method #formula? checks if there is a formula.
67
+ def formula(row,col,sheet=nil)
68
+ sheet = @default_sheet unless sheet
69
+ read_cells(sheet) unless @cells_read[sheet]
70
+ row,col = normalize(row,col)
71
+ if @formula[sheet][[row,col]] == nil
72
+ return nil
73
+ else
74
+ return @formula[sheet][[row,col]]["oooc:".length..-1]
75
+ end
76
+ end
77
+
78
+ # true, if there is a formula
79
+ def formula?(row,col,sheet=nil)
80
+ sheet = @default_sheet unless sheet
81
+ read_cells(sheet) unless @cells_read[sheet]
82
+ row,col = normalize(row,col)
83
+ formula(row,col) != nil
84
+ end
85
+
86
+ class Font
87
+ attr_accessor :bold, :italic, :underline
88
+
89
+ def bold?
90
+ @bold == 'bold'
91
+ end
92
+
93
+ def italic?
94
+ @italic == 'italic'
95
+ end
96
+
97
+ def underline?
98
+ @underline != nil
99
+ end
100
+ end
101
+
102
+ # Given a cell, return the cell's style
103
+ def font(row, col, sheet=nil)
104
+ sheet = @default_sheet unless sheet
105
+ read_cells(sheet) unless @cells_read[sheet]
106
+ row,col = normalize(row,col)
107
+ style_name = @style[sheet][[row,col]] || @style_defaults[sheet][col - 1] || 'Default'
108
+ @style_definitions[style_name]
109
+ end
110
+
111
+ # set a cell to a certain value
112
+ # (this will not be saved back to the spreadsheet file!)
113
+ def set(row,col,value,sheet=nil) #:nodoc:
114
+ sheet = @default_sheet unless sheet
115
+ read_cells(sheet) unless @cells_read[sheet]
116
+ row,col = normalize(row,col)
117
+ set_value(row,col,value,sheet)
118
+ if value.class == Fixnum
119
+ set_type(row,col,:float,sheet)
120
+ elsif value.class == String
121
+ set_type(row,col,:string,sheet)
122
+ elsif value.class == Float
123
+ set_type(row,col,:string,sheet)
124
+ else
125
+ raise ArgumentError, "Type for "+value.to_s+" not set"
126
+ end
127
+ end
128
+
129
+ # returns the type of a cell:
130
+ # * :float
131
+ # * :string
132
+ # * :date
133
+ # * :percentage
134
+ # * :formula
135
+ # * :time
136
+ # * :datetime
137
+ def celltype(row,col,sheet=nil)
138
+ sheet = @default_sheet unless sheet
139
+ read_cells(sheet) unless @cells_read[sheet]
140
+ row,col = normalize(row,col)
141
+ if @formula[sheet][[row,col]]
142
+ return :formula
143
+ else
144
+ @cell_type[sheet][[row,col]]
145
+ end
146
+ end
147
+
148
+ def sheets
149
+ return_sheets = []
150
+ @doc.find("//ss:Worksheet").each do |sheet|
151
+ return_sheets << sheet.attributes['Name']
152
+ end
153
+ return_sheets
154
+ end
155
+
156
+ # version of the openoffice document
157
+ # at 2007 this is always "1.0"
158
+ def officeversion
159
+ oo_version
160
+ @officeversion
161
+ end
162
+
163
+ # shows the internal representation of all cells
164
+ # mainly for debugging purposes
165
+ def to_s(sheet=nil)
166
+ sheet = @default_sheet unless sheet
167
+ read_cells(sheet) unless @cells_read[sheet]
168
+ @cell[sheet].inspect
169
+ end
170
+
171
+ # save spreadsheet
172
+ def save #:nodoc:
173
+ 42
174
+ end
175
+
176
+ # returns each formula in the selected sheet as an array of elements
177
+ # [row, col, formula]
178
+ def formulas(sheet=nil)
179
+ theformulas = Array.new
180
+ sheet = @default_sheet unless sheet
181
+ read_cells(sheet) unless @cells_read[sheet]
182
+ first_row(sheet).upto(last_row(sheet)) {|row|
183
+ first_column(sheet).upto(last_column(sheet)) {|col|
184
+ if formula?(row,col,sheet)
185
+ f = [row, col, formula(row,col,sheet)]
186
+ theformulas << f
187
+ end
188
+ }
189
+ }
190
+ theformulas
191
+ end
192
+
193
+ private
194
+
195
+ # read the version of the OO-Version
196
+ def oo_version
197
+ @doc.find("//*[local-name()='document-content']").each do |office|
198
+ @officeversion = office.attributes['version']
199
+ end
200
+ end
201
+
202
+ # helper function to set the internal representation of cells
203
+ def set_cell_values(sheet,x,y,i,v,vt,formula,table_cell,str_v,style_name)
204
+ key = [y,x+i]
205
+ @cell_type[sheet] = {} unless @cell_type[sheet]
206
+ @cell_type[sheet][key] = vt
207
+ @formula[sheet] = {} unless @formula[sheet]
208
+ @formula[sheet][key] = formula if formula
209
+ @cell[sheet] = {} unless @cell[sheet]
210
+ @style[sheet] = {} unless @style[sheet]
211
+ @style[sheet][key] = style_name
212
+ case @cell_type[sheet][key]
213
+ when :float
214
+ @cell[sheet][key] = v.to_f
215
+ when :string
216
+ @cell[sheet][key] = str_v
217
+ when :datetime
218
+ @cell[sheet][key] = DateTime.parse(v)
219
+ @cell_type[sheet][key] = :datetime
220
+ when :percentage
221
+ @cell[sheet][key] = v.to_f
222
+ # when :time
223
+ # hms = v.split(':')
224
+ # @cell[sheet][key] = hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i
225
+ else
226
+ @cell[sheet][key] = v
227
+ end
228
+ end
229
+
230
+ # read all cells in the selected sheet
231
+ #--
232
+ # the following construct means '4 blanks'
233
+ # some content <text:s text:c="3"/>
234
+ #++
235
+ def read_cells(sheet=nil)
236
+ sheet = @default_sheet unless sheet
237
+ sheet_found = false
238
+ raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
239
+ raise RangeError unless self.sheets.include? sheet
240
+ @doc.find("ss:Worksheet[@ss:Name='#{sheet}']").each do |ws|
241
+ sheet_found = true
242
+ row = 1
243
+ col = 1
244
+ ws.find('.//ss:Row').each do |r|
245
+ skip_to_row = r.attributes['Index'].to_i
246
+ row = skip_to_row if skip_to_row > 0
247
+ r.each do |c|
248
+ next unless c.name == 'Cell'
249
+ skip_to_col = c.attributes['Index'].to_i
250
+ col = skip_to_col if skip_to_col > 0
251
+ c.each_element do |cell|
252
+ formula = nil
253
+ style_name = cell.attributes['StyleID']
254
+ if cell.name == 'Data'
255
+ formula = cell.attributes['Formula']
256
+ vt = cell.attributes['Type'].downcase.to_sym
257
+ v = cell.content
258
+ str_v = v
259
+ case vt
260
+ # when :string
261
+ # str_v = ''
262
+ # # insert \n if there is more than one paragraph
263
+ # para_count = 0
264
+ # cell.each_element do |str|
265
+ # if str.name == 'p'
266
+ # v = str.content
267
+ # str_v += "\n" if para_count > 0
268
+ # para_count += 1
269
+ # if str.children.size > 1
270
+ # str_v += children_to_string(str.children)
271
+ # else
272
+ # str.children.each do |child|
273
+ # str_v += child.content #.text
274
+ # end
275
+ # end
276
+ # str_v.gsub!(/&apos;/,"'") # special case not supported by unescapeHTML
277
+ # str_v = CGI.unescapeHTML(str_v)
278
+ # end # == 'p'
279
+ # end
280
+ when :number
281
+ v = v.to_f
282
+ vt = :float
283
+ when :datetime
284
+ if v =~ /^1899-12-31T(\d{2}:\d{2}:\d{2})/
285
+ v = $1
286
+ vt = :time
287
+ elsif v =~ /([^T]+)T00:00:00.000/
288
+ v = $1
289
+ vt = :date
290
+ end
291
+ when :boolean
292
+ v = cell.attributes['boolean-value']
293
+ else
294
+ # raise "unknown type #{vt}"
295
+ end
296
+ # puts vt
297
+ # puts v
298
+ # puts str_v
299
+ # puts row
300
+ # puts col
301
+ # puts '---'
302
+ end
303
+ set_cell_values(sheet,col,row,0,v,vt.to_sym,formula,cell,str_v,style_name)
304
+ end
305
+ col += 1
306
+ end
307
+ row += 1
308
+ col = 1
309
+ end
310
+ end
311
+ if !sheet_found
312
+ raise RangeError
313
+ end
314
+ @cells_read[sheet] = true
315
+ end
316
+
317
+ def read_styles(style_elements)
318
+ @style_definitions['Default'] = Openoffice::Font.new
319
+ style_elements.each do |style|
320
+ next unless style.name == 'style'
321
+ style_name = style.attributes['name']
322
+ style.each do |properties|
323
+ font = Openoffice::Font.new
324
+ font.bold = properties.attributes['font-weight']
325
+ font.italic = properties.attributes['font-style']
326
+ font.underline = properties.attributes['text-underline-style']
327
+ @style_definitions[style_name] = font
328
+ end
329
+ end
330
+ end
331
+
332
+ # Checks if the default_sheet exists. If not an RangeError exception is
333
+ # raised
334
+ def check_default_sheet
335
+ sheet_found = false
336
+ raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
337
+ sheet_found = true if sheets.include?(@default_sheet)
338
+ if ! sheet_found
339
+ raise RangeError, "sheet '#{@default_sheet}' not found"
340
+ end
341
+ end
342
+
343
+ def process_zipfile(zip, path='')
344
+ if zip.file.file? path
345
+ if path == "content.xml"
346
+ open(File.join(@tmpdir, @file_nr.to_s+'_roo_content.xml'),'wb') {|f|
347
+ f << zip.read(path)
348
+ }
349
+ end
350
+ else
351
+ unless path.empty?
352
+ path += '/'
353
+ end
354
+ zip.dir.foreach(path) do |filename|
355
+ process_zipfile(zip, path+filename)
356
+ end
357
+ end
358
+ end
359
+
360
+ def extract_content
361
+ Zip::ZipFile.open(@filename) do |zip|
362
+ process_zipfile(zip)
363
+ end
364
+ end
365
+
366
+ def set_value(row,col,value,sheet=nil)
367
+ sheet = @default_value unless sheet
368
+ @cell[sheet][[row,col]] = value
369
+ end
370
+
371
+ def set_type(row,col,type,sheet=nil)
372
+ sheet = @default_value unless sheet
373
+ @cell_type[sheet][[row,col]] = type
374
+ end
375
+
376
+ A_ROO_TYPE = {
377
+ "float" => :float,
378
+ "string" => :string,
379
+ "date" => :date,
380
+ "percentage" => :percentage,
381
+ "time" => :time,
382
+ }
383
+
384
+ def Openoffice.oo_type_2_roo_type(ootype)
385
+ return A_ROO_TYPE[ootype]
386
+ end
387
+
388
+ # helper method to convert compressed spaces and other elements within
389
+ # an text into a string
390
+ def children_to_string(children)
391
+ result = ''
392
+ children.each {|child|
393
+ if child.text?
394
+ result = result + child.content
395
+ else
396
+ if child.name == 's'
397
+ compressed_spaces = child.attributes['c'].to_i
398
+ # no explicit number means a count of 1:
399
+ if compressed_spaces == 0
400
+ compressed_spaces = 1
401
+ end
402
+ result = result + " "*compressed_spaces
403
+ else
404
+ result = result + child.content
405
+ end
406
+ end
407
+ }
408
+ result
409
+ end
410
+
411
+ end # class
@@ -0,0 +1,602 @@
1
+ require 'xml'
2
+ require 'fileutils'
3
+ require 'zip/zipfilesystem'
4
+ require 'date'
5
+
6
+ class String
7
+ def end_with?(str)
8
+ self[-str.length,str.length] == str
9
+ end
10
+ end
11
+
12
+ class Excelx < GenericSpreadsheet
13
+ FORMATS = {
14
+ 'General' => :float,
15
+ '0' => :float,
16
+ '0.00' => :float,
17
+ '#,##0' => :float,
18
+ '#,##0.00' => :float,
19
+ '0%' => :percentage,
20
+ '0.00%' => :percentage,
21
+ '0.00E+00' => :float,
22
+ '# ?/?' => :float, #??? TODO:
23
+ '# ??/??' => :float, #??? TODO:
24
+ 'mm-dd-yy' => :date,
25
+ 'd-mmm-yy' => :date,
26
+ 'd-mmm' => :date,
27
+ 'mmm-yy' => :date,
28
+ 'h:mm AM/PM' => :date,
29
+ 'h:mm:ss AM/PM' => :date,
30
+ 'h:mm' => :time,
31
+ 'h:mm:ss' => :time,
32
+ 'm/d/yy h:mm' => :date,
33
+ '#,##0 ;(#,##0)' => :float,
34
+ '#,##0 ;[Red](#,##0)' => :float,
35
+ '#,##0.00;(#,##0.00)' => :float,
36
+ '#,##0.00;[Red](#,##0.00)' => :float,
37
+ 'mm:ss' => :time,
38
+ '[h]:mm:ss' => :time,
39
+ 'mmss.0' => :time,
40
+ '##0.0E+0' => :float,
41
+ '@' => :float,
42
+ #-- zusaetzliche Formate, die nicht standardmaessig definiert sind:
43
+ "yyyy\\-mm\\-dd" => :date,
44
+ 'dd/mm/yy' => :date,
45
+ 'hh:mm:ss' => :time,
46
+ "dd/mm/yy\\ hh:mm" => :datetime,
47
+ }
48
+ STANDARD_FORMATS = {
49
+ 0 => 'General',
50
+ 1 => '0',
51
+ 2 => '0.00',
52
+ 3 => '#,##0',
53
+ 4 => '#,##0.00',
54
+ 9 => '0%',
55
+ 10 => '0.00%',
56
+ 11 => '0.00E+00',
57
+ 12 => '# ?/?',
58
+ 13 => '# ??/??',
59
+ 14 => 'mm-dd-yy',
60
+ 15 => 'd-mmm-yy',
61
+ 16 => 'd-mmm',
62
+ 17 => 'mmm-yy',
63
+ 18 => 'h:mm AM/PM',
64
+ 19 => 'h:mm:ss AM/PM',
65
+ 20 => 'h:mm',
66
+ 21 => 'h:mm:ss',
67
+ 22 => 'm/d/yy h:mm',
68
+ 37 => '#,##0 ;(#,##0)',
69
+ 38 => '#,##0 ;[Red](#,##0)',
70
+ 39 => '#,##0.00;(#,##0.00)',
71
+ 40 => '#,##0.00;[Red](#,##0.00)',
72
+ 45 => 'mm:ss',
73
+ 46 => '[h]:mm:ss',
74
+ 47 => 'mmss.0',
75
+ 48 => '##0.0E+0',
76
+ 49 => '@',
77
+ }
78
+ @@nr = 0
79
+
80
+ # initialization and opening of a spreadsheet file
81
+ # values for packed: :zip
82
+ def initialize(filename, packed=nil, file_warning = :error) #, create = false)
83
+ super()
84
+ @file_warning = file_warning
85
+ @tmpdir = "oo_"+$$.to_s
86
+ @tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
87
+ unless File.exists?(@tmpdir)
88
+ FileUtils::mkdir(@tmpdir)
89
+ end
90
+ filename = open_from_uri(filename) if filename[0,7] == "http://"
91
+ filename = unzip(filename) if packed and packed == :zip
92
+ begin
93
+ file_type_check(filename,'.xlsx','an Excel-xlsx')
94
+ @cells_read = Hash.new
95
+ @filename = filename
96
+ unless File.file?(@filename)
97
+ raise IOError, "file #{@filename} does not exist"
98
+ end
99
+ @@nr += 1
100
+ @file_nr = @@nr
101
+ extract_content(@filename)
102
+ file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
103
+ @workbook_doc = XML::Parser.io(file).parse
104
+ file.close
105
+ @shared_table = []
106
+ if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
107
+ file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
108
+ @sharedstring_doc = XML::Parser.io(file).parse
109
+ file.close
110
+ read_shared_strings(@sharedstring_doc)
111
+ end
112
+ @styles_table = []
113
+ @style_definitions = Array.new { |h,k| h[k] = {} }
114
+ if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
115
+ file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
116
+ @styles_doc = XML::Parser.io(file).parse
117
+ file.close
118
+ read_styles(@styles_doc)
119
+ end
120
+ @sheet_doc = []
121
+ @sheet_files.each_with_index do |item, i|
122
+ file = File.new(item)
123
+ @sheet_doc[i] = XML::Parser.io(file).parse
124
+ file.close
125
+ end
126
+ ensure
127
+ #if ENV["roo_local"] != "thomas-p"
128
+ FileUtils::rm_r(@tmpdir)
129
+ #end
130
+ end
131
+ @default_sheet = self.sheets.first
132
+ @cell = Hash.new
133
+ @cell_type = Hash.new
134
+ @formula = Hash.new
135
+ @first_row = Hash.new
136
+ @last_row = Hash.new
137
+ @first_column = Hash.new
138
+ @last_column = Hash.new
139
+ @header_line = 1
140
+ @excelx_type = Hash.new
141
+ @excelx_value = Hash.new
142
+ @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt
143
+ end
144
+
145
+ # Returns the content of a spreadsheet-cell.
146
+ # (1,1) is the upper left corner.
147
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
148
+ # cell at the first line and first row.
149
+ def cell(row, col, sheet=nil)
150
+ sheet = @default_sheet unless sheet
151
+ read_cells(sheet) unless @cells_read[sheet]
152
+ row,col = normalize(row,col)
153
+ if celltype(row,col,sheet) == :date
154
+ yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
155
+ return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
156
+ elsif celltype(row,col,sheet) == :datetime
157
+ date_part,time_part = @cell[sheet][[row,col]].split(' ')
158
+ yyyy,mm,dd = date_part.split('-')
159
+ hh,mi,ss = time_part.split(':')
160
+ return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
161
+ end
162
+ @cell[sheet][[row,col]]
163
+ end
164
+
165
+ # Returns the formula at (row,col).
166
+ # Returns nil if there is no formula.
167
+ # The method #formula? checks if there is a formula.
168
+ def formula(row,col,sheet=nil)
169
+ sheet = @default_sheet unless sheet
170
+ read_cells(sheet) unless @cells_read[sheet]
171
+ row,col = normalize(row,col)
172
+ if @formula[sheet][[row,col]] == nil
173
+ return nil
174
+ else
175
+ return @formula[sheet][[row,col]]
176
+ end
177
+ end
178
+
179
+ # true, if there is a formula
180
+ def formula?(row,col,sheet=nil)
181
+ sheet = @default_sheet unless sheet
182
+ read_cells(sheet) unless @cells_read[sheet]
183
+ row,col = normalize(row,col)
184
+ formula(row,col) != nil
185
+ end
186
+
187
+ class Font
188
+ attr_accessor :bold, :italic, :underline
189
+
190
+ def bold?
191
+ @bold == true
192
+ end
193
+
194
+ def italic?
195
+ @italic == true
196
+ end
197
+
198
+ def underline?
199
+ @underline == true
200
+ end
201
+ end
202
+
203
+ # Given a cell, return the cell's style
204
+ def font(row, col, sheet=nil)
205
+ sheet = @default_sheet unless sheet
206
+ read_cells(sheet) unless @cells_read[sheet]
207
+ row,col = normalize(row,col)
208
+ s_attribute = @s_attribute[sheet][[row,col]]
209
+ s_attribute ||= 0
210
+ s_attribute = s_attribute.to_i
211
+ @style_definitions[s_attribute]
212
+ end
213
+
214
+ # set a cell to a certain value
215
+ # (this will not be saved back to the spreadsheet file!)
216
+ def set(row,col,value,sheet=nil) #:nodoc:
217
+ sheet = @default_sheet unless sheet
218
+ read_cells(sheet) unless @cells_read[sheet]
219
+ row,col = normalize(row,col)
220
+ set_value(row,col,value,sheet)
221
+ if value.class == Fixnum
222
+ set_type(row,col,:float,sheet)
223
+ elsif value.class == String
224
+ set_type(row,col,:string,sheet)
225
+ elsif value.class == Float
226
+ set_type(row,col,:string,sheet)
227
+ else
228
+ raise ArgumentError, "Type for "+value.to_s+" not set"
229
+ end
230
+ end
231
+
232
+ # returns the type of a cell:
233
+ # * :float
234
+ # * :string,
235
+ # * :date
236
+ # * :percentage
237
+ # * :formula
238
+ # * :time
239
+ # * :datetime
240
+ def celltype(row,col,sheet=nil)
241
+ sheet = @default_sheet unless sheet
242
+ read_cells(sheet) unless @cells_read[sheet]
243
+ row,col = normalize(row,col)
244
+ if @formula[sheet][[row,col]]
245
+ return :formula
246
+ else
247
+ @cell_type[sheet][[row,col]]
248
+ end
249
+ end
250
+
251
+ # returns the internal type of an excel cell
252
+ # * :numeric_or_formula
253
+ # * :string
254
+ # Note: this is only available within the Excelx class
255
+ def excelx_type(row,col,sheet=nil)
256
+ sheet = @default_sheet unless sheet
257
+ read_cells(sheet) unless @cells_read[sheet]
258
+ row,col = normalize(row,col)
259
+ return @excelx_type[sheet][[row,col]]
260
+ end
261
+
262
+ # returns the internal value of an excelx cell
263
+ # Note: this is only available within the Excelx class
264
+ def excelx_value(row,col,sheet=nil)
265
+ sheet = @default_sheet unless sheet
266
+ read_cells(sheet) unless @cells_read[sheet]
267
+ row,col = normalize(row,col)
268
+ return @excelx_value[sheet][[row,col]]
269
+ end
270
+
271
+ # returns the internal format of an excel cell
272
+ def excelx_format(row,col,sheet=nil)
273
+ sheet = @default_sheet unless sheet
274
+ read_cells(sheet) unless @cells_read[sheet]
275
+ row,col = normalize(row,col)
276
+ s = @s_attribute[sheet][[row,col]]
277
+ result = attribute2format(s)
278
+ result
279
+ end
280
+
281
+ # returns an array of sheet names in the spreadsheet
282
+ def sheets
283
+ return_sheets = []
284
+ @workbook_doc.find("//*[local-name()='sheet']").each do |sheet|
285
+ return_sheets << sheet.attributes.to_h['name']
286
+ end
287
+ return_sheets
288
+ end
289
+ # shows the internal representation of all cells
290
+ # for debugging purposes
291
+ def to_s(sheet=nil)
292
+ sheet = @default_sheet unless sheet
293
+ read_cells(sheet) unless @cells_read[sheet]
294
+ @cell[sheet].inspect
295
+ end
296
+
297
+ # returns each formula in the selected sheet as an array of elements
298
+ # [row, col, formula]
299
+ def formulas(sheet=nil)
300
+ theformulas = Array.new
301
+ sheet = @default_sheet unless sheet
302
+ read_cells(sheet) unless @cells_read[sheet]
303
+ first_row(sheet).upto(last_row(sheet)) {|row|
304
+ first_column(sheet).upto(last_column(sheet)) {|col|
305
+ if formula?(row,col,sheet)
306
+ f = [row, col, formula(row,col,sheet)]
307
+ theformulas << f
308
+ end
309
+ }
310
+ }
311
+ theformulas
312
+ end
313
+
314
+ private
315
+
316
+ # helper function to set the internal representation of cells
317
+ def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,
318
+ excelx_type=nil,
319
+ excelx_value=nil,
320
+ s_attribute=nil)
321
+ key = [y,x+i]
322
+ @cell_type[sheet] = {} unless @cell_type[sheet]
323
+ @cell_type[sheet][key] = vt
324
+ @formula[sheet] = {} unless @formula[sheet]
325
+ @formula[sheet][key] = formula if formula
326
+ @cell[sheet] = {} unless @cell[sheet]
327
+ case @cell_type[sheet][key]
328
+ when :float
329
+ @cell[sheet][key] = v.to_f
330
+ when :string
331
+ @cell[sheet][key] = str_v
332
+ when :date
333
+ @cell[sheet][key] = (Date.new(1899,12,30)+v.to_i).strftime("%Y-%m-%d")
334
+ when :datetime
335
+ @cell[sheet][key] = (DateTime.new(1899,12,30)+v.to_f).strftime("%Y-%m-%d %H:%M:%S")
336
+ when :percentage
337
+ @cell[sheet][key] = v.to_f
338
+ when :time
339
+ @cell[sheet][key] = v.to_f*(24*60*60)
340
+ else
341
+ @cell[sheet][key] = v
342
+ end
343
+ @excelx_type[sheet] = {} unless @excelx_type[sheet]
344
+ @excelx_type[sheet][key] = excelx_type
345
+ @excelx_value[sheet] = {} unless @excelx_value[sheet]
346
+ @excelx_value[sheet][key] = excelx_value
347
+ @s_attribute[sheet] = {} unless @s_attribute[sheet]
348
+ @s_attribute[sheet][key] = s_attribute
349
+ end
350
+
351
+ # splits a coordinate like "AA12" into the parts "AA" (String) and 12 (Fixnum)
352
+ def split_coord(s)
353
+ letter = ""
354
+ number = 0
355
+ i = 0
356
+ while i<s.length and "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".include?(s[i,1])
357
+ letter += s[i,1]
358
+ i+=1
359
+ end
360
+ while i<s.length and "0123456789".include?(s[i,1])
361
+ number = number*10 + s[i,1].to_i
362
+ i+=1
363
+ end
364
+ if letter=="" or number==0
365
+ raise ArgumentError
366
+ end
367
+ return letter,number
368
+ end
369
+
370
+ def split_coordinate(str)
371
+ letter,number = split_coord(str)
372
+ x = GenericSpreadsheet.letter_to_number(letter)
373
+ y = number
374
+ return x,y
375
+ end
376
+
377
+ # read all cells in the selected sheet
378
+ def format2type(format)
379
+ if FORMATS.has_key? format
380
+ FORMATS[format]
381
+ else
382
+ :float
383
+ end
384
+ end
385
+
386
+ # read all cells in the selected sheet
387
+ def read_cells(sheet=nil)
388
+ sheet = @default_sheet unless sheet
389
+ sheet_found = false
390
+ raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
391
+ raise RangeError unless self.sheets.include? sheet
392
+ n = self.sheets.index(sheet)
393
+ @sheet_doc[n].find("//*[local-name()='c']").each do |c|
394
+ s_attribute = c.attributes.to_h['s'].to_i # should be here
395
+ if (c.attributes.to_h['t'] == 's')
396
+ tmp_type = :shared
397
+ elsif (c.attributes.to_h['t'] == 'b')
398
+ tmp_type = :boolean
399
+ else
400
+ # s_attribute = c.attributes.to_h['s'].to_i # was here
401
+ format = attribute2format(s_attribute)
402
+ tmp_type = format2type(format)
403
+ end
404
+ formula = nil
405
+ c.each_element do |cell|
406
+ if cell.name == 'f'
407
+ formula = cell.content
408
+ end
409
+ if cell.name == 'v'
410
+ if tmp_type == :time or tmp_type == :datetime
411
+ if cell.content.to_f >= 1.0
412
+ if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
413
+ tmp_type = :datetime
414
+ else
415
+ tmp_type = :date
416
+ end
417
+ else
418
+ end
419
+ end
420
+ excelx_type = [:numeric_or_formula,format]
421
+ excelx_value = cell.content
422
+ if tmp_type == :shared
423
+ vt = :string
424
+ str_v = @shared_table[cell.content.to_i]
425
+ excelx_type = :string
426
+ elsif tmp_type == :boolean
427
+ vt = :boolean
428
+ cell.content.to_i == 1 ? v = 'TRUE' : v = 'FALSE'
429
+ elsif tmp_type == :date
430
+ vt = :date
431
+ v = cell.content
432
+ elsif tmp_type == :time
433
+ vt = :time
434
+ v = cell.content
435
+ elsif tmp_type == :datetime
436
+ vt = :datetime
437
+ v = cell.content
438
+ elsif tmp_type == :formula
439
+ vt = :formula
440
+ v = cell.content.to_f #TODO: !!!!
441
+ else
442
+ vt = :float
443
+ v = cell.content
444
+ end
445
+ #puts "vt: #{vt}" if cell.text.include? "22606.5120"
446
+ x,y = split_coordinate(c.attributes.to_h['r'])
447
+ tr=nil #TODO: ???s
448
+ set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
449
+ end
450
+ end
451
+ end
452
+ sheet_found = true #TODO:
453
+ if !sheet_found
454
+ raise RangeError
455
+ end
456
+ @cells_read[sheet] = true
457
+ end
458
+
459
+ # Checks if the default_sheet exists. If not an RangeError exception is
460
+ # raised
461
+ def check_default_sheet
462
+ sheet_found = false
463
+ raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
464
+
465
+ sheet_found = true if sheets.include?(@default_sheet)
466
+
467
+ if ! sheet_found
468
+ raise RangeError, "sheet '#{@default_sheet}' not found"
469
+ end
470
+ end
471
+
472
+ # extracts all needed files from the zip file
473
+ def process_zipfile(zipfilename, zip, path='')
474
+ @sheet_files = []
475
+ Zip::ZipFile.open(zipfilename) {|zf|
476
+ zf.entries.each {|entry|
477
+ #entry.extract
478
+ if entry.to_s.end_with?('workbook.xml')
479
+ open(@tmpdir+'/'+@file_nr.to_s+'_roo_workbook.xml','wb') {|f|
480
+ f << zip.read(entry)
481
+ }
482
+ end
483
+ if entry.to_s.end_with?('sharedStrings.xml')
484
+ open(@tmpdir+'/'+@file_nr.to_s+'_roo_sharedStrings.xml','wb') {|f|
485
+ f << zip.read(entry)
486
+ }
487
+ end
488
+ if entry.to_s.end_with?('styles.xml')
489
+ open(@tmpdir+'/'+@file_nr.to_s+'_roo_styles.xml','wb') {|f|
490
+ f << zip.read(entry)
491
+ }
492
+ end
493
+ if entry.to_s =~ /sheet([0-9]+).xml$/
494
+ nr = $1
495
+ open(@tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}",'wb') {|f|
496
+ f << zip.read(entry)
497
+ }
498
+ @sheet_files[nr.to_i-1] = @tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}"
499
+ end
500
+ }
501
+ }
502
+ return
503
+ end
504
+
505
+ # extract files from the zip file
506
+ def extract_content(zipfilename)
507
+ Zip::ZipFile.open(@filename) do |zip|
508
+ process_zipfile(zipfilename,zip)
509
+ end
510
+ end
511
+
512
+ # sets the value of a cell
513
+ def set_value(row,col,value,sheet=nil)
514
+ sheet = @default_value unless sheet
515
+ @cell[sheet][[row,col]] = value
516
+ end
517
+
518
+ # sets the type of a cell
519
+ def set_type(row,col,type,sheet=nil)
520
+ sheet = @default_value unless sheet
521
+ @cell_type[sheet][[row,col]] = type
522
+ end
523
+
524
+ # read the shared strings xml document
525
+ def read_shared_strings(doc)
526
+ doc.find("//*[local-name()='si']").each do |si|
527
+ shared_table_entry = ''
528
+ si.each_element do |elem|
529
+ if (elem.name == 'r')
530
+ elem.each_element do |r_elem|
531
+ if (r_elem.name == 't')
532
+ shared_table_entry << r_elem.content
533
+ end
534
+ end
535
+ end
536
+ if (elem.name == 't')
537
+ shared_table_entry = elem.content
538
+ end
539
+ end
540
+ @shared_table << shared_table_entry
541
+ end
542
+ end
543
+
544
+ # read the styles elements of an excelx document
545
+ def read_styles(doc)
546
+ @numFmts = []
547
+ @cellXfs = []
548
+ fonts = []
549
+
550
+ doc.find("//*[local-name()='numFmt']").each do |numFmt|
551
+ numFmtId = numFmt.attributes.to_h['numFmtId']
552
+ formatCode = numFmt.attributes.to_h['formatCode']
553
+ @numFmts << [numFmtId, formatCode]
554
+ end
555
+ doc.find("//*[local-name()='fonts']").each do |fonts_el|
556
+ fonts_el.each_element do |font_el|
557
+ if font_el.name == 'font'
558
+ font = Excelx::Font.new
559
+ font_el.each_element do |font_sub_el|
560
+ case font_sub_el.name
561
+ when 'b'
562
+ font.bold = true
563
+ when 'i'
564
+ font.italic = true
565
+ when 'u'
566
+ font.underline = true
567
+ end
568
+ end
569
+ fonts << font
570
+ end
571
+ end
572
+ end
573
+
574
+ doc.find("//*[local-name()='cellXfs']").each do |xfs|
575
+ xfs.each do |xf|
576
+ numFmtId = xf.attributes.to_h['numFmtId']
577
+ @cellXfs << [numFmtId]
578
+ fontId = xf.attributes.to_h['fontId'].to_i
579
+ @style_definitions << fonts[fontId]
580
+ end
581
+ end
582
+ end
583
+
584
+ # convert internal excelx attribute to a format
585
+ def attribute2format(s)
586
+ result = nil
587
+ @numFmts.each {|nf|
588
+ if nf.first == @cellXfs[s.to_i].first
589
+ result = nf[1]
590
+ break
591
+ end
592
+ }
593
+ unless result
594
+ id = @cellXfs[s.to_i].first.to_i
595
+ if STANDARD_FORMATS.has_key? id
596
+ result = STANDARD_FORMATS[id]
597
+ end
598
+ end
599
+ result
600
+ end
601
+
602
+ end # class