donibuchanan-roo 1.3.12

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. data/History.txt +225 -0
  2. data/README.markdown +55 -0
  3. data/examples/roo_soap_client.rb +53 -0
  4. data/examples/roo_soap_server.rb +29 -0
  5. data/examples/write_me.rb +33 -0
  6. data/lib/roo/excel.rb +468 -0
  7. data/lib/roo/excel2003xml.rb +411 -0
  8. data/lib/roo/excelx.rb +602 -0
  9. data/lib/roo/generic_spreadsheet.rb +628 -0
  10. data/lib/roo/google.rb +379 -0
  11. data/lib/roo/openoffice.rb +451 -0
  12. data/lib/roo/roo_rails_helper.rb +82 -0
  13. data/lib/roo/version.rb +9 -0
  14. data/lib/roo.rb +32 -0
  15. data/test/1900_base.xls +0 -0
  16. data/test/1904_base.xls +0 -0
  17. data/test/Bibelbund.csv +3741 -0
  18. data/test/Bibelbund.ods +0 -0
  19. data/test/Bibelbund.xls +0 -0
  20. data/test/Bibelbund.xlsx +0 -0
  21. data/test/Bibelbund1.ods +0 -0
  22. data/test/bad_excel_date.xls +0 -0
  23. data/test/bbu.ods +0 -0
  24. data/test/bbu.xls +0 -0
  25. data/test/bbu.xlsx +0 -0
  26. data/test/bode-v1.ods.zip +0 -0
  27. data/test/bode-v1.xls.zip +0 -0
  28. data/test/boolean.ods +0 -0
  29. data/test/boolean.xls +0 -0
  30. data/test/boolean.xlsx +0 -0
  31. data/test/borders.ods +0 -0
  32. data/test/borders.xls +0 -0
  33. data/test/borders.xlsx +0 -0
  34. data/test/bug-row-column-fixnum-float.xls +0 -0
  35. data/test/datetime.ods +0 -0
  36. data/test/datetime.xls +0 -0
  37. data/test/datetime.xlsx +0 -0
  38. data/test/datetime_floatconv.xls +0 -0
  39. data/test/emptysheets.ods +0 -0
  40. data/test/emptysheets.xls +0 -0
  41. data/test/excel2003.xml +21140 -0
  42. data/test/false_encoding.xls +0 -0
  43. data/test/formula.ods +0 -0
  44. data/test/formula.xls +0 -0
  45. data/test/formula.xlsx +0 -0
  46. data/test/formula_parse_error.xls +0 -0
  47. data/test/html-escape.ods +0 -0
  48. data/test/no_spreadsheet_file.txt +1 -0
  49. data/test/numbers1.csv +18 -0
  50. data/test/numbers1.ods +0 -0
  51. data/test/numbers1.xls +0 -0
  52. data/test/numbers1.xlsx +0 -0
  53. data/test/only_one_sheet.ods +0 -0
  54. data/test/only_one_sheet.xls +0 -0
  55. data/test/only_one_sheet.xlsx +0 -0
  56. data/test/paragraph.ods +0 -0
  57. data/test/paragraph.xls +0 -0
  58. data/test/paragraph.xlsx +0 -0
  59. data/test/ric.ods +0 -0
  60. data/test/simple_spreadsheet.ods +0 -0
  61. data/test/simple_spreadsheet.xls +0 -0
  62. data/test/simple_spreadsheet.xlsx +0 -0
  63. data/test/simple_spreadsheet_from_italo.ods +0 -0
  64. data/test/simple_spreadsheet_from_italo.xls +0 -0
  65. data/test/skipped_tests.rb +789 -0
  66. data/test/style.ods +0 -0
  67. data/test/style.xls +0 -0
  68. data/test/style.xlsx +0 -0
  69. data/test/test_helper.rb +19 -0
  70. data/test/test_roo.rb +1834 -0
  71. data/test/time-test.csv +2 -0
  72. data/test/time-test.ods +0 -0
  73. data/test/time-test.xls +0 -0
  74. data/test/time-test.xlsx +0 -0
  75. data/test/whitespace.ods +0 -0
  76. data/test/whitespace.xls +0 -0
  77. data/test/whitespace.xlsx +0 -0
  78. metadata +185 -0
@@ -0,0 +1,411 @@
1
+ require 'xml'
2
+ require 'fileutils'
3
+ require 'zip/zipfilesystem'
4
+ require 'date'
5
+ require 'base64'
6
+ require 'cgi'
7
+
8
+ class Excel2003XML < GenericSpreadsheet
9
+
10
+ @@nr = 0
11
+
12
+ # initialization and opening of a spreadsheet file
13
+ # values for packed: :zip
14
+ def initialize(filename, packed=nil, file_warning=:error)
15
+ @file_warning = file_warning
16
+ super()
17
+ @tmpdir = "oo_"+$$.to_s
18
+ @tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
19
+ unless File.exists?(@tmpdir)
20
+ FileUtils::mkdir(@tmpdir)
21
+ end
22
+ filename = open_from_uri(filename) if filename[0,7] == "http://"
23
+ filename = unzip(filename) if packed and packed == :zip
24
+ begin
25
+ file_type_check(filename,'.xml','an Excel 2003 XML')
26
+ @cells_read = Hash.new
27
+ @filename = filename
28
+ unless File.file?(@filename)
29
+ raise IOError, "file #{@filename} does not exist"
30
+ end
31
+ @doc = XML::Parser.file(@filename).parse
32
+ ensure
33
+ FileUtils::rm_r(@tmpdir)
34
+ end
35
+ @default_sheet = self.sheets.first
36
+ @cell = Hash.new
37
+ @cell_type = Hash.new
38
+ @formula = Hash.new
39
+ @first_row = Hash.new
40
+ @last_row = Hash.new
41
+ @first_column = Hash.new
42
+ @last_column = Hash.new
43
+ @style = Hash.new
44
+ @style_defaults = Hash.new { |h,k| h[k] = [] }
45
+ @style_definitions = Hash.new
46
+ @header_line = 1
47
+ end
48
+
49
+ # Returns the content of a spreadsheet-cell.
50
+ # (1,1) is the upper left corner.
51
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
52
+ # cell at the first line and first row.
53
+ def cell(row, col, sheet=nil)
54
+ sheet = @default_sheet unless sheet
55
+ read_cells(sheet) unless @cells_read[sheet]
56
+ row,col = normalize(row,col)
57
+ if celltype(row,col,sheet) == :date
58
+ yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
59
+ return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
60
+ end
61
+ @cell[sheet][[row,col]]
62
+ end
63
+
64
+ # Returns the formula at (row,col).
65
+ # Returns nil if there is no formula.
66
+ # The method #formula? checks if there is a formula.
67
+ def formula(row,col,sheet=nil)
68
+ sheet = @default_sheet unless sheet
69
+ read_cells(sheet) unless @cells_read[sheet]
70
+ row,col = normalize(row,col)
71
+ if @formula[sheet][[row,col]] == nil
72
+ return nil
73
+ else
74
+ return @formula[sheet][[row,col]]["oooc:".length..-1]
75
+ end
76
+ end
77
+
78
+ # true, if there is a formula
79
+ def formula?(row,col,sheet=nil)
80
+ sheet = @default_sheet unless sheet
81
+ read_cells(sheet) unless @cells_read[sheet]
82
+ row,col = normalize(row,col)
83
+ formula(row,col) != nil
84
+ end
85
+
86
+ class Font
87
+ attr_accessor :bold, :italic, :underline
88
+
89
+ def bold?
90
+ @bold == 'bold'
91
+ end
92
+
93
+ def italic?
94
+ @italic == 'italic'
95
+ end
96
+
97
+ def underline?
98
+ @underline != nil
99
+ end
100
+ end
101
+
102
+ # Given a cell, return the cell's style
103
+ def font(row, col, sheet=nil)
104
+ sheet = @default_sheet unless sheet
105
+ read_cells(sheet) unless @cells_read[sheet]
106
+ row,col = normalize(row,col)
107
+ style_name = @style[sheet][[row,col]] || @style_defaults[sheet][col - 1] || 'Default'
108
+ @style_definitions[style_name]
109
+ end
110
+
111
+ # set a cell to a certain value
112
+ # (this will not be saved back to the spreadsheet file!)
113
+ def set(row,col,value,sheet=nil) #:nodoc:
114
+ sheet = @default_sheet unless sheet
115
+ read_cells(sheet) unless @cells_read[sheet]
116
+ row,col = normalize(row,col)
117
+ set_value(row,col,value,sheet)
118
+ if value.class == Fixnum
119
+ set_type(row,col,:float,sheet)
120
+ elsif value.class == String
121
+ set_type(row,col,:string,sheet)
122
+ elsif value.class == Float
123
+ set_type(row,col,:string,sheet)
124
+ else
125
+ raise ArgumentError, "Type for "+value.to_s+" not set"
126
+ end
127
+ end
128
+
129
+ # returns the type of a cell:
130
+ # * :float
131
+ # * :string
132
+ # * :date
133
+ # * :percentage
134
+ # * :formula
135
+ # * :time
136
+ # * :datetime
137
+ def celltype(row,col,sheet=nil)
138
+ sheet = @default_sheet unless sheet
139
+ read_cells(sheet) unless @cells_read[sheet]
140
+ row,col = normalize(row,col)
141
+ if @formula[sheet][[row,col]]
142
+ return :formula
143
+ else
144
+ @cell_type[sheet][[row,col]]
145
+ end
146
+ end
147
+
148
+ def sheets
149
+ return_sheets = []
150
+ @doc.find("//ss:Worksheet").each do |sheet|
151
+ return_sheets << sheet.attributes['Name']
152
+ end
153
+ return_sheets
154
+ end
155
+
156
+ # version of the openoffice document
157
+ # at 2007 this is always "1.0"
158
+ def officeversion
159
+ oo_version
160
+ @officeversion
161
+ end
162
+
163
+ # shows the internal representation of all cells
164
+ # mainly for debugging purposes
165
+ def to_s(sheet=nil)
166
+ sheet = @default_sheet unless sheet
167
+ read_cells(sheet) unless @cells_read[sheet]
168
+ @cell[sheet].inspect
169
+ end
170
+
171
+ # save spreadsheet
172
+ def save #:nodoc:
173
+ 42
174
+ end
175
+
176
+ # returns each formula in the selected sheet as an array of elements
177
+ # [row, col, formula]
178
+ def formulas(sheet=nil)
179
+ theformulas = Array.new
180
+ sheet = @default_sheet unless sheet
181
+ read_cells(sheet) unless @cells_read[sheet]
182
+ first_row(sheet).upto(last_row(sheet)) {|row|
183
+ first_column(sheet).upto(last_column(sheet)) {|col|
184
+ if formula?(row,col,sheet)
185
+ f = [row, col, formula(row,col,sheet)]
186
+ theformulas << f
187
+ end
188
+ }
189
+ }
190
+ theformulas
191
+ end
192
+
193
+ private
194
+
195
+ # read the version of the OO-Version
196
+ def oo_version
197
+ @doc.find("//*[local-name()='document-content']").each do |office|
198
+ @officeversion = office.attributes['version']
199
+ end
200
+ end
201
+
202
+ # helper function to set the internal representation of cells
203
+ def set_cell_values(sheet,x,y,i,v,vt,formula,table_cell,str_v,style_name)
204
+ key = [y,x+i]
205
+ @cell_type[sheet] = {} unless @cell_type[sheet]
206
+ @cell_type[sheet][key] = vt
207
+ @formula[sheet] = {} unless @formula[sheet]
208
+ @formula[sheet][key] = formula if formula
209
+ @cell[sheet] = {} unless @cell[sheet]
210
+ @style[sheet] = {} unless @style[sheet]
211
+ @style[sheet][key] = style_name
212
+ case @cell_type[sheet][key]
213
+ when :float
214
+ @cell[sheet][key] = v.to_f
215
+ when :string
216
+ @cell[sheet][key] = str_v
217
+ when :datetime
218
+ @cell[sheet][key] = DateTime.parse(v)
219
+ @cell_type[sheet][key] = :datetime
220
+ when :percentage
221
+ @cell[sheet][key] = v.to_f
222
+ # when :time
223
+ # hms = v.split(':')
224
+ # @cell[sheet][key] = hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i
225
+ else
226
+ @cell[sheet][key] = v
227
+ end
228
+ end
229
+
230
+ # read all cells in the selected sheet
231
+ #--
232
+ # the following construct means '4 blanks'
233
+ # some content <text:s text:c="3"/>
234
+ #++
235
+ def read_cells(sheet=nil)
236
+ sheet = @default_sheet unless sheet
237
+ sheet_found = false
238
+ raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
239
+ raise RangeError unless self.sheets.include? sheet
240
+ @doc.find("ss:Worksheet[@ss:Name='#{sheet}']").each do |ws|
241
+ sheet_found = true
242
+ row = 1
243
+ col = 1
244
+ ws.find('.//ss:Row').each do |r|
245
+ skip_to_row = r.attributes['Index'].to_i
246
+ row = skip_to_row if skip_to_row > 0
247
+ r.each do |c|
248
+ next unless c.name == 'Cell'
249
+ skip_to_col = c.attributes['Index'].to_i
250
+ col = skip_to_col if skip_to_col > 0
251
+ c.each_element do |cell|
252
+ formula = nil
253
+ style_name = cell.attributes['StyleID']
254
+ if cell.name == 'Data'
255
+ formula = cell.attributes['Formula']
256
+ vt = cell.attributes['Type'].downcase.to_sym
257
+ v = cell.content
258
+ str_v = v
259
+ case vt
260
+ # when :string
261
+ # str_v = ''
262
+ # # insert \n if there is more than one paragraph
263
+ # para_count = 0
264
+ # cell.each_element do |str|
265
+ # if str.name == 'p'
266
+ # v = str.content
267
+ # str_v += "\n" if para_count > 0
268
+ # para_count += 1
269
+ # if str.children.size > 1
270
+ # str_v += children_to_string(str.children)
271
+ # else
272
+ # str.children.each do |child|
273
+ # str_v += child.content #.text
274
+ # end
275
+ # end
276
+ # str_v.gsub!(/&apos;/,"'") # special case not supported by unescapeHTML
277
+ # str_v = CGI.unescapeHTML(str_v)
278
+ # end # == 'p'
279
+ # end
280
+ when :number
281
+ v = v.to_f
282
+ vt = :float
283
+ when :datetime
284
+ if v =~ /^1899-12-31T(\d{2}:\d{2}:\d{2})/
285
+ v = $1
286
+ vt = :time
287
+ elsif v =~ /([^T]+)T00:00:00.000/
288
+ v = $1
289
+ vt = :date
290
+ end
291
+ when :boolean
292
+ v = cell.attributes['boolean-value']
293
+ else
294
+ # raise "unknown type #{vt}"
295
+ end
296
+ # puts vt
297
+ # puts v
298
+ # puts str_v
299
+ # puts row
300
+ # puts col
301
+ # puts '---'
302
+ end
303
+ set_cell_values(sheet,col,row,0,v,vt.to_sym,formula,cell,str_v,style_name)
304
+ end
305
+ col += 1
306
+ end
307
+ row += 1
308
+ col = 1
309
+ end
310
+ end
311
+ if !sheet_found
312
+ raise RangeError
313
+ end
314
+ @cells_read[sheet] = true
315
+ end
316
+
317
+ def read_styles(style_elements)
318
+ @style_definitions['Default'] = Openoffice::Font.new
319
+ style_elements.each do |style|
320
+ next unless style.name == 'style'
321
+ style_name = style.attributes['name']
322
+ style.each do |properties|
323
+ font = Openoffice::Font.new
324
+ font.bold = properties.attributes['font-weight']
325
+ font.italic = properties.attributes['font-style']
326
+ font.underline = properties.attributes['text-underline-style']
327
+ @style_definitions[style_name] = font
328
+ end
329
+ end
330
+ end
331
+
332
+ # Checks if the default_sheet exists. If not an RangeError exception is
333
+ # raised
334
+ def check_default_sheet
335
+ sheet_found = false
336
+ raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
337
+ sheet_found = true if sheets.include?(@default_sheet)
338
+ if ! sheet_found
339
+ raise RangeError, "sheet '#{@default_sheet}' not found"
340
+ end
341
+ end
342
+
343
+ def process_zipfile(zip, path='')
344
+ if zip.file.file? path
345
+ if path == "content.xml"
346
+ open(File.join(@tmpdir, @file_nr.to_s+'_roo_content.xml'),'wb') {|f|
347
+ f << zip.read(path)
348
+ }
349
+ end
350
+ else
351
+ unless path.empty?
352
+ path += '/'
353
+ end
354
+ zip.dir.foreach(path) do |filename|
355
+ process_zipfile(zip, path+filename)
356
+ end
357
+ end
358
+ end
359
+
360
+ def extract_content
361
+ Zip::ZipFile.open(@filename) do |zip|
362
+ process_zipfile(zip)
363
+ end
364
+ end
365
+
366
+ def set_value(row,col,value,sheet=nil)
367
+ sheet = @default_value unless sheet
368
+ @cell[sheet][[row,col]] = value
369
+ end
370
+
371
+ def set_type(row,col,type,sheet=nil)
372
+ sheet = @default_value unless sheet
373
+ @cell_type[sheet][[row,col]] = type
374
+ end
375
+
376
+ A_ROO_TYPE = {
377
+ "float" => :float,
378
+ "string" => :string,
379
+ "date" => :date,
380
+ "percentage" => :percentage,
381
+ "time" => :time,
382
+ }
383
+
384
+ def Openoffice.oo_type_2_roo_type(ootype)
385
+ return A_ROO_TYPE[ootype]
386
+ end
387
+
388
+ # helper method to convert compressed spaces and other elements within
389
+ # an text into a string
390
+ def children_to_string(children)
391
+ result = ''
392
+ children.each {|child|
393
+ if child.text?
394
+ result = result + child.content
395
+ else
396
+ if child.name == 's'
397
+ compressed_spaces = child.attributes['c'].to_i
398
+ # no explicit number means a count of 1:
399
+ if compressed_spaces == 0
400
+ compressed_spaces = 1
401
+ end
402
+ result = result + " "*compressed_spaces
403
+ else
404
+ result = result + child.content
405
+ end
406
+ end
407
+ }
408
+ result
409
+ end
410
+
411
+ end # class