roo-immersion 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +225 -0
- data/README.markdown +60 -0
- data/examples/roo_soap_client.rb +53 -0
- data/examples/roo_soap_server.rb +29 -0
- data/examples/write_me.rb +33 -0
- data/lib/roo.rb +32 -0
- data/lib/roo/excel.rb +468 -0
- data/lib/roo/excel2003xml.rb +394 -0
- data/lib/roo/excelx.rb +601 -0
- data/lib/roo/generic_spreadsheet.rb +628 -0
- data/lib/roo/google.rb +379 -0
- data/lib/roo/openoffice.rb +451 -0
- data/lib/roo/roo_rails_helper.rb +82 -0
- data/lib/roo/version.rb +9 -0
- data/test/1900_base.xls +0 -0
- data/test/1904_base.xls +0 -0
- data/test/Bibelbund.csv +3741 -0
- data/test/Bibelbund.ods +0 -0
- data/test/Bibelbund.xls +0 -0
- data/test/Bibelbund.xlsx +0 -0
- data/test/Bibelbund.xml +62518 -0
- data/test/Bibelbund1.ods +0 -0
- data/test/bad_excel_date.xls +0 -0
- data/test/bbu.ods +0 -0
- data/test/bbu.xls +0 -0
- data/test/bbu.xlsx +0 -0
- data/test/bbu.xml +152 -0
- data/test/bode-v1.ods.zip +0 -0
- data/test/bode-v1.xls.zip +0 -0
- data/test/boolean.ods +0 -0
- data/test/boolean.xls +0 -0
- data/test/boolean.xlsx +0 -0
- data/test/boolean.xml +112 -0
- data/test/borders.ods +0 -0
- data/test/borders.xls +0 -0
- data/test/borders.xlsx +0 -0
- data/test/borders.xml +144 -0
- data/test/bug-row-column-fixnum-float.xls +0 -0
- data/test/bug-row-column-fixnum-float.xml +127 -0
- data/test/datetime.ods +0 -0
- data/test/datetime.xls +0 -0
- data/test/datetime.xlsx +0 -0
- data/test/datetime.xml +142 -0
- data/test/datetime_floatconv.xls +0 -0
- data/test/datetime_floatconv.xml +148 -0
- data/test/emptysheets.ods +0 -0
- data/test/emptysheets.xls +0 -0
- data/test/emptysheets.xml +105 -0
- data/test/excel2003.xml +21140 -0
- data/test/false_encoding.xls +0 -0
- data/test/false_encoding.xml +132 -0
- data/test/formula.ods +0 -0
- data/test/formula.xls +0 -0
- data/test/formula.xlsx +0 -0
- data/test/formula.xml +134 -0
- data/test/formula_parse_error.xls +0 -0
- data/test/formula_parse_error.xml +1833 -0
- data/test/html-escape.ods +0 -0
- data/test/no_spreadsheet_file.txt +1 -0
- data/test/numbers1.csv +18 -0
- data/test/numbers1.ods +0 -0
- data/test/numbers1.xls +0 -0
- data/test/numbers1.xlsx +0 -0
- data/test/numbers1.xml +312 -0
- data/test/only_one_sheet.ods +0 -0
- data/test/only_one_sheet.xls +0 -0
- data/test/only_one_sheet.xlsx +0 -0
- data/test/only_one_sheet.xml +67 -0
- data/test/paragraph.ods +0 -0
- data/test/paragraph.xls +0 -0
- data/test/paragraph.xlsx +0 -0
- data/test/paragraph.xml +127 -0
- data/test/ric.ods +0 -0
- data/test/simple_spreadsheet.ods +0 -0
- data/test/simple_spreadsheet.xls +0 -0
- data/test/simple_spreadsheet.xlsx +0 -0
- data/test/simple_spreadsheet.xml +225 -0
- data/test/simple_spreadsheet_from_italo.ods +0 -0
- data/test/simple_spreadsheet_from_italo.xls +0 -0
- data/test/simple_spreadsheet_from_italo.xml +242 -0
- data/test/skipped_tests.rb +789 -0
- data/test/style.ods +0 -0
- data/test/style.xls +0 -0
- data/test/style.xlsx +0 -0
- data/test/style.xml +154 -0
- data/test/test_helper.rb +19 -0
- data/test/test_roo.rb +1834 -0
- data/test/time-test.csv +2 -0
- data/test/time-test.ods +0 -0
- data/test/time-test.xls +0 -0
- data/test/time-test.xlsx +0 -0
- data/test/time-test.xml +131 -0
- data/test/whitespace.ods +0 -0
- data/test/whitespace.xls +0 -0
- data/test/whitespace.xlsx +0 -0
- data/test/whitespace.xml +184 -0
- metadata +231 -0
data/lib/roo/google.rb
ADDED
@@ -0,0 +1,379 @@
|
|
1
|
+
require 'gdata/spreadsheet'
|
2
|
+
require 'xml'
|
3
|
+
|
4
|
+
class GoogleHTTPError < RuntimeError; end
|
5
|
+
class GoogleReadError < RuntimeError; end
|
6
|
+
class GoogleWriteError < RuntimeError; end
|
7
|
+
|
8
|
+
# overwrite some methods from the gdata-gem:
|
9
|
+
module GData
|
10
|
+
class Spreadsheet < GData::Base
|
11
|
+
|
12
|
+
def visibility
|
13
|
+
@headers ? "private" : "public"
|
14
|
+
end
|
15
|
+
|
16
|
+
def projection
|
17
|
+
@headers ? "full" : "values"
|
18
|
+
end
|
19
|
+
|
20
|
+
#-- modified
|
21
|
+
def evaluate_cell(cell, sheet_no=1)
|
22
|
+
raise ArgumentError, "invalid cell: #{cell}" unless cell
|
23
|
+
raise ArgumentError, "invalid sheet_no: #{sheet_no}" unless sheet_no >0 and sheet_no.class == Fixnum
|
24
|
+
path = "/feeds/cells/#{@spreadsheet_id}/#{sheet_no}/#{visibility}/#{projection}/#{cell}"
|
25
|
+
doc = Hpricot(request(path))
|
26
|
+
result = (doc/"content").inner_html
|
27
|
+
end
|
28
|
+
|
29
|
+
#-- new
|
30
|
+
def sheetlist
|
31
|
+
path = "/feeds/worksheets/#{@spreadsheet_id}/#{visibility}/#{projection}"
|
32
|
+
doc = Hpricot(request(path))
|
33
|
+
result = []
|
34
|
+
(doc/"content").each { |elem|
|
35
|
+
result << elem.inner_html
|
36
|
+
}
|
37
|
+
if result.size == 0
|
38
|
+
if (doc/"h2").inner_html =~ /Error/
|
39
|
+
raise GoogleHTTPError, "#{(doc/'h2').inner_html}: #{(doc/'title').inner_html} [key '#{@spreadsheet_id}']"
|
40
|
+
else
|
41
|
+
raise GoogleReadError, "#{doc} [key '#{@spreadsheet_id}']"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
result
|
45
|
+
end
|
46
|
+
|
47
|
+
#-- new
|
48
|
+
#@@ added sheet_no to definition
|
49
|
+
def save_entry_roo(entry, sheet_no)
|
50
|
+
raise GoogleWriteError, "unable to write to public spreadsheets" if visibility == 'public'
|
51
|
+
path = "/feeds/cells/#{@spreadsheet_id}/#{sheet_no}/#{visibility}/#{projection}"
|
52
|
+
post(path, entry)
|
53
|
+
end
|
54
|
+
|
55
|
+
#-- new
|
56
|
+
def entry_roo(formula, row=1, col=1)
|
57
|
+
<<-XML
|
58
|
+
<entry xmlns='http://www.w3.org/2005/Atom' xmlns:gs='http://schemas.google.com/spreadsheets/2006'>
|
59
|
+
<gs:cell row='#{row}' col='#{col}' inputValue='#{formula}' />
|
60
|
+
</entry>
|
61
|
+
XML
|
62
|
+
end
|
63
|
+
|
64
|
+
#-- new
|
65
|
+
#@@ added sheet_no to definition
|
66
|
+
def add_to_cell_roo(row,col,value, sheet_no=1)
|
67
|
+
save_entry_roo(entry_roo(value,row,col), sheet_no)
|
68
|
+
end
|
69
|
+
|
70
|
+
#-- new
|
71
|
+
def get_one_sheet
|
72
|
+
path = "/feeds/cells/#{@spreadsheet_id}/1/#{visibility}/#{projection}"
|
73
|
+
doc = Hpricot(request(path))
|
74
|
+
end
|
75
|
+
|
76
|
+
#new
|
77
|
+
def oben_unten_links_rechts(sheet_no)
|
78
|
+
path = "/feeds/cells/#{@spreadsheet_id}/#{sheet_no}/#{visibility}/#{projection}"
|
79
|
+
doc = Hpricot(request(path))
|
80
|
+
rows = []
|
81
|
+
cols = []
|
82
|
+
(doc/"gs:cell").each {|item|
|
83
|
+
rows.push item['row'].to_i
|
84
|
+
cols.push item['col'].to_i
|
85
|
+
}
|
86
|
+
return rows.min, rows.max, cols.min, cols.max
|
87
|
+
end
|
88
|
+
|
89
|
+
def fulldoc(sheet_no)
|
90
|
+
path = "/feeds/cells/#{@spreadsheet_id}/#{sheet_no}/#{visibility}/#{projection}"
|
91
|
+
doc = Hpricot(request(path))
|
92
|
+
return doc
|
93
|
+
end
|
94
|
+
|
95
|
+
end # class
|
96
|
+
end # module
|
97
|
+
|
98
|
+
class Google < GenericSpreadsheet
|
99
|
+
attr_accessor :date_format, :datetime_format
|
100
|
+
|
101
|
+
# Creates a new Google spreadsheet object.
|
102
|
+
def initialize(spreadsheetkey,user=nil,password=nil)
|
103
|
+
@filename = spreadsheetkey
|
104
|
+
@spreadsheetkey = spreadsheetkey
|
105
|
+
@user = user
|
106
|
+
@password = password
|
107
|
+
unless user
|
108
|
+
user = ENV['GOOGLE_MAIL']
|
109
|
+
end
|
110
|
+
unless password
|
111
|
+
password = ENV['GOOGLE_PASSWORD']
|
112
|
+
end
|
113
|
+
@cell = Hash.new {|h,k| h[k]=Hash.new}
|
114
|
+
@cell_type = Hash.new {|h,k| h[k]=Hash.new}
|
115
|
+
@formula = Hash.new
|
116
|
+
@first_row = Hash.new
|
117
|
+
@last_row = Hash.new
|
118
|
+
@first_column = Hash.new
|
119
|
+
@last_column = Hash.new
|
120
|
+
@cells_read = Hash.new
|
121
|
+
@header_line = 1
|
122
|
+
@date_format = '%d/%m/%Y'
|
123
|
+
@datetime_format = '%d/%m/%Y %H:%M:%S'
|
124
|
+
@time_format = '%H:%M:%S'
|
125
|
+
@gs = GData::Spreadsheet.new(spreadsheetkey)
|
126
|
+
@gs.authenticate(user, password) unless user.empty? || password.empty?
|
127
|
+
@sheetlist = @gs.sheetlist
|
128
|
+
@default_sheet = self.sheets.first
|
129
|
+
end
|
130
|
+
|
131
|
+
# returns an array of sheet names in the spreadsheet
|
132
|
+
def sheets
|
133
|
+
@sheetlist
|
134
|
+
end
|
135
|
+
|
136
|
+
def date?(string)
|
137
|
+
begin
|
138
|
+
Date.strptime(string, @date_format)
|
139
|
+
true
|
140
|
+
rescue
|
141
|
+
false
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# is String a time with format HH:MM:SS?
|
146
|
+
def time?(string)
|
147
|
+
begin
|
148
|
+
DateTime.strptime(string, @time_format)
|
149
|
+
true
|
150
|
+
rescue
|
151
|
+
false
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
def datetime?(string)
|
156
|
+
begin
|
157
|
+
DateTime.strptime(string, @datetime_format)
|
158
|
+
true
|
159
|
+
rescue
|
160
|
+
false
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def numeric?(string)
|
165
|
+
string =~ /^[0-9]+[\.]*[0-9]*$/
|
166
|
+
end
|
167
|
+
|
168
|
+
def timestring_to_seconds(value)
|
169
|
+
hms = value.split(':')
|
170
|
+
hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i
|
171
|
+
end
|
172
|
+
|
173
|
+
# Returns the content of a spreadsheet-cell.
|
174
|
+
# (1,1) is the upper left corner.
|
175
|
+
# (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
|
176
|
+
# cell at the first line and first row.
|
177
|
+
def cell(row, col, sheet=nil)
|
178
|
+
sheet = @default_sheet unless sheet
|
179
|
+
check_default_sheet #TODO: 2007-12-16
|
180
|
+
read_cells(sheet) unless @cells_read[sheet]
|
181
|
+
row,col = normalize(row,col)
|
182
|
+
value = @cell[sheet]["#{row},#{col}"]
|
183
|
+
if celltype(row,col,sheet) == :date
|
184
|
+
begin
|
185
|
+
return Date.strptime(value, @date_format)
|
186
|
+
rescue ArgumentError
|
187
|
+
raise "Invalid Date #{sheet}[#{row},#{col}] #{value} using format '{@date_format}'"
|
188
|
+
end
|
189
|
+
elsif celltype(row,col,sheet) == :datetime
|
190
|
+
begin
|
191
|
+
return DateTime.strptime(value, @datetime_format)
|
192
|
+
rescue ArgumentError
|
193
|
+
raise "Invalid DateTime #{sheet}[#{row},#{col}] #{value} using format '{@datetime_format}'"
|
194
|
+
end
|
195
|
+
end
|
196
|
+
return value
|
197
|
+
end
|
198
|
+
|
199
|
+
# returns the type of a cell:
|
200
|
+
# * :float
|
201
|
+
# * :string
|
202
|
+
# * :date
|
203
|
+
# * :percentage
|
204
|
+
# * :formula
|
205
|
+
# * :time
|
206
|
+
# * :datetime
|
207
|
+
def celltype(row, col, sheet=nil)
|
208
|
+
sheet = @default_sheet unless sheet
|
209
|
+
read_cells(sheet) unless @cells_read[sheet]
|
210
|
+
row,col = normalize(row,col)
|
211
|
+
if @formula.size > 0 && @formula[sheet]["#{row},#{col}"]
|
212
|
+
return :formula
|
213
|
+
else
|
214
|
+
@cell_type[sheet]["#{row},#{col}"]
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
# Returns the formula at (row,col).
|
219
|
+
# Returns nil if there is no formula.
|
220
|
+
# The method #formula? checks if there is a formula.
|
221
|
+
def formula(row,col,sheet=nil)
|
222
|
+
sheet = @default_sheet unless sheet
|
223
|
+
read_cells(sheet) unless @cells_read[sheet]
|
224
|
+
row,col = normalize(row,col)
|
225
|
+
if @formula[sheet]["#{row},#{col}"] == nil
|
226
|
+
return nil
|
227
|
+
else
|
228
|
+
return @formula[sheet]["#{row},#{col}"]
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
# true, if there is a formula
|
233
|
+
def formula?(row,col,sheet=nil)
|
234
|
+
sheet = @default_sheet unless sheet
|
235
|
+
read_cells(sheet) unless @cells_read[sheet]
|
236
|
+
row,col = normalize(row,col)
|
237
|
+
formula(row,col) != nil
|
238
|
+
end
|
239
|
+
|
240
|
+
# returns each formula in the selected sheet as an array of elements
|
241
|
+
# [row, col, formula]
|
242
|
+
def formulas(sheet=nil)
|
243
|
+
theformulas = Array.new
|
244
|
+
sheet = @default_sheet unless sheet
|
245
|
+
read_cells(sheet) unless @cells_read[sheet]
|
246
|
+
first_row(sheet).upto(last_row(sheet)) {|row|
|
247
|
+
first_column(sheet).upto(last_column(sheet)) {|col|
|
248
|
+
if formula?(row,col,sheet)
|
249
|
+
f = [row, col, formula(row,col,sheet)]
|
250
|
+
theformulas << f
|
251
|
+
end
|
252
|
+
}
|
253
|
+
}
|
254
|
+
theformulas
|
255
|
+
end
|
256
|
+
|
257
|
+
# true, if the cell is empty
|
258
|
+
def empty?(row, col, sheet=nil)
|
259
|
+
value = cell(row, col, sheet)
|
260
|
+
return true unless value
|
261
|
+
return false if value.class == Date # a date is never empty
|
262
|
+
return false if value.class == Float
|
263
|
+
return false if celltype(row,col,sheet) == :time
|
264
|
+
value.empty?
|
265
|
+
end
|
266
|
+
|
267
|
+
# sets the cell to the content of 'value'
|
268
|
+
# a formula can be set in the form of '=SUM(...)'
|
269
|
+
def set_value(row,col,value,sheet=nil)
|
270
|
+
sheet = @default_sheet unless sheet
|
271
|
+
raise RangeError, "sheet not set" unless sheet
|
272
|
+
#@@ Set and pass sheet_no
|
273
|
+
begin
|
274
|
+
sheet_no = sheets.index(sheet)+1
|
275
|
+
rescue
|
276
|
+
raise RangeError, "invalid sheet '"+sheet.to_s+"'"
|
277
|
+
end
|
278
|
+
row,col = normalize(row,col)
|
279
|
+
@gs.add_to_cell_roo(row,col,value,sheet_no)
|
280
|
+
# re-read the portion of the document that has changed
|
281
|
+
if @cells_read[sheet]
|
282
|
+
key = "#{row},#{col}"
|
283
|
+
(value, value_type) = determine_datatype(value.to_s)
|
284
|
+
@cell[sheet][key] = value
|
285
|
+
@cell_type[sheet][key] = value_type
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
# returns the first non-empty row in a sheet
|
290
|
+
def first_row(sheet=nil)
|
291
|
+
sheet = @default_sheet unless sheet
|
292
|
+
unless @first_row[sheet]
|
293
|
+
sheet_no = sheets.index(sheet) + 1
|
294
|
+
@first_row[sheet], @last_row[sheet], @first_column[sheet], @last_column[sheet] = @gs.oben_unten_links_rechts(sheet_no)
|
295
|
+
end
|
296
|
+
return @first_row[sheet]
|
297
|
+
end
|
298
|
+
|
299
|
+
# returns the last non-empty row in a sheet
|
300
|
+
def last_row(sheet=nil)
|
301
|
+
sheet = @default_sheet unless sheet
|
302
|
+
unless @last_row[sheet]
|
303
|
+
sheet_no = sheets.index(sheet) + 1
|
304
|
+
@first_row[sheet], @last_row[sheet], @first_column[sheet], @last_column[sheet] = @gs.oben_unten_links_rechts(sheet_no)
|
305
|
+
end
|
306
|
+
return @last_row[sheet]
|
307
|
+
end
|
308
|
+
|
309
|
+
# returns the first non-empty column in a sheet
|
310
|
+
def first_column(sheet=nil)
|
311
|
+
sheet = @default_sheet unless sheet
|
312
|
+
unless @first_column[sheet]
|
313
|
+
sheet_no = sheets.index(sheet) + 1
|
314
|
+
@first_row[sheet], @last_row[sheet], @first_column[sheet], @last_column[sheet] = @gs.oben_unten_links_rechts(sheet_no)
|
315
|
+
end
|
316
|
+
return @first_column[sheet]
|
317
|
+
end
|
318
|
+
|
319
|
+
# returns the last non-empty column in a sheet
|
320
|
+
def last_column(sheet=nil)
|
321
|
+
sheet = @default_sheet unless sheet
|
322
|
+
unless @last_column[sheet]
|
323
|
+
sheet_no = sheets.index(sheet) + 1
|
324
|
+
@first_row[sheet], @last_row[sheet], @first_column[sheet], @last_column[sheet] = @gs.oben_unten_links_rechts(sheet_no)
|
325
|
+
end
|
326
|
+
return @last_column[sheet]
|
327
|
+
end
|
328
|
+
|
329
|
+
private
|
330
|
+
|
331
|
+
# read all cells in a sheet.
|
332
|
+
def read_cells(sheet=nil)
|
333
|
+
sheet = @default_sheet unless sheet
|
334
|
+
raise RangeError, "illegal sheet <#{sheet}>" unless sheets.index(sheet)
|
335
|
+
sheet_no = sheets.index(sheet)+1
|
336
|
+
xml = @gs.fulldoc(sheet_no).to_s
|
337
|
+
doc = XML::Parser.string(xml).parse
|
338
|
+
doc.find("//*[local-name()='cell']").each do |item|
|
339
|
+
row = item['row']
|
340
|
+
col = item['col']
|
341
|
+
key = "#{row},#{col}"
|
342
|
+
string_value = item['inputvalue'] || item['inputValue']
|
343
|
+
numeric_value = item['numericvalue'] || item['numericValue']
|
344
|
+
(value, value_type) = determine_datatype(string_value, numeric_value)
|
345
|
+
@cell[sheet][key] = value unless value == "" or value == nil
|
346
|
+
@cell_type[sheet][key] = value_type
|
347
|
+
@formula[sheet] = {} unless @formula[sheet]
|
348
|
+
@formula[sheet][key] = string_value if value_type == :formula
|
349
|
+
end
|
350
|
+
@cells_read[sheet] = true
|
351
|
+
end
|
352
|
+
|
353
|
+
def determine_datatype(val, numval=nil)
|
354
|
+
if val.nil? || val[0,1] == '='
|
355
|
+
ty = :formula
|
356
|
+
if numeric?(numval)
|
357
|
+
val = numval.to_f
|
358
|
+
else
|
359
|
+
val = numval
|
360
|
+
end
|
361
|
+
else
|
362
|
+
if datetime?(val)
|
363
|
+
ty = :datetime
|
364
|
+
elsif date?(val)
|
365
|
+
ty = :date
|
366
|
+
elsif numeric?(val)
|
367
|
+
ty = :float
|
368
|
+
val = val.to_f
|
369
|
+
elsif time?(val)
|
370
|
+
ty = :time
|
371
|
+
val = timestring_to_seconds(val)
|
372
|
+
else
|
373
|
+
ty = :string
|
374
|
+
end
|
375
|
+
end
|
376
|
+
return val, ty
|
377
|
+
end
|
378
|
+
|
379
|
+
end # class
|
@@ -0,0 +1,451 @@
|
|
1
|
+
require 'xml'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'zip/zipfilesystem'
|
4
|
+
require 'date'
|
5
|
+
require 'base64'
|
6
|
+
require 'cgi'
|
7
|
+
|
8
|
+
class Openoffice < GenericSpreadsheet
|
9
|
+
|
10
|
+
@@nr = 0
|
11
|
+
|
12
|
+
# initialization and opening of a spreadsheet file
|
13
|
+
# values for packed: :zip
|
14
|
+
def initialize(filename, packed=nil, file_warning=:error) #, create = false)
|
15
|
+
@file_warning = file_warning
|
16
|
+
super()
|
17
|
+
@tmpdir = "oo_"+$$.to_s
|
18
|
+
@tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
|
19
|
+
unless File.exists?(@tmpdir)
|
20
|
+
FileUtils::mkdir(@tmpdir)
|
21
|
+
end
|
22
|
+
filename = open_from_uri(filename) if filename[0,7] == "http://"
|
23
|
+
filename = unzip(filename) if packed and packed == :zip
|
24
|
+
begin
|
25
|
+
file_type_check(filename,'.ods','an openoffice')
|
26
|
+
#if create and ! File.exists?(filename)
|
27
|
+
# self.create_openoffice(filename)
|
28
|
+
#end
|
29
|
+
@cells_read = Hash.new
|
30
|
+
#TODO: @cells_read[:default] = false
|
31
|
+
@filename = filename
|
32
|
+
unless File.file?(@filename)
|
33
|
+
raise IOError, "file #{@filename} does not exist"
|
34
|
+
end
|
35
|
+
@@nr += 1
|
36
|
+
@file_nr = @@nr
|
37
|
+
extract_content
|
38
|
+
file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_content.xml"))
|
39
|
+
@doc = XML::Parser.io(file).parse
|
40
|
+
file.close
|
41
|
+
ensure
|
42
|
+
#if ENV["roo_local"] != "thomas-p"
|
43
|
+
FileUtils::rm_r(@tmpdir)
|
44
|
+
#end
|
45
|
+
end
|
46
|
+
@default_sheet = self.sheets.first
|
47
|
+
@cell = Hash.new
|
48
|
+
@cell_type = Hash.new
|
49
|
+
@formula = Hash.new
|
50
|
+
@first_row = Hash.new
|
51
|
+
@last_row = Hash.new
|
52
|
+
@first_column = Hash.new
|
53
|
+
@last_column = Hash.new
|
54
|
+
@style = Hash.new
|
55
|
+
@style_defaults = Hash.new { |h,k| h[k] = [] }
|
56
|
+
@style_definitions = Hash.new
|
57
|
+
@header_line = 1
|
58
|
+
end
|
59
|
+
|
60
|
+
# creates a new empty openoffice-spreadsheet file
|
61
|
+
def create_openoffice(filename) #:nodoc:
|
62
|
+
#TODO: a better way for creating the file contents
|
63
|
+
# now you have to call mkbase64...rb to create an include file with all
|
64
|
+
# the empty files in an openoffice zip-file
|
65
|
+
load 'base64include.rb'
|
66
|
+
# puts @@empty_spreadsheet
|
67
|
+
f = File.open(filename,'wb')
|
68
|
+
f.print(Base64.decode64(@@empty_spreadsheet))
|
69
|
+
f.close
|
70
|
+
end
|
71
|
+
|
72
|
+
# Returns the content of a spreadsheet-cell.
|
73
|
+
# (1,1) is the upper left corner.
|
74
|
+
# (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
|
75
|
+
# cell at the first line and first row.
|
76
|
+
def cell(row, col, sheet=nil)
|
77
|
+
sheet = @default_sheet unless sheet
|
78
|
+
read_cells(sheet) unless @cells_read[sheet]
|
79
|
+
row,col = normalize(row,col)
|
80
|
+
if celltype(row,col,sheet) == :date
|
81
|
+
yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
|
82
|
+
return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
|
83
|
+
end
|
84
|
+
@cell[sheet][[row,col]]
|
85
|
+
end
|
86
|
+
|
87
|
+
# Returns the formula at (row,col).
|
88
|
+
# Returns nil if there is no formula.
|
89
|
+
# The method #formula? checks if there is a formula.
|
90
|
+
def formula(row,col,sheet=nil)
|
91
|
+
sheet = @default_sheet unless sheet
|
92
|
+
read_cells(sheet) unless @cells_read[sheet]
|
93
|
+
row,col = normalize(row,col)
|
94
|
+
if @formula[sheet][[row,col]] == nil
|
95
|
+
return nil
|
96
|
+
else
|
97
|
+
return @formula[sheet][[row,col]]["oooc:".length..-1]
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# true, if there is a formula
|
102
|
+
def formula?(row,col,sheet=nil)
|
103
|
+
sheet = @default_sheet unless sheet
|
104
|
+
read_cells(sheet) unless @cells_read[sheet]
|
105
|
+
row,col = normalize(row,col)
|
106
|
+
formula(row,col) != nil
|
107
|
+
end
|
108
|
+
|
109
|
+
class Font
|
110
|
+
attr_accessor :bold, :italic, :underline
|
111
|
+
|
112
|
+
def bold?
|
113
|
+
@bold == 'bold'
|
114
|
+
end
|
115
|
+
|
116
|
+
def italic?
|
117
|
+
@italic == 'italic'
|
118
|
+
end
|
119
|
+
|
120
|
+
def underline?
|
121
|
+
@underline != nil
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
# Given a cell, return the cell's style
|
126
|
+
def font(row, col, sheet=nil)
|
127
|
+
sheet = @default_sheet unless sheet
|
128
|
+
read_cells(sheet) unless @cells_read[sheet]
|
129
|
+
row,col = normalize(row,col)
|
130
|
+
style_name = @style[sheet][[row,col]] || @style_defaults[sheet][col - 1] || 'Default'
|
131
|
+
@style_definitions[style_name]
|
132
|
+
end
|
133
|
+
|
134
|
+
# set a cell to a certain value
|
135
|
+
# (this will not be saved back to the spreadsheet file!)
|
136
|
+
def set(row,col,value,sheet=nil) #:nodoc:
|
137
|
+
sheet = @default_sheet unless sheet
|
138
|
+
read_cells(sheet) unless @cells_read[sheet]
|
139
|
+
row,col = normalize(row,col)
|
140
|
+
set_value(row,col,value,sheet)
|
141
|
+
if value.class == Fixnum
|
142
|
+
set_type(row,col,:float,sheet)
|
143
|
+
elsif value.class == String
|
144
|
+
set_type(row,col,:string,sheet)
|
145
|
+
elsif value.class == Float
|
146
|
+
set_type(row,col,:string,sheet)
|
147
|
+
else
|
148
|
+
raise ArgumentError, "Type for "+value.to_s+" not set"
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
# returns the type of a cell:
|
153
|
+
# * :float
|
154
|
+
# * :string
|
155
|
+
# * :date
|
156
|
+
# * :percentage
|
157
|
+
# * :formula
|
158
|
+
# * :time
|
159
|
+
# * :datetime
|
160
|
+
def celltype(row,col,sheet=nil)
|
161
|
+
sheet = @default_sheet unless sheet
|
162
|
+
read_cells(sheet) unless @cells_read[sheet]
|
163
|
+
row,col = normalize(row,col)
|
164
|
+
if @formula[sheet][[row,col]]
|
165
|
+
return :formula
|
166
|
+
else
|
167
|
+
@cell_type[sheet][[row,col]]
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def sheets
|
172
|
+
return_sheets = []
|
173
|
+
@doc.find("//*[local-name()='table']").each do |sheet|
|
174
|
+
return_sheets << sheet.attributes['name']
|
175
|
+
end
|
176
|
+
return_sheets
|
177
|
+
end
|
178
|
+
|
179
|
+
# version of the openoffice document
|
180
|
+
# at 2007 this is always "1.0"
|
181
|
+
def officeversion
|
182
|
+
oo_version
|
183
|
+
@officeversion
|
184
|
+
end
|
185
|
+
|
186
|
+
# shows the internal representation of all cells
|
187
|
+
# mainly for debugging purposes
|
188
|
+
def to_s(sheet=nil)
|
189
|
+
sheet = @default_sheet unless sheet
|
190
|
+
read_cells(sheet) unless @cells_read[sheet]
|
191
|
+
@cell[sheet].inspect
|
192
|
+
end
|
193
|
+
|
194
|
+
# save spreadsheet
|
195
|
+
def save #:nodoc:
|
196
|
+
42
|
197
|
+
end
|
198
|
+
|
199
|
+
# returns each formula in the selected sheet as an array of elements
|
200
|
+
# [row, col, formula]
|
201
|
+
def formulas(sheet=nil)
|
202
|
+
theformulas = Array.new
|
203
|
+
sheet = @default_sheet unless sheet
|
204
|
+
read_cells(sheet) unless @cells_read[sheet]
|
205
|
+
first_row(sheet).upto(last_row(sheet)) {|row|
|
206
|
+
first_column(sheet).upto(last_column(sheet)) {|col|
|
207
|
+
if formula?(row,col,sheet)
|
208
|
+
f = [row, col, formula(row,col,sheet)]
|
209
|
+
theformulas << f
|
210
|
+
end
|
211
|
+
}
|
212
|
+
}
|
213
|
+
theformulas
|
214
|
+
end
|
215
|
+
|
216
|
+
private
|
217
|
+
|
218
|
+
# read the version of the OO-Version
|
219
|
+
def oo_version
|
220
|
+
@doc.find("//*[local-name()='document-content']").each do |office|
|
221
|
+
@officeversion = office.attributes['version']
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
# helper function to set the internal representation of cells
|
226
|
+
def set_cell_values(sheet,x,y,i,v,vt,formula,table_cell,str_v,style_name)
|
227
|
+
key = [y,x+i]
|
228
|
+
@cell_type[sheet] = {} unless @cell_type[sheet]
|
229
|
+
@cell_type[sheet][key] = Openoffice.oo_type_2_roo_type(vt)
|
230
|
+
@formula[sheet] = {} unless @formula[sheet]
|
231
|
+
@formula[sheet][key] = formula if formula
|
232
|
+
@cell[sheet] = {} unless @cell[sheet]
|
233
|
+
@style[sheet] = {} unless @style[sheet]
|
234
|
+
@style[sheet][key] = style_name
|
235
|
+
case @cell_type[sheet][key]
|
236
|
+
when :float
|
237
|
+
@cell[sheet][key] = (v.to_s.include?('.') ? v.to_f : v.to_i)
|
238
|
+
when :string
|
239
|
+
@cell[sheet][key] = str_v
|
240
|
+
when :date
|
241
|
+
if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size
|
242
|
+
#-- dann ist noch eine Uhrzeit vorhanden
|
243
|
+
#-- "1961-11-21T12:17:18"
|
244
|
+
@cell[sheet][key] = DateTime.parse(table_cell.attributes['date-value'])
|
245
|
+
@cell_type[sheet][key] = :datetime
|
246
|
+
else
|
247
|
+
@cell[sheet][key] = table_cell.attributes['date-value']
|
248
|
+
end
|
249
|
+
when :percentage
|
250
|
+
@cell[sheet][key] = v.to_f
|
251
|
+
when :time
|
252
|
+
hms = v.split(':')
|
253
|
+
@cell[sheet][key] = hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i
|
254
|
+
else
|
255
|
+
@cell[sheet][key] = v
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
# read all cells in the selected sheet
|
260
|
+
#--
|
261
|
+
# the following construct means '4 blanks'
|
262
|
+
# some content <text:s text:c="3"/>
|
263
|
+
#++
|
264
|
+
def read_cells(sheet=nil)
|
265
|
+
sheet = @default_sheet unless sheet
|
266
|
+
sheet_found = false
|
267
|
+
raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
|
268
|
+
raise RangeError unless self.sheets.include? sheet
|
269
|
+
|
270
|
+
@doc.find("//*[local-name()='table']").each do |ws|
|
271
|
+
if sheet == ws.attributes['name']
|
272
|
+
sheet_found = true
|
273
|
+
col = 1
|
274
|
+
row = 1
|
275
|
+
ws.each_element do |table_element|
|
276
|
+
case table_element.name
|
277
|
+
when 'table-column'
|
278
|
+
@style_defaults[sheet] << table_element.attributes['default-cell-style-name']
|
279
|
+
when 'table-row'
|
280
|
+
if table_element.attributes['number-rows-repeated']
|
281
|
+
skip_row = table_element.attributes['number-rows-repeated'].to_i
|
282
|
+
row = row + skip_row - 1
|
283
|
+
end
|
284
|
+
table_element.each_element do |cell|
|
285
|
+
skip_col = cell.attributes['number-columns-repeated']
|
286
|
+
formula = cell.attributes['formula']
|
287
|
+
vt = cell.attributes['value-type']
|
288
|
+
v = cell.attributes['value']
|
289
|
+
style_name = cell.attributes['style-name']
|
290
|
+
if vt == 'string'
|
291
|
+
str_v = ''
|
292
|
+
# insert \n if there is more than one paragraph
|
293
|
+
para_count = 0
|
294
|
+
cell.each_element do |str|
|
295
|
+
if str.name == 'p'
|
296
|
+
v = str.content
|
297
|
+
str_v += "\n" if para_count > 0
|
298
|
+
para_count += 1
|
299
|
+
if str.children.size > 1
|
300
|
+
str_v += children_to_string(str.children)
|
301
|
+
else
|
302
|
+
str.children.each do |child|
|
303
|
+
str_v += child.content #.text
|
304
|
+
end
|
305
|
+
end
|
306
|
+
str_v.gsub!(/'/,"'") # special case not supported by unescapeHTML
|
307
|
+
str_v = CGI.unescapeHTML(str_v)
|
308
|
+
end # == 'p'
|
309
|
+
end
|
310
|
+
elsif vt == 'time'
|
311
|
+
cell.each_element do |str|
|
312
|
+
if str.name == 'p'
|
313
|
+
v = str.content
|
314
|
+
end
|
315
|
+
end
|
316
|
+
elsif vt == '' or vt == nil
|
317
|
+
#
|
318
|
+
elsif vt == 'date'
|
319
|
+
#
|
320
|
+
elsif vt == 'percentage'
|
321
|
+
#
|
322
|
+
elsif vt == 'float'
|
323
|
+
#
|
324
|
+
elsif vt == 'boolean'
|
325
|
+
v = cell.attributes['boolean-value']
|
326
|
+
#
|
327
|
+
else
|
328
|
+
# raise "unknown type #{vt}"
|
329
|
+
end
|
330
|
+
if skip_col
|
331
|
+
if v != nil or cell.attributes['date-value']
|
332
|
+
0.upto(skip_col.to_i-1) do |i|
|
333
|
+
set_cell_values(sheet,col,row,i,v,vt,formula,cell,str_v,style_name)
|
334
|
+
end
|
335
|
+
end
|
336
|
+
col += (skip_col.to_i - 1)
|
337
|
+
end # if skip
|
338
|
+
set_cell_values(sheet,col,row,0,v,vt,formula,cell,str_v,style_name)
|
339
|
+
col += 1
|
340
|
+
end
|
341
|
+
row += 1
|
342
|
+
col = 1
|
343
|
+
end
|
344
|
+
end
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
@doc.find("//*[local-name()='automatic-styles']").each do |style|
|
349
|
+
read_styles(style)
|
350
|
+
end
|
351
|
+
if !sheet_found
|
352
|
+
raise RangeError
|
353
|
+
end
|
354
|
+
@cells_read[sheet] = true
|
355
|
+
end
|
356
|
+
|
357
|
+
def read_styles(style_elements)
|
358
|
+
@style_definitions['Default'] = Openoffice::Font.new
|
359
|
+
style_elements.each do |style|
|
360
|
+
next unless style.name == 'style'
|
361
|
+
style_name = style.attributes['name']
|
362
|
+
style.each do |properties|
|
363
|
+
font = Openoffice::Font.new
|
364
|
+
font.bold = properties.attributes['font-weight']
|
365
|
+
font.italic = properties.attributes['font-style']
|
366
|
+
font.underline = properties.attributes['text-underline-style']
|
367
|
+
@style_definitions[style_name] = font
|
368
|
+
end
|
369
|
+
end
|
370
|
+
end
|
371
|
+
|
372
|
+
# Checks if the default_sheet exists. If not an RangeError exception is
|
373
|
+
# raised
|
374
|
+
def check_default_sheet
|
375
|
+
sheet_found = false
|
376
|
+
raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
|
377
|
+
sheet_found = true if sheets.include?(@default_sheet)
|
378
|
+
if ! sheet_found
|
379
|
+
raise RangeError, "sheet '#{@default_sheet}' not found"
|
380
|
+
end
|
381
|
+
end
|
382
|
+
|
383
|
+
def process_zipfile(zip, path='')
|
384
|
+
if zip.file.file? path
|
385
|
+
if path == "content.xml"
|
386
|
+
open(File.join(@tmpdir, @file_nr.to_s+'_roo_content.xml'),'wb') {|f|
|
387
|
+
f << zip.read(path)
|
388
|
+
}
|
389
|
+
end
|
390
|
+
else
|
391
|
+
unless path.empty?
|
392
|
+
path += '/'
|
393
|
+
end
|
394
|
+
zip.dir.foreach(path) do |filename|
|
395
|
+
process_zipfile(zip, path+filename)
|
396
|
+
end
|
397
|
+
end
|
398
|
+
end
|
399
|
+
|
400
|
+
def extract_content
|
401
|
+
Zip::ZipFile.open(@filename) do |zip|
|
402
|
+
process_zipfile(zip)
|
403
|
+
end
|
404
|
+
end
|
405
|
+
|
406
|
+
def set_value(row,col,value,sheet=nil)
|
407
|
+
sheet = @default_value unless sheet
|
408
|
+
@cell[sheet][[row,col]] = value
|
409
|
+
end
|
410
|
+
|
411
|
+
def set_type(row,col,type,sheet=nil)
|
412
|
+
sheet = @default_value unless sheet
|
413
|
+
@cell_type[sheet][[row,col]] = type
|
414
|
+
end
|
415
|
+
|
416
|
+
A_ROO_TYPE = {
|
417
|
+
"float" => :float,
|
418
|
+
"string" => :string,
|
419
|
+
"date" => :date,
|
420
|
+
"percentage" => :percentage,
|
421
|
+
"time" => :time,
|
422
|
+
}
|
423
|
+
|
424
|
+
def Openoffice.oo_type_2_roo_type(ootype)
|
425
|
+
return A_ROO_TYPE[ootype]
|
426
|
+
end
|
427
|
+
|
428
|
+
# helper method to convert compressed spaces and other elements within
|
429
|
+
# an text into a string
|
430
|
+
def children_to_string(children)
|
431
|
+
result = ''
|
432
|
+
children.each {|child|
|
433
|
+
if child.text?
|
434
|
+
result = result + child.content
|
435
|
+
else
|
436
|
+
if child.name == 's'
|
437
|
+
compressed_spaces = child.attributes['c'].to_i
|
438
|
+
# no explicit number means a count of 1:
|
439
|
+
if compressed_spaces == 0
|
440
|
+
compressed_spaces = 1
|
441
|
+
end
|
442
|
+
result = result + " "*compressed_spaces
|
443
|
+
else
|
444
|
+
result = result + child.content
|
445
|
+
end
|
446
|
+
end
|
447
|
+
}
|
448
|
+
result
|
449
|
+
end
|
450
|
+
|
451
|
+
end # class
|