ruh-roo 3.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +677 -0
- data/Gemfile +24 -0
- data/LICENSE +24 -0
- data/README.md +315 -0
- data/lib/roo/base.rb +607 -0
- data/lib/roo/constants.rb +7 -0
- data/lib/roo/csv.rb +141 -0
- data/lib/roo/errors.rb +11 -0
- data/lib/roo/excelx/cell/base.rb +108 -0
- data/lib/roo/excelx/cell/boolean.rb +30 -0
- data/lib/roo/excelx/cell/date.rb +28 -0
- data/lib/roo/excelx/cell/datetime.rb +107 -0
- data/lib/roo/excelx/cell/empty.rb +20 -0
- data/lib/roo/excelx/cell/number.rb +89 -0
- data/lib/roo/excelx/cell/string.rb +19 -0
- data/lib/roo/excelx/cell/time.rb +44 -0
- data/lib/roo/excelx/cell.rb +110 -0
- data/lib/roo/excelx/comments.rb +55 -0
- data/lib/roo/excelx/coordinate.rb +19 -0
- data/lib/roo/excelx/extractor.rb +39 -0
- data/lib/roo/excelx/format.rb +71 -0
- data/lib/roo/excelx/images.rb +26 -0
- data/lib/roo/excelx/relationships.rb +33 -0
- data/lib/roo/excelx/shared.rb +39 -0
- data/lib/roo/excelx/shared_strings.rb +151 -0
- data/lib/roo/excelx/sheet.rb +151 -0
- data/lib/roo/excelx/sheet_doc.rb +248 -0
- data/lib/roo/excelx/styles.rb +64 -0
- data/lib/roo/excelx/workbook.rb +63 -0
- data/lib/roo/excelx.rb +480 -0
- data/lib/roo/font.rb +17 -0
- data/lib/roo/formatters/base.rb +15 -0
- data/lib/roo/formatters/csv.rb +84 -0
- data/lib/roo/formatters/matrix.rb +23 -0
- data/lib/roo/formatters/xml.rb +31 -0
- data/lib/roo/formatters/yaml.rb +40 -0
- data/lib/roo/helpers/default_attr_reader.rb +20 -0
- data/lib/roo/helpers/weak_instance_cache.rb +41 -0
- data/lib/roo/libre_office.rb +4 -0
- data/lib/roo/link.rb +34 -0
- data/lib/roo/open_office.rb +628 -0
- data/lib/roo/spreadsheet.rb +39 -0
- data/lib/roo/tempdir.rb +21 -0
- data/lib/roo/utils.rb +128 -0
- data/lib/roo/version.rb +3 -0
- data/lib/roo.rb +36 -0
- data/roo.gemspec +28 -0
- metadata +189 -0
data/lib/roo/excelx.rb
ADDED
@@ -0,0 +1,480 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'zip/filesystem'
|
3
|
+
require 'roo/link'
|
4
|
+
require 'roo/tempdir'
|
5
|
+
require 'roo/utils'
|
6
|
+
require 'forwardable'
|
7
|
+
require 'set'
|
8
|
+
|
9
|
+
module Roo
|
10
|
+
class Excelx < Roo::Base
|
11
|
+
extend Roo::Tempdir
|
12
|
+
extend Forwardable
|
13
|
+
|
14
|
+
ERROR_VALUES = %w(#N/A #REF! #NAME? #DIV/0! #NULL! #VALUE! #NUM!).to_set
|
15
|
+
|
16
|
+
require 'roo/excelx/shared'
|
17
|
+
require 'roo/excelx/workbook'
|
18
|
+
require 'roo/excelx/shared_strings'
|
19
|
+
require 'roo/excelx/styles'
|
20
|
+
require 'roo/excelx/cell'
|
21
|
+
require 'roo/excelx/sheet'
|
22
|
+
require 'roo/excelx/relationships'
|
23
|
+
require 'roo/excelx/comments'
|
24
|
+
require 'roo/excelx/sheet_doc'
|
25
|
+
require 'roo/excelx/coordinate'
|
26
|
+
require 'roo/excelx/format'
|
27
|
+
require 'roo/excelx/images'
|
28
|
+
|
29
|
+
delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels, :image_files] => :@shared
|
30
|
+
ExceedsMaxError = Class.new(StandardError)
|
31
|
+
|
32
|
+
# initialization and opening of a spreadsheet file
|
33
|
+
# values for packed: :zip
|
34
|
+
# optional cell_max (int) parameter for early aborting attempts to parse
|
35
|
+
# enormous documents.
|
36
|
+
def initialize(filename_or_stream, options = {})
|
37
|
+
packed = options[:packed]
|
38
|
+
file_warning = options.fetch(:file_warning, :error)
|
39
|
+
cell_max = options.delete(:cell_max)
|
40
|
+
sheet_options = {}
|
41
|
+
sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
|
42
|
+
sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
|
43
|
+
sheet_options[:empty_cell] = (options[:empty_cell] || false)
|
44
|
+
shared_options = {}
|
45
|
+
|
46
|
+
shared_options[:disable_html_wrapper] = (options[:disable_html_wrapper] || false)
|
47
|
+
unless is_stream?(filename_or_stream)
|
48
|
+
file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
|
49
|
+
basename = find_basename(filename_or_stream)
|
50
|
+
end
|
51
|
+
|
52
|
+
# NOTE: Create temp directory and allow Ruby to cleanup the temp directory
|
53
|
+
# when the object is garbage collected. Initially, the finalizer was
|
54
|
+
# created in the Roo::Tempdir module, but that led to a segfault
|
55
|
+
# when testing in Ruby 2.4.0.
|
56
|
+
@tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
|
57
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
|
58
|
+
|
59
|
+
@shared = Shared.new(@tmpdir, shared_options)
|
60
|
+
@filename = local_filename(filename_or_stream, @tmpdir, packed)
|
61
|
+
process_zipfile(@filename || filename_or_stream)
|
62
|
+
|
63
|
+
@sheet_names = []
|
64
|
+
@sheets = []
|
65
|
+
@sheets_by_name = {}
|
66
|
+
|
67
|
+
workbook.sheets.each_with_index do |sheet, index|
|
68
|
+
next if options[:only_visible_sheets] && sheet['state'] == 'hidden'
|
69
|
+
|
70
|
+
sheet_name = sheet['name']
|
71
|
+
@sheet_names << sheet_name
|
72
|
+
@sheets_by_name[sheet_name] = @sheets[index] = Sheet.new(sheet_name, @shared, index, sheet_options)
|
73
|
+
end
|
74
|
+
|
75
|
+
if cell_max
|
76
|
+
cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
|
77
|
+
raise ExceedsMaxError.new("Excel file exceeds cell maximum: #{cell_count} > #{cell_max}") if cell_count > cell_max
|
78
|
+
end
|
79
|
+
|
80
|
+
super
|
81
|
+
rescue
|
82
|
+
self.class.finalize_tempdirs(object_id)
|
83
|
+
raise
|
84
|
+
end
|
85
|
+
|
86
|
+
def method_missing(method, *args)
|
87
|
+
if (label = workbook.defined_names[method.to_s])
|
88
|
+
safe_send(sheet_for(label.sheet).cells[label.key], :value)
|
89
|
+
else
|
90
|
+
# call super for methods like #a1
|
91
|
+
super
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def sheets
|
96
|
+
@sheet_names
|
97
|
+
end
|
98
|
+
|
99
|
+
def sheet_for(sheet)
|
100
|
+
sheet ||= default_sheet
|
101
|
+
validate_sheet!(sheet)
|
102
|
+
@sheets_by_name[sheet] || @sheets[sheet]
|
103
|
+
end
|
104
|
+
|
105
|
+
def images(sheet = nil)
|
106
|
+
images_names = sheet_for(sheet).images.map(&:last)
|
107
|
+
images_names.map { |iname| image_files.find { |ifile| ifile[iname] } }
|
108
|
+
end
|
109
|
+
|
110
|
+
# Returns the content of a spreadsheet-cell.
|
111
|
+
# (1,1) is the upper left corner.
|
112
|
+
# (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
|
113
|
+
# cell at the first line and first row.
|
114
|
+
def cell(row, col, sheet = nil)
|
115
|
+
key = normalize(row, col)
|
116
|
+
safe_send(sheet_for(sheet).cells[key], :value)
|
117
|
+
end
|
118
|
+
|
119
|
+
def row(rownumber, sheet = nil)
|
120
|
+
sheet_for(sheet).row(rownumber)
|
121
|
+
end
|
122
|
+
|
123
|
+
# returns all values in this column as an array
|
124
|
+
# column numbers are 1,2,3,... like in the spreadsheet
|
125
|
+
def column(column_number, sheet = nil)
|
126
|
+
if column_number.is_a?(::String)
|
127
|
+
column_number = ::Roo::Utils.letter_to_number(column_number)
|
128
|
+
end
|
129
|
+
sheet_for(sheet).column(column_number)
|
130
|
+
end
|
131
|
+
|
132
|
+
# returns the number of the first non-empty row
|
133
|
+
def first_row(sheet = nil)
|
134
|
+
sheet_for(sheet).first_row
|
135
|
+
end
|
136
|
+
|
137
|
+
# returns the number of the last non-empty row
|
138
|
+
def last_row(sheet = nil)
|
139
|
+
sheet_for(sheet).last_row
|
140
|
+
end
|
141
|
+
|
142
|
+
# returns the number of the first non-empty column
|
143
|
+
def first_column(sheet = nil)
|
144
|
+
sheet_for(sheet).first_column
|
145
|
+
end
|
146
|
+
|
147
|
+
# returns the number of the last non-empty column
|
148
|
+
def last_column(sheet = nil)
|
149
|
+
sheet_for(sheet).last_column
|
150
|
+
end
|
151
|
+
|
152
|
+
# set a cell to a certain value
|
153
|
+
# (this will not be saved back to the spreadsheet file!)
|
154
|
+
def set(row, col, value, sheet = nil) #:nodoc:
|
155
|
+
key = normalize(row, col)
|
156
|
+
cell_type = cell_type_by_value(value)
|
157
|
+
sheet_for(sheet).cells[key] = Cell.new(value, cell_type, nil, cell_type, value, nil, nil, nil, Coordinate.new(row, col))
|
158
|
+
end
|
159
|
+
|
160
|
+
# Returns the formula at (row,col).
|
161
|
+
# Returns nil if there is no formula.
|
162
|
+
# The method #formula? checks if there is a formula.
|
163
|
+
def formula(row, col, sheet = nil)
|
164
|
+
key = normalize(row, col)
|
165
|
+
safe_send(sheet_for(sheet).cells[key], :formula)
|
166
|
+
end
|
167
|
+
|
168
|
+
# Predicate methods really should return a boolean
|
169
|
+
# value. Hopefully no one was relying on the fact that this
|
170
|
+
# previously returned either nil/formula
|
171
|
+
def formula?(*args)
|
172
|
+
!!formula(*args)
|
173
|
+
end
|
174
|
+
|
175
|
+
# returns each formula in the selected sheet as an array of tuples in following format
|
176
|
+
# [[row, col, formula], [row, col, formula],...]
|
177
|
+
def formulas(sheet = nil)
|
178
|
+
sheet_for(sheet).cells.select { |_, cell| cell.formula }.map do |(x, y), cell|
|
179
|
+
[x, y, cell.formula]
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
# Given a cell, return the cell's style
|
184
|
+
def font(row, col, sheet = nil)
|
185
|
+
key = normalize(row, col)
|
186
|
+
definition_index = safe_send(sheet_for(sheet).cells[key], :style)
|
187
|
+
styles.definitions[definition_index] if definition_index
|
188
|
+
end
|
189
|
+
|
190
|
+
# returns the type of a cell:
|
191
|
+
# * :float
|
192
|
+
# * :string,
|
193
|
+
# * :date
|
194
|
+
# * :percentage
|
195
|
+
# * :formula
|
196
|
+
# * :time
|
197
|
+
# * :datetime
|
198
|
+
def celltype(row, col, sheet = nil)
|
199
|
+
key = normalize(row, col)
|
200
|
+
safe_send(sheet_for(sheet).cells[key], :type)
|
201
|
+
end
|
202
|
+
|
203
|
+
# returns the internal type of an excel cell
|
204
|
+
# * :numeric_or_formula
|
205
|
+
# * :string
|
206
|
+
# Note: this is only available within the Excelx class
|
207
|
+
def excelx_type(row, col, sheet = nil)
|
208
|
+
key = normalize(row, col)
|
209
|
+
safe_send(sheet_for(sheet).cells[key], :cell_type)
|
210
|
+
end
|
211
|
+
|
212
|
+
# returns the internal value of an excelx cell
|
213
|
+
# Note: this is only available within the Excelx class
|
214
|
+
def excelx_value(row, col, sheet = nil)
|
215
|
+
key = normalize(row, col)
|
216
|
+
safe_send(sheet_for(sheet).cells[key], :cell_value)
|
217
|
+
end
|
218
|
+
|
219
|
+
# returns the internal value of an excelx cell
|
220
|
+
# Note: this is only available within the Excelx class
|
221
|
+
def formatted_value(row, col, sheet = nil)
|
222
|
+
key = normalize(row, col)
|
223
|
+
safe_send(sheet_for(sheet).cells[key], :formatted_value)
|
224
|
+
end
|
225
|
+
|
226
|
+
# returns the internal format of an excel cell
|
227
|
+
def excelx_format(row, col, sheet = nil)
|
228
|
+
key = normalize(row, col)
|
229
|
+
sheet_for(sheet).excelx_format(key)
|
230
|
+
end
|
231
|
+
|
232
|
+
def empty?(row, col, sheet = nil)
|
233
|
+
sheet = sheet_for(sheet)
|
234
|
+
key = normalize(row, col)
|
235
|
+
cell = sheet.cells[key]
|
236
|
+
!cell || cell.empty? ||
|
237
|
+
(row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column)
|
238
|
+
end
|
239
|
+
|
240
|
+
# shows the internal representation of all cells
|
241
|
+
# for debugging purposes
|
242
|
+
def to_s(sheet = nil)
|
243
|
+
sheet_for(sheet).cells.inspect
|
244
|
+
end
|
245
|
+
|
246
|
+
# returns the row,col values of the labelled cell
|
247
|
+
# (nil,nil) if label is not defined
|
248
|
+
def label(name)
|
249
|
+
labels = workbook.defined_names
|
250
|
+
return [nil, nil, nil] if labels.empty? || !labels.key?(name)
|
251
|
+
|
252
|
+
[labels[name].row, labels[name].col, labels[name].sheet]
|
253
|
+
end
|
254
|
+
|
255
|
+
# Returns an array which all labels. Each element is an array with
|
256
|
+
# [labelname, [row,col,sheetname]]
|
257
|
+
def labels
|
258
|
+
@labels ||= workbook.defined_names.map do |name, label|
|
259
|
+
[
|
260
|
+
name,
|
261
|
+
[label.row, label.col, label.sheet]
|
262
|
+
]
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
def hyperlink?(row, col, sheet = nil)
|
267
|
+
!!hyperlink(row, col, sheet)
|
268
|
+
end
|
269
|
+
|
270
|
+
# returns the hyperlink at (row/col)
|
271
|
+
# nil if there is no hyperlink
|
272
|
+
def hyperlink(row, col, sheet = nil)
|
273
|
+
key = normalize(row, col)
|
274
|
+
sheet_for(sheet).hyperlinks[key]
|
275
|
+
end
|
276
|
+
|
277
|
+
# returns the comment at (row/col)
|
278
|
+
# nil if there is no comment
|
279
|
+
def comment(row, col, sheet = nil)
|
280
|
+
key = normalize(row, col)
|
281
|
+
sheet_for(sheet).comments[key]
|
282
|
+
end
|
283
|
+
|
284
|
+
# true, if there is a comment
|
285
|
+
def comment?(row, col, sheet = nil)
|
286
|
+
!!comment(row, col, sheet)
|
287
|
+
end
|
288
|
+
|
289
|
+
def comments(sheet = nil)
|
290
|
+
sheet_for(sheet).comments.map do |(x, y), comment|
|
291
|
+
[x, y, comment]
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
# Yield an array of Excelx::Cell
|
296
|
+
# Takes options for sheet, pad_cells, and max_rows
|
297
|
+
def each_row_streaming(options = {})
|
298
|
+
sheet = sheet_for(options.delete(:sheet))
|
299
|
+
if block_given?
|
300
|
+
sheet.each_row(options) { |row| yield row }
|
301
|
+
else
|
302
|
+
sheet.to_enum(:each_row, options)
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
private
|
307
|
+
|
308
|
+
def clean_sheet(sheet)
|
309
|
+
@sheets_by_name[sheet].cells.each_pair do |coord, value|
|
310
|
+
next unless value.value.is_a?(::String)
|
311
|
+
|
312
|
+
@sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value)
|
313
|
+
end
|
314
|
+
|
315
|
+
@cleaned[sheet] = true
|
316
|
+
end
|
317
|
+
|
318
|
+
# Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
|
319
|
+
# documents require a workbook.xml file, so a if the file is missing
|
320
|
+
# it is not a valid xlsx file. In these cases, an ArgumentError is
|
321
|
+
# raised.
|
322
|
+
#
|
323
|
+
# wb - a Zip::Entry for the workbook.xml file.
|
324
|
+
# path - A String for Zip::Entry's destination path.
|
325
|
+
#
|
326
|
+
# Examples
|
327
|
+
#
|
328
|
+
# extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml')
|
329
|
+
# # => ["rId1", "rId2", "rId3"]
|
330
|
+
#
|
331
|
+
# Returns an Array of Strings.
|
332
|
+
def extract_worksheet_ids(entries, path)
|
333
|
+
wb = entries.find { |e| e.name[/workbook.xml$/] }
|
334
|
+
fail ArgumentError 'missing required workbook file' if wb.nil?
|
335
|
+
|
336
|
+
wb.extract(path)
|
337
|
+
workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
|
338
|
+
workbook_doc.xpath('//sheet').map { |s| s['id'] }
|
339
|
+
end
|
340
|
+
|
341
|
+
# Internal
|
342
|
+
#
|
343
|
+
# wb_rels - A Zip::Entry for the workbook.xml.rels file.
|
344
|
+
# path - A String for the Zip::Entry's destination path.
|
345
|
+
#
|
346
|
+
# Examples
|
347
|
+
#
|
348
|
+
# extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels')
|
349
|
+
# # => {
|
350
|
+
# "rId1"=>"worksheets/sheet1.xml",
|
351
|
+
# "rId2"=>"worksheets/sheet2.xml",
|
352
|
+
# "rId3"=>"worksheets/sheet3.xml"
|
353
|
+
# }
|
354
|
+
#
|
355
|
+
# Returns a Hash.
|
356
|
+
def extract_worksheet_rels(entries, path)
|
357
|
+
wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] }
|
358
|
+
fail ArgumentError 'missing required workbook file' if wb_rels.nil?
|
359
|
+
|
360
|
+
wb_rels.extract(path)
|
361
|
+
rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
|
362
|
+
|
363
|
+
relationships = rels_doc.xpath('//Relationship').select do |relationship|
|
364
|
+
worksheet_types.include? relationship['Type']
|
365
|
+
end
|
366
|
+
|
367
|
+
relationships.each_with_object({}) do |relationship, hash|
|
368
|
+
hash[relationship['Id']] = relationship['Target']
|
369
|
+
end
|
370
|
+
end
|
371
|
+
|
372
|
+
# Extracts the sheets in order, but it will ignore sheets that are not
|
373
|
+
# worksheets.
|
374
|
+
def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
|
375
|
+
(sheet_ids & sheets.keys).each_with_index do |id, i|
|
376
|
+
name = sheets[id]
|
377
|
+
entry = entries.find { |e| "/#{e.name}" =~ /#{name}$/ }
|
378
|
+
path = "#{tmpdir}/roo_sheet#{i + 1}"
|
379
|
+
sheet_files << path
|
380
|
+
@sheet_files << path
|
381
|
+
entry.extract(path)
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
def extract_images(entries, tmpdir)
|
386
|
+
img_entries = entries.select { |e| e.name[/media\/image([0-9]+)/] }
|
387
|
+
img_entries.each do |entry|
|
388
|
+
path = "#{@tmpdir}/roo#{entry.name.gsub(/xl\/|\//, "_")}"
|
389
|
+
image_files << path
|
390
|
+
entry.extract(path)
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
# Extracts all needed files from the zip file
|
395
|
+
def process_zipfile(zipfilename_or_stream)
|
396
|
+
@sheet_files = []
|
397
|
+
|
398
|
+
unless is_stream?(zipfilename_or_stream)
|
399
|
+
zip_file = Zip::File.open(zipfilename_or_stream)
|
400
|
+
else
|
401
|
+
zip_file = Zip::CentralDirectory.new
|
402
|
+
zip_file.read_from_stream zipfilename_or_stream
|
403
|
+
end
|
404
|
+
|
405
|
+
process_zipfile_entries zip_file.to_a.sort_by(&:name)
|
406
|
+
end
|
407
|
+
|
408
|
+
def process_zipfile_entries(entries)
|
409
|
+
# NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
|
410
|
+
# are not in order. With Numbers 3.1, the first sheet is always
|
411
|
+
# sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
|
412
|
+
# independent of a worksheet's filename (i.e. sheet6.xml can be the
|
413
|
+
# first worksheet).
|
414
|
+
#
|
415
|
+
# workbook.xml lists the correct order of worksheets and
|
416
|
+
# workbook.xml.rels lists the filenames for those worksheets.
|
417
|
+
#
|
418
|
+
# workbook.xml:
|
419
|
+
# <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/>
|
420
|
+
# <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/>
|
421
|
+
# workbook.xml.rel:
|
422
|
+
# <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
|
423
|
+
# <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
|
424
|
+
sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
|
425
|
+
sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
|
426
|
+
extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
|
427
|
+
extract_images(entries, @tmpdir)
|
428
|
+
|
429
|
+
entries.each do |entry|
|
430
|
+
path =
|
431
|
+
case entry.name.downcase
|
432
|
+
when /richdata/
|
433
|
+
# FIXME: Ignore richData as parsing is not implemented yet and can cause
|
434
|
+
# Zip::DestinationFileExistsError when including a second "styles.xml" entry
|
435
|
+
# see http://schemas.microsoft.com/office/spreadsheetml/2017/richdata2
|
436
|
+
nil
|
437
|
+
when /sharedstrings.xml$/
|
438
|
+
"#{@tmpdir}/roo_sharedStrings.xml"
|
439
|
+
when /styles.xml$/
|
440
|
+
"#{@tmpdir}/roo_styles.xml"
|
441
|
+
when /comments([0-9]+).xml$/
|
442
|
+
# FIXME: Most of the time, The order of the comment files are the same
|
443
|
+
# the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
|
444
|
+
# In some situations, this isn't true. The true location of a
|
445
|
+
# sheet's comment file is in the sheet1.xml.rels file. SEE
|
446
|
+
# ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
|
447
|
+
nr = Regexp.last_match[1].to_i
|
448
|
+
comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
|
449
|
+
when %r{chartsheets/_rels/sheet([0-9]+).xml.rels$}
|
450
|
+
# NOTE: Chart sheet relationship files were interfering with
|
451
|
+
# worksheets.
|
452
|
+
nil
|
453
|
+
when /sheet([0-9]+).xml.rels$/
|
454
|
+
# FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
|
455
|
+
# it also stores the location for sharedStrings, comments,
|
456
|
+
# drawings, etc.
|
457
|
+
nr = Regexp.last_match[1].to_i
|
458
|
+
rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
|
459
|
+
when /drawing([0-9]+).xml.rels$/
|
460
|
+
# Extracting drawing relationships to make images lists for each sheet
|
461
|
+
nr = Regexp.last_match[1].to_i
|
462
|
+
image_rels[nr - 1] = "#{@tmpdir}/roo_image_rels#{nr}"
|
463
|
+
end
|
464
|
+
|
465
|
+
entry.extract(path) if path
|
466
|
+
end
|
467
|
+
end
|
468
|
+
|
469
|
+
def safe_send(object, method, *args)
|
470
|
+
object.send(method, *args) if object&.respond_to?(method)
|
471
|
+
end
|
472
|
+
|
473
|
+
def worksheet_types
|
474
|
+
[
|
475
|
+
'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet', # OOXML Transitional
|
476
|
+
'http://purl.oclc.org/ooxml/officeDocument/relationships/worksheet' # OOXML Strict
|
477
|
+
]
|
478
|
+
end
|
479
|
+
end
|
480
|
+
end
|
data/lib/roo/font.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Roo
|
2
|
+
module Formatters
|
3
|
+
module Base
|
4
|
+
# converts an integer value to a time string like '02:05:06'
|
5
|
+
def integer_to_timestring(content)
|
6
|
+
h = (content / 3600.0).floor
|
7
|
+
content -= h * 3600
|
8
|
+
m = (content / 60.0).floor
|
9
|
+
content -= m * 60
|
10
|
+
s = content
|
11
|
+
Kernel.format("%02d:%02d:%02d", h, m, s)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module Roo
|
2
|
+
module Formatters
|
3
|
+
module CSV
|
4
|
+
def to_csv(filename = nil, separator = ",", sheet = default_sheet)
|
5
|
+
if filename
|
6
|
+
File.open(filename, "w") do |file|
|
7
|
+
write_csv_content(file, sheet, separator)
|
8
|
+
end
|
9
|
+
true
|
10
|
+
else
|
11
|
+
sio = ::StringIO.new
|
12
|
+
write_csv_content(sio, sheet, separator)
|
13
|
+
sio.rewind
|
14
|
+
sio.read
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# Write all cells to the csv file. File can be a filename or nil. If the
|
21
|
+
# file argument is nil the output goes to STDOUT
|
22
|
+
def write_csv_content(file = nil, sheet = nil, separator = ",")
|
23
|
+
file ||= STDOUT
|
24
|
+
return unless first_row(sheet) # The sheet is empty
|
25
|
+
|
26
|
+
1.upto(last_row(sheet)) do |row|
|
27
|
+
1.upto(last_column(sheet)) do |col|
|
28
|
+
# TODO: use CSV.generate_line
|
29
|
+
file.print(separator) if col > 1
|
30
|
+
file.print cell_to_csv(row, col, sheet)
|
31
|
+
end
|
32
|
+
file.print("\n")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# The content of a cell in the csv output
|
37
|
+
def cell_to_csv(row, col, sheet)
|
38
|
+
return "" if empty?(row, col, sheet)
|
39
|
+
|
40
|
+
onecell = cell(row, col, sheet)
|
41
|
+
|
42
|
+
case celltype(row, col, sheet)
|
43
|
+
when :string
|
44
|
+
%("#{onecell.gsub('"', '""')}") unless onecell.empty?
|
45
|
+
when :boolean
|
46
|
+
# TODO: this only works for excelx
|
47
|
+
onecell = self.sheet_for(sheet).cells[[row, col]].formatted_value
|
48
|
+
%("#{onecell.gsub('"', '""').downcase}")
|
49
|
+
when :float, :percentage
|
50
|
+
if onecell == onecell.to_i
|
51
|
+
onecell.to_i.to_s
|
52
|
+
else
|
53
|
+
onecell.to_s
|
54
|
+
end
|
55
|
+
when :formula
|
56
|
+
case onecell
|
57
|
+
when String
|
58
|
+
%("#{onecell.gsub('"', '""')}") unless onecell.empty?
|
59
|
+
when Integer
|
60
|
+
onecell.to_s
|
61
|
+
when Float
|
62
|
+
if onecell == onecell.to_i
|
63
|
+
onecell.to_i.to_s
|
64
|
+
else
|
65
|
+
onecell.to_s
|
66
|
+
end
|
67
|
+
when Date, DateTime, TrueClass, FalseClass
|
68
|
+
onecell.to_s
|
69
|
+
else
|
70
|
+
fail "unhandled onecell-class #{onecell.class}"
|
71
|
+
end
|
72
|
+
when :date, :datetime
|
73
|
+
onecell.to_s
|
74
|
+
when :time
|
75
|
+
integer_to_timestring(onecell)
|
76
|
+
when :link
|
77
|
+
%("#{onecell.url.gsub('"', '""')}")
|
78
|
+
else
|
79
|
+
fail "unhandled celltype #{celltype(row, col, sheet)}"
|
80
|
+
end || ""
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Roo
|
2
|
+
module Formatters
|
3
|
+
module Matrix
|
4
|
+
# returns a matrix object from the whole sheet or a rectangular area of a sheet
|
5
|
+
def to_matrix(from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet)
|
6
|
+
require 'matrix'
|
7
|
+
|
8
|
+
return ::Matrix.empty unless first_row
|
9
|
+
|
10
|
+
from_row ||= first_row(sheet)
|
11
|
+
to_row ||= last_row(sheet)
|
12
|
+
from_column ||= first_column(sheet)
|
13
|
+
to_column ||= last_column(sheet)
|
14
|
+
|
15
|
+
::Matrix.rows(from_row.upto(to_row).map do |row|
|
16
|
+
from_column.upto(to_column).map do |col|
|
17
|
+
cell(row, col, sheet)
|
18
|
+
end
|
19
|
+
end)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# returns an XML representation of all sheets of a spreadsheet file
|
2
|
+
module Roo
|
3
|
+
module Formatters
|
4
|
+
module XML
|
5
|
+
def to_xml
|
6
|
+
Nokogiri::XML::Builder.new do |xml|
|
7
|
+
xml.spreadsheet do
|
8
|
+
sheets.each do |sheet|
|
9
|
+
self.default_sheet = sheet
|
10
|
+
xml.sheet(name: sheet) do |x|
|
11
|
+
if first_row && last_row && first_column && last_column
|
12
|
+
# sonst gibt es Fehler bei leeren Blaettern
|
13
|
+
first_row.upto(last_row) do |row|
|
14
|
+
first_column.upto(last_column) do |col|
|
15
|
+
next if empty?(row, col)
|
16
|
+
|
17
|
+
x.cell(cell(row, col),
|
18
|
+
row: row,
|
19
|
+
column: col,
|
20
|
+
type: celltype(row, col))
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end.to_xml
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Roo
|
2
|
+
module Formatters
|
3
|
+
module YAML
|
4
|
+
# returns a rectangular area (default: all cells) as yaml-output
|
5
|
+
# you can add additional attributes with the prefix parameter like:
|
6
|
+
# oo.to_yaml({"file"=>"flightdata_2007-06-26", "sheet" => "1"})
|
7
|
+
def to_yaml(prefix = {}, from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet)
|
8
|
+
# return an empty string if there is no first_row, i.e. the sheet is empty
|
9
|
+
return "" unless first_row
|
10
|
+
|
11
|
+
from_row ||= first_row(sheet)
|
12
|
+
to_row ||= last_row(sheet)
|
13
|
+
from_column ||= first_column(sheet)
|
14
|
+
to_column ||= last_column(sheet)
|
15
|
+
|
16
|
+
result = "--- \n"
|
17
|
+
from_row.upto(to_row) do |row|
|
18
|
+
from_column.upto(to_column) do |col|
|
19
|
+
next if empty?(row, col, sheet)
|
20
|
+
|
21
|
+
result << "cell_#{row}_#{col}: \n"
|
22
|
+
prefix.each do|k, v|
|
23
|
+
result << " #{k}: #{v} \n"
|
24
|
+
end
|
25
|
+
result << " row: #{row} \n"
|
26
|
+
result << " col: #{col} \n"
|
27
|
+
result << " celltype: #{celltype(row, col, sheet)} \n"
|
28
|
+
value = cell(row, col, sheet)
|
29
|
+
if celltype(row, col, sheet) == :time
|
30
|
+
value = integer_to_timestring(value)
|
31
|
+
end
|
32
|
+
result << " value: #{value} \n"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
result
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|