ruh-roo 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +677 -0
- data/Gemfile +24 -0
- data/LICENSE +24 -0
- data/README.md +315 -0
- data/lib/roo/base.rb +607 -0
- data/lib/roo/constants.rb +7 -0
- data/lib/roo/csv.rb +141 -0
- data/lib/roo/errors.rb +11 -0
- data/lib/roo/excelx/cell/base.rb +108 -0
- data/lib/roo/excelx/cell/boolean.rb +30 -0
- data/lib/roo/excelx/cell/date.rb +28 -0
- data/lib/roo/excelx/cell/datetime.rb +107 -0
- data/lib/roo/excelx/cell/empty.rb +20 -0
- data/lib/roo/excelx/cell/number.rb +89 -0
- data/lib/roo/excelx/cell/string.rb +19 -0
- data/lib/roo/excelx/cell/time.rb +44 -0
- data/lib/roo/excelx/cell.rb +110 -0
- data/lib/roo/excelx/comments.rb +55 -0
- data/lib/roo/excelx/coordinate.rb +19 -0
- data/lib/roo/excelx/extractor.rb +39 -0
- data/lib/roo/excelx/format.rb +71 -0
- data/lib/roo/excelx/images.rb +26 -0
- data/lib/roo/excelx/relationships.rb +33 -0
- data/lib/roo/excelx/shared.rb +39 -0
- data/lib/roo/excelx/shared_strings.rb +151 -0
- data/lib/roo/excelx/sheet.rb +151 -0
- data/lib/roo/excelx/sheet_doc.rb +248 -0
- data/lib/roo/excelx/styles.rb +64 -0
- data/lib/roo/excelx/workbook.rb +63 -0
- data/lib/roo/excelx.rb +480 -0
- data/lib/roo/font.rb +17 -0
- data/lib/roo/formatters/base.rb +15 -0
- data/lib/roo/formatters/csv.rb +84 -0
- data/lib/roo/formatters/matrix.rb +23 -0
- data/lib/roo/formatters/xml.rb +31 -0
- data/lib/roo/formatters/yaml.rb +40 -0
- data/lib/roo/helpers/default_attr_reader.rb +20 -0
- data/lib/roo/helpers/weak_instance_cache.rb +41 -0
- data/lib/roo/libre_office.rb +4 -0
- data/lib/roo/link.rb +34 -0
- data/lib/roo/open_office.rb +628 -0
- data/lib/roo/spreadsheet.rb +39 -0
- data/lib/roo/tempdir.rb +21 -0
- data/lib/roo/utils.rb +128 -0
- data/lib/roo/version.rb +3 -0
- data/lib/roo.rb +36 -0
- data/roo.gemspec +28 -0
- metadata +189 -0
data/lib/roo/excelx.rb
ADDED
@@ -0,0 +1,480 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'zip/filesystem'
|
3
|
+
require 'roo/link'
|
4
|
+
require 'roo/tempdir'
|
5
|
+
require 'roo/utils'
|
6
|
+
require 'forwardable'
|
7
|
+
require 'set'
|
8
|
+
|
9
|
+
module Roo
|
10
|
+
class Excelx < Roo::Base
|
11
|
+
extend Roo::Tempdir
|
12
|
+
extend Forwardable
|
13
|
+
|
14
|
+
ERROR_VALUES = %w(#N/A #REF! #NAME? #DIV/0! #NULL! #VALUE! #NUM!).to_set
|
15
|
+
|
16
|
+
require 'roo/excelx/shared'
|
17
|
+
require 'roo/excelx/workbook'
|
18
|
+
require 'roo/excelx/shared_strings'
|
19
|
+
require 'roo/excelx/styles'
|
20
|
+
require 'roo/excelx/cell'
|
21
|
+
require 'roo/excelx/sheet'
|
22
|
+
require 'roo/excelx/relationships'
|
23
|
+
require 'roo/excelx/comments'
|
24
|
+
require 'roo/excelx/sheet_doc'
|
25
|
+
require 'roo/excelx/coordinate'
|
26
|
+
require 'roo/excelx/format'
|
27
|
+
require 'roo/excelx/images'
|
28
|
+
|
29
|
+
delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels, :image_files] => :@shared
|
30
|
+
ExceedsMaxError = Class.new(StandardError)
|
31
|
+
|
32
|
+
# initialization and opening of a spreadsheet file
|
33
|
+
# values for packed: :zip
|
34
|
+
# optional cell_max (int) parameter for early aborting attempts to parse
|
35
|
+
# enormous documents.
|
36
|
+
def initialize(filename_or_stream, options = {})
|
37
|
+
packed = options[:packed]
|
38
|
+
file_warning = options.fetch(:file_warning, :error)
|
39
|
+
cell_max = options.delete(:cell_max)
|
40
|
+
sheet_options = {}
|
41
|
+
sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
|
42
|
+
sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
|
43
|
+
sheet_options[:empty_cell] = (options[:empty_cell] || false)
|
44
|
+
shared_options = {}
|
45
|
+
|
46
|
+
shared_options[:disable_html_wrapper] = (options[:disable_html_wrapper] || false)
|
47
|
+
unless is_stream?(filename_or_stream)
|
48
|
+
file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
|
49
|
+
basename = find_basename(filename_or_stream)
|
50
|
+
end
|
51
|
+
|
52
|
+
# NOTE: Create temp directory and allow Ruby to cleanup the temp directory
|
53
|
+
# when the object is garbage collected. Initially, the finalizer was
|
54
|
+
# created in the Roo::Tempdir module, but that led to a segfault
|
55
|
+
# when testing in Ruby 2.4.0.
|
56
|
+
@tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
|
57
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
|
58
|
+
|
59
|
+
@shared = Shared.new(@tmpdir, shared_options)
|
60
|
+
@filename = local_filename(filename_or_stream, @tmpdir, packed)
|
61
|
+
process_zipfile(@filename || filename_or_stream)
|
62
|
+
|
63
|
+
@sheet_names = []
|
64
|
+
@sheets = []
|
65
|
+
@sheets_by_name = {}
|
66
|
+
|
67
|
+
workbook.sheets.each_with_index do |sheet, index|
|
68
|
+
next if options[:only_visible_sheets] && sheet['state'] == 'hidden'
|
69
|
+
|
70
|
+
sheet_name = sheet['name']
|
71
|
+
@sheet_names << sheet_name
|
72
|
+
@sheets_by_name[sheet_name] = @sheets[index] = Sheet.new(sheet_name, @shared, index, sheet_options)
|
73
|
+
end
|
74
|
+
|
75
|
+
if cell_max
|
76
|
+
cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
|
77
|
+
raise ExceedsMaxError.new("Excel file exceeds cell maximum: #{cell_count} > #{cell_max}") if cell_count > cell_max
|
78
|
+
end
|
79
|
+
|
80
|
+
super
|
81
|
+
rescue
|
82
|
+
self.class.finalize_tempdirs(object_id)
|
83
|
+
raise
|
84
|
+
end
|
85
|
+
|
86
|
+
def method_missing(method, *args)
|
87
|
+
if (label = workbook.defined_names[method.to_s])
|
88
|
+
safe_send(sheet_for(label.sheet).cells[label.key], :value)
|
89
|
+
else
|
90
|
+
# call super for methods like #a1
|
91
|
+
super
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def sheets
|
96
|
+
@sheet_names
|
97
|
+
end
|
98
|
+
|
99
|
+
def sheet_for(sheet)
|
100
|
+
sheet ||= default_sheet
|
101
|
+
validate_sheet!(sheet)
|
102
|
+
@sheets_by_name[sheet] || @sheets[sheet]
|
103
|
+
end
|
104
|
+
|
105
|
+
def images(sheet = nil)
|
106
|
+
images_names = sheet_for(sheet).images.map(&:last)
|
107
|
+
images_names.map { |iname| image_files.find { |ifile| ifile[iname] } }
|
108
|
+
end
|
109
|
+
|
110
|
+
# Returns the content of a spreadsheet-cell.
|
111
|
+
# (1,1) is the upper left corner.
|
112
|
+
# (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
|
113
|
+
# cell at the first line and first row.
|
114
|
+
def cell(row, col, sheet = nil)
|
115
|
+
key = normalize(row, col)
|
116
|
+
safe_send(sheet_for(sheet).cells[key], :value)
|
117
|
+
end
|
118
|
+
|
119
|
+
def row(rownumber, sheet = nil)
|
120
|
+
sheet_for(sheet).row(rownumber)
|
121
|
+
end
|
122
|
+
|
123
|
+
# returns all values in this column as an array
|
124
|
+
# column numbers are 1,2,3,... like in the spreadsheet
|
125
|
+
def column(column_number, sheet = nil)
|
126
|
+
if column_number.is_a?(::String)
|
127
|
+
column_number = ::Roo::Utils.letter_to_number(column_number)
|
128
|
+
end
|
129
|
+
sheet_for(sheet).column(column_number)
|
130
|
+
end
|
131
|
+
|
132
|
+
# returns the number of the first non-empty row
|
133
|
+
def first_row(sheet = nil)
|
134
|
+
sheet_for(sheet).first_row
|
135
|
+
end
|
136
|
+
|
137
|
+
# returns the number of the last non-empty row
|
138
|
+
def last_row(sheet = nil)
|
139
|
+
sheet_for(sheet).last_row
|
140
|
+
end
|
141
|
+
|
142
|
+
# returns the number of the first non-empty column
|
143
|
+
def first_column(sheet = nil)
|
144
|
+
sheet_for(sheet).first_column
|
145
|
+
end
|
146
|
+
|
147
|
+
# returns the number of the last non-empty column
|
148
|
+
def last_column(sheet = nil)
|
149
|
+
sheet_for(sheet).last_column
|
150
|
+
end
|
151
|
+
|
152
|
+
# set a cell to a certain value
|
153
|
+
# (this will not be saved back to the spreadsheet file!)
|
154
|
+
def set(row, col, value, sheet = nil) #:nodoc:
|
155
|
+
key = normalize(row, col)
|
156
|
+
cell_type = cell_type_by_value(value)
|
157
|
+
sheet_for(sheet).cells[key] = Cell.new(value, cell_type, nil, cell_type, value, nil, nil, nil, Coordinate.new(row, col))
|
158
|
+
end
|
159
|
+
|
160
|
+
# Returns the formula at (row,col).
|
161
|
+
# Returns nil if there is no formula.
|
162
|
+
# The method #formula? checks if there is a formula.
|
163
|
+
def formula(row, col, sheet = nil)
|
164
|
+
key = normalize(row, col)
|
165
|
+
safe_send(sheet_for(sheet).cells[key], :formula)
|
166
|
+
end
|
167
|
+
|
168
|
+
# Predicate methods really should return a boolean
|
169
|
+
# value. Hopefully no one was relying on the fact that this
|
170
|
+
# previously returned either nil/formula
|
171
|
+
def formula?(*args)
|
172
|
+
!!formula(*args)
|
173
|
+
end
|
174
|
+
|
175
|
+
# returns each formula in the selected sheet as an array of tuples in following format
|
176
|
+
# [[row, col, formula], [row, col, formula],...]
|
177
|
+
def formulas(sheet = nil)
|
178
|
+
sheet_for(sheet).cells.select { |_, cell| cell.formula }.map do |(x, y), cell|
|
179
|
+
[x, y, cell.formula]
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
# Given a cell, return the cell's style
|
184
|
+
def font(row, col, sheet = nil)
|
185
|
+
key = normalize(row, col)
|
186
|
+
definition_index = safe_send(sheet_for(sheet).cells[key], :style)
|
187
|
+
styles.definitions[definition_index] if definition_index
|
188
|
+
end
|
189
|
+
|
190
|
+
# returns the type of a cell:
|
191
|
+
# * :float
|
192
|
+
# * :string,
|
193
|
+
# * :date
|
194
|
+
# * :percentage
|
195
|
+
# * :formula
|
196
|
+
# * :time
|
197
|
+
# * :datetime
|
198
|
+
def celltype(row, col, sheet = nil)
|
199
|
+
key = normalize(row, col)
|
200
|
+
safe_send(sheet_for(sheet).cells[key], :type)
|
201
|
+
end
|
202
|
+
|
203
|
+
# returns the internal type of an excel cell
|
204
|
+
# * :numeric_or_formula
|
205
|
+
# * :string
|
206
|
+
# Note: this is only available within the Excelx class
|
207
|
+
def excelx_type(row, col, sheet = nil)
|
208
|
+
key = normalize(row, col)
|
209
|
+
safe_send(sheet_for(sheet).cells[key], :cell_type)
|
210
|
+
end
|
211
|
+
|
212
|
+
# returns the internal value of an excelx cell
|
213
|
+
# Note: this is only available within the Excelx class
|
214
|
+
def excelx_value(row, col, sheet = nil)
|
215
|
+
key = normalize(row, col)
|
216
|
+
safe_send(sheet_for(sheet).cells[key], :cell_value)
|
217
|
+
end
|
218
|
+
|
219
|
+
# returns the internal value of an excelx cell
|
220
|
+
# Note: this is only available within the Excelx class
|
221
|
+
def formatted_value(row, col, sheet = nil)
|
222
|
+
key = normalize(row, col)
|
223
|
+
safe_send(sheet_for(sheet).cells[key], :formatted_value)
|
224
|
+
end
|
225
|
+
|
226
|
+
# returns the internal format of an excel cell
|
227
|
+
def excelx_format(row, col, sheet = nil)
|
228
|
+
key = normalize(row, col)
|
229
|
+
sheet_for(sheet).excelx_format(key)
|
230
|
+
end
|
231
|
+
|
232
|
+
def empty?(row, col, sheet = nil)
|
233
|
+
sheet = sheet_for(sheet)
|
234
|
+
key = normalize(row, col)
|
235
|
+
cell = sheet.cells[key]
|
236
|
+
!cell || cell.empty? ||
|
237
|
+
(row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column)
|
238
|
+
end
|
239
|
+
|
240
|
+
# shows the internal representation of all cells
|
241
|
+
# for debugging purposes
|
242
|
+
def to_s(sheet = nil)
|
243
|
+
sheet_for(sheet).cells.inspect
|
244
|
+
end
|
245
|
+
|
246
|
+
# returns the row,col values of the labelled cell
|
247
|
+
# (nil,nil) if label is not defined
|
248
|
+
def label(name)
|
249
|
+
labels = workbook.defined_names
|
250
|
+
return [nil, nil, nil] if labels.empty? || !labels.key?(name)
|
251
|
+
|
252
|
+
[labels[name].row, labels[name].col, labels[name].sheet]
|
253
|
+
end
|
254
|
+
|
255
|
+
# Returns an array which all labels. Each element is an array with
|
256
|
+
# [labelname, [row,col,sheetname]]
|
257
|
+
def labels
|
258
|
+
@labels ||= workbook.defined_names.map do |name, label|
|
259
|
+
[
|
260
|
+
name,
|
261
|
+
[label.row, label.col, label.sheet]
|
262
|
+
]
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
def hyperlink?(row, col, sheet = nil)
|
267
|
+
!!hyperlink(row, col, sheet)
|
268
|
+
end
|
269
|
+
|
270
|
+
# returns the hyperlink at (row/col)
|
271
|
+
# nil if there is no hyperlink
|
272
|
+
def hyperlink(row, col, sheet = nil)
|
273
|
+
key = normalize(row, col)
|
274
|
+
sheet_for(sheet).hyperlinks[key]
|
275
|
+
end
|
276
|
+
|
277
|
+
# returns the comment at (row/col)
|
278
|
+
# nil if there is no comment
|
279
|
+
def comment(row, col, sheet = nil)
|
280
|
+
key = normalize(row, col)
|
281
|
+
sheet_for(sheet).comments[key]
|
282
|
+
end
|
283
|
+
|
284
|
+
# true, if there is a comment
|
285
|
+
def comment?(row, col, sheet = nil)
|
286
|
+
!!comment(row, col, sheet)
|
287
|
+
end
|
288
|
+
|
289
|
+
def comments(sheet = nil)
|
290
|
+
sheet_for(sheet).comments.map do |(x, y), comment|
|
291
|
+
[x, y, comment]
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
# Yield an array of Excelx::Cell
|
296
|
+
# Takes options for sheet, pad_cells, and max_rows
|
297
|
+
def each_row_streaming(options = {})
|
298
|
+
sheet = sheet_for(options.delete(:sheet))
|
299
|
+
if block_given?
|
300
|
+
sheet.each_row(options) { |row| yield row }
|
301
|
+
else
|
302
|
+
sheet.to_enum(:each_row, options)
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
private
|
307
|
+
|
308
|
+
def clean_sheet(sheet)
|
309
|
+
@sheets_by_name[sheet].cells.each_pair do |coord, value|
|
310
|
+
next unless value.value.is_a?(::String)
|
311
|
+
|
312
|
+
@sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value)
|
313
|
+
end
|
314
|
+
|
315
|
+
@cleaned[sheet] = true
|
316
|
+
end
|
317
|
+
|
318
|
+
# Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
|
319
|
+
# documents require a workbook.xml file, so a if the file is missing
|
320
|
+
# it is not a valid xlsx file. In these cases, an ArgumentError is
|
321
|
+
# raised.
|
322
|
+
#
|
323
|
+
# wb - a Zip::Entry for the workbook.xml file.
|
324
|
+
# path - A String for Zip::Entry's destination path.
|
325
|
+
#
|
326
|
+
# Examples
|
327
|
+
#
|
328
|
+
# extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml')
|
329
|
+
# # => ["rId1", "rId2", "rId3"]
|
330
|
+
#
|
331
|
+
# Returns an Array of Strings.
|
332
|
+
def extract_worksheet_ids(entries, path)
|
333
|
+
wb = entries.find { |e| e.name[/workbook.xml$/] }
|
334
|
+
fail ArgumentError 'missing required workbook file' if wb.nil?
|
335
|
+
|
336
|
+
wb.extract(path)
|
337
|
+
workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
|
338
|
+
workbook_doc.xpath('//sheet').map { |s| s['id'] }
|
339
|
+
end
|
340
|
+
|
341
|
+
# Internal
|
342
|
+
#
|
343
|
+
# wb_rels - A Zip::Entry for the workbook.xml.rels file.
|
344
|
+
# path - A String for the Zip::Entry's destination path.
|
345
|
+
#
|
346
|
+
# Examples
|
347
|
+
#
|
348
|
+
# extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels')
|
349
|
+
# # => {
|
350
|
+
# "rId1"=>"worksheets/sheet1.xml",
|
351
|
+
# "rId2"=>"worksheets/sheet2.xml",
|
352
|
+
# "rId3"=>"worksheets/sheet3.xml"
|
353
|
+
# }
|
354
|
+
#
|
355
|
+
# Returns a Hash.
|
356
|
+
def extract_worksheet_rels(entries, path)
|
357
|
+
wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] }
|
358
|
+
fail ArgumentError 'missing required workbook file' if wb_rels.nil?
|
359
|
+
|
360
|
+
wb_rels.extract(path)
|
361
|
+
rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
|
362
|
+
|
363
|
+
relationships = rels_doc.xpath('//Relationship').select do |relationship|
|
364
|
+
worksheet_types.include? relationship['Type']
|
365
|
+
end
|
366
|
+
|
367
|
+
relationships.each_with_object({}) do |relationship, hash|
|
368
|
+
hash[relationship['Id']] = relationship['Target']
|
369
|
+
end
|
370
|
+
end
|
371
|
+
|
372
|
+
# Extracts the sheets in order, but it will ignore sheets that are not
|
373
|
+
# worksheets.
|
374
|
+
def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
|
375
|
+
(sheet_ids & sheets.keys).each_with_index do |id, i|
|
376
|
+
name = sheets[id]
|
377
|
+
entry = entries.find { |e| "/#{e.name}" =~ /#{name}$/ }
|
378
|
+
path = "#{tmpdir}/roo_sheet#{i + 1}"
|
379
|
+
sheet_files << path
|
380
|
+
@sheet_files << path
|
381
|
+
entry.extract(path)
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
def extract_images(entries, tmpdir)
|
386
|
+
img_entries = entries.select { |e| e.name[/media\/image([0-9]+)/] }
|
387
|
+
img_entries.each do |entry|
|
388
|
+
path = "#{@tmpdir}/roo#{entry.name.gsub(/xl\/|\//, "_")}"
|
389
|
+
image_files << path
|
390
|
+
entry.extract(path)
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
# Extracts all needed files from the zip file
|
395
|
+
def process_zipfile(zipfilename_or_stream)
|
396
|
+
@sheet_files = []
|
397
|
+
|
398
|
+
unless is_stream?(zipfilename_or_stream)
|
399
|
+
zip_file = Zip::File.open(zipfilename_or_stream)
|
400
|
+
else
|
401
|
+
zip_file = Zip::CentralDirectory.new
|
402
|
+
zip_file.read_from_stream zipfilename_or_stream
|
403
|
+
end
|
404
|
+
|
405
|
+
process_zipfile_entries zip_file.to_a.sort_by(&:name)
|
406
|
+
end
|
407
|
+
|
408
|
+
def process_zipfile_entries(entries)
|
409
|
+
# NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
|
410
|
+
# are not in order. With Numbers 3.1, the first sheet is always
|
411
|
+
# sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
|
412
|
+
# independent of a worksheet's filename (i.e. sheet6.xml can be the
|
413
|
+
# first worksheet).
|
414
|
+
#
|
415
|
+
# workbook.xml lists the correct order of worksheets and
|
416
|
+
# workbook.xml.rels lists the filenames for those worksheets.
|
417
|
+
#
|
418
|
+
# workbook.xml:
|
419
|
+
# <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/>
|
420
|
+
# <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/>
|
421
|
+
# workbook.xml.rel:
|
422
|
+
# <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
|
423
|
+
# <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
|
424
|
+
sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
|
425
|
+
sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
|
426
|
+
extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
|
427
|
+
extract_images(entries, @tmpdir)
|
428
|
+
|
429
|
+
entries.each do |entry|
|
430
|
+
path =
|
431
|
+
case entry.name.downcase
|
432
|
+
when /richdata/
|
433
|
+
# FIXME: Ignore richData as parsing is not implemented yet and can cause
|
434
|
+
# Zip::DestinationFileExistsError when including a second "styles.xml" entry
|
435
|
+
# see http://schemas.microsoft.com/office/spreadsheetml/2017/richdata2
|
436
|
+
nil
|
437
|
+
when /sharedstrings.xml$/
|
438
|
+
"#{@tmpdir}/roo_sharedStrings.xml"
|
439
|
+
when /styles.xml$/
|
440
|
+
"#{@tmpdir}/roo_styles.xml"
|
441
|
+
when /comments([0-9]+).xml$/
|
442
|
+
# FIXME: Most of the time, The order of the comment files are the same
|
443
|
+
# the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
|
444
|
+
# In some situations, this isn't true. The true location of a
|
445
|
+
# sheet's comment file is in the sheet1.xml.rels file. SEE
|
446
|
+
# ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
|
447
|
+
nr = Regexp.last_match[1].to_i
|
448
|
+
comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
|
449
|
+
when %r{chartsheets/_rels/sheet([0-9]+).xml.rels$}
|
450
|
+
# NOTE: Chart sheet relationship files were interfering with
|
451
|
+
# worksheets.
|
452
|
+
nil
|
453
|
+
when /sheet([0-9]+).xml.rels$/
|
454
|
+
# FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
|
455
|
+
# it also stores the location for sharedStrings, comments,
|
456
|
+
# drawings, etc.
|
457
|
+
nr = Regexp.last_match[1].to_i
|
458
|
+
rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
|
459
|
+
when /drawing([0-9]+).xml.rels$/
|
460
|
+
# Extracting drawing relationships to make images lists for each sheet
|
461
|
+
nr = Regexp.last_match[1].to_i
|
462
|
+
image_rels[nr - 1] = "#{@tmpdir}/roo_image_rels#{nr}"
|
463
|
+
end
|
464
|
+
|
465
|
+
entry.extract(path) if path
|
466
|
+
end
|
467
|
+
end
|
468
|
+
|
469
|
+
def safe_send(object, method, *args)
|
470
|
+
object.send(method, *args) if object&.respond_to?(method)
|
471
|
+
end
|
472
|
+
|
473
|
+
def worksheet_types
|
474
|
+
[
|
475
|
+
'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet', # OOXML Transitional
|
476
|
+
'http://purl.oclc.org/ooxml/officeDocument/relationships/worksheet' # OOXML Strict
|
477
|
+
]
|
478
|
+
end
|
479
|
+
end
|
480
|
+
end
|
data/lib/roo/font.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
module Roo
|
2
|
+
module Formatters
|
3
|
+
module Base
|
4
|
+
# converts an integer value to a time string like '02:05:06'
|
5
|
+
def integer_to_timestring(content)
|
6
|
+
h = (content / 3600.0).floor
|
7
|
+
content -= h * 3600
|
8
|
+
m = (content / 60.0).floor
|
9
|
+
content -= m * 60
|
10
|
+
s = content
|
11
|
+
Kernel.format("%02d:%02d:%02d", h, m, s)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module Roo
|
2
|
+
module Formatters
|
3
|
+
module CSV
|
4
|
+
def to_csv(filename = nil, separator = ",", sheet = default_sheet)
|
5
|
+
if filename
|
6
|
+
File.open(filename, "w") do |file|
|
7
|
+
write_csv_content(file, sheet, separator)
|
8
|
+
end
|
9
|
+
true
|
10
|
+
else
|
11
|
+
sio = ::StringIO.new
|
12
|
+
write_csv_content(sio, sheet, separator)
|
13
|
+
sio.rewind
|
14
|
+
sio.read
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# Write all cells to the csv file. File can be a filename or nil. If the
|
21
|
+
# file argument is nil the output goes to STDOUT
|
22
|
+
def write_csv_content(file = nil, sheet = nil, separator = ",")
|
23
|
+
file ||= STDOUT
|
24
|
+
return unless first_row(sheet) # The sheet is empty
|
25
|
+
|
26
|
+
1.upto(last_row(sheet)) do |row|
|
27
|
+
1.upto(last_column(sheet)) do |col|
|
28
|
+
# TODO: use CSV.generate_line
|
29
|
+
file.print(separator) if col > 1
|
30
|
+
file.print cell_to_csv(row, col, sheet)
|
31
|
+
end
|
32
|
+
file.print("\n")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# The content of a cell in the csv output
|
37
|
+
def cell_to_csv(row, col, sheet)
|
38
|
+
return "" if empty?(row, col, sheet)
|
39
|
+
|
40
|
+
onecell = cell(row, col, sheet)
|
41
|
+
|
42
|
+
case celltype(row, col, sheet)
|
43
|
+
when :string
|
44
|
+
%("#{onecell.gsub('"', '""')}") unless onecell.empty?
|
45
|
+
when :boolean
|
46
|
+
# TODO: this only works for excelx
|
47
|
+
onecell = self.sheet_for(sheet).cells[[row, col]].formatted_value
|
48
|
+
%("#{onecell.gsub('"', '""').downcase}")
|
49
|
+
when :float, :percentage
|
50
|
+
if onecell == onecell.to_i
|
51
|
+
onecell.to_i.to_s
|
52
|
+
else
|
53
|
+
onecell.to_s
|
54
|
+
end
|
55
|
+
when :formula
|
56
|
+
case onecell
|
57
|
+
when String
|
58
|
+
%("#{onecell.gsub('"', '""')}") unless onecell.empty?
|
59
|
+
when Integer
|
60
|
+
onecell.to_s
|
61
|
+
when Float
|
62
|
+
if onecell == onecell.to_i
|
63
|
+
onecell.to_i.to_s
|
64
|
+
else
|
65
|
+
onecell.to_s
|
66
|
+
end
|
67
|
+
when Date, DateTime, TrueClass, FalseClass
|
68
|
+
onecell.to_s
|
69
|
+
else
|
70
|
+
fail "unhandled onecell-class #{onecell.class}"
|
71
|
+
end
|
72
|
+
when :date, :datetime
|
73
|
+
onecell.to_s
|
74
|
+
when :time
|
75
|
+
integer_to_timestring(onecell)
|
76
|
+
when :link
|
77
|
+
%("#{onecell.url.gsub('"', '""')}")
|
78
|
+
else
|
79
|
+
fail "unhandled celltype #{celltype(row, col, sheet)}"
|
80
|
+
end || ""
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Roo
|
2
|
+
module Formatters
|
3
|
+
module Matrix
|
4
|
+
# returns a matrix object from the whole sheet or a rectangular area of a sheet
|
5
|
+
def to_matrix(from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet)
|
6
|
+
require 'matrix'
|
7
|
+
|
8
|
+
return ::Matrix.empty unless first_row
|
9
|
+
|
10
|
+
from_row ||= first_row(sheet)
|
11
|
+
to_row ||= last_row(sheet)
|
12
|
+
from_column ||= first_column(sheet)
|
13
|
+
to_column ||= last_column(sheet)
|
14
|
+
|
15
|
+
::Matrix.rows(from_row.upto(to_row).map do |row|
|
16
|
+
from_column.upto(to_column).map do |col|
|
17
|
+
cell(row, col, sheet)
|
18
|
+
end
|
19
|
+
end)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# returns an XML representation of all sheets of a spreadsheet file
|
2
|
+
module Roo
|
3
|
+
module Formatters
|
4
|
+
module XML
|
5
|
+
def to_xml
|
6
|
+
Nokogiri::XML::Builder.new do |xml|
|
7
|
+
xml.spreadsheet do
|
8
|
+
sheets.each do |sheet|
|
9
|
+
self.default_sheet = sheet
|
10
|
+
xml.sheet(name: sheet) do |x|
|
11
|
+
if first_row && last_row && first_column && last_column
|
12
|
+
# sonst gibt es Fehler bei leeren Blaettern
|
13
|
+
first_row.upto(last_row) do |row|
|
14
|
+
first_column.upto(last_column) do |col|
|
15
|
+
next if empty?(row, col)
|
16
|
+
|
17
|
+
x.cell(cell(row, col),
|
18
|
+
row: row,
|
19
|
+
column: col,
|
20
|
+
type: celltype(row, col))
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end.to_xml
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Roo
|
2
|
+
module Formatters
|
3
|
+
module YAML
|
4
|
+
# returns a rectangular area (default: all cells) as yaml-output
|
5
|
+
# you can add additional attributes with the prefix parameter like:
|
6
|
+
# oo.to_yaml({"file"=>"flightdata_2007-06-26", "sheet" => "1"})
|
7
|
+
def to_yaml(prefix = {}, from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet)
|
8
|
+
# return an empty string if there is no first_row, i.e. the sheet is empty
|
9
|
+
return "" unless first_row
|
10
|
+
|
11
|
+
from_row ||= first_row(sheet)
|
12
|
+
to_row ||= last_row(sheet)
|
13
|
+
from_column ||= first_column(sheet)
|
14
|
+
to_column ||= last_column(sheet)
|
15
|
+
|
16
|
+
result = "--- \n"
|
17
|
+
from_row.upto(to_row) do |row|
|
18
|
+
from_column.upto(to_column) do |col|
|
19
|
+
next if empty?(row, col, sheet)
|
20
|
+
|
21
|
+
result << "cell_#{row}_#{col}: \n"
|
22
|
+
prefix.each do|k, v|
|
23
|
+
result << " #{k}: #{v} \n"
|
24
|
+
end
|
25
|
+
result << " row: #{row} \n"
|
26
|
+
result << " col: #{col} \n"
|
27
|
+
result << " celltype: #{celltype(row, col, sheet)} \n"
|
28
|
+
value = cell(row, col, sheet)
|
29
|
+
if celltype(row, col, sheet) == :time
|
30
|
+
value = integer_to_timestring(value)
|
31
|
+
end
|
32
|
+
result << " value: #{value} \n"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
result
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|