rbxl 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +55 -1
- data/README.md +126 -12
- data/Rakefile +19 -0
- data/lib/rbxl/editable_cell.rb +176 -0
- data/lib/rbxl/editable_workbook.rb +315 -0
- data/lib/rbxl/editable_worksheet.rb +216 -0
- data/lib/rbxl/errors.rb +12 -0
- data/lib/rbxl/read_only_workbook.rb +78 -88
- data/lib/rbxl/shared_strings_loader.rb +100 -0
- data/lib/rbxl/version.rb +1 -1
- data/lib/rbxl.rb +61 -10
- data/sig/rbxl.rbs +70 -2
- metadata +6 -2
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
module Rbxl
|
|
2
|
+
# Read-modify-save workbook for surgical edits to an existing +.xlsx+.
|
|
3
|
+
#
|
|
4
|
+
# The design promise mirrors +rbpptx+: <em>what we don't understand, we
|
|
5
|
+
# don't touch</em>. The package is opened as a ZIP, each part you mutate is
|
|
6
|
+
# re-serialized, and every other entry — styles, drawings, charts, comments,
|
|
7
|
+
# pivot caches, custom XML, untouched worksheets — round-trips byte-for-byte
|
|
8
|
+
# via {Zip::Entry#copy_raw_entry}. Inside a worksheet you do edit, only the
|
|
9
|
+
# specific +<c>+ element you target is rewritten; surrounding cells, the
|
|
10
|
+
# row's other attributes, +<mergeCells>+, +<conditionalFormatting>+,
|
|
11
|
+
# +<dataValidations>+, and any unknown OOXML extensions remain in place.
|
|
12
|
+
# The cell's existing +s+ (style index) attribute is preserved, so template
|
|
13
|
+
# number formats, fonts, and fills carry through to the new value.
|
|
14
|
+
#
|
|
15
|
+
# The editable mode is the right tool for template-style fill-ins: open a
|
|
16
|
+
# template with named cells, write a handful of values, save back. It is
|
|
17
|
+
# explicitly <em>not</em> the right tool for rewriting the data area of a
|
|
18
|
+
# large worksheet — the touched sheet is parsed as a Nokogiri DOM, so peak
|
|
19
|
+
# memory scales with that sheet's on-disk size. Use the write-only mode
|
|
20
|
+
# (+Rbxl.new+) for that case instead.
|
|
21
|
+
#
|
|
22
|
+
# == Out of scope (1.4.0)
|
|
23
|
+
#
|
|
24
|
+
# * inserting / deleting / reordering / duplicating sheets
|
|
25
|
+
# * editing styles, formulas, named ranges, drawings, or shared strings
|
|
26
|
+
# * +Date+ / +Time+ / +DateTime+ values (raise {EditableCellTypeError};
|
|
27
|
+
# convert to a numeric serial yourself if you need a date cell)
|
|
28
|
+
# * recomputing the worksheet +<dimension>+ when a write expands the bounds
|
|
29
|
+
#
|
|
30
|
+
# == Strings on write
|
|
31
|
+
#
|
|
32
|
+
# Cells written through this mode become inline strings
|
|
33
|
+
# (+t="inlineStr"+), so +xl/sharedStrings.xml+ is never mutated. Existing
|
|
34
|
+
# +t="s"+ cells you don't touch keep resolving through the SST as usual;
|
|
35
|
+
# only cells you actually overwrite drop their SST reference.
|
|
36
|
+
class EditableWorkbook
|
|
37
|
+
# Namespace for the main SpreadsheetML schema.
|
|
38
|
+
MAIN_NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main".freeze
|
|
39
|
+
|
|
40
|
+
# Namespace used for document-level relationships.
|
|
41
|
+
REL_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships".freeze
|
|
42
|
+
|
|
43
|
+
# Namespace used by the OPC package relationships layer.
|
|
44
|
+
PACKAGE_REL_NS = "http://schemas.openxmlformats.org/package/2006/relationships".freeze
|
|
45
|
+
|
|
46
|
+
# Relationship type identifying the workbook part inside +_rels/.rels+.
|
|
47
|
+
OFFICE_DOC_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument".freeze
|
|
48
|
+
|
|
49
|
+
OLE_CFB_MAGIC = "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1".b.freeze
|
|
50
|
+
private_constant :OLE_CFB_MAGIC
|
|
51
|
+
|
|
52
|
+
ZIP_LOCAL_MAGIC = "PK\x03\x04".b.freeze
|
|
53
|
+
private_constant :ZIP_LOCAL_MAGIC
|
|
54
|
+
|
|
55
|
+
# @return [String] filesystem path the workbook was opened from
|
|
56
|
+
attr_reader :path
|
|
57
|
+
|
|
58
|
+
# @return [Array<String>] visible sheet names in workbook order
|
|
59
|
+
attr_reader :sheet_names
|
|
60
|
+
|
|
61
|
+
# Convenience constructor equivalent to +new(path)+. When a block is
|
|
62
|
+
# given, the workbook is yielded and {#close} is called automatically
|
|
63
|
+
# when the block returns or raises.
|
|
64
|
+
#
|
|
65
|
+
# @param path [String, #to_path]
|
|
66
|
+
# @yieldparam book [Rbxl::EditableWorkbook]
|
|
67
|
+
# @return [Rbxl::EditableWorkbook, Object] the workbook when no block is
|
|
68
|
+
# given, otherwise the block's return value
|
|
69
|
+
def self.open(path)
|
|
70
|
+
book = new(path)
|
|
71
|
+
return book unless block_given?
|
|
72
|
+
|
|
73
|
+
begin
|
|
74
|
+
yield book
|
|
75
|
+
ensure
|
|
76
|
+
book.close
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Opens the package, validates the format, and indexes worksheet parts
|
|
81
|
+
# by visible sheet name. Worksheet XML is not parsed until the caller
|
|
82
|
+
# touches that sheet via {#sheet}.
|
|
83
|
+
#
|
|
84
|
+
# @param path [String, #to_path] path to the +.xlsx+ file
|
|
85
|
+
# @raise [Rbxl::UnsupportedFormatError] if the file is not a valid
|
|
86
|
+
# +.xlsx+ container (e.g. a legacy +.xls+, or non-ZIP bytes)
|
|
87
|
+
# @raise [Rbxl::WorkbookFormatError] if +xl/workbook.xml+ or its rels are
|
|
88
|
+
# missing, malformed, or internally inconsistent
|
|
89
|
+
def initialize(path)
|
|
90
|
+
@path = path.to_s
|
|
91
|
+
ensure_xlsx_format!(@path)
|
|
92
|
+
@zip = Zip::File.open(@path)
|
|
93
|
+
@closed = false
|
|
94
|
+
@workbook_part = locate_workbook_part
|
|
95
|
+
@workbook_dir = File.dirname(@workbook_part)
|
|
96
|
+
@sheet_entries = load_sheet_entries
|
|
97
|
+
@sheet_names = @sheet_entries.keys.freeze
|
|
98
|
+
@shared_strings = nil
|
|
99
|
+
@sheets_by_name = {}
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Returns the editable worksheet for +name_or_index+. Repeated calls for
|
|
103
|
+
# the same sheet return the same in-memory object so edits accumulate
|
|
104
|
+
# across calls before {#save}.
|
|
105
|
+
#
|
|
106
|
+
# @param name_or_index [String, Integer] visible sheet name as listed in
|
|
107
|
+
# {#sheet_names}, or an integer index (negatives count from the end)
|
|
108
|
+
# @return [Rbxl::EditableWorksheet]
|
|
109
|
+
# @raise [Rbxl::SheetNotFoundError] if +name_or_index+ does not resolve
|
|
110
|
+
# @raise [Rbxl::ClosedWorkbookError] if the workbook has been closed
|
|
111
|
+
def sheet(name_or_index)
|
|
112
|
+
ensure_open!
|
|
113
|
+
|
|
114
|
+
name = resolve_sheet_name(name_or_index)
|
|
115
|
+
@sheets_by_name[name] ||= EditableWorksheet.new(
|
|
116
|
+
zip: @zip,
|
|
117
|
+
entry_path: @sheet_entries.fetch(name) {
|
|
118
|
+
raise SheetNotFoundError, "sheet not found: #{name}"
|
|
119
|
+
},
|
|
120
|
+
workbook_path: @path,
|
|
121
|
+
shared_strings: shared_strings,
|
|
122
|
+
name: name
|
|
123
|
+
)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Iterates worksheets in workbook order. Worksheets are constructed on
|
|
127
|
+
# demand and memoized, so iterating then editing is consistent with
|
|
128
|
+
# {#sheet}.
|
|
129
|
+
#
|
|
130
|
+
# @yieldparam worksheet [Rbxl::EditableWorksheet]
|
|
131
|
+
# @return [Enumerator<Rbxl::EditableWorksheet>] when no block is given
|
|
132
|
+
# @raise [Rbxl::ClosedWorkbookError] if the workbook has been closed
|
|
133
|
+
def sheets
|
|
134
|
+
ensure_open!
|
|
135
|
+
return enum_for(:sheets) unless block_given?
|
|
136
|
+
|
|
137
|
+
@sheet_names.each { |name| yield sheet(name) }
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Writes the workbook out, preserving every part that has not been
|
|
141
|
+
# mutated byte-for-byte. Worksheets whose cells have been edited are
|
|
142
|
+
# re-serialized from their in-memory Nokogiri document; all other
|
|
143
|
+
# entries (styles, sharedStrings, drawings, charts, pivot caches,
|
|
144
|
+
# custom XML, rels) are streamed straight from the source ZIP without
|
|
145
|
+
# re-parsing.
|
|
146
|
+
#
|
|
147
|
+
# +path+ defaults to the original load path; passing +nil+ or omitting
|
|
148
|
+
# it saves in place. The new file is written to a temp file in the same
|
|
149
|
+
# directory and atomically renamed into place, so a crash mid-write
|
|
150
|
+
# never leaves a half-written workbook. On success, dirty flags on each
|
|
151
|
+
# touched worksheet are cleared, so the object is reusable for further
|
|
152
|
+
# edits and another {#save}.
|
|
153
|
+
#
|
|
154
|
+
# @param path [String, #to_path, nil] destination path; defaults to the
|
|
155
|
+
# path the workbook was opened from
|
|
156
|
+
# @return [String] the path that was written
|
|
157
|
+
# @raise [Rbxl::ClosedWorkbookError] if the workbook has been closed
|
|
158
|
+
def save(path = nil)
|
|
159
|
+
ensure_open!
|
|
160
|
+
out_path = (path || @path).to_s
|
|
161
|
+
overrides = collect_overrides
|
|
162
|
+
|
|
163
|
+
tmp_path = "#{out_path}.rbxl-tmp.#{Process.pid}.#{rand(1 << 32).to_s(16)}"
|
|
164
|
+
begin
|
|
165
|
+
Zip::OutputStream.open(tmp_path) do |out|
|
|
166
|
+
@zip.each do |entry|
|
|
167
|
+
next if entry.directory?
|
|
168
|
+
|
|
169
|
+
if (override_xml = overrides[entry.name])
|
|
170
|
+
out.put_next_entry(entry.name)
|
|
171
|
+
out.write(override_xml)
|
|
172
|
+
else
|
|
173
|
+
out.copy_raw_entry(entry)
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
File.rename(tmp_path, out_path)
|
|
178
|
+
rescue StandardError
|
|
179
|
+
File.unlink(tmp_path) if File.exist?(tmp_path)
|
|
180
|
+
raise
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
@sheets_by_name.each_value(&:clear_dirty!)
|
|
184
|
+
out_path
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Releases the underlying ZIP file. Idempotent.
|
|
188
|
+
#
|
|
189
|
+
# @return [Boolean] +true+ on the first call, +false+ on subsequent calls
|
|
190
|
+
def close
|
|
191
|
+
return false if @closed
|
|
192
|
+
|
|
193
|
+
@zip&.close
|
|
194
|
+
@zip = nil
|
|
195
|
+
@closed = true
|
|
196
|
+
true
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# @return [Boolean]
|
|
200
|
+
def closed?
|
|
201
|
+
@closed
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
private
|
|
205
|
+
|
|
206
|
+
def ensure_open!
|
|
207
|
+
raise ClosedWorkbookError, "workbook has been closed" if @closed
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def resolve_sheet_name(key)
|
|
211
|
+
return key unless key.is_a?(Integer)
|
|
212
|
+
|
|
213
|
+
name = @sheet_names[key]
|
|
214
|
+
return name if name
|
|
215
|
+
|
|
216
|
+
raise SheetNotFoundError, "sheet index out of range: #{key} (#{@sheet_names.length} sheet(s))"
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
def ensure_xlsx_format!(path)
|
|
220
|
+
header = begin
|
|
221
|
+
File.binread(path, 8)
|
|
222
|
+
rescue Errno::ENOENT, Errno::EISDIR, Errno::EACCES => e
|
|
223
|
+
raise UnsupportedFormatError, "#{path}: #{e.message}"
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
raise UnsupportedFormatError, "#{path}: file is empty or unreadable" if header.nil? || header.empty?
|
|
227
|
+
return if header.start_with?(ZIP_LOCAL_MAGIC)
|
|
228
|
+
|
|
229
|
+
if header.start_with?(OLE_CFB_MAGIC)
|
|
230
|
+
raise UnsupportedFormatError,
|
|
231
|
+
"#{path} looks like a legacy .xls (BIFF/CFB). " \
|
|
232
|
+
"rbxl supports .xlsx (OOXML) only; convert first, e.g. " \
|
|
233
|
+
"`libreoffice --headless --convert-to xlsx #{File.basename(path.to_s)}`."
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
raise UnsupportedFormatError,
|
|
237
|
+
"#{path} is not a valid .xlsx (no ZIP signature at offset 0)."
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def locate_workbook_part
|
|
241
|
+
doc = parse_xml("_rels/.rels")
|
|
242
|
+
rel = doc.at_xpath(
|
|
243
|
+
"/pkg:Relationships/pkg:Relationship[@Type=$type]",
|
|
244
|
+
{ "pkg" => PACKAGE_REL_NS },
|
|
245
|
+
{ "type" => OFFICE_DOC_REL_TYPE }
|
|
246
|
+
)
|
|
247
|
+
raise WorkbookFormatError, "#{@path}: officeDocument relationship missing from _rels/.rels" unless rel
|
|
248
|
+
|
|
249
|
+
target = rel["Target"] or raise WorkbookFormatError, "#{@path}: officeDocument relationship has no Target"
|
|
250
|
+
target.sub(%r{\A/}, "")
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def load_sheet_entries
|
|
254
|
+
rels = parse_rels(rels_path_for(@workbook_part))
|
|
255
|
+
doc = parse_xml(@workbook_part)
|
|
256
|
+
sheets = {}
|
|
257
|
+
|
|
258
|
+
doc.xpath("/main:workbook/main:sheets/main:sheet", "main" => MAIN_NS).each do |sheet_node|
|
|
259
|
+
name = sheet_node["name"]
|
|
260
|
+
rid = sheet_node.attribute_with_ns("id", REL_NS)&.value
|
|
261
|
+
next unless name && rid
|
|
262
|
+
|
|
263
|
+
target = rels.fetch(rid) do
|
|
264
|
+
raise WorkbookFormatError,
|
|
265
|
+
"workbook #{@path} references missing relationship #{rid.inspect} for sheet #{name.inspect}"
|
|
266
|
+
end
|
|
267
|
+
sheets[name] = resolve_relative(@workbook_dir, target)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
sheets
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def shared_strings
|
|
274
|
+
@shared_strings ||= SharedStringsLoader.load(@zip)
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
def collect_overrides
|
|
278
|
+
@sheets_by_name.each_with_object({}) do |(_, ws), h|
|
|
279
|
+
h[ws.entry_path] = ws.to_xml if ws.dirty?
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def parse_xml(part_name)
|
|
284
|
+
entry = @zip.find_entry(part_name)
|
|
285
|
+
raise WorkbookFormatError, "#{@path}: missing part #{part_name}" unless entry
|
|
286
|
+
|
|
287
|
+
doc = Nokogiri::XML(entry.get_input_stream.read)
|
|
288
|
+
raise WorkbookFormatError, "#{@path}: #{part_name}: #{doc.errors.first}" unless doc.errors.empty?
|
|
289
|
+
|
|
290
|
+
doc
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
def parse_rels(rels_part)
|
|
294
|
+
entry = @zip.find_entry(rels_part)
|
|
295
|
+
return {} unless entry
|
|
296
|
+
|
|
297
|
+
doc = Nokogiri::XML(entry.get_input_stream.read)
|
|
298
|
+
doc.xpath("/pkg:Relationships/pkg:Relationship", "pkg" => PACKAGE_REL_NS).each_with_object({}) do |r, h|
|
|
299
|
+
h[r["Id"]] = r["Target"]
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
def rels_path_for(part_name)
|
|
304
|
+
dir = File.dirname(part_name)
|
|
305
|
+
base = File.basename(part_name)
|
|
306
|
+
dir == "." ? "_rels/#{base}.rels" : "#{dir}/_rels/#{base}.rels"
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
def resolve_relative(base_dir, target)
|
|
310
|
+
return target.sub(%r{\A/}, "") if target.start_with?("/")
|
|
311
|
+
|
|
312
|
+
File.expand_path(target, "/#{base_dir}").sub(%r{\A/}, "")
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
end
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
module Rbxl
|
|
2
|
+
# A single worksheet inside an {EditableWorkbook}.
|
|
3
|
+
#
|
|
4
|
+
# The worksheet's XML payload is parsed lazily — calling {#cell} for the
|
|
5
|
+
# first time triggers a single Nokogiri DOM parse of the sheet entry, and
|
|
6
|
+
# subsequent edits mutate that in-memory tree. Worksheets that are never
|
|
7
|
+
# touched are never parsed; on save they pass through the ZIP unchanged.
|
|
8
|
+
#
|
|
9
|
+
# Cell access is openpyxl-style:
|
|
10
|
+
#
|
|
11
|
+
# sheet["B5"].value = "company name"
|
|
12
|
+
# sheet.cell("B5").value # => "company name"
|
|
13
|
+
#
|
|
14
|
+
# See {EditableWorkbook} for the design contract these edits live inside.
|
|
15
|
+
class EditableWorksheet
|
|
16
|
+
# Namespace for the main SpreadsheetML schema.
|
|
17
|
+
MAIN_NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main".freeze
|
|
18
|
+
|
|
19
|
+
# @return [String] visible sheet name
|
|
20
|
+
attr_reader :name
|
|
21
|
+
|
|
22
|
+
# @return [String] ZIP entry path of the worksheet's XML part
|
|
23
|
+
attr_reader :entry_path
|
|
24
|
+
|
|
25
|
+
# @param zip [Zip::File] open archive shared with the workbook
|
|
26
|
+
# @param entry_path [String] ZIP entry path for this sheet's XML
|
|
27
|
+
# @param workbook_path [String] filesystem path the workbook was opened from
|
|
28
|
+
# @param shared_strings [Array<String>] pre-decoded shared strings table
|
|
29
|
+
# @param name [String] visible sheet name
|
|
30
|
+
def initialize(zip:, entry_path:, workbook_path:, shared_strings:, name:)
|
|
31
|
+
@zip = zip
|
|
32
|
+
@entry_path = entry_path
|
|
33
|
+
@workbook_path = workbook_path
|
|
34
|
+
@shared_strings = shared_strings
|
|
35
|
+
@name = name
|
|
36
|
+
@doc = nil
|
|
37
|
+
@sheet_data = nil
|
|
38
|
+
@row_index = nil
|
|
39
|
+
@dirty = false
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Returns the {EditableCell} view for +coordinate+. Cells not present in
|
|
43
|
+
# the sheet's XML are addressable too — reading their value yields +nil+,
|
|
44
|
+
# writing creates the +<c>+ (and its enclosing +<row>+ if needed) in
|
|
45
|
+
# column-sorted position. Repeated calls for the same coordinate may
|
|
46
|
+
# return different {EditableCell} objects but the underlying XML is the
|
|
47
|
+
# same, so reads are consistent.
|
|
48
|
+
#
|
|
49
|
+
# @param coordinate [String] Excel-style coordinate (e.g. +"A1"+, +"B5"+)
|
|
50
|
+
# @return [Rbxl::EditableCell]
|
|
51
|
+
# @raise [ArgumentError] if +coordinate+ is not a valid +A1+-style ref
|
|
52
|
+
def cell(coordinate)
|
|
53
|
+
EditableCell.new(worksheet: self, coordinate: normalize_coordinate(coordinate))
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
alias [] cell
|
|
57
|
+
|
|
58
|
+
# @return [Boolean] whether any cell on this sheet has been mutated since
|
|
59
|
+
# load (or since the last successful save)
|
|
60
|
+
def dirty?
|
|
61
|
+
@dirty
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Marks the sheet dirty. Called by {EditableCell#value=}; not part of
|
|
65
|
+
# the public API.
|
|
66
|
+
#
|
|
67
|
+
# @api private
|
|
68
|
+
def mark_dirty!
|
|
69
|
+
@dirty = true
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# @api private
|
|
73
|
+
def clear_dirty!
|
|
74
|
+
@dirty = false
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# @return [String] the worksheet's XML, reflecting any in-memory edits.
|
|
78
|
+
# The XML declaration and original namespace bindings are preserved.
|
|
79
|
+
def to_xml
|
|
80
|
+
ensure_doc_loaded!
|
|
81
|
+
@doc.to_xml
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# @api private
|
|
85
|
+
# Resolves a shared-string index against the table loaded from
|
|
86
|
+
# +xl/sharedStrings.xml+. Used by {EditableCell} when decoding +t="s"+
|
|
87
|
+
# cells.
|
|
88
|
+
def shared_string_at(index)
|
|
89
|
+
@shared_strings[index]
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# @api private
|
|
93
|
+
# Locates the +<c>+ node for +coordinate+. With +create: true+ the
|
|
94
|
+
# node — and its enclosing +<row>+ — are inserted in sorted position
|
|
95
|
+
# when missing. Returns +nil+ when +create+ is false and the cell does
|
|
96
|
+
# not exist.
|
|
97
|
+
def find_or_create_cell_node(coordinate, create:)
|
|
98
|
+
ensure_doc_loaded!
|
|
99
|
+
col, row = parse_coordinate(coordinate)
|
|
100
|
+
raise ArgumentError, "invalid coordinate: #{coordinate.inspect}" unless col && row
|
|
101
|
+
|
|
102
|
+
row_node = find_or_create_row(row, create: create)
|
|
103
|
+
return nil unless row_node
|
|
104
|
+
|
|
105
|
+
existing = row_node.element_children.find { |c| c["r"] == coordinate }
|
|
106
|
+
return existing if existing
|
|
107
|
+
return nil unless create
|
|
108
|
+
|
|
109
|
+
insert_cell_in_order(row_node, coordinate, col)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# @api private
|
|
113
|
+
# Returns the document for in-place mutation. Loads the XML on first
|
|
114
|
+
# access.
|
|
115
|
+
def document
|
|
116
|
+
ensure_doc_loaded!
|
|
117
|
+
@doc
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
private
|
|
121
|
+
|
|
122
|
+
def ensure_doc_loaded!
|
|
123
|
+
return if @doc
|
|
124
|
+
|
|
125
|
+
entry = @zip.find_entry(@entry_path)
|
|
126
|
+
unless entry
|
|
127
|
+
raise WorksheetFormatError,
|
|
128
|
+
"worksheet #{@name.inspect} is missing XML entry #{@entry_path.inspect} in #{@workbook_path}"
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
parsed = Nokogiri::XML(entry.get_input_stream.read)
|
|
132
|
+
unless parsed.errors.empty?
|
|
133
|
+
raise WorksheetFormatError,
|
|
134
|
+
"invalid worksheet XML for sheet #{@name.inspect} in #{@workbook_path}: #{parsed.errors.first}"
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
sheet_data = parsed.at_xpath("/main:worksheet/main:sheetData", "main" => MAIN_NS)
|
|
138
|
+
unless sheet_data
|
|
139
|
+
raise WorksheetFormatError,
|
|
140
|
+
"worksheet #{@name.inspect} in #{@workbook_path} is missing <sheetData>"
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
@doc = parsed
|
|
144
|
+
@sheet_data = sheet_data
|
|
145
|
+
@row_index = sheet_data.xpath("./main:row", "main" => MAIN_NS).each_with_object({}) do |row, h|
|
|
146
|
+
idx = row["r"]&.to_i
|
|
147
|
+
h[idx] = row if idx
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def find_or_create_row(row_num, create:)
|
|
152
|
+
existing = @row_index[row_num]
|
|
153
|
+
return existing if existing
|
|
154
|
+
return nil unless create
|
|
155
|
+
|
|
156
|
+
row_node = insert_row_in_order(@sheet_data, row_num)
|
|
157
|
+
@row_index[row_num] = row_node
|
|
158
|
+
row_node
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Insertion is done by parsing an XML fragment in the parent's context
|
|
162
|
+
# so the new element inherits the SpreadsheetML default namespace
|
|
163
|
+
# binding from its surroundings rather than landing in +xmlns=""+ jail.
|
|
164
|
+
def insert_row_in_order(parent, row_num)
|
|
165
|
+
following = parent.element_children.find do |child|
|
|
166
|
+
child.name == "row" && (child["r"]&.to_i || 0) > row_num
|
|
167
|
+
end
|
|
168
|
+
xml = %(<row r="#{row_num}"/>)
|
|
169
|
+
added = following ? following.add_previous_sibling(xml) : parent.add_child(xml)
|
|
170
|
+
first_node(added)
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def insert_cell_in_order(parent, coordinate, col_index)
|
|
174
|
+
following = parent.element_children.find do |child|
|
|
175
|
+
next false unless child.name == "c"
|
|
176
|
+
|
|
177
|
+
child_col, _ = parse_coordinate(child["r"])
|
|
178
|
+
child_col && child_col > col_index
|
|
179
|
+
end
|
|
180
|
+
xml = %(<c r="#{coordinate}"/>)
|
|
181
|
+
added = following ? following.add_previous_sibling(xml) : parent.add_child(xml)
|
|
182
|
+
first_node(added)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def first_node(result)
|
|
186
|
+
result.is_a?(Nokogiri::XML::NodeSet) ? result.first : result
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
COORDINATE_RE = /\A([A-Z]+)([1-9]\d*)\z/.freeze
|
|
190
|
+
private_constant :COORDINATE_RE
|
|
191
|
+
|
|
192
|
+
def normalize_coordinate(coordinate)
|
|
193
|
+
raise ArgumentError, "coordinate cannot be nil" if coordinate.nil?
|
|
194
|
+
|
|
195
|
+
str = coordinate.to_s.upcase
|
|
196
|
+
raise ArgumentError, "invalid coordinate: #{coordinate.inspect}" unless str.match?(COORDINATE_RE)
|
|
197
|
+
|
|
198
|
+
str
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def parse_coordinate(coordinate)
|
|
202
|
+
return [nil, nil] unless coordinate
|
|
203
|
+
|
|
204
|
+
m = coordinate.match(COORDINATE_RE)
|
|
205
|
+
return [nil, nil] unless m
|
|
206
|
+
|
|
207
|
+
[column_index(m[1]), m[2].to_i]
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def column_index(label)
|
|
211
|
+
col = 0
|
|
212
|
+
label.each_byte { |b| col = (col * 26) + (b - 64) }
|
|
213
|
+
col
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
end
|
data/lib/rbxl/errors.rb
CHANGED
|
@@ -34,6 +34,12 @@ module Rbxl
|
|
|
34
34
|
# worksheets are stopped mid-inflate rather than after the fact.
|
|
35
35
|
class WorksheetTooLargeError < Error; end
|
|
36
36
|
|
|
37
|
+
# Raised by {Rbxl.open} when the file is not a valid +.xlsx+ container.
|
|
38
|
+
# Most commonly fires on legacy +.xls+ (BIFF/CFB) files — the message
|
|
39
|
+
# names the detected format and suggests a conversion path rather than
|
|
40
|
+
# letting the underlying ZIP parser surface an opaque error.
|
|
41
|
+
class UnsupportedFormatError < Error; end
|
|
42
|
+
|
|
37
43
|
# Raised when workbook-level XML is malformed or internally inconsistent,
|
|
38
44
|
# for example when +xl/workbook.xml+ cannot be parsed or references a
|
|
39
45
|
# missing relationship target.
|
|
@@ -46,4 +52,10 @@ module Rbxl
|
|
|
46
52
|
# workbook path, sheet name, and cell coordinate to make bad inputs easy
|
|
47
53
|
# to locate.
|
|
48
54
|
class CellValueError < WorksheetFormatError; end
|
|
55
|
+
|
|
56
|
+
# Raised by {Rbxl::EditableCell#value=} when the assigned Ruby object is
|
|
57
|
+
# not one of the supported types (+nil+, +String+, +Integer+, +Float+,
|
|
58
|
+
# +true+, +false+). +Date+/+Time+ values raise this error too — see
|
|
59
|
+
# {Rbxl::EditableCell} for the rationale.
|
|
60
|
+
class EditableCellTypeError < Error; end
|
|
49
61
|
end
|