rbxl 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ begin
2
+ require "rbxl_native/rbxl_native"
3
+ rescue LoadError
4
+ # Try loading from ext/ build directory (development)
5
+ ext_path = File.expand_path("../../ext/rbxl_native", __dir__)
6
+ so = Dir.glob(File.join(ext_path, "**", "rbxl_native.{so,bundle,dll}")).first
7
+ if so
8
+ require so
9
+ else
10
+ raise LoadError,
11
+ "rbxl_native C extension not found. " \
12
+ "Ensure libxml2 development headers are installed and run: " \
13
+ "cd ext/rbxl_native && ruby extconf.rb && make"
14
+ end
15
+ end
@@ -0,0 +1,3 @@
1
+ module Rbxl
2
+ ReadOnlyCell = Data.define(:coordinate, :value)
3
+ end
@@ -0,0 +1,153 @@
1
+ module Rbxl
2
+ class ReadOnlyWorkbook
3
+ MAIN_NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
4
+ REL_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
5
+ PACKAGE_REL_NS = "http://schemas.openxmlformats.org/package/2006/relationships"
6
+
7
+ attr_reader :path, :sheet_names
8
+
9
+ def self.open(path)
10
+ new(path)
11
+ end
12
+
13
+ def initialize(path)
14
+ @path = path
15
+ @zip = Zip::File.open(path)
16
+ @shared_strings = load_shared_strings
17
+ @sheet_entries = load_sheet_entries
18
+ @sheet_names = @sheet_entries.keys.freeze
19
+ @closed = false
20
+ end
21
+
22
+ def sheet(name)
23
+ ensure_open!
24
+
25
+ entry_path = @sheet_entries.fetch(name) do
26
+ raise SheetNotFoundError, "sheet not found: #{name}"
27
+ end
28
+
29
+ ReadOnlyWorksheet.new(zip: @zip, entry_path: entry_path, shared_strings: @shared_strings, name: name)
30
+ end
31
+
32
+ def close
33
+ return if closed?
34
+
35
+ @zip.close
36
+ @closed = true
37
+ end
38
+
39
+ def closed?
40
+ @closed
41
+ end
42
+
43
+ private
44
+
45
+ def ensure_open!
46
+ raise ClosedWorkbookError, "workbook has been closed" if closed?
47
+ end
48
+
49
+ def load_shared_strings
50
+ entry = @zip.find_entry("xl/sharedStrings.xml")
51
+ return [] unless entry
52
+
53
+ strings = []
54
+ io = entry.get_input_stream
55
+ reader = Nokogiri::XML::Reader(io)
56
+
57
+ in_si = false
58
+ in_run = false
59
+ in_phonetic = false
60
+ collecting_text = false
61
+ buffer = +""
62
+ current_fragments = []
63
+
64
+ reader.each do |node|
65
+ case node.node_type
66
+ when Nokogiri::XML::Reader::TYPE_ELEMENT
67
+ case node.local_name
68
+ when "si"
69
+ in_si = true
70
+ current_fragments = []
71
+ when "r"
72
+ in_run = true if in_si
73
+ when "rPh"
74
+ in_phonetic = true if in_si
75
+ when "t"
76
+ next unless in_si && !in_phonetic
77
+
78
+ collecting_text = !in_run || node.depth.positive?
79
+ buffer.clear if collecting_text
80
+ end
81
+ when Nokogiri::XML::Reader::TYPE_TEXT, Nokogiri::XML::Reader::TYPE_CDATA
82
+ buffer << node.value if collecting_text
83
+ when Nokogiri::XML::Reader::TYPE_END_ELEMENT
84
+ case node.local_name
85
+ when "t"
86
+ if collecting_text
87
+ current_fragments << buffer.dup
88
+ collecting_text = false
89
+ end
90
+ when "r"
91
+ in_run = false
92
+ when "rPh"
93
+ in_phonetic = false
94
+ when "si"
95
+ strings << current_fragments.join.freeze
96
+ in_si = false
97
+ in_run = false
98
+ in_phonetic = false
99
+ collecting_text = false
100
+ end
101
+ end
102
+ end
103
+
104
+ strings
105
+ ensure
106
+ io&.close
107
+ end
108
+
109
+ def load_sheet_entries
110
+ relationships = load_relationship_targets("xl/_rels/workbook.xml.rels")
111
+ sheets = {}
112
+
113
+ each_xml_node("xl/workbook.xml") do |node|
114
+ next unless node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
115
+ next unless node.local_name == "sheet"
116
+
117
+ name = node.attribute("name")
118
+ rid = node.attribute("r:id")
119
+ next unless name && rid
120
+
121
+ target = relationships.fetch(rid)
122
+ sheets[name] = "xl/#{target}".gsub(%r{/+}, "/")
123
+ end
124
+
125
+ sheets
126
+ end
127
+
128
+ def load_relationship_targets(entry_path)
129
+ relationships = {}
130
+
131
+ each_xml_node(entry_path) do |node|
132
+ next unless node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
133
+ next unless node.local_name == "Relationship"
134
+
135
+ id = node.attribute("Id")
136
+ target = node.attribute("Target")
137
+ next unless id && target
138
+
139
+ relationships[id] = target
140
+ end
141
+
142
+ relationships
143
+ end
144
+
145
+ def each_xml_node(entry_path)
146
+ io = @zip.get_entry(entry_path).get_input_stream
147
+ reader = Nokogiri::XML::Reader(io)
148
+ reader.each { |node| yield node }
149
+ ensure
150
+ io&.close
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,501 @@
1
+ module Rbxl
2
+ class ReadOnlyWorksheet
3
+ ELEMENT_NODE = Nokogiri::XML::Reader::TYPE_ELEMENT
4
+ TEXT_NODE = Nokogiri::XML::Reader::TYPE_TEXT
5
+ CDATA_NODE = Nokogiri::XML::Reader::TYPE_CDATA
6
+ END_ELEMENT_NODE = Nokogiri::XML::Reader::TYPE_END_ELEMENT
7
+
8
+ attr_reader :name, :dimensions
9
+
10
+ def initialize(zip:, entry_path:, shared_strings:, name:)
11
+ @zip = zip
12
+ @entry_path = entry_path
13
+ @shared_strings = shared_strings
14
+ @name = name
15
+ @dimensions = extract_dimensions
16
+ @merge_ranges_by_row = nil
17
+ @merge_anchor_values = {}
18
+ end
19
+
20
+ def each_row(pad_cells: false, values_only: false, expand_merged: false, &block)
21
+ return enum_for(:each_row, pad_cells: pad_cells, values_only: values_only, expand_merged: expand_merged) unless block
22
+
23
+ if values_only && !pad_cells && !expand_merged
24
+ each_row_values_only(&block)
25
+ else
26
+ each_row_full(pad_cells: pad_cells, values_only: values_only, expand_merged: expand_merged, &block)
27
+ end
28
+ end
29
+
30
+ def rows(values_only: false, pad_cells: false, expand_merged: false)
31
+ each_row(values_only: values_only, pad_cells: pad_cells, expand_merged: expand_merged)
32
+ end
33
+
34
+ def max_column
35
+ return nil unless dimensions
36
+
37
+ dimensions[:max_col]
38
+ end
39
+
40
+ def max_row
41
+ return nil unless dimensions
42
+
43
+ dimensions[:max_row]
44
+ end
45
+
46
+ def reset_dimensions
47
+ @dimensions = nil
48
+ end
49
+
50
+ def calculate_dimension(force: false)
51
+ if dimensions
52
+ return dimensions[:ref]
53
+ end
54
+
55
+ raise UnsizedWorksheetError, "worksheet is unsized, use force: true" unless force
56
+
57
+ @dimensions = scan_dimensions
58
+ dimensions ? dimensions[:ref] : "A1:A1"
59
+ end
60
+
61
+ private
62
+
63
+ def each_row_values_only(&block)
64
+ if defined?(Rbxl::Native) && !@disable_native
65
+ xml = @zip.get_entry(@entry_path).get_input_stream.read
66
+ Rbxl::Native.parse_sheet(xml, @shared_strings, &block)
67
+ return
68
+ end
69
+
70
+ cell_type = nil
71
+ collecting_value = false
72
+ in_v = false
73
+ raw_value = nil
74
+ value_buffer = +""
75
+ current_values = nil
76
+ row_depth = nil
77
+
78
+ with_sheet_reader do |reader|
79
+ reader.each do |node|
80
+ case node.node_type
81
+ when ELEMENT_NODE
82
+ case node.local_name
83
+ when "row"
84
+ current_values = []
85
+ row_depth = node.depth
86
+ when "c"
87
+ cell_type = node.attribute("t")
88
+ raw_value = nil
89
+ when "v"
90
+ collecting_value = true
91
+ in_v = true
92
+ value_buffer.clear
93
+ when "t"
94
+ collecting_value = true
95
+ value_buffer.clear
96
+ end
97
+ when TEXT_NODE, CDATA_NODE
98
+ value_buffer << node.value if collecting_value
99
+ when END_ELEMENT_NODE
100
+ if collecting_value
101
+ if in_v
102
+ raw_value = value_buffer.dup
103
+ collecting_value = false
104
+ in_v = false
105
+ else
106
+ raw_value = raw_value ? raw_value << value_buffer : value_buffer.dup
107
+ collecting_value = false
108
+ end
109
+ elsif node.depth == row_depth
110
+ yield current_values.freeze
111
+ current_values = nil
112
+ elsif current_values && node.depth == row_depth + 1
113
+ current_values << coerce_value(raw_value, cell_type)
114
+ cell_type = nil
115
+ raw_value = nil
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+
122
+ def each_row_full(pad_cells:, values_only:, expand_merged:, &block)
123
+ if defined?(Rbxl::Native) && !@disable_native && !pad_cells && !expand_merged && !values_only
124
+ xml = @zip.get_entry(@entry_path).get_input_stream.read
125
+ Rbxl::Native.parse_sheet_full(xml, @shared_strings, &block)
126
+ return
127
+ end
128
+
129
+ current_row_index = nil
130
+ last_row_index = 0
131
+ current_cells = nil
132
+ cell_ref = nil
133
+ cell_type = nil
134
+ current_col_index = 0
135
+ collecting_value = false
136
+ in_v = false
137
+ raw_value = nil
138
+ value_buffer = +""
139
+ row_depth = nil
140
+
141
+ with_sheet_reader do |reader|
142
+ reader.each do |node|
143
+ case node.node_type
144
+ when ELEMENT_NODE
145
+ case node.local_name
146
+ when "row"
147
+ current_row_index = attribute_int(node, "r") || (last_row_index + 1)
148
+ current_col_index = 0
149
+ current_cells = []
150
+ row_depth = node.depth
151
+ when "c"
152
+ cell_ref = node.attribute("r")
153
+ if cell_ref
154
+ current_col_index = split_col_index(cell_ref)
155
+ else
156
+ current_col_index += 1
157
+ cell_ref = "#{column_name(current_col_index)}#{current_row_index}"
158
+ end
159
+ cell_type = node.attribute("t")
160
+ raw_value = nil
161
+ when "v"
162
+ collecting_value = true
163
+ in_v = true
164
+ value_buffer.clear
165
+ when "t"
166
+ collecting_value = true
167
+ value_buffer.clear
168
+ end
169
+ when TEXT_NODE, CDATA_NODE
170
+ value_buffer << node.value if collecting_value
171
+ when END_ELEMENT_NODE
172
+ if collecting_value
173
+ if in_v
174
+ raw_value = value_buffer.dup
175
+ collecting_value = false
176
+ in_v = false
177
+ else
178
+ raw_value = raw_value ? raw_value << value_buffer : value_buffer.dup
179
+ collecting_value = false
180
+ end
181
+ elsif node.depth == row_depth
182
+ current_cells = pad_row(current_cells, current_row_index, values_only: values_only) if pad_cells
183
+ current_cells = expand_merged_cells(current_cells, current_row_index, values_only: values_only) if expand_merged
184
+ yield values_only ? extract_values(current_cells).freeze : Row.new(index: current_row_index, cells: current_cells)
185
+ last_row_index = current_row_index
186
+ current_row_index = nil
187
+ current_cells = nil
188
+ elsif current_cells && node.depth == row_depth + 1
189
+ current_cells << build_row_entry(cell_ref, coerce_value(raw_value, cell_type), values_only)
190
+ cell_ref = nil
191
+ cell_type = nil
192
+ raw_value = nil
193
+ end
194
+ end
195
+ end
196
+ end
197
+ end
198
+
199
+ def with_sheet_reader
200
+ io = @zip.get_entry(@entry_path).get_input_stream
201
+ reader = Nokogiri::XML::Reader(io)
202
+ yield reader
203
+ ensure
204
+ io&.close
205
+ end
206
+
207
+ def extract_dimensions
208
+ with_sheet_reader do |reader|
209
+ reader.each do |node|
210
+ next unless node.node_type == ELEMENT_NODE && node.local_name == "dimension"
211
+
212
+ return parse_range(node.attribute("ref"))
213
+ end
214
+ end
215
+
216
+ nil
217
+ end
218
+
219
+ def extract_merge_ranges_by_row
220
+ ranges_by_row = Hash.new { |hash, key| hash[key] = [] }
221
+
222
+ with_sheet_reader do |reader|
223
+ reader.each do |node|
224
+ next unless node.node_type == ELEMENT_NODE && node.local_name == "mergeCell"
225
+
226
+ range = parse_merge_range(node.attribute("ref"))
227
+ next unless range
228
+
229
+ (range[:start_row]..range[:end_row]).each do |row|
230
+ ranges_by_row[row] << range
231
+ end
232
+ end
233
+ end
234
+
235
+ ranges_by_row
236
+ end
237
+
238
+ def scan_dimensions
239
+ max_col = nil
240
+ max_row = nil
241
+
242
+ with_sheet_reader do |reader|
243
+ reader.each do |node|
244
+ next unless node.node_type == ELEMENT_NODE && node.local_name == "c"
245
+
246
+ coordinate = node.attribute("r")
247
+ col, row = split_coordinate(coordinate)
248
+ next unless col && row
249
+
250
+ max_col = col if max_col.nil? || col > max_col
251
+ max_row = row if max_row.nil? || row > max_row
252
+ end
253
+ end
254
+
255
+ return nil unless max_col && max_row
256
+
257
+ { ref: "A1:#{column_name(max_col)}#{max_row}", max_col: max_col, max_row: max_row }
258
+ end
259
+
260
+ def parse_range(reference)
261
+ return nil if reference.nil? || reference.empty?
262
+
263
+ start_ref, finish_ref = reference.split(":", 2)
264
+ finish_ref ||= start_ref
265
+ _, _, max_col, max_row = *range_bounds(start_ref, finish_ref)
266
+ { ref: reference, max_col: max_col, max_row: max_row }
267
+ end
268
+
269
+ def parse_merge_range(reference)
270
+ return nil if reference.nil? || reference.empty?
271
+
272
+ start_ref, finish_ref = reference.split(":", 2)
273
+ finish_ref ||= start_ref
274
+ start_col, start_row, end_col, end_row = *range_bounds(start_ref, finish_ref)
275
+ return nil unless start_col && start_row && end_col && end_row
276
+
277
+ {
278
+ start_col: start_col,
279
+ start_row: start_row,
280
+ end_col: end_col,
281
+ end_row: end_row
282
+ }
283
+ end
284
+
285
+ def range_bounds(start_ref, finish_ref)
286
+ start_col, start_row = split_coordinate(start_ref)
287
+ finish_col, finish_row = split_coordinate(finish_ref)
288
+ [start_col, start_row, finish_col, finish_row]
289
+ end
290
+
291
+ def split_coordinate(reference)
292
+ col = 0
293
+ i = 0
294
+ len = reference.length
295
+
296
+ while i < len
297
+ byte = reference.getbyte(i)
298
+ break unless byte >= 65 && byte <= 90 # A-Z
299
+
300
+ col = (col * 26) + (byte - 64)
301
+ i += 1
302
+ end
303
+
304
+ return [nil, nil] if i == 0 || i == len
305
+
306
+ row = 0
307
+ while i < len
308
+ byte = reference.getbyte(i)
309
+ return [nil, nil] unless byte >= 48 && byte <= 57 # 0-9
310
+
311
+ row = (row * 10) + (byte - 48)
312
+ i += 1
313
+ end
314
+
315
+ [col, row]
316
+ end
317
+
318
+ def column_index(label)
319
+ col = 0
320
+ i = 0
321
+ len = label.length
322
+ while i < len
323
+ col = (col * 26) + (label.getbyte(i) - 64)
324
+ i += 1
325
+ end
326
+ col
327
+ end
328
+
329
+ def split_col_index(reference)
330
+ col = 0
331
+ i = 0
332
+ len = reference.length
333
+
334
+ while i < len
335
+ byte = reference.getbyte(i)
336
+ break unless byte >= 65 && byte <= 90
337
+
338
+ col = (col * 26) + (byte - 64)
339
+ i += 1
340
+ end
341
+
342
+ col
343
+ end
344
+
345
+ def pad_row(cells, row_index, values_only:)
346
+ return cells unless dimensions && dimensions[:max_col]
347
+
348
+ by_column = cells.each_with_object({}) do |cell, acc|
349
+ coordinate =
350
+ if cell.respond_to?(:coordinate)
351
+ cell.coordinate
352
+ elsif values_only
353
+ cell[0]
354
+ end
355
+ next unless coordinate
356
+
357
+ acc[column_index(coordinate[/\A[A-Z]+/])] = cell
358
+ end
359
+
360
+ (1..dimensions[:max_col]).map do |col|
361
+ by_column[col] || (values_only ? [nil, nil] : EmptyCell.new(coordinate: "#{column_name(col)}#{row_index}"))
362
+ end
363
+ end
364
+
365
+ def expand_merged_cells(cells, row_index, values_only:)
366
+ merge_ranges = merge_ranges_by_row[row_index]
367
+ return cells if merge_ranges.empty?
368
+
369
+ expanded_cells = cells.dup
370
+
371
+ merge_ranges.each do |range|
372
+ if row_index == range[:start_row]
373
+ @merge_anchor_values[range] = value_at(expanded_cells, range[:start_col], values_only: values_only)
374
+ end
375
+
376
+ anchor_value = @merge_anchor_values[range]
377
+ next if anchor_value.nil?
378
+
379
+ (range[:start_col]..range[:end_col]).each do |col|
380
+ next if row_index == range[:start_row] && col == range[:start_col]
381
+
382
+ expanded_cells = set_value_at(expanded_cells, row_index, col, anchor_value, values_only: values_only)
383
+ end
384
+ end
385
+
386
+ expanded_cells
387
+ end
388
+
389
+ def value_at(cells, col_index, values_only:)
390
+ cell = cells[col_index - 1]
391
+ return nil unless cell
392
+
393
+ if values_only
394
+ cell[1]
395
+ elsif cell.is_a?(EmptyCell)
396
+ nil
397
+ else
398
+ cell.value
399
+ end
400
+ end
401
+
402
+ def set_value_at(cells, row_index, col_index, value, values_only:)
403
+ if values_only
404
+ coordinate = "#{column_name(col_index)}#{row_index}"
405
+ cells[col_index - 1] = [coordinate, value]
406
+ else
407
+ coordinate = "#{column_name(col_index)}#{row_index}"
408
+ cells[col_index - 1] = ReadOnlyCell.new(coordinate, value)
409
+ end
410
+
411
+ cells
412
+ end
413
+
414
+ def merge_ranges_by_row
415
+ @merge_ranges_by_row ||= extract_merge_ranges_by_row
416
+ end
417
+
418
+ def coerce_value(raw_value, type)
419
+ case type
420
+ when "s"
421
+ @shared_strings[raw_value.to_i]
422
+ when "inlineStr", "str"
423
+ raw_value
424
+ when "b"
425
+ raw_value == "1"
426
+ else
427
+ infer_scalar(raw_value)
428
+ end
429
+ end
430
+
431
+ def infer_scalar(raw_value)
432
+ return nil if raw_value.nil? || raw_value.empty?
433
+
434
+ numeric_kind = detect_numeric_kind(raw_value)
435
+ return raw_value.to_i if numeric_kind == :integer
436
+ return raw_value.to_f if numeric_kind == :float
437
+
438
+ raw_value
439
+ end
440
+
441
+ def detect_numeric_kind(value)
442
+ index = 0
443
+ length = value.length
444
+ saw_digit = false
445
+ saw_dot = false
446
+
447
+ if value.getbyte(0) == 45
448
+ index = 1
449
+ return nil if length == 1
450
+ end
451
+
452
+ while index < length
453
+ byte = value.getbyte(index)
454
+
455
+ if byte >= 48 && byte <= 57
456
+ saw_digit = true
457
+ elsif byte == 46
458
+ return nil if saw_dot
459
+
460
+ saw_dot = true
461
+ else
462
+ return nil
463
+ end
464
+
465
+ index += 1
466
+ end
467
+
468
+ return nil unless saw_digit
469
+
470
+ saw_dot ? :float : :integer
471
+ end
472
+
473
+ def column_name(index)
474
+ name = +""
475
+ current = index
476
+
477
+ while current.positive?
478
+ current -= 1
479
+ name.prepend((65 + (current % 26)).chr)
480
+ current /= 26
481
+ end
482
+
483
+ name
484
+ end
485
+
486
+ def attribute_int(node, key)
487
+ value = node.attribute(key)
488
+ value&.to_i
489
+ end
490
+
491
+ def build_row_entry(coordinate, value, values_only)
492
+ return [coordinate, value] if values_only
493
+
494
+ ReadOnlyCell.new(coordinate, value)
495
+ end
496
+
497
+ def extract_values(cells)
498
+ cells.map { |cell| cell.is_a?(Array) ? cell[1] : cell }
499
+ end
500
+ end
501
+ end
data/lib/rbxl/row.rb ADDED
@@ -0,0 +1,23 @@
1
+ module Rbxl
2
+ class Row
3
+ attr_reader :index, :cells
4
+
5
+ def initialize(index:, cells:)
6
+ @index = index
7
+ @cells = cells.freeze
8
+ @values = nil
9
+ end
10
+
11
+ def [](offset)
12
+ cells[offset]
13
+ end
14
+
15
+ def values
16
+ @values ||= cells.map(&:value).freeze
17
+ end
18
+
19
+ def size
20
+ cells.size
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,3 @@
1
+ module Rbxl
2
+ VERSION = "1.0.0"
3
+ end
@@ -0,0 +1,10 @@
1
+ module Rbxl
2
+ class WriteOnlyCell
3
+ attr_reader :value, :style_id
4
+
5
+ def initialize(value, style_id: nil)
6
+ @value = value
7
+ @style_id = style_id
8
+ end
9
+ end
10
+ end