rbxl 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +61 -20
- data/Rakefile +6 -0
- data/ext/rbxl_native/native.c +127 -7
- data/lib/rbxl/cell.rb +15 -0
- data/lib/rbxl/empty_cell.rb +15 -0
- data/lib/rbxl/errors.rb +29 -0
- data/lib/rbxl/native.rb +14 -1
- data/lib/rbxl/read_only_cell.rb +10 -0
- data/lib/rbxl/read_only_workbook.rb +83 -6
- data/lib/rbxl/read_only_worksheet.rb +119 -7
- data/lib/rbxl/row.rb +34 -1
- data/lib/rbxl/version.rb +2 -1
- data/lib/rbxl/write_only_cell.rb +19 -1
- data/lib/rbxl/write_only_workbook.rb +42 -1
- data/lib/rbxl/write_only_worksheet.rb +41 -0
- data/lib/rbxl.rb +96 -2
- data/sig/rbxl.rbs +128 -0
- metadata +6 -3
|
@@ -1,22 +1,86 @@
|
|
|
1
1
|
module Rbxl
|
|
2
|
+
# Row-by-row worksheet reader for a single sheet of a read-only workbook.
|
|
3
|
+
#
|
|
4
|
+
# Instances are produced by {Rbxl::ReadOnlyWorkbook#sheet} and must not be
|
|
5
|
+
# constructed directly; their lifecycle is bound to the workbook's ZIP
|
|
6
|
+
# handle. Rows can be consumed as {Rbxl::Row} objects or as plain value
|
|
7
|
+
# arrays depending on the iteration options.
|
|
8
|
+
#
|
|
9
|
+
# == Iteration modes
|
|
10
|
+
#
|
|
11
|
+
# # Default: yield Rbxl::Row with cell wrappers.
|
|
12
|
+
# sheet.each_row { |row| row.values }
|
|
13
|
+
#
|
|
14
|
+
# # Fast path: yield plain Array<Object> of decoded values.
|
|
15
|
+
# sheet.each_row(values_only: true) { |values| ... }
|
|
16
|
+
#
|
|
17
|
+
# # Pad missing cells in sparse rows up to max_column.
|
|
18
|
+
# sheet.each_row(pad_cells: true) { |row| ... }
|
|
19
|
+
#
|
|
20
|
+
# # Replicate anchor values across merged ranges.
|
|
21
|
+
# sheet.each_row(expand_merged: true) { |row| ... }
|
|
22
|
+
#
|
|
23
|
+
# Iteration without a block returns an +Enumerator+.
|
|
24
|
+
#
|
|
25
|
+
# == Dimensions
|
|
26
|
+
#
|
|
27
|
+
# The worksheet dimension (the <tt>A1:C10</tt>-style range) is read from
|
|
28
|
+
# the sheet's +<dimension>+ element when present. When absent or when the
|
|
29
|
+
# caller wants to recompute it, {#calculate_dimension} with
|
|
30
|
+
# <tt>force: true</tt> scans the sheet for actual cell coordinates.
|
|
2
31
|
class ReadOnlyWorksheet
|
|
32
|
+
# @private Nokogiri reader node-type shortcuts.
|
|
3
33
|
ELEMENT_NODE = Nokogiri::XML::Reader::TYPE_ELEMENT
|
|
34
|
+
# @private
|
|
4
35
|
TEXT_NODE = Nokogiri::XML::Reader::TYPE_TEXT
|
|
36
|
+
# @private
|
|
5
37
|
CDATA_NODE = Nokogiri::XML::Reader::TYPE_CDATA
|
|
38
|
+
# @private
|
|
6
39
|
END_ELEMENT_NODE = Nokogiri::XML::Reader::TYPE_END_ELEMENT
|
|
7
40
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
41
|
+
# @return [String] visible sheet name
|
|
42
|
+
attr_reader :name
|
|
43
|
+
|
|
44
|
+
# Parsed dimension metadata, +nil+ when the sheet has no +<dimension>+
|
|
45
|
+
# element and no scan has been forced. When present the hash has keys
|
|
46
|
+
# +:ref+, +:max_col+, and +:max_row+.
|
|
47
|
+
#
|
|
48
|
+
# @return [Hash{Symbol => Object}, nil]
|
|
49
|
+
attr_reader :dimensions
|
|
50
|
+
|
|
51
|
+
# @param zip [Zip::File] open archive shared with the workbook
|
|
52
|
+
# @param entry_path [String] ZIP entry path for this sheet's XML
|
|
53
|
+
# @param shared_strings [Array<String>] pre-decoded shared strings table
|
|
54
|
+
# @param name [String] visible sheet name
|
|
55
|
+
# @param streaming [Boolean] when the native extension is loaded, feed
|
|
56
|
+
# worksheet XML to the parser in chunks instead of reading the entry
|
|
57
|
+
# into memory first
|
|
58
|
+
def initialize(zip:, entry_path:, shared_strings:, name:, streaming: false)
|
|
11
59
|
@zip = zip
|
|
12
60
|
@entry_path = entry_path
|
|
13
61
|
@shared_strings = shared_strings
|
|
14
62
|
@name = name
|
|
63
|
+
@streaming = streaming
|
|
15
64
|
@dimensions = extract_dimensions
|
|
16
65
|
@merge_ranges_by_row = nil
|
|
17
66
|
@merge_anchor_values = {}
|
|
18
67
|
end
|
|
19
68
|
|
|
69
|
+
# Iterates rows in worksheet order.
|
|
70
|
+
#
|
|
71
|
+
# With +values_only+ and neither +pad_cells+ nor +expand_merged+ set,
|
|
72
|
+
# iteration takes a tighter path that yields frozen +Array<Object>+
|
|
73
|
+
# rows and skips allocating cell wrappers.
|
|
74
|
+
#
|
|
75
|
+
# @param pad_cells [Boolean] pad sparse rows with {Rbxl::EmptyCell} (or
|
|
76
|
+
# <tt>[coordinate, nil]</tt> pairs in +values_only+ mode) up to the
|
|
77
|
+
# worksheet's +max_column+
|
|
78
|
+
# @param values_only [Boolean] yield plain value arrays instead of
|
|
79
|
+
# {Rbxl::Row} instances
|
|
80
|
+
# @param expand_merged [Boolean] propagate the anchor value of every
|
|
81
|
+
# merged range across the range's cells
|
|
82
|
+
# @yieldparam row [Rbxl::Row, Array<Object>]
|
|
83
|
+
# @return [Enumerator, void] enumerator when called without a block
|
|
20
84
|
def each_row(pad_cells: false, values_only: false, expand_merged: false, &block)
|
|
21
85
|
return enum_for(:each_row, pad_cells: pad_cells, values_only: values_only, expand_merged: expand_merged) unless block
|
|
22
86
|
|
|
@@ -27,26 +91,54 @@ module Rbxl
|
|
|
27
91
|
end
|
|
28
92
|
end
|
|
29
93
|
|
|
94
|
+
# Enumerator-returning alias for {#each_row} that reads more naturally
|
|
95
|
+
# when the call site chains further enumerable operations.
|
|
96
|
+
#
|
|
97
|
+
# sheet.rows(values_only: true).take(10)
|
|
98
|
+
#
|
|
99
|
+
# @param values_only [Boolean] see {#each_row}
|
|
100
|
+
# @param pad_cells [Boolean] see {#each_row}
|
|
101
|
+
# @param expand_merged [Boolean] see {#each_row}
|
|
102
|
+
# @return [Enumerator]
|
|
30
103
|
def rows(values_only: false, pad_cells: false, expand_merged: false)
|
|
31
104
|
each_row(values_only: values_only, pad_cells: pad_cells, expand_merged: expand_merged)
|
|
32
105
|
end
|
|
33
106
|
|
|
107
|
+
# @return [Integer, nil] rightmost column index (1-based) from the
|
|
108
|
+
# worksheet dimension, or +nil+ when dimensions are unknown
|
|
34
109
|
def max_column
|
|
35
110
|
return nil unless dimensions
|
|
36
111
|
|
|
37
112
|
dimensions[:max_col]
|
|
38
113
|
end
|
|
39
114
|
|
|
115
|
+
# @return [Integer, nil] bottom row index (1-based) from the worksheet
|
|
116
|
+
# dimension, or +nil+ when dimensions are unknown
|
|
40
117
|
def max_row
|
|
41
118
|
return nil unless dimensions
|
|
42
119
|
|
|
43
120
|
dimensions[:max_row]
|
|
44
121
|
end
|
|
45
122
|
|
|
123
|
+
# Clears cached dimension metadata so that the next call to
|
|
124
|
+
# {#calculate_dimension} recomputes it.
|
|
125
|
+
#
|
|
126
|
+
# @return [nil]
|
|
46
127
|
def reset_dimensions
|
|
47
128
|
@dimensions = nil
|
|
48
129
|
end
|
|
49
130
|
|
|
131
|
+
# Returns the worksheet dimension reference (e.g. <tt>"A1:C10"</tt>).
|
|
132
|
+
#
|
|
133
|
+
# When the sheet lacks a +<dimension>+ element the default is to raise
|
|
134
|
+
# {Rbxl::UnsizedWorksheetError}. Passing <tt>force: true</tt> scans the
|
|
135
|
+
# sheet for the actual cell bounds instead; a sheet with no cells at
|
|
136
|
+
# all falls back to <tt>"A1:A1"</tt>.
|
|
137
|
+
#
|
|
138
|
+
# @param force [Boolean] scan the sheet when no stored dimension exists
|
|
139
|
+
# @return [String] Excel-style range reference
|
|
140
|
+
# @raise [Rbxl::UnsizedWorksheetError] if the sheet is unsized and
|
|
141
|
+
# +force+ is +false+
|
|
50
142
|
def calculate_dimension(force: false)
|
|
51
143
|
if dimensions
|
|
52
144
|
return dimensions[:ref]
|
|
@@ -62,8 +154,12 @@ module Rbxl
|
|
|
62
154
|
|
|
63
155
|
def each_row_values_only(&block)
|
|
64
156
|
if defined?(Rbxl::Native) && !@disable_native
|
|
65
|
-
|
|
66
|
-
|
|
157
|
+
if @streaming
|
|
158
|
+
native_parse_streaming(:parse_sheet_io, &block)
|
|
159
|
+
else
|
|
160
|
+
xml = @zip.get_entry(@entry_path).get_input_stream.read
|
|
161
|
+
Rbxl::Native.parse_sheet(xml, @shared_strings, &block)
|
|
162
|
+
end
|
|
67
163
|
return
|
|
68
164
|
end
|
|
69
165
|
|
|
@@ -121,8 +217,12 @@ module Rbxl
|
|
|
121
217
|
|
|
122
218
|
def each_row_full(pad_cells:, values_only:, expand_merged:, &block)
|
|
123
219
|
if defined?(Rbxl::Native) && !@disable_native && !pad_cells && !expand_merged && !values_only
|
|
124
|
-
|
|
125
|
-
|
|
220
|
+
if @streaming
|
|
221
|
+
native_parse_streaming(:parse_sheet_full_io, &block)
|
|
222
|
+
else
|
|
223
|
+
xml = @zip.get_entry(@entry_path).get_input_stream.read
|
|
224
|
+
Rbxl::Native.parse_sheet_full(xml, @shared_strings, &block)
|
|
225
|
+
end
|
|
126
226
|
return
|
|
127
227
|
end
|
|
128
228
|
|
|
@@ -204,6 +304,18 @@ module Rbxl
|
|
|
204
304
|
io&.close
|
|
205
305
|
end
|
|
206
306
|
|
|
307
|
+
def native_parse_streaming(method_name, &block)
|
|
308
|
+
io = @zip.get_entry(@entry_path).get_input_stream
|
|
309
|
+
max_bytes = Rbxl.max_worksheet_bytes
|
|
310
|
+
Rbxl::Native.public_send(method_name, io, @shared_strings, max_bytes, &block)
|
|
311
|
+
rescue RuntimeError => e
|
|
312
|
+
raise WorksheetTooLargeError, e.message if e.message&.include?("worksheet bytes exceed limit")
|
|
313
|
+
|
|
314
|
+
raise
|
|
315
|
+
ensure
|
|
316
|
+
io&.close
|
|
317
|
+
end
|
|
318
|
+
|
|
207
319
|
def extract_dimensions
|
|
208
320
|
with_sheet_reader do |reader|
|
|
209
321
|
reader.each do |node|
|
data/lib/rbxl/row.rb
CHANGED
|
@@ -1,21 +1,54 @@
|
|
|
1
1
|
module Rbxl
|
|
2
|
+
# Immutable row wrapper yielded by {Rbxl::ReadOnlyWorksheet#each_row}.
|
|
3
|
+
#
|
|
4
|
+
# A row holds its 1-based worksheet index and a frozen array of cell
|
|
5
|
+
# objects. The cell array may contain {Rbxl::Cell}, {Rbxl::ReadOnlyCell},
|
|
6
|
+
# or {Rbxl::EmptyCell} instances depending on the iteration options
|
|
7
|
+
# (+pad_cells+, +expand_merged+) and the parser backend in use.
|
|
8
|
+
#
|
|
9
|
+
# sheet.each_row do |row|
|
|
10
|
+
# row.index # => 2
|
|
11
|
+
# row.size # => 3
|
|
12
|
+
# row.values # => ["alice", 100, true]
|
|
13
|
+
# row[0].value # => "alice"
|
|
14
|
+
# end
|
|
2
15
|
class Row
|
|
3
|
-
|
|
16
|
+
# @return [Integer] 1-based worksheet row number
|
|
17
|
+
attr_reader :index
|
|
4
18
|
|
|
19
|
+
# @return [Array<Rbxl::Cell, Rbxl::ReadOnlyCell, Rbxl::EmptyCell>]
|
|
20
|
+
# frozen array of cell objects
|
|
21
|
+
attr_reader :cells
|
|
22
|
+
|
|
23
|
+
# @param index [Integer] 1-based worksheet row number
|
|
24
|
+
# @param cells [Array<Rbxl::Cell, Rbxl::ReadOnlyCell, Rbxl::EmptyCell>]
|
|
25
|
+
# cell objects in column order; the array is frozen in place
|
|
5
26
|
def initialize(index:, cells:)
|
|
6
27
|
@index = index
|
|
7
28
|
@cells = cells.freeze
|
|
8
29
|
@values = nil
|
|
9
30
|
end
|
|
10
31
|
|
|
32
|
+
# Returns the cell at a zero-based offset within the row.
|
|
33
|
+
#
|
|
34
|
+
# No bounds checking is performed beyond Array semantics: an offset
|
|
35
|
+
# outside the cell range simply returns +nil+.
|
|
36
|
+
#
|
|
37
|
+
# @param offset [Integer] zero-based position within the row
|
|
38
|
+
# @return [Rbxl::Cell, Rbxl::ReadOnlyCell, Rbxl::EmptyCell, nil]
|
|
11
39
|
def [](offset)
|
|
12
40
|
cells[offset]
|
|
13
41
|
end
|
|
14
42
|
|
|
43
|
+
# Returns the row as plain Ruby values, memoized and frozen so that
|
|
44
|
+
# repeated calls are allocation-free.
|
|
45
|
+
#
|
|
46
|
+
# @return [Array<Object>] decoded cell values in column order
|
|
15
47
|
def values
|
|
16
48
|
@values ||= cells.map(&:value).freeze
|
|
17
49
|
end
|
|
18
50
|
|
|
51
|
+
# @return [Integer] number of cells in the row
|
|
19
52
|
def size
|
|
20
53
|
cells.size
|
|
21
54
|
end
|
data/lib/rbxl/version.rb
CHANGED
data/lib/rbxl/write_only_cell.rb
CHANGED
|
@@ -1,7 +1,25 @@
|
|
|
1
1
|
module Rbxl
|
|
2
|
+
# Wraps a write-side cell value so that a style id can be associated with
|
|
3
|
+
# it without widening every call site to a Hash or Array.
|
|
4
|
+
#
|
|
5
|
+
# Instances are passed transparently to {Rbxl::WriteOnlyWorksheet#append}
|
|
6
|
+
# (or +<<+) in place of a plain value:
|
|
7
|
+
#
|
|
8
|
+
# cell = Rbxl::WriteOnlyCell.new(42, style_id: 1)
|
|
9
|
+
# sheet << ["id", cell]
|
|
10
|
+
#
|
|
11
|
+
# The value is serialized using the same type rules as a bare value; the
|
|
12
|
+
# +style_id+, when present, is emitted as the cell's +s+ attribute.
|
|
2
13
|
class WriteOnlyCell
|
|
3
|
-
|
|
14
|
+
# @return [Object] underlying Ruby value (String, Numeric, Boolean,
|
|
15
|
+
# Date/DateTime/Time, or +nil+)
|
|
16
|
+
attr_reader :value
|
|
4
17
|
|
|
18
|
+
# @return [Integer, nil] style index into the workbook's +cellXfs+ table
|
|
19
|
+
attr_reader :style_id
|
|
20
|
+
|
|
21
|
+
# @param value [Object] Ruby value to serialize into the cell
|
|
22
|
+
# @param style_id [Integer, nil] optional style index
|
|
5
23
|
def initialize(value, style_id: nil)
|
|
6
24
|
@value = value
|
|
7
25
|
@style_id = style_id
|
|
@@ -1,13 +1,38 @@
|
|
|
1
1
|
module Rbxl
|
|
2
|
+
# Write-only workbook for single-pass XLSX generation.
|
|
3
|
+
#
|
|
4
|
+
# The workbook accumulates rows per worksheet and emits the full
|
|
5
|
+
# <tt>.xlsx</tt> package in a single pass when {#save} is called. By
|
|
6
|
+
# design a write-only workbook can only be saved once: {#save} calls
|
|
7
|
+
# {#close} on success, and any subsequent call raises
|
|
8
|
+
# {Rbxl::WorkbookAlreadySavedError}.
|
|
9
|
+
#
|
|
10
|
+
# book = Rbxl.new(write_only: true)
|
|
11
|
+
# sheet = book.add_sheet("Report")
|
|
12
|
+
# sheet.append(["id", "name"])
|
|
13
|
+
# sheet.append([1, "alice"])
|
|
14
|
+
# book.save("report.xlsx")
|
|
15
|
+
#
|
|
16
|
+
# Style output is intentionally minimal: a single default style entry is
|
|
17
|
+
# emitted so that authored +style_id+ references resolve, but arbitrary
|
|
18
|
+
# workbook styling is out of scope for the MVP API.
|
|
2
19
|
class WriteOnlyWorkbook
|
|
20
|
+
# @return [Array<Rbxl::WriteOnlyWorksheet>] worksheets in insertion order
|
|
3
21
|
attr_reader :worksheets
|
|
4
22
|
|
|
23
|
+
# Creates an empty write-only workbook with no worksheets.
|
|
5
24
|
def initialize
|
|
6
25
|
@worksheets = []
|
|
7
26
|
@closed = false
|
|
8
27
|
@saved = false
|
|
9
28
|
end
|
|
10
29
|
|
|
30
|
+
# Creates and returns a new worksheet appended to this workbook.
|
|
31
|
+
#
|
|
32
|
+
# @param name [String] visible sheet name
|
|
33
|
+
# @return [Rbxl::WriteOnlyWorksheet]
|
|
34
|
+
# @raise [Rbxl::ClosedWorkbookError] if the workbook has been closed
|
|
35
|
+
# @raise [Rbxl::WorkbookAlreadySavedError] if {#save} has already succeeded
|
|
11
36
|
def add_sheet(name)
|
|
12
37
|
ensure_writable!
|
|
13
38
|
|
|
@@ -16,6 +41,16 @@ module Rbxl
|
|
|
16
41
|
sheet
|
|
17
42
|
end
|
|
18
43
|
|
|
44
|
+
# Serializes the workbook to an <tt>.xlsx</tt> file at +path+.
|
|
45
|
+
#
|
|
46
|
+
# On success the workbook is closed automatically; the method returns
|
|
47
|
+
# the path that was written, suitable for chaining.
|
|
48
|
+
#
|
|
49
|
+
# @param path [String, #to_path] destination filesystem path
|
|
50
|
+
# @return [String] the saved path
|
|
51
|
+
# @raise [Rbxl::Error] if no worksheets have been added
|
|
52
|
+
# @raise [Rbxl::ClosedWorkbookError] if the workbook is already closed
|
|
53
|
+
# @raise [Rbxl::WorkbookAlreadySavedError] if {#save} was already called
|
|
19
54
|
def save(path)
|
|
20
55
|
ensure_writable!
|
|
21
56
|
raise Error, "at least one worksheet is required" if worksheets.empty?
|
|
@@ -44,10 +79,16 @@ module Rbxl
|
|
|
44
79
|
path
|
|
45
80
|
end
|
|
46
81
|
|
|
82
|
+
# Marks the workbook as closed. Further mutating operations raise
|
|
83
|
+
# {Rbxl::ClosedWorkbookError}. This is called automatically by a
|
|
84
|
+
# successful {#save}.
|
|
85
|
+
#
|
|
86
|
+
# @return [Boolean] the new closed state (always +true+)
|
|
47
87
|
def close
|
|
48
88
|
@closed = true
|
|
49
89
|
end
|
|
50
90
|
|
|
91
|
+
# @return [Boolean] whether the workbook has been closed
|
|
51
92
|
def closed?
|
|
52
93
|
@closed
|
|
53
94
|
end
|
|
@@ -55,8 +96,8 @@ module Rbxl
|
|
|
55
96
|
private
|
|
56
97
|
|
|
57
98
|
def ensure_writable!
|
|
58
|
-
raise ClosedWorkbookError, "workbook has been closed" if closed?
|
|
59
99
|
raise WorkbookAlreadySavedError, "write-only workbook can only be saved once" if @saved
|
|
100
|
+
raise ClosedWorkbookError, "workbook has been closed" if closed?
|
|
60
101
|
end
|
|
61
102
|
|
|
62
103
|
def write_entry(zip, name, content)
|
|
@@ -1,17 +1,50 @@
|
|
|
1
1
|
module Rbxl
|
|
2
|
+
# Worksheet builder used by {Rbxl::WriteOnlyWorkbook}.
|
|
3
|
+
#
|
|
4
|
+
# Rows are appended in order and later serialized as SpreadsheetML by
|
|
5
|
+
# {#to_xml} when the workbook is saved. The builder never rewrites a
|
|
6
|
+
# previously appended row, so the worksheet's in-memory footprint scales
|
|
7
|
+
# linearly with the number of appended rows.
|
|
8
|
+
#
|
|
9
|
+
# == Row values
|
|
10
|
+
#
|
|
11
|
+
# Each element of an appended row may be one of:
|
|
12
|
+
#
|
|
13
|
+
# * +nil+ — serialized as an empty cell
|
|
14
|
+
# * +true+ / +false+ — serialized as a boolean cell
|
|
15
|
+
# * +Integer+ / +Numeric+ — serialized as a numeric cell
|
|
16
|
+
# * +Date+ / +DateTime+ / +Time+ — serialized as an ISO-8601 inline string
|
|
17
|
+
# * any other object — serialized as +value.to_s+ in an inline string
|
|
18
|
+
# * {Rbxl::WriteOnlyCell} — same as the above, but with an optional style id
|
|
2
19
|
class WriteOnlyWorksheet
|
|
20
|
+
# @return [String] visible sheet name
|
|
3
21
|
attr_reader :name
|
|
4
22
|
|
|
23
|
+
# @param name [String] visible sheet name
|
|
5
24
|
def initialize(name:)
|
|
6
25
|
@name = name
|
|
7
26
|
@rows = []
|
|
8
27
|
@column_name_cache = []
|
|
9
28
|
end
|
|
10
29
|
|
|
30
|
+
# Appends a row of values. Equivalent to {#append} so that the shell
|
|
31
|
+
# operator reads naturally at the call site:
|
|
32
|
+
#
|
|
33
|
+
# sheet << ["id", "name"]
|
|
34
|
+
#
|
|
35
|
+
# @param values [Array, Enumerator] row values
|
|
36
|
+
# @return [Rbxl::WriteOnlyWorksheet] +self+ for chaining
|
|
37
|
+
# @raise [TypeError] if +values+ is not Array-like
|
|
11
38
|
def <<(values)
|
|
12
39
|
append(values)
|
|
13
40
|
end
|
|
14
41
|
|
|
42
|
+
# Appends a row of values.
|
|
43
|
+
#
|
|
44
|
+
# @param values [Array, Enumerator] row values; each element is serialized
|
|
45
|
+
# according to the rules documented on the class
|
|
46
|
+
# @return [Rbxl::WriteOnlyWorksheet] +self+ for chaining
|
|
47
|
+
# @raise [TypeError] if +values+ is neither an Array nor an Enumerator
|
|
15
48
|
def append(values)
|
|
16
49
|
unless values.is_a?(Array) || values.is_a?(Enumerator)
|
|
17
50
|
raise TypeError, "row must be an Array or Enumerator, got #{values.class}"
|
|
@@ -21,6 +54,14 @@ module Rbxl
|
|
|
21
54
|
self
|
|
22
55
|
end
|
|
23
56
|
|
|
57
|
+
# Serializes the worksheet to SpreadsheetML.
|
|
58
|
+
#
|
|
59
|
+
# When <tt>require "rbxl/native"</tt> has been loaded the native
|
|
60
|
+
# extension handles serialization for a significant speedup; otherwise
|
|
61
|
+
# a pure-Ruby implementation is used. Both paths produce equivalent
|
|
62
|
+
# output.
|
|
63
|
+
#
|
|
64
|
+
# @return [String] worksheet XML
|
|
24
65
|
def to_xml
|
|
25
66
|
if defined?(Rbxl::Native)
|
|
26
67
|
return Rbxl::Native.generate_sheet(@rows)
|
data/lib/rbxl.rb
CHANGED
|
@@ -16,14 +16,108 @@ require_relative "rbxl/write_only_cell"
|
|
|
16
16
|
require_relative "rbxl/write_only_workbook"
|
|
17
17
|
require_relative "rbxl/write_only_worksheet"
|
|
18
18
|
|
|
19
|
+
# Minimal, memory-friendly XLSX reader/writer inspired by +openpyxl+.
|
|
20
|
+
#
|
|
21
|
+
# Rbxl exposes two explicit, non-overlapping modes:
|
|
22
|
+
#
|
|
23
|
+
# * {Rbxl.open} returns a {Rbxl::ReadOnlyWorkbook} for row-by-row reads
|
|
24
|
+
# * {Rbxl.new} returns a {Rbxl::WriteOnlyWorkbook} for one-shot writes
|
|
25
|
+
#
|
|
26
|
+
# The API is intentionally narrow so that memory usage stays predictable
|
|
27
|
+
# for large workbooks. Neither mode materializes the full workbook in
|
|
28
|
+
# memory; reads pull rows from the underlying XML one at a time, and writes
|
|
29
|
+
# accumulate only the rows added before {Rbxl::WriteOnlyWorkbook#save}.
|
|
30
|
+
#
|
|
31
|
+
# == Reading
|
|
32
|
+
#
|
|
33
|
+
# require "rbxl"
|
|
34
|
+
#
|
|
35
|
+
# book = Rbxl.open("report.xlsx", read_only: true)
|
|
36
|
+
# sheet = book.sheet("Report")
|
|
37
|
+
# sheet.each_row(values_only: true) { |values| p values }
|
|
38
|
+
# book.close
|
|
39
|
+
#
|
|
40
|
+
# == Writing
|
|
41
|
+
#
|
|
42
|
+
# require "rbxl"
|
|
43
|
+
#
|
|
44
|
+
# book = Rbxl.new(write_only: true)
|
|
45
|
+
# sheet = book.add_sheet("Report")
|
|
46
|
+
# sheet << ["id", "name", "score"]
|
|
47
|
+
# sheet << [1, "alice", 100]
|
|
48
|
+
# book.save("report.xlsx")
|
|
49
|
+
#
|
|
50
|
+
# == Native extension
|
|
51
|
+
#
|
|
52
|
+
# Requiring <tt>"rbxl/native"</tt> after <tt>"rbxl"</tt> swaps the hot
|
|
53
|
+
# worksheet XML paths for a libxml2-backed C implementation with the same
|
|
54
|
+
# observable behavior. See the README for build requirements.
|
|
19
55
|
module Rbxl
|
|
56
|
+
# Maximum number of shared strings accepted from a workbook's
|
|
57
|
+
# +xl/sharedStrings.xml+ entry. Defaults to 10 million, which comfortably
|
|
58
|
+
# covers real-world enterprise workbooks while rejecting files crafted to
|
|
59
|
+
# exhaust memory before any row is read. Set to +nil+ to disable.
|
|
60
|
+
@max_shared_strings = 10_000_000
|
|
61
|
+
|
|
62
|
+
# Maximum total byte size of the shared strings table once decoded.
|
|
63
|
+
# Defaults to 512 MiB. Applied both to the ZIP entry's declared
|
|
64
|
+
# uncompressed size (cheap early rejection of zip bombs) and to the
|
|
65
|
+
# running sum while parsing. Set to +nil+ to disable.
|
|
66
|
+
@max_shared_string_bytes = 512 * 1024 * 1024
|
|
67
|
+
|
|
68
|
+
# Maximum uncompressed byte size accepted from a single worksheet's XML
|
|
69
|
+
# entry while iterating in +streaming: true+ mode. Default is +nil+
|
|
70
|
+
# (unbounded) because legitimate worksheets can be arbitrarily large.
|
|
71
|
+
# Set a positive integer to bound peak inflation and stop high-compression
|
|
72
|
+
# zip-bomb worksheets mid-inflate.
|
|
73
|
+
@max_worksheet_bytes = nil
|
|
74
|
+
|
|
20
75
|
class << self
|
|
21
|
-
|
|
76
|
+
# @return [Integer, nil] configured shared-strings count cap
|
|
77
|
+
attr_accessor :max_shared_strings
|
|
78
|
+
|
|
79
|
+
# @return [Integer, nil] configured shared-strings byte cap
|
|
80
|
+
attr_accessor :max_shared_string_bytes
|
|
81
|
+
|
|
82
|
+
# @return [Integer, nil] per-worksheet streaming byte cap
|
|
83
|
+
attr_accessor :max_worksheet_bytes
|
|
84
|
+
|
|
85
|
+
# Opens an existing workbook in read-only row-by-row mode.
|
|
86
|
+
#
|
|
87
|
+
# The +read_only+ keyword is required and must be +true+. It exists to
|
|
88
|
+
# mark the intent explicitly and to leave room for a future read/write
|
|
89
|
+
# mode without changing the default behavior of {.open}.
|
|
90
|
+
#
|
|
91
|
+
# With <tt>streaming: true</tt>, the native backend (when loaded) feeds
|
|
92
|
+
# worksheet XML to the parser in chunks pulled from the ZIP input stream
|
|
93
|
+
# instead of materializing the entire worksheet as one Ruby string. This
|
|
94
|
+
# keeps peak memory roughly independent of worksheet size and lets
|
|
95
|
+
# {Rbxl.max_worksheet_bytes} bound how much is inflated. Streaming mode
|
|
96
|
+
# is the same API and output shape — only the inflation strategy
|
|
97
|
+
# differs — and typically pays back a few percent of throughput on small
|
|
98
|
+
# sheets in exchange for the flat memory profile.
|
|
99
|
+
#
|
|
100
|
+
# @param path [String, #to_path] filesystem path to an <tt>.xlsx</tt> file
|
|
101
|
+
# @param read_only [Boolean] must be +true+ for the current API
|
|
102
|
+
# @param streaming [Boolean] feed worksheet XML to the native parser in
|
|
103
|
+
# chunks instead of fully inflating the entry in advance. Ignored when
|
|
104
|
+
# the native extension is not loaded.
|
|
105
|
+
# @return [Rbxl::ReadOnlyWorkbook]
|
|
106
|
+
# @raise [ArgumentError] if +read_only+ is not +true+
|
|
107
|
+
def open(path, read_only: false, streaming: false)
|
|
22
108
|
raise ArgumentError, "read_only: true is required for this MVP" unless read_only
|
|
23
109
|
|
|
24
|
-
ReadOnlyWorkbook.open(path)
|
|
110
|
+
ReadOnlyWorkbook.open(path, streaming: streaming)
|
|
25
111
|
end
|
|
26
112
|
|
|
113
|
+
# Creates a new workbook in write-only mode.
|
|
114
|
+
#
|
|
115
|
+
# The +write_only+ keyword is required and must be +true+ to make the
|
|
116
|
+
# save-once, append-only contract obvious at the call site.
|
|
117
|
+
#
|
|
118
|
+
# @param write_only [Boolean] must be +true+ for the current API
|
|
119
|
+
# @return [Rbxl::WriteOnlyWorkbook]
|
|
120
|
+
# @raise [ArgumentError] if +write_only+ is not +true+
|
|
27
121
|
def new(write_only: false)
|
|
28
122
|
raise ArgumentError, "write_only: true is required for this MVP" unless write_only
|
|
29
123
|
|
data/sig/rbxl.rbs
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
module Rbxl
|
|
2
|
+
VERSION: String
|
|
3
|
+
|
|
4
|
+
type cell_value = String | Integer | Float | bool | nil
|
|
5
|
+
type pathish = String | Pathname
|
|
6
|
+
type row_input = Array[untyped] | Enumerator[untyped, untyped]
|
|
7
|
+
type row_values = Array[cell_value]
|
|
8
|
+
type row_cell = Cell | ReadOnlyCell | EmptyCell
|
|
9
|
+
type row_cells = Array[row_cell]
|
|
10
|
+
type dimensions = { ref: String, max_col: Integer, max_row: Integer }
|
|
11
|
+
|
|
12
|
+
def self.open: (pathish path, ?read_only: bool, ?streaming: bool) -> ReadOnlyWorkbook
|
|
13
|
+
def self.new: (?write_only: bool) -> WriteOnlyWorkbook
|
|
14
|
+
|
|
15
|
+
attr_accessor self.max_shared_strings: Integer?
|
|
16
|
+
attr_accessor self.max_shared_string_bytes: Integer?
|
|
17
|
+
attr_accessor self.max_worksheet_bytes: Integer?
|
|
18
|
+
|
|
19
|
+
class Error < StandardError
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
class SheetNotFoundError < Error
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
class ClosedWorkbookError < Error
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
class WorkbookAlreadySavedError < Error
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
class UnsizedWorksheetError < Error
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
class SharedStringsTooLargeError < Error
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
class WorksheetTooLargeError < Error
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
class Cell
|
|
41
|
+
attr_accessor value: cell_value
|
|
42
|
+
attr_accessor coordinate: String?
|
|
43
|
+
|
|
44
|
+
def initialize: (?value: cell_value, ?coordinate: String?) -> void
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
class ReadOnlyCell
|
|
48
|
+
attr_reader coordinate: String
|
|
49
|
+
attr_reader value: cell_value
|
|
50
|
+
|
|
51
|
+
def initialize: (String coordinate, cell_value value) -> void
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
class EmptyCell
|
|
55
|
+
attr_reader coordinate: String
|
|
56
|
+
|
|
57
|
+
def initialize: (coordinate: String) -> void
|
|
58
|
+
def value: () -> nil
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
class WriteOnlyCell
|
|
62
|
+
attr_reader value: untyped
|
|
63
|
+
attr_reader style_id: Integer?
|
|
64
|
+
|
|
65
|
+
def initialize: (untyped value, ?style_id: Integer?) -> void
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
class Row
|
|
69
|
+
attr_reader index: Integer
|
|
70
|
+
attr_reader cells: row_cells
|
|
71
|
+
|
|
72
|
+
def initialize: (index: Integer, cells: row_cells) -> void
|
|
73
|
+
def []: (Integer offset) -> row_cell?
|
|
74
|
+
def values: () -> row_values
|
|
75
|
+
def size: () -> Integer
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
class ReadOnlyWorkbook
|
|
79
|
+
MAIN_NS: String
|
|
80
|
+
REL_NS: String
|
|
81
|
+
PACKAGE_REL_NS: String
|
|
82
|
+
|
|
83
|
+
attr_reader path: String
|
|
84
|
+
attr_reader sheet_names: Array[String]
|
|
85
|
+
|
|
86
|
+
def self.open: (pathish path, ?streaming: bool) -> ReadOnlyWorkbook
|
|
87
|
+
def initialize: (pathish path, ?streaming: bool) -> void
|
|
88
|
+
def sheet: (String name) -> ReadOnlyWorksheet
|
|
89
|
+
def close: () -> void
|
|
90
|
+
def closed?: () -> bool
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
class ReadOnlyWorksheet
|
|
94
|
+
attr_reader name: String
|
|
95
|
+
attr_reader dimensions: dimensions?
|
|
96
|
+
|
|
97
|
+
def initialize: (zip: untyped, entry_path: String, shared_strings: Array[String], name: String, ?streaming: bool) -> void
|
|
98
|
+
|
|
99
|
+
def each_row: (?pad_cells: bool, ?values_only: bool, ?expand_merged: bool) { (Row | row_values) -> void } -> void
|
|
100
|
+
| (?pad_cells: bool, ?values_only: bool, ?expand_merged: bool) -> Enumerator[Row | row_values, void]
|
|
101
|
+
|
|
102
|
+
def rows: (?values_only: bool, ?pad_cells: bool, ?expand_merged: bool) -> Enumerator[Row | row_values, void]
|
|
103
|
+
|
|
104
|
+
def max_column: () -> Integer?
|
|
105
|
+
def max_row: () -> Integer?
|
|
106
|
+
def reset_dimensions: () -> nil
|
|
107
|
+
def calculate_dimension: (?force: bool) -> String
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
class WriteOnlyWorkbook
|
|
111
|
+
attr_reader worksheets: Array[WriteOnlyWorksheet]
|
|
112
|
+
|
|
113
|
+
def initialize: () -> void
|
|
114
|
+
def add_sheet: (String name) -> WriteOnlyWorksheet
|
|
115
|
+
def save: (pathish path) -> String
|
|
116
|
+
def close: () -> bool
|
|
117
|
+
def closed?: () -> bool
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
class WriteOnlyWorksheet
|
|
121
|
+
attr_reader name: String
|
|
122
|
+
|
|
123
|
+
def initialize: (name: String) -> void
|
|
124
|
+
def <<: (row_input values) -> WriteOnlyWorksheet
|
|
125
|
+
def append: (row_input values) -> WriteOnlyWorksheet
|
|
126
|
+
def to_xml: () -> String
|
|
127
|
+
end
|
|
128
|
+
end
|