rbxl 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/README.md +85 -24
- data/Rakefile +6 -0
- data/ext/rbxl_native/extconf.rb +23 -34
- data/ext/rbxl_native/native.c +127 -7
- data/lib/rbxl/cell.rb +15 -0
- data/lib/rbxl/empty_cell.rb +15 -0
- data/lib/rbxl/errors.rb +29 -0
- data/lib/rbxl/native.rb +16 -1
- data/lib/rbxl/read_only_cell.rb +10 -0
- data/lib/rbxl/read_only_workbook.rb +83 -6
- data/lib/rbxl/read_only_worksheet.rb +119 -7
- data/lib/rbxl/row.rb +34 -1
- data/lib/rbxl/version.rb +2 -1
- data/lib/rbxl/write_only_cell.rb +19 -1
- data/lib/rbxl/write_only_workbook.rb +57 -9
- data/lib/rbxl/write_only_worksheet.rb +41 -0
- data/lib/rbxl.rb +96 -2
- data/sig/rbxl.rbs +128 -0
- metadata +7 -3
|
@@ -1,24 +1,75 @@
|
|
|
1
1
|
module Rbxl
|
|
2
|
+
# Read-only workbook backed by a ZIP archive.
|
|
3
|
+
#
|
|
4
|
+
# The workbook opens the underlying <tt>.xlsx</tt> once and keeps a single
|
|
5
|
+
# +Zip::File+ handle open for the lifetime of the object. Worksheets are
|
|
6
|
+
# opened lazily via {#sheet}, so callers can process very large sheets
|
|
7
|
+
# without materializing the full workbook in memory.
|
|
8
|
+
#
|
|
9
|
+
# Typical use:
|
|
10
|
+
#
|
|
11
|
+
# book = Rbxl.open("big.xlsx", read_only: true)
|
|
12
|
+
# begin
|
|
13
|
+
# book.sheet_names # => ["Data"]
|
|
14
|
+
# book.sheet("Data").each_row do |row|
|
|
15
|
+
# process(row.values)
|
|
16
|
+
# end
|
|
17
|
+
# ensure
|
|
18
|
+
# book.close
|
|
19
|
+
# end
|
|
20
|
+
#
|
|
21
|
+
# After {#close} every subsequent {#sheet} call raises
|
|
22
|
+
# {Rbxl::ClosedWorkbookError}.
|
|
2
23
|
class ReadOnlyWorkbook
|
|
24
|
+
# Namespace for the main SpreadsheetML schema.
|
|
3
25
|
MAIN_NS = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
|
|
26
|
+
|
|
27
|
+
# Namespace used for document-level relationships.
|
|
4
28
|
REL_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
|
29
|
+
|
|
30
|
+
# Namespace used by the OPC package relationships layer.
|
|
5
31
|
PACKAGE_REL_NS = "http://schemas.openxmlformats.org/package/2006/relationships"
|
|
6
32
|
|
|
7
|
-
|
|
33
|
+
# @return [String] filesystem path the workbook was opened from
|
|
34
|
+
attr_reader :path
|
|
8
35
|
|
|
9
|
-
|
|
10
|
-
|
|
36
|
+
# @return [Array<String>] visible sheet names in workbook order
|
|
37
|
+
attr_reader :sheet_names
|
|
38
|
+
|
|
39
|
+
# Convenience constructor equivalent to <tt>new(path, streaming:)</tt>.
|
|
40
|
+
#
|
|
41
|
+
# @param path [String, #to_path] path to the <tt>.xlsx</tt> file
|
|
42
|
+
# @param streaming [Boolean] feed worksheet XML to the native parser in
|
|
43
|
+
# chunks (see {Rbxl.open})
|
|
44
|
+
# @return [Rbxl::ReadOnlyWorkbook]
|
|
45
|
+
def self.open(path, streaming: false)
|
|
46
|
+
new(path, streaming: streaming)
|
|
11
47
|
end
|
|
12
48
|
|
|
13
|
-
|
|
49
|
+
# Opens the ZIP archive, pre-loads shared strings, and indexes the
|
|
50
|
+
# worksheet entries keyed by visible sheet name.
|
|
51
|
+
#
|
|
52
|
+
# @param path [String, #to_path] path to the <tt>.xlsx</tt> file
|
|
53
|
+
# @param streaming [Boolean] forwarded to produced worksheets
|
|
54
|
+
def initialize(path, streaming: false)
|
|
14
55
|
@path = path
|
|
15
56
|
@zip = Zip::File.open(path)
|
|
57
|
+
@streaming = streaming
|
|
16
58
|
@shared_strings = load_shared_strings
|
|
17
59
|
@sheet_entries = load_sheet_entries
|
|
18
60
|
@sheet_names = @sheet_entries.keys.freeze
|
|
19
61
|
@closed = false
|
|
20
62
|
end
|
|
21
63
|
|
|
64
|
+
# Returns a row-by-row worksheet by visible sheet name.
|
|
65
|
+
#
|
|
66
|
+
# The returned object shares the workbook's ZIP handle. Closing the
|
|
67
|
+
# workbook invalidates any worksheets produced by prior calls.
|
|
68
|
+
#
|
|
69
|
+
# @param name [String] visible sheet name as listed in {#sheet_names}
|
|
70
|
+
# @return [Rbxl::ReadOnlyWorksheet]
|
|
71
|
+
# @raise [Rbxl::SheetNotFoundError] if +name+ is not present
|
|
72
|
+
# @raise [Rbxl::ClosedWorkbookError] if the workbook has been closed
|
|
22
73
|
def sheet(name)
|
|
23
74
|
ensure_open!
|
|
24
75
|
|
|
@@ -26,9 +77,13 @@ module Rbxl
|
|
|
26
77
|
raise SheetNotFoundError, "sheet not found: #{name}"
|
|
27
78
|
end
|
|
28
79
|
|
|
29
|
-
ReadOnlyWorksheet.new(zip: @zip, entry_path: entry_path, shared_strings: @shared_strings, name: name)
|
|
80
|
+
ReadOnlyWorksheet.new(zip: @zip, entry_path: entry_path, shared_strings: @shared_strings, name: name, streaming: @streaming)
|
|
30
81
|
end
|
|
31
82
|
|
|
83
|
+
# Releases the underlying ZIP file handle. Idempotent; subsequent calls
|
|
84
|
+
# are no-ops.
|
|
85
|
+
#
|
|
86
|
+
# @return [void]
|
|
32
87
|
def close
|
|
33
88
|
return if closed?
|
|
34
89
|
|
|
@@ -36,6 +91,7 @@ module Rbxl
|
|
|
36
91
|
@closed = true
|
|
37
92
|
end
|
|
38
93
|
|
|
94
|
+
# @return [Boolean] whether {#close} has been called
|
|
39
95
|
def closed?
|
|
40
96
|
@closed
|
|
41
97
|
end
|
|
@@ -50,7 +106,18 @@ module Rbxl
|
|
|
50
106
|
entry = @zip.find_entry("xl/sharedStrings.xml")
|
|
51
107
|
return [] unless entry
|
|
52
108
|
|
|
109
|
+
max_count = Rbxl.max_shared_strings
|
|
110
|
+
max_bytes = Rbxl.max_shared_string_bytes
|
|
111
|
+
|
|
112
|
+
# Reject zip-bomb style entries up front using the ZIP directory's
|
|
113
|
+
# declared uncompressed size, before allocating any decompression buffer.
|
|
114
|
+
if max_bytes && entry.size && entry.size > max_bytes
|
|
115
|
+
raise SharedStringsTooLargeError,
|
|
116
|
+
"shared strings uncompressed size #{entry.size} exceeds limit #{max_bytes}"
|
|
117
|
+
end
|
|
118
|
+
|
|
53
119
|
strings = []
|
|
120
|
+
total_bytes = 0
|
|
54
121
|
io = entry.get_input_stream
|
|
55
122
|
reader = Nokogiri::XML::Reader(io)
|
|
56
123
|
|
|
@@ -92,7 +159,17 @@ module Rbxl
|
|
|
92
159
|
when "rPh"
|
|
93
160
|
in_phonetic = false
|
|
94
161
|
when "si"
|
|
95
|
-
|
|
162
|
+
value = current_fragments.join.freeze
|
|
163
|
+
total_bytes += value.bytesize
|
|
164
|
+
if max_bytes && total_bytes > max_bytes
|
|
165
|
+
raise SharedStringsTooLargeError,
|
|
166
|
+
"shared strings total size exceeds limit #{max_bytes}"
|
|
167
|
+
end
|
|
168
|
+
strings << value
|
|
169
|
+
if max_count && strings.size > max_count
|
|
170
|
+
raise SharedStringsTooLargeError,
|
|
171
|
+
"shared strings count exceeds limit #{max_count}"
|
|
172
|
+
end
|
|
96
173
|
in_si = false
|
|
97
174
|
in_run = false
|
|
98
175
|
in_phonetic = false
|
|
@@ -1,22 +1,86 @@
|
|
|
1
1
|
module Rbxl
|
|
2
|
+
# Row-by-row worksheet reader for a single sheet of a read-only workbook.
|
|
3
|
+
#
|
|
4
|
+
# Instances are produced by {Rbxl::ReadOnlyWorkbook#sheet} and must not be
|
|
5
|
+
# constructed directly; their lifecycle is bound to the workbook's ZIP
|
|
6
|
+
# handle. Rows can be consumed as {Rbxl::Row} objects or as plain value
|
|
7
|
+
# arrays depending on the iteration options.
|
|
8
|
+
#
|
|
9
|
+
# == Iteration modes
|
|
10
|
+
#
|
|
11
|
+
# # Default: yield Rbxl::Row with cell wrappers.
|
|
12
|
+
# sheet.each_row { |row| row.values }
|
|
13
|
+
#
|
|
14
|
+
# # Fast path: yield plain Array<Object> of decoded values.
|
|
15
|
+
# sheet.each_row(values_only: true) { |values| ... }
|
|
16
|
+
#
|
|
17
|
+
# # Pad missing cells in sparse rows up to max_column.
|
|
18
|
+
# sheet.each_row(pad_cells: true) { |row| ... }
|
|
19
|
+
#
|
|
20
|
+
# # Replicate anchor values across merged ranges.
|
|
21
|
+
# sheet.each_row(expand_merged: true) { |row| ... }
|
|
22
|
+
#
|
|
23
|
+
# Iteration without a block returns an +Enumerator+.
|
|
24
|
+
#
|
|
25
|
+
# == Dimensions
|
|
26
|
+
#
|
|
27
|
+
# The worksheet dimension (the <tt>A1:C10</tt>-style range) is read from
|
|
28
|
+
# the sheet's +<dimension>+ element when present. When absent or when the
|
|
29
|
+
# caller wants to recompute it, {#calculate_dimension} with
|
|
30
|
+
# <tt>force: true</tt> scans the sheet for actual cell coordinates.
|
|
2
31
|
class ReadOnlyWorksheet
|
|
32
|
+
# @private Nokogiri reader node-type shortcuts.
|
|
3
33
|
ELEMENT_NODE = Nokogiri::XML::Reader::TYPE_ELEMENT
|
|
34
|
+
# @private
|
|
4
35
|
TEXT_NODE = Nokogiri::XML::Reader::TYPE_TEXT
|
|
36
|
+
# @private
|
|
5
37
|
CDATA_NODE = Nokogiri::XML::Reader::TYPE_CDATA
|
|
38
|
+
# @private
|
|
6
39
|
END_ELEMENT_NODE = Nokogiri::XML::Reader::TYPE_END_ELEMENT
|
|
7
40
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
41
|
+
# @return [String] visible sheet name
|
|
42
|
+
attr_reader :name
|
|
43
|
+
|
|
44
|
+
# Parsed dimension metadata, +nil+ when the sheet has no +<dimension>+
|
|
45
|
+
# element and no scan has been forced. When present the hash has keys
|
|
46
|
+
# +:ref+, +:max_col+, and +:max_row+.
|
|
47
|
+
#
|
|
48
|
+
# @return [Hash{Symbol => Object}, nil]
|
|
49
|
+
attr_reader :dimensions
|
|
50
|
+
|
|
51
|
+
# @param zip [Zip::File] open archive shared with the workbook
|
|
52
|
+
# @param entry_path [String] ZIP entry path for this sheet's XML
|
|
53
|
+
# @param shared_strings [Array<String>] pre-decoded shared strings table
|
|
54
|
+
# @param name [String] visible sheet name
|
|
55
|
+
# @param streaming [Boolean] when the native extension is loaded, feed
|
|
56
|
+
# worksheet XML to the parser in chunks instead of reading the entry
|
|
57
|
+
# into memory first
|
|
58
|
+
def initialize(zip:, entry_path:, shared_strings:, name:, streaming: false)
|
|
11
59
|
@zip = zip
|
|
12
60
|
@entry_path = entry_path
|
|
13
61
|
@shared_strings = shared_strings
|
|
14
62
|
@name = name
|
|
63
|
+
@streaming = streaming
|
|
15
64
|
@dimensions = extract_dimensions
|
|
16
65
|
@merge_ranges_by_row = nil
|
|
17
66
|
@merge_anchor_values = {}
|
|
18
67
|
end
|
|
19
68
|
|
|
69
|
+
# Iterates rows in worksheet order.
|
|
70
|
+
#
|
|
71
|
+
# With +values_only+ and neither +pad_cells+ nor +expand_merged+ set,
|
|
72
|
+
# iteration takes a tighter path that yields frozen +Array<Object>+
|
|
73
|
+
# rows and skips allocating cell wrappers.
|
|
74
|
+
#
|
|
75
|
+
# @param pad_cells [Boolean] pad sparse rows with {Rbxl::EmptyCell} (or
|
|
76
|
+
# <tt>[coordinate, nil]</tt> pairs in +values_only+ mode) up to the
|
|
77
|
+
# worksheet's +max_column+
|
|
78
|
+
# @param values_only [Boolean] yield plain value arrays instead of
|
|
79
|
+
# {Rbxl::Row} instances
|
|
80
|
+
# @param expand_merged [Boolean] propagate the anchor value of every
|
|
81
|
+
# merged range across the range's cells
|
|
82
|
+
# @yieldparam row [Rbxl::Row, Array<Object>]
|
|
83
|
+
# @return [Enumerator, void] enumerator when called without a block
|
|
20
84
|
def each_row(pad_cells: false, values_only: false, expand_merged: false, &block)
|
|
21
85
|
return enum_for(:each_row, pad_cells: pad_cells, values_only: values_only, expand_merged: expand_merged) unless block
|
|
22
86
|
|
|
@@ -27,26 +91,54 @@ module Rbxl
|
|
|
27
91
|
end
|
|
28
92
|
end
|
|
29
93
|
|
|
94
|
+
# Enumerator-returning alias for {#each_row} that reads more naturally
|
|
95
|
+
# when the call site chains further enumerable operations.
|
|
96
|
+
#
|
|
97
|
+
# sheet.rows(values_only: true).take(10)
|
|
98
|
+
#
|
|
99
|
+
# @param values_only [Boolean] see {#each_row}
|
|
100
|
+
# @param pad_cells [Boolean] see {#each_row}
|
|
101
|
+
# @param expand_merged [Boolean] see {#each_row}
|
|
102
|
+
# @return [Enumerator]
|
|
30
103
|
def rows(values_only: false, pad_cells: false, expand_merged: false)
|
|
31
104
|
each_row(values_only: values_only, pad_cells: pad_cells, expand_merged: expand_merged)
|
|
32
105
|
end
|
|
33
106
|
|
|
107
|
+
# @return [Integer, nil] rightmost column index (1-based) from the
|
|
108
|
+
# worksheet dimension, or +nil+ when dimensions are unknown
|
|
34
109
|
def max_column
|
|
35
110
|
return nil unless dimensions
|
|
36
111
|
|
|
37
112
|
dimensions[:max_col]
|
|
38
113
|
end
|
|
39
114
|
|
|
115
|
+
# @return [Integer, nil] bottom row index (1-based) from the worksheet
|
|
116
|
+
# dimension, or +nil+ when dimensions are unknown
|
|
40
117
|
def max_row
|
|
41
118
|
return nil unless dimensions
|
|
42
119
|
|
|
43
120
|
dimensions[:max_row]
|
|
44
121
|
end
|
|
45
122
|
|
|
123
|
+
# Clears cached dimension metadata so that the next call to
|
|
124
|
+
# {#calculate_dimension} recomputes it.
|
|
125
|
+
#
|
|
126
|
+
# @return [nil]
|
|
46
127
|
def reset_dimensions
|
|
47
128
|
@dimensions = nil
|
|
48
129
|
end
|
|
49
130
|
|
|
131
|
+
# Returns the worksheet dimension reference (e.g. <tt>"A1:C10"</tt>).
|
|
132
|
+
#
|
|
133
|
+
# When the sheet lacks a +<dimension>+ element the default is to raise
|
|
134
|
+
# {Rbxl::UnsizedWorksheetError}. Passing <tt>force: true</tt> scans the
|
|
135
|
+
# sheet for the actual cell bounds instead; a sheet with no cells at
|
|
136
|
+
# all falls back to <tt>"A1:A1"</tt>.
|
|
137
|
+
#
|
|
138
|
+
# @param force [Boolean] scan the sheet when no stored dimension exists
|
|
139
|
+
# @return [String] Excel-style range reference
|
|
140
|
+
# @raise [Rbxl::UnsizedWorksheetError] if the sheet is unsized and
|
|
141
|
+
# +force+ is +false+
|
|
50
142
|
def calculate_dimension(force: false)
|
|
51
143
|
if dimensions
|
|
52
144
|
return dimensions[:ref]
|
|
@@ -62,8 +154,12 @@ module Rbxl
|
|
|
62
154
|
|
|
63
155
|
def each_row_values_only(&block)
|
|
64
156
|
if defined?(Rbxl::Native) && !@disable_native
|
|
65
|
-
|
|
66
|
-
|
|
157
|
+
if @streaming
|
|
158
|
+
native_parse_streaming(:parse_sheet_io, &block)
|
|
159
|
+
else
|
|
160
|
+
xml = @zip.get_entry(@entry_path).get_input_stream.read
|
|
161
|
+
Rbxl::Native.parse_sheet(xml, @shared_strings, &block)
|
|
162
|
+
end
|
|
67
163
|
return
|
|
68
164
|
end
|
|
69
165
|
|
|
@@ -121,8 +217,12 @@ module Rbxl
|
|
|
121
217
|
|
|
122
218
|
def each_row_full(pad_cells:, values_only:, expand_merged:, &block)
|
|
123
219
|
if defined?(Rbxl::Native) && !@disable_native && !pad_cells && !expand_merged && !values_only
|
|
124
|
-
|
|
125
|
-
|
|
220
|
+
if @streaming
|
|
221
|
+
native_parse_streaming(:parse_sheet_full_io, &block)
|
|
222
|
+
else
|
|
223
|
+
xml = @zip.get_entry(@entry_path).get_input_stream.read
|
|
224
|
+
Rbxl::Native.parse_sheet_full(xml, @shared_strings, &block)
|
|
225
|
+
end
|
|
126
226
|
return
|
|
127
227
|
end
|
|
128
228
|
|
|
@@ -204,6 +304,18 @@ module Rbxl
|
|
|
204
304
|
io&.close
|
|
205
305
|
end
|
|
206
306
|
|
|
307
|
+
def native_parse_streaming(method_name, &block)
|
|
308
|
+
io = @zip.get_entry(@entry_path).get_input_stream
|
|
309
|
+
max_bytes = Rbxl.max_worksheet_bytes
|
|
310
|
+
Rbxl::Native.public_send(method_name, io, @shared_strings, max_bytes, &block)
|
|
311
|
+
rescue RuntimeError => e
|
|
312
|
+
raise WorksheetTooLargeError, e.message if e.message&.include?("worksheet bytes exceed limit")
|
|
313
|
+
|
|
314
|
+
raise
|
|
315
|
+
ensure
|
|
316
|
+
io&.close
|
|
317
|
+
end
|
|
318
|
+
|
|
207
319
|
def extract_dimensions
|
|
208
320
|
with_sheet_reader do |reader|
|
|
209
321
|
reader.each do |node|
|
data/lib/rbxl/row.rb
CHANGED
|
@@ -1,21 +1,54 @@
|
|
|
1
1
|
module Rbxl
|
|
2
|
+
# Immutable row wrapper yielded by {Rbxl::ReadOnlyWorksheet#each_row}.
|
|
3
|
+
#
|
|
4
|
+
# A row holds its 1-based worksheet index and a frozen array of cell
|
|
5
|
+
# objects. The cell array may contain {Rbxl::Cell}, {Rbxl::ReadOnlyCell},
|
|
6
|
+
# or {Rbxl::EmptyCell} instances depending on the iteration options
|
|
7
|
+
# (+pad_cells+, +expand_merged+) and the parser backend in use.
|
|
8
|
+
#
|
|
9
|
+
# sheet.each_row do |row|
|
|
10
|
+
# row.index # => 2
|
|
11
|
+
# row.size # => 3
|
|
12
|
+
# row.values # => ["alice", 100, true]
|
|
13
|
+
# row[0].value # => "alice"
|
|
14
|
+
# end
|
|
2
15
|
class Row
|
|
3
|
-
|
|
16
|
+
# @return [Integer] 1-based worksheet row number
|
|
17
|
+
attr_reader :index
|
|
4
18
|
|
|
19
|
+
# @return [Array<Rbxl::Cell, Rbxl::ReadOnlyCell, Rbxl::EmptyCell>]
|
|
20
|
+
# frozen array of cell objects
|
|
21
|
+
attr_reader :cells
|
|
22
|
+
|
|
23
|
+
# @param index [Integer] 1-based worksheet row number
|
|
24
|
+
# @param cells [Array<Rbxl::Cell, Rbxl::ReadOnlyCell, Rbxl::EmptyCell>]
|
|
25
|
+
# cell objects in column order; the array is frozen in place
|
|
5
26
|
def initialize(index:, cells:)
|
|
6
27
|
@index = index
|
|
7
28
|
@cells = cells.freeze
|
|
8
29
|
@values = nil
|
|
9
30
|
end
|
|
10
31
|
|
|
32
|
+
# Returns the cell at a zero-based offset within the row.
|
|
33
|
+
#
|
|
34
|
+
# No bounds checking is performed beyond Array semantics: an offset
|
|
35
|
+
# outside the cell range simply returns +nil+.
|
|
36
|
+
#
|
|
37
|
+
# @param offset [Integer] zero-based position within the row
|
|
38
|
+
# @return [Rbxl::Cell, Rbxl::ReadOnlyCell, Rbxl::EmptyCell, nil]
|
|
11
39
|
def [](offset)
|
|
12
40
|
cells[offset]
|
|
13
41
|
end
|
|
14
42
|
|
|
43
|
+
# Returns the row as plain Ruby values, memoized and frozen so that
|
|
44
|
+
# repeated calls are allocation-free.
|
|
45
|
+
#
|
|
46
|
+
# @return [Array<Object>] decoded cell values in column order
|
|
15
47
|
def values
|
|
16
48
|
@values ||= cells.map(&:value).freeze
|
|
17
49
|
end
|
|
18
50
|
|
|
51
|
+
# @return [Integer] number of cells in the row
|
|
19
52
|
def size
|
|
20
53
|
cells.size
|
|
21
54
|
end
|
data/lib/rbxl/version.rb
CHANGED
data/lib/rbxl/write_only_cell.rb
CHANGED
|
@@ -1,7 +1,25 @@
|
|
|
1
1
|
module Rbxl
|
|
2
|
+
# Wraps a write-side cell value so that a style id can be associated with
|
|
3
|
+
# it without widening every call site to a Hash or Array.
|
|
4
|
+
#
|
|
5
|
+
# Instances are passed transparently to {Rbxl::WriteOnlyWorksheet#append}
|
|
6
|
+
# (or +<<+) in place of a plain value:
|
|
7
|
+
#
|
|
8
|
+
# cell = Rbxl::WriteOnlyCell.new(42, style_id: 1)
|
|
9
|
+
# sheet << ["id", cell]
|
|
10
|
+
#
|
|
11
|
+
# The value is serialized using the same type rules as a bare value; the
|
|
12
|
+
# +style_id+, when present, is emitted as the cell's +s+ attribute.
|
|
2
13
|
class WriteOnlyCell
|
|
3
|
-
|
|
14
|
+
# @return [Object] underlying Ruby value (String, Numeric, Boolean,
|
|
15
|
+
# Date/DateTime/Time, or +nil+)
|
|
16
|
+
attr_reader :value
|
|
4
17
|
|
|
18
|
+
# @return [Integer, nil] style index into the workbook's +cellXfs+ table
|
|
19
|
+
attr_reader :style_id
|
|
20
|
+
|
|
21
|
+
# @param value [Object] Ruby value to serialize into the cell
|
|
22
|
+
# @param style_id [Integer, nil] optional style index
|
|
5
23
|
def initialize(value, style_id: nil)
|
|
6
24
|
@value = value
|
|
7
25
|
@style_id = style_id
|
|
@@ -1,13 +1,38 @@
|
|
|
1
1
|
module Rbxl
|
|
2
|
+
# Write-only workbook for single-pass XLSX generation.
|
|
3
|
+
#
|
|
4
|
+
# The workbook accumulates rows per worksheet and emits the full
|
|
5
|
+
# <tt>.xlsx</tt> package in a single pass when {#save} is called. By
|
|
6
|
+
# design a write-only workbook can only be saved once: {#save} calls
|
|
7
|
+
# {#close} on success, and any subsequent call raises
|
|
8
|
+
# {Rbxl::WorkbookAlreadySavedError}.
|
|
9
|
+
#
|
|
10
|
+
# book = Rbxl.new(write_only: true)
|
|
11
|
+
# sheet = book.add_sheet("Report")
|
|
12
|
+
# sheet.append(["id", "name"])
|
|
13
|
+
# sheet.append([1, "alice"])
|
|
14
|
+
# book.save("report.xlsx")
|
|
15
|
+
#
|
|
16
|
+
# Style output is intentionally minimal: a single default style entry is
|
|
17
|
+
# emitted so that authored +style_id+ references resolve, but arbitrary
|
|
18
|
+
# workbook styling is out of scope for the MVP API.
|
|
2
19
|
class WriteOnlyWorkbook
|
|
20
|
+
# @return [Array<Rbxl::WriteOnlyWorksheet>] worksheets in insertion order
|
|
3
21
|
attr_reader :worksheets
|
|
4
22
|
|
|
23
|
+
# Creates an empty write-only workbook with no worksheets.
|
|
5
24
|
def initialize
|
|
6
25
|
@worksheets = []
|
|
7
26
|
@closed = false
|
|
8
27
|
@saved = false
|
|
9
28
|
end
|
|
10
29
|
|
|
30
|
+
# Creates and returns a new worksheet appended to this workbook.
|
|
31
|
+
#
|
|
32
|
+
# @param name [String] visible sheet name
|
|
33
|
+
# @return [Rbxl::WriteOnlyWorksheet]
|
|
34
|
+
# @raise [Rbxl::ClosedWorkbookError] if the workbook has been closed
|
|
35
|
+
# @raise [Rbxl::WorkbookAlreadySavedError] if {#save} has already succeeded
|
|
11
36
|
def add_sheet(name)
|
|
12
37
|
ensure_writable!
|
|
13
38
|
|
|
@@ -16,20 +41,37 @@ module Rbxl
|
|
|
16
41
|
sheet
|
|
17
42
|
end
|
|
18
43
|
|
|
44
|
+
# Serializes the workbook to an <tt>.xlsx</tt> file at +path+.
|
|
45
|
+
#
|
|
46
|
+
# On success the workbook is closed automatically; the method returns
|
|
47
|
+
# the path that was written, suitable for chaining.
|
|
48
|
+
#
|
|
49
|
+
# @param path [String, #to_path] destination filesystem path
|
|
50
|
+
# @return [String] the saved path
|
|
51
|
+
# @raise [Rbxl::Error] if no worksheets have been added
|
|
52
|
+
# @raise [Rbxl::ClosedWorkbookError] if the workbook is already closed
|
|
53
|
+
# @raise [Rbxl::WorkbookAlreadySavedError] if {#save} was already called
|
|
19
54
|
def save(path)
|
|
20
55
|
ensure_writable!
|
|
21
56
|
raise Error, "at least one worksheet is required" if worksheets.empty?
|
|
22
57
|
|
|
23
|
-
Zip
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
write_entry(zip, "xl/workbook.xml", workbook_xml)
|
|
27
|
-
write_entry(zip, "xl/_rels/workbook.xml.rels", workbook_rels_xml)
|
|
28
|
-
write_entry(zip, "xl/styles.xml", styles_xml)
|
|
58
|
+
previous_zip64 = Zip.write_zip64_support
|
|
59
|
+
begin
|
|
60
|
+
Zip.write_zip64_support = false
|
|
29
61
|
|
|
30
|
-
|
|
31
|
-
write_entry(zip, "
|
|
62
|
+
Zip::OutputStream.open(path) do |zip|
|
|
63
|
+
write_entry(zip, "[Content_Types].xml", content_types_xml)
|
|
64
|
+
write_entry(zip, "_rels/.rels", root_rels_xml)
|
|
65
|
+
write_entry(zip, "xl/workbook.xml", workbook_xml)
|
|
66
|
+
write_entry(zip, "xl/_rels/workbook.xml.rels", workbook_rels_xml)
|
|
67
|
+
write_entry(zip, "xl/styles.xml", styles_xml)
|
|
68
|
+
|
|
69
|
+
worksheets.each_with_index do |sheet, index|
|
|
70
|
+
write_entry(zip, "xl/worksheets/sheet#{index + 1}.xml", sheet.to_xml)
|
|
71
|
+
end
|
|
32
72
|
end
|
|
73
|
+
ensure
|
|
74
|
+
Zip.write_zip64_support = previous_zip64
|
|
33
75
|
end
|
|
34
76
|
|
|
35
77
|
@saved = true
|
|
@@ -37,10 +79,16 @@ module Rbxl
|
|
|
37
79
|
path
|
|
38
80
|
end
|
|
39
81
|
|
|
82
|
+
# Marks the workbook as closed. Further mutating operations raise
|
|
83
|
+
# {Rbxl::ClosedWorkbookError}. This is called automatically by a
|
|
84
|
+
# successful {#save}.
|
|
85
|
+
#
|
|
86
|
+
# @return [Boolean] the new closed state (always +true+)
|
|
40
87
|
def close
|
|
41
88
|
@closed = true
|
|
42
89
|
end
|
|
43
90
|
|
|
91
|
+
# @return [Boolean] whether the workbook has been closed
|
|
44
92
|
def closed?
|
|
45
93
|
@closed
|
|
46
94
|
end
|
|
@@ -48,8 +96,8 @@ module Rbxl
|
|
|
48
96
|
private
|
|
49
97
|
|
|
50
98
|
def ensure_writable!
|
|
51
|
-
raise ClosedWorkbookError, "workbook has been closed" if closed?
|
|
52
99
|
raise WorkbookAlreadySavedError, "write-only workbook can only be saved once" if @saved
|
|
100
|
+
raise ClosedWorkbookError, "workbook has been closed" if closed?
|
|
53
101
|
end
|
|
54
102
|
|
|
55
103
|
def write_entry(zip, name, content)
|
|
@@ -1,17 +1,50 @@
|
|
|
1
1
|
module Rbxl
|
|
2
|
+
# Worksheet builder used by {Rbxl::WriteOnlyWorkbook}.
|
|
3
|
+
#
|
|
4
|
+
# Rows are appended in order and later serialized as SpreadsheetML by
|
|
5
|
+
# {#to_xml} when the workbook is saved. The builder never rewrites a
|
|
6
|
+
# previously appended row, so the worksheet's in-memory footprint scales
|
|
7
|
+
# linearly with the number of appended rows.
|
|
8
|
+
#
|
|
9
|
+
# == Row values
|
|
10
|
+
#
|
|
11
|
+
# Each element of an appended row may be one of:
|
|
12
|
+
#
|
|
13
|
+
# * +nil+ — serialized as an empty cell
|
|
14
|
+
# * +true+ / +false+ — serialized as a boolean cell
|
|
15
|
+
# * +Integer+ / +Numeric+ — serialized as a numeric cell
|
|
16
|
+
# * +Date+ / +DateTime+ / +Time+ — serialized as an ISO-8601 inline string
|
|
17
|
+
# * any other object — serialized as +value.to_s+ in an inline string
|
|
18
|
+
# * {Rbxl::WriteOnlyCell} — same as the above, but with an optional style id
|
|
2
19
|
class WriteOnlyWorksheet
|
|
20
|
+
# @return [String] visible sheet name
|
|
3
21
|
attr_reader :name
|
|
4
22
|
|
|
23
|
+
# @param name [String] visible sheet name
|
|
5
24
|
def initialize(name:)
|
|
6
25
|
@name = name
|
|
7
26
|
@rows = []
|
|
8
27
|
@column_name_cache = []
|
|
9
28
|
end
|
|
10
29
|
|
|
30
|
+
# Appends a row of values. Equivalent to {#append} so that the shell
|
|
31
|
+
# operator reads naturally at the call site:
|
|
32
|
+
#
|
|
33
|
+
# sheet << ["id", "name"]
|
|
34
|
+
#
|
|
35
|
+
# @param values [Array, Enumerator] row values
|
|
36
|
+
# @return [Rbxl::WriteOnlyWorksheet] +self+ for chaining
|
|
37
|
+
# @raise [TypeError] if +values+ is not Array-like
|
|
11
38
|
def <<(values)
|
|
12
39
|
append(values)
|
|
13
40
|
end
|
|
14
41
|
|
|
42
|
+
# Appends a row of values.
|
|
43
|
+
#
|
|
44
|
+
# @param values [Array, Enumerator] row values; each element is serialized
|
|
45
|
+
# according to the rules documented on the class
|
|
46
|
+
# @return [Rbxl::WriteOnlyWorksheet] +self+ for chaining
|
|
47
|
+
# @raise [TypeError] if +values+ is neither an Array nor an Enumerator
|
|
15
48
|
def append(values)
|
|
16
49
|
unless values.is_a?(Array) || values.is_a?(Enumerator)
|
|
17
50
|
raise TypeError, "row must be an Array or Enumerator, got #{values.class}"
|
|
@@ -21,6 +54,14 @@ module Rbxl
|
|
|
21
54
|
self
|
|
22
55
|
end
|
|
23
56
|
|
|
57
|
+
# Serializes the worksheet to SpreadsheetML.
|
|
58
|
+
#
|
|
59
|
+
# When <tt>require "rbxl/native"</tt> has been loaded the native
|
|
60
|
+
# extension handles serialization for a significant speedup; otherwise
|
|
61
|
+
# a pure-Ruby implementation is used. Both paths produce equivalent
|
|
62
|
+
# output.
|
|
63
|
+
#
|
|
64
|
+
# @return [String] worksheet XML
|
|
24
65
|
def to_xml
|
|
25
66
|
if defined?(Rbxl::Native)
|
|
26
67
|
return Rbxl::Native.generate_sheet(@rows)
|