rbxl 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -1
- data/README.md +22 -0
- data/Rakefile +19 -0
- data/lib/rbxl/errors.rb +6 -0
- data/lib/rbxl/read_only_workbook.rb +77 -6
- data/lib/rbxl/version.rb +1 -1
- data/lib/rbxl.rb +14 -3
- data/sig/rbxl.rbs +8 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2b16579845423af49cff940ed7557164236d66b2e3e7f92e8bcece2a69c486e0
|
|
4
|
+
data.tar.gz: 3976e30db04742ea0b22b5d1edf95bccaf269e0ee6b6d209af3fe14bbaace7f0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2dcba5f510b571dd546ca36b8c86dd607dd406298691e71bf01575e124c15bac6f526bcdb8aa7183f4980fb06c16e7dfe8b4d0f7ce3743c57b214f5e90d6c1c4
|
|
7
|
+
data.tar.gz: 2cd7f062d75b0af0ae1fdbe13606dcbe5b1c6df0d2aa48be94e7ef2b06bf178ef8ae315d8c9c1c11629b265d9f2829beef60f4127294ac7a91b4de088f4d2a09
|
data/CHANGELOG.md
CHANGED
|
@@ -4,7 +4,27 @@ All notable changes to this project are documented here. The format is based
|
|
|
4
4
|
on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project
|
|
5
5
|
follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
6
|
|
|
7
|
-
## [
|
|
7
|
+
## [1.3.0] - 2026-04-27
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- `Rbxl.open` (and `Rbxl::ReadOnlyWorkbook.open`) now accept a block. The
|
|
12
|
+
workbook is yielded and closed automatically when the block returns or
|
|
13
|
+
raises, matching the `File.open` / `Zip::File.open` idiom. Previously the
|
|
14
|
+
block was silently ignored.
|
|
15
|
+
- `Rbxl::UnsupportedFormatError` raised by `Rbxl.open` when the file is not
|
|
16
|
+
a `.xlsx` container. Legacy `.xls` (BIFF/CFB) inputs are detected by the
|
|
17
|
+
OLE compound-file magic and reported with a conversion hint, instead of
|
|
18
|
+
surfacing an opaque `Zip::Error` from rubyzip five frames deep.
|
|
19
|
+
- `Rbxl::ReadOnlyWorkbook#sheet` now accepts an integer index into
|
|
20
|
+
`sheet_names` (including negatives, so `sheet(-1)` returns the last
|
|
21
|
+
sheet), for the common single-sheet case where `book.sheet(0)` reads
|
|
22
|
+
cleaner than `book.sheet(book.sheet_names.first)`.
|
|
23
|
+
- `Rbxl::ReadOnlyWorkbook#sheets` iterator over worksheets in workbook
|
|
24
|
+
order. Returns an `Enumerator` when called without a block, so
|
|
25
|
+
`book.sheets.first` and `book.sheets.map(&:name)` compose naturally.
|
|
26
|
+
Worksheet objects are constructed on demand — no eager parse of sibling
|
|
27
|
+
sheets.
|
|
8
28
|
|
|
9
29
|
## [1.2.0] - 2026-04-23
|
|
10
30
|
|
data/README.md
CHANGED
|
@@ -25,6 +25,12 @@ Out of scope:
|
|
|
25
25
|
read-only and generates new workbooks write-only, with no read-modify-save
|
|
26
26
|
path. If you need to open a file, tweak a handful of cells, and write it
|
|
27
27
|
back preserving everything else, use a full-object-model library instead.
|
|
28
|
+
- legacy `.xls` (BIFF/CFB) input — rbxl reads OOXML `.xlsx` only. Convert
|
|
29
|
+
first, e.g. `libreoffice --headless --convert-to xlsx file.xls` or
|
|
30
|
+
`ssconvert file.xls file.xlsx` (Gnumeric). `Rbxl.open` detects the OLE
|
|
31
|
+
compound-file magic on open and raises `Rbxl::UnsupportedFormatError`
|
|
32
|
+
with the conversion hint rather than surfacing an opaque ZIP parse
|
|
33
|
+
error from rubyzip.
|
|
28
34
|
- preserving arbitrary workbook structure on save
|
|
29
35
|
- rich style round-tripping
|
|
30
36
|
- formulas, images, charts, comments
|
|
@@ -142,6 +148,22 @@ book.sheet("Sparse").each_row(pad_cells: true, values_only: true).first
|
|
|
142
148
|
# => ["left", nil, "right"]
|
|
143
149
|
```
|
|
144
150
|
|
|
151
|
+
**Leading empty columns aren't padded.** Both default and `pad_cells: true`
|
|
152
|
+
rows align to the first populated column, not to column A. On a sheet
|
|
153
|
+
whose dimension is `B1:N100`, every row has 13 entries (columns B–N), not
|
|
154
|
+
14. `max_column` still reports `14` (column N, 1-based) — the gap is on
|
|
155
|
+
the left, not the right. If you need column-A alignment, inspect
|
|
156
|
+
`calculate_dimension` and prepend the missing `nil`s yourself:
|
|
157
|
+
|
|
158
|
+
```ruby
|
|
159
|
+
sheet = book.sheet("LeftOffset")
|
|
160
|
+
sheet.calculate_dimension # => "B1:N100"
|
|
161
|
+
leading_pad = Array.new(1, nil) # B starts at column 2, so 1 nil
|
|
162
|
+
sheet.each_row(values_only: true, pad_cells: true) do |row|
|
|
163
|
+
aligned = leading_pad + row # => [nil, "first B-value", ...]
|
|
164
|
+
end
|
|
165
|
+
```
|
|
166
|
+
|
|
145
167
|
**Expand merged cells.** Excel leaves the anchor cell populated and the
|
|
146
168
|
rest of the merge range empty. Pass `expand_merged: true` to propagate
|
|
147
169
|
the anchor value across the full range; combine with `pad_cells: true`
|
data/Rakefile
CHANGED
|
@@ -1,11 +1,30 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "bundler/gem_helper"
|
|
4
|
+
require "rake/testtask"
|
|
4
5
|
require "rdoc/task"
|
|
5
6
|
|
|
6
7
|
Bundler::GemHelper.install_tasks
|
|
7
8
|
|
|
9
|
+
Rake::TestTask.new(:test) do |t|
|
|
10
|
+
t.libs << "test"
|
|
11
|
+
t.libs << "lib"
|
|
12
|
+
t.test_files = FileList["test/**/*_test.rb"]
|
|
13
|
+
t.warning = false
|
|
14
|
+
end
|
|
15
|
+
|
|
8
16
|
RDoc::Task.new(:rdoc) do |rdoc|
|
|
9
17
|
rdoc.main = "README.md"
|
|
10
18
|
rdoc.rdoc_files.include("README.md", "lib/**/*.rb")
|
|
11
19
|
end
|
|
20
|
+
|
|
21
|
+
desc "Build the rbxl_native C extension in place"
|
|
22
|
+
task :compile do
|
|
23
|
+
ext_dir = File.expand_path("ext/rbxl_native", __dir__)
|
|
24
|
+
Dir.chdir(ext_dir) do
|
|
25
|
+
ruby "extconf.rb"
|
|
26
|
+
sh "make"
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
task test: :compile
|
data/lib/rbxl/errors.rb
CHANGED
|
@@ -34,6 +34,12 @@ module Rbxl
|
|
|
34
34
|
# worksheets are stopped mid-inflate rather than after the fact.
|
|
35
35
|
class WorksheetTooLargeError < Error; end
|
|
36
36
|
|
|
37
|
+
# Raised by {Rbxl.open} when the file is not a valid +.xlsx+ container.
|
|
38
|
+
# Most commonly fires on legacy +.xls+ (BIFF/CFB) files — the message
|
|
39
|
+
# names the detected format and suggests a conversion path rather than
|
|
40
|
+
# letting the underlying ZIP parser surface an opaque error.
|
|
41
|
+
class UnsupportedFormatError < Error; end
|
|
42
|
+
|
|
37
43
|
# Raised when workbook-level XML is malformed or internally inconsistent,
|
|
38
44
|
# for example when +xl/workbook.xml+ cannot be parsed or references a
|
|
39
45
|
# missing relationship target.
|
|
@@ -30,6 +30,16 @@ module Rbxl
|
|
|
30
30
|
# Namespace used by the OPC package relationships layer.
|
|
31
31
|
PACKAGE_REL_NS = "http://schemas.openxmlformats.org/package/2006/relationships"
|
|
32
32
|
|
|
33
|
+
# First 8 bytes of the OLE Compound File Binary format (legacy .xls,
|
|
34
|
+
# .doc, .ppt). Sniffed to short-circuit into a typed error before
|
|
35
|
+
# rubyzip bubbles up an opaque "end of central directory" failure.
|
|
36
|
+
OLE_CFB_MAGIC = "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1".b.freeze
|
|
37
|
+
private_constant :OLE_CFB_MAGIC
|
|
38
|
+
|
|
39
|
+
# ZIP local file header signature — the first bytes of every .xlsx.
|
|
40
|
+
ZIP_LOCAL_MAGIC = "PK\x03\x04".b.freeze
|
|
41
|
+
private_constant :ZIP_LOCAL_MAGIC
|
|
42
|
+
|
|
33
43
|
# @return [String] filesystem path the workbook was opened from
|
|
34
44
|
attr_reader :path
|
|
35
45
|
|
|
@@ -39,14 +49,28 @@ module Rbxl
|
|
|
39
49
|
# Convenience constructor equivalent to
|
|
40
50
|
# <tt>new(path, streaming:, date_conversion:)</tt>.
|
|
41
51
|
#
|
|
52
|
+
# When a block is given, the workbook is yielded to the block and
|
|
53
|
+
# {#close} is called automatically when the block returns (or raises).
|
|
54
|
+
# The block's return value is returned to the caller, matching the
|
|
55
|
+
# +File.open+ / +Zip::File.open+ idiom.
|
|
56
|
+
#
|
|
42
57
|
# @param path [String, #to_path] path to the <tt>.xlsx</tt> file
|
|
43
58
|
# @param streaming [Boolean] feed worksheet XML to the native parser in
|
|
44
59
|
# chunks (see {Rbxl.open})
|
|
45
60
|
# @param date_conversion [Boolean] convert numeric cells backed by a
|
|
46
61
|
# date/time +numFmt+ to Ruby date/time objects (see {Rbxl.open})
|
|
47
|
-
# @
|
|
62
|
+
# @yieldparam book [Rbxl::ReadOnlyWorkbook] the opened workbook
|
|
63
|
+
# @return [Rbxl::ReadOnlyWorkbook, Object] the workbook when no block is
|
|
64
|
+
# given, otherwise the block's return value
|
|
48
65
|
def self.open(path, streaming: false, date_conversion: false)
|
|
49
|
-
new(path, streaming: streaming, date_conversion: date_conversion)
|
|
66
|
+
book = new(path, streaming: streaming, date_conversion: date_conversion)
|
|
67
|
+
return book unless block_given?
|
|
68
|
+
|
|
69
|
+
begin
|
|
70
|
+
yield book
|
|
71
|
+
ensure
|
|
72
|
+
book.close
|
|
73
|
+
end
|
|
50
74
|
end
|
|
51
75
|
|
|
52
76
|
# Opens the ZIP archive, pre-loads shared strings, and indexes the
|
|
@@ -58,6 +82,7 @@ module Rbxl
|
|
|
58
82
|
# date-style lookup table to produced worksheets
|
|
59
83
|
def initialize(path, streaming: false, date_conversion: false)
|
|
60
84
|
@path = path
|
|
85
|
+
ensure_xlsx_format!(path)
|
|
61
86
|
@zip = Zip::File.open(path)
|
|
62
87
|
@streaming = streaming
|
|
63
88
|
@date_conversion = date_conversion
|
|
@@ -69,18 +94,23 @@ module Rbxl
|
|
|
69
94
|
@closed = false
|
|
70
95
|
end
|
|
71
96
|
|
|
72
|
-
# Returns a row-by-row worksheet by visible sheet name
|
|
97
|
+
# Returns a row-by-row worksheet by visible sheet name or by 0-based
|
|
98
|
+
# index into {#sheet_names}. Negative indexes count from the end, so
|
|
99
|
+
# <tt>sheet(-1)</tt> returns the last sheet.
|
|
73
100
|
#
|
|
74
101
|
# The returned object shares the workbook's ZIP handle. Closing the
|
|
75
102
|
# workbook invalidates any worksheets produced by prior calls.
|
|
76
103
|
#
|
|
77
|
-
# @param
|
|
104
|
+
# @param name_or_index [String, Integer] visible sheet name as listed in
|
|
105
|
+
# {#sheet_names}, or an integer index into that list
|
|
78
106
|
# @return [Rbxl::ReadOnlyWorksheet]
|
|
79
|
-
# @raise [Rbxl::SheetNotFoundError] if +
|
|
107
|
+
# @raise [Rbxl::SheetNotFoundError] if +name_or_index+ does not resolve
|
|
108
|
+
# to a sheet
|
|
80
109
|
# @raise [Rbxl::ClosedWorkbookError] if the workbook has been closed
|
|
81
|
-
def sheet(
|
|
110
|
+
def sheet(name_or_index)
|
|
82
111
|
ensure_open!
|
|
83
112
|
|
|
113
|
+
name = resolve_sheet_name(name_or_index)
|
|
84
114
|
entry_path = @sheet_entries.fetch(name) do
|
|
85
115
|
raise SheetNotFoundError, "sheet not found: #{name}"
|
|
86
116
|
end
|
|
@@ -97,6 +127,23 @@ module Rbxl
|
|
|
97
127
|
)
|
|
98
128
|
end
|
|
99
129
|
|
|
130
|
+
# Iterates the workbook's sheets in workbook order. Each worksheet is
|
|
131
|
+
# constructed on demand, so <tt>sheets.first</tt> allocates only the
|
|
132
|
+
# first sheet and <tt>sheets.lazy.find { ... }</tt> stops as soon as a
|
|
133
|
+
# match is found. Returned objects share the same ZIP handle and
|
|
134
|
+
# cached shared-strings / date-style tables as {#sheet}.
|
|
135
|
+
#
|
|
136
|
+
# @yieldparam worksheet [Rbxl::ReadOnlyWorksheet]
|
|
137
|
+
# @return [Enumerator<Rbxl::ReadOnlyWorksheet>] when no block is given
|
|
138
|
+
# @return [void] when a block is given
|
|
139
|
+
# @raise [Rbxl::ClosedWorkbookError] if the workbook has been closed
|
|
140
|
+
def sheets
|
|
141
|
+
ensure_open!
|
|
142
|
+
return enum_for(:sheets) unless block_given?
|
|
143
|
+
|
|
144
|
+
@sheet_names.each { |name| yield sheet(name) }
|
|
145
|
+
end
|
|
146
|
+
|
|
100
147
|
# Releases the underlying ZIP file handle. Idempotent; subsequent calls
|
|
101
148
|
# are no-ops.
|
|
102
149
|
#
|
|
@@ -119,6 +166,30 @@ module Rbxl
|
|
|
119
166
|
raise ClosedWorkbookError, "workbook has been closed" if closed?
|
|
120
167
|
end
|
|
121
168
|
|
|
169
|
+
def resolve_sheet_name(key)
|
|
170
|
+
return key unless key.is_a?(Integer)
|
|
171
|
+
|
|
172
|
+
name = @sheet_names[key]
|
|
173
|
+
return name if name
|
|
174
|
+
|
|
175
|
+
raise SheetNotFoundError, "sheet index out of range: #{key} (#{@sheet_names.length} sheet(s))"
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def ensure_xlsx_format!(path)
|
|
179
|
+
header = File.binread(path, 8)
|
|
180
|
+
return if header.start_with?(ZIP_LOCAL_MAGIC)
|
|
181
|
+
|
|
182
|
+
if header.start_with?(OLE_CFB_MAGIC)
|
|
183
|
+
raise UnsupportedFormatError,
|
|
184
|
+
"#{path} looks like a legacy .xls (BIFF/CFB). " \
|
|
185
|
+
"rbxl supports .xlsx (OOXML) only; convert first, e.g. " \
|
|
186
|
+
"`libreoffice --headless --convert-to xlsx #{File.basename(path.to_s)}`."
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
raise UnsupportedFormatError,
|
|
190
|
+
"#{path} is not a valid .xlsx (no ZIP signature at offset 0)."
|
|
191
|
+
end
|
|
192
|
+
|
|
122
193
|
# Built-in numFmtId values that Excel resolves to date/time formats.
|
|
123
194
|
# Ids outside this set are dates only when the workbook provides a
|
|
124
195
|
# matching custom +<numFmt>+ entry whose format code contains date
|
data/lib/rbxl/version.rb
CHANGED
data/lib/rbxl.rb
CHANGED
|
@@ -92,6 +92,14 @@ module Rbxl
|
|
|
92
92
|
# intent explicitly at the call site. Passing +read_only: false+ raises
|
|
93
93
|
# {NotImplementedError}; a read/write mode is not available.
|
|
94
94
|
#
|
|
95
|
+
# When a block is given, the workbook is yielded and automatically
|
|
96
|
+
# closed when the block returns (or raises), mirroring the +File.open+
|
|
97
|
+
# and +Zip::File.open+ idiom:
|
|
98
|
+
#
|
|
99
|
+
# Rbxl.open("report.xlsx") do |book|
|
|
100
|
+
# book.sheet("Report").each_row(values_only: true) { |row| p row }
|
|
101
|
+
# end
|
|
102
|
+
#
|
|
95
103
|
# With <tt>streaming: true</tt>, the native backend (when loaded) feeds
|
|
96
104
|
# worksheet XML to the parser in chunks pulled from the ZIP input stream
|
|
97
105
|
# instead of materializing the entire worksheet as one Ruby string. This
|
|
@@ -117,12 +125,15 @@ module Rbxl
|
|
|
117
125
|
# the native extension is not loaded.
|
|
118
126
|
# @param date_conversion [Boolean] convert numeric cells backed by a
|
|
119
127
|
# date/time +numFmt+ to +Date+ / +Time+ / +DateTime+
|
|
120
|
-
# @
|
|
128
|
+
# @yieldparam book [Rbxl::ReadOnlyWorkbook] opened workbook; auto-closed
|
|
129
|
+
# when the block returns
|
|
130
|
+
# @return [Rbxl::ReadOnlyWorkbook, Object] the workbook when no block is
|
|
131
|
+
# given, otherwise the block's return value
|
|
121
132
|
# @raise [NotImplementedError] if +read_only+ is not +true+
|
|
122
|
-
def open(path, read_only: true, streaming: false, date_conversion: false)
|
|
133
|
+
def open(path, read_only: true, streaming: false, date_conversion: false, &block)
|
|
123
134
|
raise NotImplementedError, "read/write mode is not supported; pass read_only: true" unless read_only
|
|
124
135
|
|
|
125
|
-
ReadOnlyWorkbook.open(path, streaming: streaming, date_conversion: date_conversion)
|
|
136
|
+
ReadOnlyWorkbook.open(path, streaming: streaming, date_conversion: date_conversion, &block)
|
|
126
137
|
end
|
|
127
138
|
|
|
128
139
|
# Creates a new workbook in write-only mode.
|
data/sig/rbxl.rbs
CHANGED
|
@@ -10,6 +10,7 @@ module Rbxl
|
|
|
10
10
|
type dimensions = { ref: String, max_col: Integer, max_row: Integer }
|
|
11
11
|
|
|
12
12
|
def self.open: (pathish path, ?read_only: bool, ?streaming: bool, ?date_conversion: bool) -> ReadOnlyWorkbook
|
|
13
|
+
| [T] (pathish path, ?read_only: bool, ?streaming: bool, ?date_conversion: bool) { (ReadOnlyWorkbook) -> T } -> T
|
|
13
14
|
def self.new: (?write_only: bool) -> WriteOnlyWorkbook
|
|
14
15
|
|
|
15
16
|
attr_accessor self.max_shared_strings: Integer?
|
|
@@ -37,6 +38,9 @@ module Rbxl
|
|
|
37
38
|
class WorksheetTooLargeError < Error
|
|
38
39
|
end
|
|
39
40
|
|
|
41
|
+
class UnsupportedFormatError < Error
|
|
42
|
+
end
|
|
43
|
+
|
|
40
44
|
class Cell
|
|
41
45
|
attr_accessor value: cell_value
|
|
42
46
|
attr_accessor coordinate: String?
|
|
@@ -84,8 +88,11 @@ module Rbxl
|
|
|
84
88
|
attr_reader sheet_names: Array[String]
|
|
85
89
|
|
|
86
90
|
def self.open: (pathish path, ?streaming: bool, ?date_conversion: bool) -> ReadOnlyWorkbook
|
|
91
|
+
| [T] (pathish path, ?streaming: bool, ?date_conversion: bool) { (ReadOnlyWorkbook) -> T } -> T
|
|
87
92
|
def initialize: (pathish path, ?streaming: bool, ?date_conversion: bool) -> void
|
|
88
|
-
def sheet: (String
|
|
93
|
+
def sheet: (String | Integer name_or_index) -> ReadOnlyWorksheet
|
|
94
|
+
def sheets: () { (ReadOnlyWorksheet) -> void } -> void
|
|
95
|
+
| () -> Enumerator[ReadOnlyWorksheet, void]
|
|
89
96
|
def close: () -> void
|
|
90
97
|
def closed?: () -> bool
|
|
91
98
|
end
|