rbxl 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b7d99201ddbfd10ac1f5173052e0ef0d0bfea0e7e0143bc5e214d28d5cbea335
4
- data.tar.gz: 513ec07aea3c8888bafd1b60c20f6e508e6ce87a2380c5dbcb536523b09ceab3
3
+ metadata.gz: 2b16579845423af49cff940ed7557164236d66b2e3e7f92e8bcece2a69c486e0
4
+ data.tar.gz: 3976e30db04742ea0b22b5d1edf95bccaf269e0ee6b6d209af3fe14bbaace7f0
5
5
  SHA512:
6
- metadata.gz: 1dd2f6856dd7c9452d63f132e52f4336958a8bda63e304b353766ba573ed429b196c76dfa067468dcf0d85f5926de5b002f8f498b4907486fe717096ab20dbb2
7
- data.tar.gz: 298fc80d0760d5468a7b2c95ae32751eb5c0e6070cd546d77d806ef7aabb057674f98a371d0c6c0320fd9784d89633e9fb278d03bdec4219426d89997a5540cb
6
+ metadata.gz: 2dcba5f510b571dd546ca36b8c86dd607dd406298691e71bf01575e124c15bac6f526bcdb8aa7183f4980fb06c16e7dfe8b4d0f7ce3743c57b214f5e90d6c1c4
7
+ data.tar.gz: 2cd7f062d75b0af0ae1fdbe13606dcbe5b1c6df0d2aa48be94e7ef2b06bf178ef8ae315d8c9c1c11629b265d9f2829beef60f4127294ac7a91b4de088f4d2a09
data/CHANGELOG.md CHANGED
@@ -4,7 +4,27 @@ All notable changes to this project are documented here. The format is based
4
4
  on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project
5
5
  follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
6
 
7
- ## [Unreleased]
7
+ ## [1.3.0] - 2026-04-27
8
+
9
+ ### Added
10
+
11
+ - `Rbxl.open` (and `Rbxl::ReadOnlyWorkbook.open`) now accept a block. The
12
+ workbook is yielded and closed automatically when the block returns or
13
+ raises, matching the `File.open` / `Zip::File.open` idiom. Previously the
14
+ block was silently ignored.
15
+ - `Rbxl::UnsupportedFormatError` raised by `Rbxl.open` when the file is not
16
+ a `.xlsx` container. Legacy `.xls` (BIFF/CFB) inputs are detected by the
17
+ OLE compound-file magic and reported with a conversion hint, instead of
18
+ surfacing an opaque `Zip::Error` from rubyzip five frames deep.
19
+ - `Rbxl::ReadOnlyWorkbook#sheet` now accepts an integer index into
20
+ `sheet_names` (including negatives, so `sheet(-1)` returns the last
21
+ sheet), for the common single-sheet case where `book.sheet(0)` reads
22
+ cleaner than `book.sheet(book.sheet_names.first)`.
23
+ - `Rbxl::ReadOnlyWorkbook#sheets` iterator over worksheets in workbook
24
+ order. Returns an `Enumerator` when called without a block, so
25
+ `book.sheets.first` and `book.sheets.map(&:name)` compose naturally.
26
+ Worksheet objects are constructed on demand — no eager parse of sibling
27
+ sheets.
8
28
 
9
29
  ## [1.2.0] - 2026-04-23
10
30
 
data/README.md CHANGED
@@ -25,6 +25,12 @@ Out of scope:
25
25
  read-only and generates new workbooks write-only, with no read-modify-save
26
26
  path. If you need to open a file, tweak a handful of cells, and write it
27
27
  back preserving everything else, use a full-object-model library instead.
28
+ - legacy `.xls` (BIFF/CFB) input — rbxl reads OOXML `.xlsx` only. Convert
29
+ first, e.g. `libreoffice --headless --convert-to xlsx file.xls` or
30
+ `ssconvert file.xls file.xlsx` (Gnumeric). `Rbxl.open` detects the OLE
31
+ compound-file magic on open and raises `Rbxl::UnsupportedFormatError`
32
+ with the conversion hint rather than surfacing an opaque ZIP parse
33
+ error from rubyzip.
28
34
  - preserving arbitrary workbook structure on save
29
35
  - rich style round-tripping
30
36
  - formulas, images, charts, comments
@@ -142,6 +148,22 @@ book.sheet("Sparse").each_row(pad_cells: true, values_only: true).first
142
148
  # => ["left", nil, "right"]
143
149
  ```
144
150
 
151
+ **Leading empty columns aren't padded.** Both default and `pad_cells: true`
152
+ rows align to the first populated column, not to column A. On a sheet
153
+ whose dimension is `B1:N100`, every row has 13 entries (columns B–N), not
154
+ 14. `max_column` still reports `14` (column N, 1-based) — the gap is on
155
+ the left, not the right. If you need column-A alignment, inspect
156
+ `calculate_dimension` and prepend the missing `nil`s yourself:
157
+
158
+ ```ruby
159
+ sheet = book.sheet("LeftOffset")
160
+ sheet.calculate_dimension # => "B1:N100"
161
+ leading_pad = Array.new(1, nil) # B starts at column 2, so 1 nil
162
+ sheet.each_row(values_only: true, pad_cells: true) do |row|
163
+ aligned = leading_pad + row # => [nil, "first B-value", ...]
164
+ end
165
+ ```
166
+
145
167
  **Expand merged cells.** Excel leaves the anchor cell populated and the
146
168
  rest of the merge range empty. Pass `expand_merged: true` to propagate
147
169
  the anchor value across the full range; combine with `pad_cells: true`
data/Rakefile CHANGED
@@ -1,11 +1,30 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "bundler/gem_helper"
4
+ require "rake/testtask"
4
5
  require "rdoc/task"
5
6
 
6
7
  Bundler::GemHelper.install_tasks
7
8
 
9
+ Rake::TestTask.new(:test) do |t|
10
+ t.libs << "test"
11
+ t.libs << "lib"
12
+ t.test_files = FileList["test/**/*_test.rb"]
13
+ t.warning = false
14
+ end
15
+
8
16
  RDoc::Task.new(:rdoc) do |rdoc|
9
17
  rdoc.main = "README.md"
10
18
  rdoc.rdoc_files.include("README.md", "lib/**/*.rb")
11
19
  end
20
+
21
+ desc "Build the rbxl_native C extension in place"
22
+ task :compile do
23
+ ext_dir = File.expand_path("ext/rbxl_native", __dir__)
24
+ Dir.chdir(ext_dir) do
25
+ ruby "extconf.rb"
26
+ sh "make"
27
+ end
28
+ end
29
+
30
+ task test: :compile
data/lib/rbxl/errors.rb CHANGED
@@ -34,6 +34,12 @@ module Rbxl
34
34
  # worksheets are stopped mid-inflate rather than after the fact.
35
35
  class WorksheetTooLargeError < Error; end
36
36
 
37
+ # Raised by {Rbxl.open} when the file is not a valid +.xlsx+ container.
38
+ # Most commonly fires on legacy +.xls+ (BIFF/CFB) files — the message
39
+ # names the detected format and suggests a conversion path rather than
40
+ # letting the underlying ZIP parser surface an opaque error.
41
+ class UnsupportedFormatError < Error; end
42
+
37
43
  # Raised when workbook-level XML is malformed or internally inconsistent,
38
44
  # for example when +xl/workbook.xml+ cannot be parsed or references a
39
45
  # missing relationship target.
@@ -30,6 +30,16 @@ module Rbxl
30
30
  # Namespace used by the OPC package relationships layer.
31
31
  PACKAGE_REL_NS = "http://schemas.openxmlformats.org/package/2006/relationships"
32
32
 
33
+ # First 8 bytes of the OLE Compound File Binary format (legacy .xls,
34
+ # .doc, .ppt). Sniffed to short-circuit into a typed error before
35
+ # rubyzip bubbles up an opaque "end of central directory" failure.
36
+ OLE_CFB_MAGIC = "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1".b.freeze
37
+ private_constant :OLE_CFB_MAGIC
38
+
39
+ # ZIP local file header signature — the first bytes of every .xlsx.
40
+ ZIP_LOCAL_MAGIC = "PK\x03\x04".b.freeze
41
+ private_constant :ZIP_LOCAL_MAGIC
42
+
33
43
  # @return [String] filesystem path the workbook was opened from
34
44
  attr_reader :path
35
45
 
@@ -39,14 +49,28 @@ module Rbxl
39
49
  # Convenience constructor equivalent to
40
50
  # <tt>new(path, streaming:, date_conversion:)</tt>.
41
51
  #
52
+ # When a block is given, the workbook is yielded to the block and
53
+ # {#close} is called automatically when the block returns (or raises).
54
+ # The block's return value is returned to the caller, matching the
55
+ # +File.open+ / +Zip::File.open+ idiom.
56
+ #
42
57
  # @param path [String, #to_path] path to the <tt>.xlsx</tt> file
43
58
  # @param streaming [Boolean] feed worksheet XML to the native parser in
44
59
  # chunks (see {Rbxl.open})
45
60
  # @param date_conversion [Boolean] convert numeric cells backed by a
46
61
  # date/time +numFmt+ to Ruby date/time objects (see {Rbxl.open})
47
- # @return [Rbxl::ReadOnlyWorkbook]
62
+ # @yieldparam book [Rbxl::ReadOnlyWorkbook] the opened workbook
63
+ # @return [Rbxl::ReadOnlyWorkbook, Object] the workbook when no block is
64
+ # given, otherwise the block's return value
48
65
  def self.open(path, streaming: false, date_conversion: false)
49
- new(path, streaming: streaming, date_conversion: date_conversion)
66
+ book = new(path, streaming: streaming, date_conversion: date_conversion)
67
+ return book unless block_given?
68
+
69
+ begin
70
+ yield book
71
+ ensure
72
+ book.close
73
+ end
50
74
  end
51
75
 
52
76
  # Opens the ZIP archive, pre-loads shared strings, and indexes the
@@ -58,6 +82,7 @@ module Rbxl
58
82
  # date-style lookup table to produced worksheets
59
83
  def initialize(path, streaming: false, date_conversion: false)
60
84
  @path = path
85
+ ensure_xlsx_format!(path)
61
86
  @zip = Zip::File.open(path)
62
87
  @streaming = streaming
63
88
  @date_conversion = date_conversion
@@ -69,18 +94,23 @@ module Rbxl
69
94
  @closed = false
70
95
  end
71
96
 
72
- # Returns a row-by-row worksheet by visible sheet name.
97
+ # Returns a row-by-row worksheet by visible sheet name or by 0-based
98
+ # index into {#sheet_names}. Negative indexes count from the end, so
99
+ # <tt>sheet(-1)</tt> returns the last sheet.
73
100
  #
74
101
  # The returned object shares the workbook's ZIP handle. Closing the
75
102
  # workbook invalidates any worksheets produced by prior calls.
76
103
  #
77
- # @param name [String] visible sheet name as listed in {#sheet_names}
104
+ # @param name_or_index [String, Integer] visible sheet name as listed in
105
+ # {#sheet_names}, or an integer index into that list
78
106
  # @return [Rbxl::ReadOnlyWorksheet]
79
- # @raise [Rbxl::SheetNotFoundError] if +name+ is not present
107
+ # @raise [Rbxl::SheetNotFoundError] if +name_or_index+ does not resolve
108
+ # to a sheet
80
109
  # @raise [Rbxl::ClosedWorkbookError] if the workbook has been closed
81
- def sheet(name)
110
+ def sheet(name_or_index)
82
111
  ensure_open!
83
112
 
113
+ name = resolve_sheet_name(name_or_index)
84
114
  entry_path = @sheet_entries.fetch(name) do
85
115
  raise SheetNotFoundError, "sheet not found: #{name}"
86
116
  end
@@ -97,6 +127,23 @@ module Rbxl
97
127
  )
98
128
  end
99
129
 
130
+ # Iterates the workbook's sheets in workbook order. Each worksheet is
131
+ # constructed on demand, so <tt>sheets.first</tt> allocates only the
132
+ # first sheet and <tt>sheets.lazy.find { ... }</tt> stops as soon as a
133
+ # match is found. Returned objects share the same ZIP handle and
134
+ # cached shared-strings / date-style tables as {#sheet}.
135
+ #
136
+ # @yieldparam worksheet [Rbxl::ReadOnlyWorksheet]
137
+ # @return [Enumerator<Rbxl::ReadOnlyWorksheet>] when no block is given
138
+ # @return [void] when a block is given
139
+ # @raise [Rbxl::ClosedWorkbookError] if the workbook has been closed
140
+ def sheets
141
+ ensure_open!
142
+ return enum_for(:sheets) unless block_given?
143
+
144
+ @sheet_names.each { |name| yield sheet(name) }
145
+ end
146
+
100
147
  # Releases the underlying ZIP file handle. Idempotent; subsequent calls
101
148
  # are no-ops.
102
149
  #
@@ -119,6 +166,30 @@ module Rbxl
119
166
  raise ClosedWorkbookError, "workbook has been closed" if closed?
120
167
  end
121
168
 
169
+ def resolve_sheet_name(key)
170
+ return key unless key.is_a?(Integer)
171
+
172
+ name = @sheet_names[key]
173
+ return name if name
174
+
175
+ raise SheetNotFoundError, "sheet index out of range: #{key} (#{@sheet_names.length} sheet(s))"
176
+ end
177
+
178
+ def ensure_xlsx_format!(path)
179
+ header = File.binread(path, 8)
180
+ return if header.start_with?(ZIP_LOCAL_MAGIC)
181
+
182
+ if header.start_with?(OLE_CFB_MAGIC)
183
+ raise UnsupportedFormatError,
184
+ "#{path} looks like a legacy .xls (BIFF/CFB). " \
185
+ "rbxl supports .xlsx (OOXML) only; convert first, e.g. " \
186
+ "`libreoffice --headless --convert-to xlsx #{File.basename(path.to_s)}`."
187
+ end
188
+
189
+ raise UnsupportedFormatError,
190
+ "#{path} is not a valid .xlsx (no ZIP signature at offset 0)."
191
+ end
192
+
122
193
  # Built-in numFmtId values that Excel resolves to date/time formats.
123
194
  # Ids outside this set are dates only when the workbook provides a
124
195
  # matching custom +<numFmt>+ entry whose format code contains date
data/lib/rbxl/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  module Rbxl
2
2
  # Gem version string, tracked with semantic versioning.
3
- VERSION = "1.2.0"
3
+ VERSION = "1.3.0"
4
4
  end
data/lib/rbxl.rb CHANGED
@@ -92,6 +92,14 @@ module Rbxl
92
92
  # intent explicitly at the call site. Passing +read_only: false+ raises
93
93
  # {NotImplementedError}; a read/write mode is not available.
94
94
  #
95
+ # When a block is given, the workbook is yielded and automatically
96
+ # closed when the block returns (or raises), mirroring the +File.open+
97
+ # and +Zip::File.open+ idiom:
98
+ #
99
+ # Rbxl.open("report.xlsx") do |book|
100
+ # book.sheet("Report").each_row(values_only: true) { |row| p row }
101
+ # end
102
+ #
95
103
  # With <tt>streaming: true</tt>, the native backend (when loaded) feeds
96
104
  # worksheet XML to the parser in chunks pulled from the ZIP input stream
97
105
  # instead of materializing the entire worksheet as one Ruby string. This
@@ -117,12 +125,15 @@ module Rbxl
117
125
  # the native extension is not loaded.
118
126
  # @param date_conversion [Boolean] convert numeric cells backed by a
119
127
  # date/time +numFmt+ to +Date+ / +Time+ / +DateTime+
120
- # @return [Rbxl::ReadOnlyWorkbook]
128
+ # @yieldparam book [Rbxl::ReadOnlyWorkbook] opened workbook; auto-closed
129
+ # when the block returns
130
+ # @return [Rbxl::ReadOnlyWorkbook, Object] the workbook when no block is
131
+ # given, otherwise the block's return value
121
132
  # @raise [NotImplementedError] if +read_only+ is not +true+
122
- def open(path, read_only: true, streaming: false, date_conversion: false)
133
+ def open(path, read_only: true, streaming: false, date_conversion: false, &block)
123
134
  raise NotImplementedError, "read/write mode is not supported; pass read_only: true" unless read_only
124
135
 
125
- ReadOnlyWorkbook.open(path, streaming: streaming, date_conversion: date_conversion)
136
+ ReadOnlyWorkbook.open(path, streaming: streaming, date_conversion: date_conversion, &block)
126
137
  end
127
138
 
128
139
  # Creates a new workbook in write-only mode.
data/sig/rbxl.rbs CHANGED
@@ -10,6 +10,7 @@ module Rbxl
10
10
  type dimensions = { ref: String, max_col: Integer, max_row: Integer }
11
11
 
12
12
  def self.open: (pathish path, ?read_only: bool, ?streaming: bool, ?date_conversion: bool) -> ReadOnlyWorkbook
13
+ | [T] (pathish path, ?read_only: bool, ?streaming: bool, ?date_conversion: bool) { (ReadOnlyWorkbook) -> T } -> T
13
14
  def self.new: (?write_only: bool) -> WriteOnlyWorkbook
14
15
 
15
16
  attr_accessor self.max_shared_strings: Integer?
@@ -37,6 +38,9 @@ module Rbxl
37
38
  class WorksheetTooLargeError < Error
38
39
  end
39
40
 
41
+ class UnsupportedFormatError < Error
42
+ end
43
+
40
44
  class Cell
41
45
  attr_accessor value: cell_value
42
46
  attr_accessor coordinate: String?
@@ -84,8 +88,11 @@ module Rbxl
84
88
  attr_reader sheet_names: Array[String]
85
89
 
86
90
  def self.open: (pathish path, ?streaming: bool, ?date_conversion: bool) -> ReadOnlyWorkbook
91
+ | [T] (pathish path, ?streaming: bool, ?date_conversion: bool) { (ReadOnlyWorkbook) -> T } -> T
87
92
  def initialize: (pathish path, ?streaming: bool, ?date_conversion: bool) -> void
88
- def sheet: (String name) -> ReadOnlyWorksheet
93
+ def sheet: (String | Integer name_or_index) -> ReadOnlyWorksheet
94
+ def sheets: () { (ReadOnlyWorksheet) -> void } -> void
95
+ | () -> Enumerator[ReadOnlyWorksheet, void]
89
96
  def close: () -> void
90
97
  def closed?: () -> bool
91
98
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbxl
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Taro KOBAYASHI