rbxl 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +154 -0
- data/Rakefile +5 -0
- data/ext/rbxl_native/extconf.rb +51 -0
- data/ext/rbxl_native/native.c +677 -0
- data/lib/rbxl/cell.rb +3 -0
- data/lib/rbxl/empty_cell.rb +13 -0
- data/lib/rbxl/errors.rb +7 -0
- data/lib/rbxl/native.rb +15 -0
- data/lib/rbxl/read_only_cell.rb +3 -0
- data/lib/rbxl/read_only_workbook.rb +153 -0
- data/lib/rbxl/read_only_worksheet.rb +501 -0
- data/lib/rbxl/row.rb +23 -0
- data/lib/rbxl/version.rb +3 -0
- data/lib/rbxl/write_only_cell.rb +10 -0
- data/lib/rbxl/write_only_workbook.rb +143 -0
- data/lib/rbxl/write_only_worksheet.rb +180 -0
- data/lib/rbxl.rb +33 -0
- metadata +97 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 7dfc04eae51753bfa17b28f87476e1da9904efd4748a6a503ef999b58316d419
|
|
4
|
+
data.tar.gz: dcad9a70d574b225be56d5c942995c5634ef06ac2a882ff030a69d9d1fde1ecb
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: c0c65e0501a613c690795274aee90ee71b26332e916f416e62f7df398d610a67992e230651ffec87a9017243adfb56c517da6498d076e54ed11fe61a8f6dc74d
|
|
7
|
+
data.tar.gz: fd952f51da370eb1a9a433d661f0c6018a7460e43560b72eb9682cf938a833edda949774ebfcd942add2eabea1d87e37199d993f0b5172153710b0f264d890a9
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Taro KOBAYASHI
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# rbxl
|
|
2
|
+
|
|
3
|
+
`openpyxl` inspired Ruby gem for large-ish `.xlsx` files.
|
|
4
|
+
|
|
5
|
+
Current scope is intentionally small:
|
|
6
|
+
|
|
7
|
+
- `write_only` workbook generation
|
|
8
|
+
- `read_only` row streaming
|
|
9
|
+
- `close()` for read-only workbooks
|
|
10
|
+
- minimal `openpyxl`-like API
|
|
11
|
+
- optional C extension (`rbxl/native`) for maximum performance
|
|
12
|
+
|
|
13
|
+
Out of scope for this MVP:
|
|
14
|
+
|
|
15
|
+
- preserving arbitrary workbook structure on save
|
|
16
|
+
- rich style round-tripping
|
|
17
|
+
- formulas, images, charts, comments
|
|
18
|
+
|
|
19
|
+
## Usage
|
|
20
|
+
|
|
21
|
+
```ruby
|
|
22
|
+
require "rbxl"
|
|
23
|
+
|
|
24
|
+
book = Rbxl.new(write_only: true)
|
|
25
|
+
sheet = book.add_sheet("Report")
|
|
26
|
+
sheet.append(["id", "name", "score"])
|
|
27
|
+
sheet.append([1, "alice", 100])
|
|
28
|
+
sheet.append([2, "bob", 95.5])
|
|
29
|
+
book.save("report.xlsx")
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
```ruby
|
|
33
|
+
require "rbxl"
|
|
34
|
+
|
|
35
|
+
book = Rbxl.open("report.xlsx", read_only: true)
|
|
36
|
+
sheet = book.sheet("Report")
|
|
37
|
+
|
|
38
|
+
sheet.each_row do |row|
|
|
39
|
+
p row.values
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
p sheet.calculate_dimension
|
|
43
|
+
|
|
44
|
+
book.close
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
`write_only` workbooks are save-once by design. This matches the optimized
|
|
48
|
+
mode tradeoff: low flexibility in exchange for simpler memory behavior.
|
|
49
|
+
|
|
50
|
+
## Native C Extension
|
|
51
|
+
|
|
52
|
+
Add a single `require` to opt-in to the libxml2-based C extension for
|
|
53
|
+
significantly faster read and write performance:
|
|
54
|
+
|
|
55
|
+
```ruby
|
|
56
|
+
require "rbxl"
|
|
57
|
+
require "rbxl/native" # opt-in
|
|
58
|
+
|
|
59
|
+
# Same API, backed by C extension
|
|
60
|
+
book = Rbxl.open("large.xlsx", read_only: true)
|
|
61
|
+
book.sheet("Data").rows(values_only: true).each { |row| process(row) }
|
|
62
|
+
book.close
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
The C extension is **opt-in by design**:
|
|
66
|
+
|
|
67
|
+
- **Portability first**: `require "rbxl"` alone works everywhere Ruby and
|
|
68
|
+
Nokogiri run, with zero native compilation required. This is the default.
|
|
69
|
+
- **Performance when you need it**: `require "rbxl/native"` activates the
|
|
70
|
+
libxml2 SAX2 backend for read/write hot paths. If the `.so` was not built
|
|
71
|
+
(e.g. libxml2 headers missing at install time), you get a clear `LoadError`
|
|
72
|
+
rather than a silent degradation.
|
|
73
|
+
- **Same API, same output**: switching between the two paths changes nothing
|
|
74
|
+
about behavior or output format. The test suite runs both paths and
|
|
75
|
+
compares results cell-by-cell to guarantee parity.
|
|
76
|
+
- **Fallback is automatic at build time**: `gem install rbxl` attempts to
|
|
77
|
+
compile the C extension. If libxml2 is not found, compilation is silently
|
|
78
|
+
skipped and the gem installs successfully without it. You only notice when
|
|
79
|
+
you try `require "rbxl/native"`.
|
|
80
|
+
- **Current boundary cost is explicit**: worksheet ZIP entries are still
|
|
81
|
+
inflated into a Ruby string before crossing into C. The extension removes
|
|
82
|
+
XML parse overhead, but not ZIP I/O or that intermediate buffer.
|
|
83
|
+
|
|
84
|
+
Requirements for the C extension:
|
|
85
|
+
|
|
86
|
+
- libxml2 development headers (`libxml2-dev` / `libxml2-devel`), or
|
|
87
|
+
- Nokogiri with bundled libxml2 (headers are detected automatically)
|
|
88
|
+
|
|
89
|
+
## Design Notes
|
|
90
|
+
|
|
91
|
+
- Writer avoids a full workbook object graph and streams rows into sheet XML.
|
|
92
|
+
- Reader uses a pull parser for worksheet XML so it can iterate rows without building the full DOM.
|
|
93
|
+
- Strings written by the MVP use `inlineStr` to avoid shared string bookkeeping during generation.
|
|
94
|
+
- Reader supports both shared strings and inline strings.
|
|
95
|
+
- The native extension uses libxml2 SAX2 directly, bypassing Nokogiri's per-node Ruby object allocation overhead.
|
|
96
|
+
|
|
97
|
+
## Development
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
bundle install
|
|
101
|
+
|
|
102
|
+
# Run tests (pure Ruby)
|
|
103
|
+
ruby -Ilib -Itest test/rbxl_test.rb
|
|
104
|
+
|
|
105
|
+
# Run tests (with native extension)
|
|
106
|
+
cd ext/rbxl_native && ruby extconf.rb && make && cd ../..
|
|
107
|
+
ruby -Ilib -Itest -r rbxl/native test/rbxl_test.rb
|
|
108
|
+
ruby -Ilib -Itest test/fast_ext_test.rb
|
|
109
|
+
|
|
110
|
+
# Benchmarks
|
|
111
|
+
ruby -Ilib benchmark/compare.rb # pure Ruby
|
|
112
|
+
ruby -Ilib -r rbxl/native benchmark/compare.rb # with native
|
|
113
|
+
RBXL_BENCH_WARMUP=1 RBXL_BENCH_ITERATIONS=5 ruby -Ilib benchmark/read_modes.rb
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Benchmarks
|
|
117
|
+
|
|
118
|
+
5000 rows x 10 columns, Ruby 3.4 / Python 3.13:
|
|
119
|
+
|
|
120
|
+

|
|
121
|
+
|
|
122
|
+
### Pure Ruby (Nokogiri Reader)
|
|
123
|
+
|
|
124
|
+
| benchmark | real (s) |
|
|
125
|
+
|---|---|
|
|
126
|
+
| rbxl write | 0.09 |
|
|
127
|
+
| rbxl read | 0.30 |
|
|
128
|
+
| rbxl read values | 0.22 |
|
|
129
|
+
| openpyxl write | 0.36 |
|
|
130
|
+
| openpyxl read | 0.28 |
|
|
131
|
+
| openpyxl read values | 0.26 |
|
|
132
|
+
|
|
133
|
+
### With `rbxl/native`
|
|
134
|
+
|
|
135
|
+
| benchmark | real (s) | vs openpyxl |
|
|
136
|
+
|---|---|---|
|
|
137
|
+
| rbxl write | **0.04** | 9x faster |
|
|
138
|
+
| rbxl read | **0.08** | 3.5x faster |
|
|
139
|
+
| rbxl read values | **0.03** | 9x faster |
|
|
140
|
+
|
|
141
|
+
The comparison script uses these libraries when available:
|
|
142
|
+
|
|
143
|
+
Benchmark notes:
|
|
144
|
+
|
|
145
|
+
- `RBXL_BENCH_WARMUP` and `RBXL_BENCH_ITERATIONS` control warmup and repeated runs.
|
|
146
|
+
- Read comparisons use the same `rbxl.xlsx` fixture for `rbxl`, `roo`, `rubyXL`, and `openpyxl`.
|
|
147
|
+
- Write comparisons still measure each library producing its own workbook.
|
|
148
|
+
- `rss_delta_kb` is best-effort process RSS on Linux and should be treated as directional.
|
|
149
|
+
|
|
150
|
+
- `rbxl` for write/read
|
|
151
|
+
- `caxlsx` for write
|
|
152
|
+
- `roo` for read streaming
|
|
153
|
+
- `rubyXL` for full workbook read
|
|
154
|
+
- `openpyxl` as a Python reference point when `openpyxl` or `uv` is available
|
data/Rakefile
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
require "mkmf"
|
|
2
|
+
|
|
3
|
+
# Try to find libxml2 headers and library.
|
|
4
|
+
# Priority:
|
|
5
|
+
# 1. Nokogiri's bundled libxml2 (avoids version mismatch warnings)
|
|
6
|
+
# 2. System pkg-config
|
|
7
|
+
# 3. Common system paths
|
|
8
|
+
#
|
|
9
|
+
# If libxml2 is not available at all, skip compilation gracefully so
|
|
10
|
+
# that `gem install rbxl` never fails — the C extension is optional.
|
|
11
|
+
|
|
12
|
+
found = false
|
|
13
|
+
|
|
14
|
+
# 1. Try Nokogiri's bundled libxml2
|
|
15
|
+
begin
|
|
16
|
+
nokogiri_spec = Gem::Specification.find_by_name("nokogiri")
|
|
17
|
+
nokogiri_include = File.join(nokogiri_spec.full_gem_path, "ext", "nokogiri", "include", "libxml2")
|
|
18
|
+
nokogiri_lib = File.join(nokogiri_spec.full_gem_path, "ext", "nokogiri")
|
|
19
|
+
|
|
20
|
+
if File.directory?(nokogiri_include) && find_header("libxml/parser.h", nokogiri_include)
|
|
21
|
+
# Link against Nokogiri's bundled libxml2
|
|
22
|
+
nokogiri_so = Dir.glob(File.join(nokogiri_lib, "**", "nokogiri.{so,bundle}")).first
|
|
23
|
+
if nokogiri_so
|
|
24
|
+
so_dir = File.dirname(nokogiri_so)
|
|
25
|
+
$LDFLAGS << " -L#{so_dir} -Wl,-rpath,#{so_dir}"
|
|
26
|
+
end
|
|
27
|
+
found = have_library("xml2") || true # headers found via Nokogiri, may link at runtime
|
|
28
|
+
end
|
|
29
|
+
rescue Gem::MissingSpecError
|
|
30
|
+
# Nokogiri not installed — fall through
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# 2. System pkg-config
|
|
34
|
+
found ||= pkg_config("libxml-2.0")
|
|
35
|
+
|
|
36
|
+
# 3. Common system paths
|
|
37
|
+
found ||= (have_header("libxml/parser.h") && have_library("xml2"))
|
|
38
|
+
found ||= (find_header("libxml/parser.h", "/usr/include/libxml2") && have_library("xml2"))
|
|
39
|
+
|
|
40
|
+
unless found
|
|
41
|
+
warn "rbxl_native: libxml2 not found — skipping C extension build"
|
|
42
|
+
File.write("Makefile", "all install clean:\n\t@:\n")
|
|
43
|
+
exit 0
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Hardening flags
|
|
47
|
+
$CFLAGS << " -Wall -Wextra -Werror=format-security"
|
|
48
|
+
$CFLAGS << " -D_FORTIFY_SOURCE=2" unless $CFLAGS.include?("_FORTIFY_SOURCE")
|
|
49
|
+
$CFLAGS << " -fstack-protector-strong" if try_cflags("-fstack-protector-strong")
|
|
50
|
+
|
|
51
|
+
create_makefile("rbxl_native/rbxl_native")
|