openexcel-c 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openexcel_c-0.1.1/.github/workflows/publish.yml +76 -0
- openexcel_c-0.1.1/.gitignore +16 -0
- openexcel_c-0.1.1/CMakeLists.txt +50 -0
- openexcel_c-0.1.1/LICENSE +21 -0
- openexcel_c-0.1.1/PKG-INFO +193 -0
- openexcel_c-0.1.1/README.md +165 -0
- openexcel_c-0.1.1/pyproject.toml +58 -0
- openexcel_c-0.1.1/src/_openexcel/cell.c +103 -0
- openexcel_c-0.1.1/src/_openexcel/cell.h +45 -0
- openexcel_c-0.1.1/src/_openexcel/module.c +485 -0
- openexcel_c-0.1.1/src/_openexcel/reader/reader.c +87 -0
- openexcel_c-0.1.1/src/_openexcel/reader/reader.h +7 -0
- openexcel_c-0.1.1/src/_openexcel/reader/xml_sheet.c +244 -0
- openexcel_c-0.1.1/src/_openexcel/reader/xml_sheet.h +8 -0
- openexcel_c-0.1.1/src/_openexcel/reader/xml_sst.c +115 -0
- openexcel_c-0.1.1/src/_openexcel/reader/xml_sst.h +7 -0
- openexcel_c-0.1.1/src/_openexcel/reader/xml_styles.c +88 -0
- openexcel_c-0.1.1/src/_openexcel/reader/xml_styles.h +7 -0
- openexcel_c-0.1.1/src/_openexcel/reader/xml_workbook.c +129 -0
- openexcel_c-0.1.1/src/_openexcel/reader/xml_workbook.h +11 -0
- openexcel_c-0.1.1/src/_openexcel/reader/zip_reader.c +56 -0
- openexcel_c-0.1.1/src/_openexcel/reader/zip_reader.h +21 -0
- openexcel_c-0.1.1/src/_openexcel/string_table.c +65 -0
- openexcel_c-0.1.1/src/_openexcel/string_table.h +21 -0
- openexcel_c-0.1.1/src/_openexcel/styles.c +74 -0
- openexcel_c-0.1.1/src/_openexcel/styles.h +24 -0
- openexcel_c-0.1.1/src/_openexcel/third_party/khash.h +627 -0
- openexcel_c-0.1.1/src/_openexcel/third_party/miniz.c +646 -0
- openexcel_c-0.1.1/src/_openexcel/third_party/miniz.h +615 -0
- openexcel_c-0.1.1/src/_openexcel/third_party/miniz_common.h +89 -0
- openexcel_c-0.1.1/src/_openexcel/third_party/miniz_export.h +3 -0
- openexcel_c-0.1.1/src/_openexcel/third_party/miniz_tdef.c +1602 -0
- openexcel_c-0.1.1/src/_openexcel/third_party/miniz_tdef.h +199 -0
- openexcel_c-0.1.1/src/_openexcel/third_party/miniz_tinfl.c +778 -0
- openexcel_c-0.1.1/src/_openexcel/third_party/miniz_tinfl.h +150 -0
- openexcel_c-0.1.1/src/_openexcel/third_party/miniz_zip.c +4895 -0
- openexcel_c-0.1.1/src/_openexcel/third_party/miniz_zip.h +454 -0
- openexcel_c-0.1.1/src/_openexcel/workbook.c +46 -0
- openexcel_c-0.1.1/src/_openexcel/workbook.h +22 -0
- openexcel_c-0.1.1/src/_openexcel/worksheet.c +36 -0
- openexcel_c-0.1.1/src/_openexcel/worksheet.h +19 -0
- openexcel_c-0.1.1/src/_openexcel/writer/sheet_writer.c +82 -0
- openexcel_c-0.1.1/src/_openexcel/writer/sheet_writer.h +8 -0
- openexcel_c-0.1.1/src/_openexcel/writer/sst_writer.c +19 -0
- openexcel_c-0.1.1/src/_openexcel/writer/sst_writer.h +6 -0
- openexcel_c-0.1.1/src/_openexcel/writer/writer.c +183 -0
- openexcel_c-0.1.1/src/_openexcel/writer/writer.h +8 -0
- openexcel_c-0.1.1/src/_openexcel/writer/xml_gen.c +101 -0
- openexcel_c-0.1.1/src/_openexcel/writer/xml_gen.h +37 -0
- openexcel_c-0.1.1/src/_openexcel/writer/zip_writer.c +38 -0
- openexcel_c-0.1.1/src/_openexcel/writer/zip_writer.h +12 -0
- openexcel_c-0.1.1/src/openexcel/__init__.py +3 -0
- openexcel_c-0.1.1/tests/conftest.py +9 -0
- openexcel_c-0.1.1/tests/fixtures/dates.xlsx +0 -0
- openexcel_c-0.1.1/tests/fixtures/mixed.xlsx +0 -0
- openexcel_c-0.1.1/tests/fixtures/simple.xlsx +0 -0
- openexcel_c-0.1.1/tests/fixtures/strings.xlsx +0 -0
- openexcel_c-0.1.1/tests/make_fixtures.py +46 -0
- openexcel_c-0.1.1/tests/test_read_basic.py +45 -0
- openexcel_c-0.1.1/tests/test_read_dates.py +25 -0
- openexcel_c-0.1.1/tests/test_read_strings.py +20 -0
- openexcel_c-0.1.1/tests/test_roundtrip.py +52 -0
- openexcel_c-0.1.1/tests/test_write_basic.py +44 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
name: Build and publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
pull_request: # also build (but don't publish) on every PR to catch compile errors early
|
|
8
|
+
workflow_dispatch: # allow manual trigger from GitHub UI
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
build_wheels:
|
|
12
|
+
name: Build wheels on ${{ matrix.os }}
|
|
13
|
+
runs-on: ${{ matrix.os }}
|
|
14
|
+
strategy:
|
|
15
|
+
fail-fast: false
|
|
16
|
+
matrix:
|
|
17
|
+
os: [ubuntu-latest, macos-latest]
|
|
18
|
+
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
|
|
22
|
+
- name: Set up QEMU
|
|
23
|
+
if: runner.os == 'Linux'
|
|
24
|
+
uses: docker/setup-qemu-action@v3
|
|
25
|
+
with:
|
|
26
|
+
platforms: arm64
|
|
27
|
+
|
|
28
|
+
- name: Build wheels
|
|
29
|
+
uses: pypa/cibuildwheel@v2.21.3
|
|
30
|
+
|
|
31
|
+
- uses: actions/upload-artifact@v4
|
|
32
|
+
with:
|
|
33
|
+
name: wheels-${{ matrix.os }}
|
|
34
|
+
path: ./wheelhouse/*.whl
|
|
35
|
+
|
|
36
|
+
build_sdist:
|
|
37
|
+
name: Build source distribution
|
|
38
|
+
runs-on: ubuntu-latest
|
|
39
|
+
steps:
|
|
40
|
+
- uses: actions/checkout@v4
|
|
41
|
+
|
|
42
|
+
- name: Install build
|
|
43
|
+
run: pip install build
|
|
44
|
+
|
|
45
|
+
- name: Build sdist
|
|
46
|
+
run: python -m build --sdist
|
|
47
|
+
|
|
48
|
+
- uses: actions/upload-artifact@v4
|
|
49
|
+
with:
|
|
50
|
+
name: sdist
|
|
51
|
+
path: dist/*.tar.gz
|
|
52
|
+
|
|
53
|
+
publish:
|
|
54
|
+
name: Publish to PyPI
|
|
55
|
+
needs: [build_wheels, build_sdist]
|
|
56
|
+
runs-on: ubuntu-latest
|
|
57
|
+
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
|
|
58
|
+
environment:
|
|
59
|
+
name: pypi
|
|
60
|
+
url: https://pypi.org/p/openexcel-c
|
|
61
|
+
permissions:
|
|
62
|
+
id-token: write # required for Trusted Publisher (OIDC)
|
|
63
|
+
|
|
64
|
+
steps:
|
|
65
|
+
- uses: actions/download-artifact@v4
|
|
66
|
+
with:
|
|
67
|
+
pattern: wheels-*
|
|
68
|
+
merge-multiple: true
|
|
69
|
+
path: dist/
|
|
70
|
+
|
|
71
|
+
- uses: actions/download-artifact@v4
|
|
72
|
+
with:
|
|
73
|
+
name: sdist
|
|
74
|
+
path: dist/
|
|
75
|
+
|
|
76
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.20)
|
|
2
|
+
project(openexcel C)
|
|
3
|
+
|
|
4
|
+
find_package(Python REQUIRED COMPONENTS Interpreter Development.Module)
|
|
5
|
+
find_package(EXPAT REQUIRED)
|
|
6
|
+
|
|
7
|
+
set(OPENEXCEL_SOURCES
|
|
8
|
+
src/_openexcel/module.c
|
|
9
|
+
src/_openexcel/workbook.c
|
|
10
|
+
src/_openexcel/worksheet.c
|
|
11
|
+
src/_openexcel/cell.c
|
|
12
|
+
src/_openexcel/string_table.c
|
|
13
|
+
src/_openexcel/styles.c
|
|
14
|
+
src/_openexcel/reader/reader.c
|
|
15
|
+
src/_openexcel/reader/zip_reader.c
|
|
16
|
+
src/_openexcel/reader/xml_workbook.c
|
|
17
|
+
src/_openexcel/reader/xml_sst.c
|
|
18
|
+
src/_openexcel/reader/xml_styles.c
|
|
19
|
+
src/_openexcel/reader/xml_sheet.c
|
|
20
|
+
src/_openexcel/writer/writer.c
|
|
21
|
+
src/_openexcel/writer/zip_writer.c
|
|
22
|
+
src/_openexcel/writer/xml_gen.c
|
|
23
|
+
src/_openexcel/writer/sheet_writer.c
|
|
24
|
+
src/_openexcel/writer/sst_writer.c
|
|
25
|
+
src/_openexcel/third_party/miniz.c
|
|
26
|
+
src/_openexcel/third_party/miniz_tdef.c
|
|
27
|
+
src/_openexcel/third_party/miniz_tinfl.c
|
|
28
|
+
src/_openexcel/third_party/miniz_zip.c
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
python_add_library(_openexcel MODULE WITH_SOABI ${OPENEXCEL_SOURCES})
|
|
32
|
+
|
|
33
|
+
target_include_directories(_openexcel PRIVATE
|
|
34
|
+
src/_openexcel
|
|
35
|
+
src/_openexcel/third_party
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
target_link_libraries(_openexcel PRIVATE EXPAT::EXPAT)
|
|
39
|
+
|
|
40
|
+
target_compile_options(_openexcel PRIVATE
|
|
41
|
+
-O3
|
|
42
|
+
-Wall
|
|
43
|
+
-Wextra
|
|
44
|
+
-Wno-unused-parameter
|
|
45
|
+
-Wno-cast-function-type
|
|
46
|
+
-Wno-missing-field-initializers
|
|
47
|
+
$<$<C_COMPILER_ID:GNU,Clang>:-fvisibility=hidden>
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
install(TARGETS _openexcel DESTINATION openexcel)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Karun Gopal
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: openexcel-c
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: High-performance .xlsx reader/writer (C extension)
|
|
5
|
+
Keywords: xlsx,excel,spreadsheet,openpyxl,performance
|
|
6
|
+
Author-Email: Karun Gopal <karun.gopal@gmail.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: C
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Office/Business :: Financial :: Spreadsheet
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Project-URL: Homepage, https://github.com/karungop/openexCel
|
|
20
|
+
Project-URL: Repository, https://github.com/karungop/openexCel
|
|
21
|
+
Project-URL: Issues, https://github.com/karungop/openexCel/issues
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: pytest; extra == "dev"
|
|
25
|
+
Requires-Dist: pytest-benchmark; extra == "dev"
|
|
26
|
+
Requires-Dist: openpyxl; extra == "dev"
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# openexcel
|
|
30
|
+
|
|
31
|
+
A high-performance Python library for reading and writing `.xlsx` files, implemented as a C extension. Designed as a drop-in accelerator for workloads where [openpyxl](https://openpyxl.readthedocs.io/) is too slow.
|
|
32
|
+
|
|
33
|
+
## Why
|
|
34
|
+
|
|
35
|
+
openpyxl is pure Python. For large files (100k+ rows), the bottlenecks are ZIP extraction, XML parsing, and per-cell object allocation — all done in the Python interpreter. openexcel moves these to C:
|
|
36
|
+
|
|
37
|
+
- **Streaming SAX parsing** via libexpat — never loads the full XML into memory
|
|
38
|
+
- **Flat sorted cell array** — O(1) append during parse, O(1) sequential iteration
|
|
39
|
+
- **GIL released** during read and write — other threads run freely
|
|
40
|
+
- **Vendored miniz** — no external ZIP dependency
|
|
41
|
+
|
|
42
|
+
Typical speedups: **10–30× faster reads**, **5–15× faster writes** on 100k-row sheets.
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install openexcel-c
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Pre-built wheels are available for macOS (arm64, x86_64) and Linux (x86_64, aarch64) for Python 3.10–3.13.
|
|
51
|
+
|
|
52
|
+
### Building from source
|
|
53
|
+
|
|
54
|
+
You need CMake ≥ 3.20, a C11 compiler, and libexpat development headers.
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# macOS
|
|
58
|
+
brew install expat cmake
|
|
59
|
+
|
|
60
|
+
# Ubuntu/Debian
|
|
61
|
+
sudo apt-get install libexpat1-dev cmake
|
|
62
|
+
|
|
63
|
+
pip install openexcel-c --no-binary openexcel-c
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Usage
|
|
67
|
+
|
|
68
|
+
### Reading
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import openexcel
|
|
72
|
+
|
|
73
|
+
wb = openexcel.load_workbook("data.xlsx")
|
|
74
|
+
ws = wb.active # first sheet
|
|
75
|
+
|
|
76
|
+
# Iterate rows — returns tuples of Python values
|
|
77
|
+
for row in ws.iter_rows():
|
|
78
|
+
print(row) # e.g. (1, "hello", 3.14, True, datetime.date(2024, 6, 1))
|
|
79
|
+
|
|
80
|
+
# Slice a range
|
|
81
|
+
for row in ws.iter_rows(min_row=2, max_row=100, min_col=1, max_col=5):
|
|
82
|
+
print(row)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Cell values map to Python types:
|
|
86
|
+
|
|
87
|
+
| Excel type | Python type |
|
|
88
|
+
|---|---|
|
|
89
|
+
| Number | `float` |
|
|
90
|
+
| String | `str` |
|
|
91
|
+
| Boolean | `bool` |
|
|
92
|
+
| Date / datetime | `datetime.date` / `datetime.datetime` |
|
|
93
|
+
| Empty | `None` |
|
|
94
|
+
| Error | `str` (e.g. `"#DIV/0!"`) |
|
|
95
|
+
|
|
96
|
+
### Writing
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
import openexcel
|
|
100
|
+
import datetime
|
|
101
|
+
|
|
102
|
+
wb = openexcel.Workbook()
|
|
103
|
+
ws = wb.create_sheet("Sheet1")
|
|
104
|
+
|
|
105
|
+
ws.append(["Name", "Score", "Date"])
|
|
106
|
+
ws.append(["Alice", 98.5, datetime.date(2024, 6, 1)])
|
|
107
|
+
ws.append(["Bob", 72.0, datetime.date(2024, 6, 2)])
|
|
108
|
+
|
|
109
|
+
wb.save("output.xlsx")
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Multiple sheets
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
wb = openexcel.load_workbook("multi.xlsx")
|
|
116
|
+
|
|
117
|
+
# By index
|
|
118
|
+
ws = wb[0]
|
|
119
|
+
|
|
120
|
+
# By name
|
|
121
|
+
ws = wb["Summary"]
|
|
122
|
+
|
|
123
|
+
# Iterate all sheets
|
|
124
|
+
for ws in wb:
|
|
125
|
+
print(ws.title, ws.max_row, ws.max_column)
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Context manager
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
with openexcel.load_workbook("data.xlsx") as wb:
|
|
132
|
+
ws = wb.active
|
|
133
|
+
data = [row for row in ws.iter_rows()]
|
|
134
|
+
# file resources released on exit
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## API reference
|
|
138
|
+
|
|
139
|
+
### `openexcel.load_workbook(path: str) -> Workbook`
|
|
140
|
+
|
|
141
|
+
Open an existing `.xlsx` file for reading. Parses the entire file on load; subsequent access is from memory.
|
|
142
|
+
|
|
143
|
+
### `openexcel.Workbook()`
|
|
144
|
+
|
|
145
|
+
Create a new empty workbook.
|
|
146
|
+
|
|
147
|
+
### `Workbook.create_sheet(name: str) -> Worksheet`
|
|
148
|
+
|
|
149
|
+
Add a new sheet and return it.
|
|
150
|
+
|
|
151
|
+
### `Workbook.save(path: str)`
|
|
152
|
+
|
|
153
|
+
Write the workbook to an `.xlsx` file. GIL is released during write.
|
|
154
|
+
|
|
155
|
+
### `Workbook.active -> Worksheet`
|
|
156
|
+
|
|
157
|
+
Returns the first sheet.
|
|
158
|
+
|
|
159
|
+
### `Worksheet.iter_rows(min_row=None, max_row=None, min_col=None, max_col=None)`
|
|
160
|
+
|
|
161
|
+
Yields one tuple per row. Indices are 1-based and inclusive, matching openpyxl's convention.
|
|
162
|
+
|
|
163
|
+
### `Worksheet.append(row: list | tuple)`
|
|
164
|
+
|
|
165
|
+
Append a row of values. Accepts `int`, `float`, `str`, `bool`, `None`, `datetime.date`, `datetime.datetime`.
|
|
166
|
+
|
|
167
|
+
### `Worksheet.max_row -> int`
|
|
168
|
+
|
|
169
|
+
### `Worksheet.max_column -> int`
|
|
170
|
+
|
|
171
|
+
### `Worksheet.title -> str`
|
|
172
|
+
|
|
173
|
+
## Differences from openpyxl
|
|
174
|
+
|
|
175
|
+
| Feature | openexcel | openpyxl |
|
|
176
|
+
|---|---|---|
|
|
177
|
+
| Read speed | **Fast** (C, SAX) | Slow (pure Python) |
|
|
178
|
+
| Write speed | **Fast** (C, arena buffer) | Slow (pure Python) |
|
|
179
|
+
| Cell objects | Not exposed (values only) | Full `Cell` with font/fill/border |
|
|
180
|
+
| Formulas | Read result value only | Read formula string |
|
|
181
|
+
| Styles / formatting | Date detection only | Full style API |
|
|
182
|
+
| Merged cells | Not supported | Supported |
|
|
183
|
+
| Charts / images | Not supported | Supported |
|
|
184
|
+
|
|
185
|
+
openexcel is optimized for **data workloads** — reading and writing large tables of values. If you need full formatting control or chart support, use openpyxl.
|
|
186
|
+
|
|
187
|
+
## License
|
|
188
|
+
|
|
189
|
+
MIT — see [LICENSE](LICENSE).
|
|
190
|
+
|
|
191
|
+
Vendored dependencies:
|
|
192
|
+
- [miniz](https://github.com/richgel999/miniz) — public domain
|
|
193
|
+
- [khash](https://github.com/attractivechaos/klib) — MIT
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# openexcel
|
|
2
|
+
|
|
3
|
+
A high-performance Python library for reading and writing `.xlsx` files, implemented as a C extension. Designed as a drop-in accelerator for workloads where [openpyxl](https://openpyxl.readthedocs.io/) is too slow.
|
|
4
|
+
|
|
5
|
+
## Why
|
|
6
|
+
|
|
7
|
+
openpyxl is pure Python. For large files (100k+ rows), the bottlenecks are ZIP extraction, XML parsing, and per-cell object allocation — all done in the Python interpreter. openexcel moves these to C:
|
|
8
|
+
|
|
9
|
+
- **Streaming SAX parsing** via libexpat — never loads the full XML into memory
|
|
10
|
+
- **Flat sorted cell array** — O(1) append during parse, O(1) sequential iteration
|
|
11
|
+
- **GIL released** during read and write — other threads run freely
|
|
12
|
+
- **Vendored miniz** — no external ZIP dependency
|
|
13
|
+
|
|
14
|
+
Typical speedups: **10–30× faster reads**, **5–15× faster writes** on 100k-row sheets.
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install openexcel-c
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Pre-built wheels are available for macOS (arm64, x86_64) and Linux (x86_64, aarch64) for Python 3.10–3.13.
|
|
23
|
+
|
|
24
|
+
### Building from source
|
|
25
|
+
|
|
26
|
+
You need CMake ≥ 3.20, a C11 compiler, and libexpat development headers.
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
# macOS
|
|
30
|
+
brew install expat cmake
|
|
31
|
+
|
|
32
|
+
# Ubuntu/Debian
|
|
33
|
+
sudo apt-get install libexpat1-dev cmake
|
|
34
|
+
|
|
35
|
+
pip install openexcel-c --no-binary openexcel-c
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Usage
|
|
39
|
+
|
|
40
|
+
### Reading
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
import openexcel
|
|
44
|
+
|
|
45
|
+
wb = openexcel.load_workbook("data.xlsx")
|
|
46
|
+
ws = wb.active # first sheet
|
|
47
|
+
|
|
48
|
+
# Iterate rows — returns tuples of Python values
|
|
49
|
+
for row in ws.iter_rows():
|
|
50
|
+
print(row) # e.g. (1, "hello", 3.14, True, datetime.date(2024, 6, 1))
|
|
51
|
+
|
|
52
|
+
# Slice a range
|
|
53
|
+
for row in ws.iter_rows(min_row=2, max_row=100, min_col=1, max_col=5):
|
|
54
|
+
print(row)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Cell values map to Python types:
|
|
58
|
+
|
|
59
|
+
| Excel type | Python type |
|
|
60
|
+
|---|---|
|
|
61
|
+
| Number | `float` |
|
|
62
|
+
| String | `str` |
|
|
63
|
+
| Boolean | `bool` |
|
|
64
|
+
| Date / datetime | `datetime.date` / `datetime.datetime` |
|
|
65
|
+
| Empty | `None` |
|
|
66
|
+
| Error | `str` (e.g. `"#DIV/0!"`) |
|
|
67
|
+
|
|
68
|
+
### Writing
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import openexcel
|
|
72
|
+
import datetime
|
|
73
|
+
|
|
74
|
+
wb = openexcel.Workbook()
|
|
75
|
+
ws = wb.create_sheet("Sheet1")
|
|
76
|
+
|
|
77
|
+
ws.append(["Name", "Score", "Date"])
|
|
78
|
+
ws.append(["Alice", 98.5, datetime.date(2024, 6, 1)])
|
|
79
|
+
ws.append(["Bob", 72.0, datetime.date(2024, 6, 2)])
|
|
80
|
+
|
|
81
|
+
wb.save("output.xlsx")
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Multiple sheets
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
wb = openexcel.load_workbook("multi.xlsx")
|
|
88
|
+
|
|
89
|
+
# By index
|
|
90
|
+
ws = wb[0]
|
|
91
|
+
|
|
92
|
+
# By name
|
|
93
|
+
ws = wb["Summary"]
|
|
94
|
+
|
|
95
|
+
# Iterate all sheets
|
|
96
|
+
for ws in wb:
|
|
97
|
+
print(ws.title, ws.max_row, ws.max_column)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Context manager
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
with openexcel.load_workbook("data.xlsx") as wb:
|
|
104
|
+
ws = wb.active
|
|
105
|
+
data = [row for row in ws.iter_rows()]
|
|
106
|
+
# file resources released on exit
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## API reference
|
|
110
|
+
|
|
111
|
+
### `openexcel.load_workbook(path: str) -> Workbook`
|
|
112
|
+
|
|
113
|
+
Open an existing `.xlsx` file for reading. Parses the entire file on load; subsequent access is from memory.
|
|
114
|
+
|
|
115
|
+
### `openexcel.Workbook()`
|
|
116
|
+
|
|
117
|
+
Create a new empty workbook.
|
|
118
|
+
|
|
119
|
+
### `Workbook.create_sheet(name: str) -> Worksheet`
|
|
120
|
+
|
|
121
|
+
Add a new sheet and return it.
|
|
122
|
+
|
|
123
|
+
### `Workbook.save(path: str)`
|
|
124
|
+
|
|
125
|
+
Write the workbook to an `.xlsx` file. GIL is released during write.
|
|
126
|
+
|
|
127
|
+
### `Workbook.active -> Worksheet`
|
|
128
|
+
|
|
129
|
+
Returns the first sheet.
|
|
130
|
+
|
|
131
|
+
### `Worksheet.iter_rows(min_row=None, max_row=None, min_col=None, max_col=None)`
|
|
132
|
+
|
|
133
|
+
Yields one tuple per row. Indices are 1-based and inclusive, matching openpyxl's convention.
|
|
134
|
+
|
|
135
|
+
### `Worksheet.append(row: list | tuple)`
|
|
136
|
+
|
|
137
|
+
Append a row of values. Accepts `int`, `float`, `str`, `bool`, `None`, `datetime.date`, `datetime.datetime`.
|
|
138
|
+
|
|
139
|
+
### `Worksheet.max_row -> int`
|
|
140
|
+
|
|
141
|
+
### `Worksheet.max_column -> int`
|
|
142
|
+
|
|
143
|
+
### `Worksheet.title -> str`
|
|
144
|
+
|
|
145
|
+
## Differences from openpyxl
|
|
146
|
+
|
|
147
|
+
| Feature | openexcel | openpyxl |
|
|
148
|
+
|---|---|---|
|
|
149
|
+
| Read speed | **Fast** (C, SAX) | Slow (pure Python) |
|
|
150
|
+
| Write speed | **Fast** (C, arena buffer) | Slow (pure Python) |
|
|
151
|
+
| Cell objects | Not exposed (values only) | Full `Cell` with font/fill/border |
|
|
152
|
+
| Formulas | Read result value only | Read formula string |
|
|
153
|
+
| Styles / formatting | Date detection only | Full style API |
|
|
154
|
+
| Merged cells | Not supported | Supported |
|
|
155
|
+
| Charts / images | Not supported | Supported |
|
|
156
|
+
|
|
157
|
+
openexcel is optimized for **data workloads** — reading and writing large tables of values. If you need full formatting control or chart support, use openpyxl.
|
|
158
|
+
|
|
159
|
+
## License
|
|
160
|
+
|
|
161
|
+
MIT — see [LICENSE](LICENSE).
|
|
162
|
+
|
|
163
|
+
Vendored dependencies:
|
|
164
|
+
- [miniz](https://github.com/richgel999/miniz) — public domain
|
|
165
|
+
- [khash](https://github.com/attractivechaos/klib) — MIT
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["scikit-build-core>=0.9"]
|
|
3
|
+
build-backend = "scikit_build_core.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "openexcel-c"
|
|
7
|
+
version = "0.1.1"
|
|
8
|
+
description = "High-performance .xlsx reader/writer (C extension)"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Karun Gopal", email = "karun.gopal@gmail.com" }
|
|
14
|
+
]
|
|
15
|
+
keywords = ["xlsx", "excel", "spreadsheet", "openpyxl", "performance"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: C",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Programming Language :: Python :: 3.13",
|
|
26
|
+
"Topic :: Office/Business :: Financial :: Spreadsheet",
|
|
27
|
+
"Operating System :: OS Independent",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Homepage = "https://github.com/karungop/openexCel"
|
|
32
|
+
Repository = "https://github.com/karungop/openexCel"
|
|
33
|
+
Issues = "https://github.com/karungop/openexCel/issues"
|
|
34
|
+
|
|
35
|
+
[project.optional-dependencies]
|
|
36
|
+
dev = ["pytest", "pytest-benchmark", "openpyxl"]
|
|
37
|
+
|
|
38
|
+
[tool.scikit-build]
|
|
39
|
+
cmake.build-type = "Release"
|
|
40
|
+
wheel.packages = ["src/openexcel"]
|
|
41
|
+
|
|
42
|
+
[tool.cibuildwheel]
|
|
43
|
+
# Build CPython 3.10-3.13 only (no PyPy — C extension relies on CPython internals)
|
|
44
|
+
build = "cp310-* cp311-* cp312-* cp313-*"
|
|
45
|
+
# Skip 32-bit builds and musl libc (Alpine) — not worth the effort for v0.1
|
|
46
|
+
skip = "*-manylinux_i686 *-musllinux*"
|
|
47
|
+
|
|
48
|
+
[tool.cibuildwheel.linux]
|
|
49
|
+
# expat-devel is available in the manylinux2014 image via yum
|
|
50
|
+
before-build = "yum install -y expat-devel || (apt-get update && apt-get install -y libexpat1-dev)"
|
|
51
|
+
# Build for both x86_64 and arm64 using QEMU emulation
|
|
52
|
+
archs = ["x86_64", "aarch64"]
|
|
53
|
+
|
|
54
|
+
[tool.cibuildwheel.macos]
|
|
55
|
+
# Homebrew expat is installed in the macos-latest runner image
|
|
56
|
+
before-build = "brew install expat || true"
|
|
57
|
+
# Build native arm64 and cross-compile x86_64
|
|
58
|
+
archs = ["arm64", "x86_64"]
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
#include "cell.h"
|
|
2
|
+
#include <stdlib.h>
|
|
3
|
+
#include <string.h>
|
|
4
|
+
|
|
5
|
+
/* Excel's 1900 date system has a deliberate bug: it treats 1900 as a leap year.
|
|
6
|
+
Day 1 = 1900-01-01, Day 60 = 1900-02-29 (doesn't exist), Day 61 = 1900-03-01. */
|
|
7
|
+
|
|
8
|
+
static int is_leap(int y) {
|
|
9
|
+
return (y % 4 == 0 && y % 100 != 0) || (y % 400 == 0);
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
static int days_in_month(int m, int y) {
|
|
13
|
+
static const int dim[] = {31,28,31,30,31,30,31,31,30,31,30,31};
|
|
14
|
+
if (m == 2 && is_leap(y)) return 29;
|
|
15
|
+
return dim[m - 1];
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
OxlDate oxl_serial_to_date(double serial, int date1904) {
|
|
19
|
+
OxlDate d = {0};
|
|
20
|
+
if (serial < 0.0) return d;
|
|
21
|
+
|
|
22
|
+
long day_part;
|
|
23
|
+
double frac;
|
|
24
|
+
|
|
25
|
+
if (date1904) {
|
|
26
|
+
/* 1904 system: day 0 = 1904-01-01 */
|
|
27
|
+
day_part = (long)serial;
|
|
28
|
+
frac = serial - day_part;
|
|
29
|
+
int year = 1904, month = 1, day = 1;
|
|
30
|
+
while (day_part > 0) {
|
|
31
|
+
int dim = days_in_month(month, year);
|
|
32
|
+
if (day_part <= dim) { day = (int)day_part; break; }
|
|
33
|
+
day_part -= dim;
|
|
34
|
+
month++;
|
|
35
|
+
if (month > 12) { month = 1; year++; }
|
|
36
|
+
}
|
|
37
|
+
d.year = (int16_t)year;
|
|
38
|
+
d.month = (uint8_t)month;
|
|
39
|
+
d.day = (uint8_t)day;
|
|
40
|
+
} else {
|
|
41
|
+
/* 1900 system: skip the phantom day 60 (1900-02-29) */
|
|
42
|
+
if (serial >= 60.0) serial -= 1.0;
|
|
43
|
+
day_part = (long)serial;
|
|
44
|
+
frac = serial - (double)(long)serial;
|
|
45
|
+
/* day 1 = 1899-12-31; so day_part days after that */
|
|
46
|
+
int year = 1899, month = 12, day = 31;
|
|
47
|
+
while (day_part > 0) {
|
|
48
|
+
int dim = days_in_month(month, year);
|
|
49
|
+
int remaining = dim - day;
|
|
50
|
+
if (day_part <= remaining) { day += (int)day_part; day_part = 0; }
|
|
51
|
+
else {
|
|
52
|
+
day_part -= remaining + 1;
|
|
53
|
+
day = 1;
|
|
54
|
+
month++;
|
|
55
|
+
if (month > 12) { month = 1; year++; }
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
d.year = (int16_t)year;
|
|
59
|
+
d.month = (uint8_t)month;
|
|
60
|
+
d.day = (uint8_t)day;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/* time from fractional part */
|
|
64
|
+
long total_sec = (long)(frac * 86400.0 + 0.5);
|
|
65
|
+
d.hour = (uint8_t)(total_sec / 3600);
|
|
66
|
+
d.min = (uint8_t)((total_sec % 3600) / 60);
|
|
67
|
+
d.sec = (uint8_t)(total_sec % 60);
|
|
68
|
+
return d;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
double oxl_date_to_serial(OxlDate d, int date1904) {
|
|
72
|
+
/* Count days from epoch to d.year-d.month-d.day */
|
|
73
|
+
long days = 0;
|
|
74
|
+
int base_year, base_month, base_day;
|
|
75
|
+
|
|
76
|
+
if (date1904) {
|
|
77
|
+
base_year = 1904; base_month = 1; base_day = 1;
|
|
78
|
+
} else {
|
|
79
|
+
base_year = 1899; base_month = 12; base_day = 31;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
int y = base_year, m = base_month, day = base_day;
|
|
83
|
+
while (y < d.year || (y == d.year && m < d.month) ||
|
|
84
|
+
(y == d.year && m == d.month && day < d.day)) {
|
|
85
|
+
days++;
|
|
86
|
+
day++;
|
|
87
|
+
if (day > days_in_month(m, y)) { day = 1; m++; }
|
|
88
|
+
if (m > 12) { m = 1; y++; }
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (!date1904 && days >= 59) days++; /* re-insert the phantom day 60 */
|
|
92
|
+
|
|
93
|
+
double frac = d.hour / 24.0 + d.min / 1440.0 + d.sec / 86400.0;
|
|
94
|
+
return (double)days + frac;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
void oxl_cell_free(OxlCell *c) {
|
|
98
|
+
if (!c) return;
|
|
99
|
+
if (c->type == OXL_CELL_INLINE_STR || c->type == OXL_CELL_ERROR) {
|
|
100
|
+
free(c->v.s_inline);
|
|
101
|
+
c->v.s_inline = NULL;
|
|
102
|
+
}
|
|
103
|
+
}
|