hilbertsfc 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hilbertsfc-0.1.0/LICENSE +21 -0
- hilbertsfc-0.1.0/PKG-INFO +253 -0
- hilbertsfc-0.1.0/README.md +222 -0
- hilbertsfc-0.1.0/pyproject.toml +129 -0
- hilbertsfc-0.1.0/src/hilbertsfc/__init__.py +39 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_cache.py +81 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_data/__init__.py +5 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_data/lut_2d4b_b_qs_u64.npy +0 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_data/lut_2d4b_q_bs_u64.npy +0 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_data/lut_3d2b_sb_so_u16.npy +0 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_data/lut_3d2b_so_sb_u16.npy +0 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_dispatch.py +57 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_dtype.py +109 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_flatten.py +40 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_input_checks.py +28 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_kernels/__init__.py +6 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_kernels/hilbert2d_decode.py +69 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_kernels/hilbert2d_encode.py +68 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_kernels/hilbert3d_decode.py +76 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_kernels/hilbert3d_encode.py +78 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_luts.py +91 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_nbits.py +17 -0
- hilbertsfc-0.1.0/src/hilbertsfc/_typing.py +59 -0
- hilbertsfc-0.1.0/src/hilbertsfc/hilbert2d.py +431 -0
- hilbertsfc-0.1.0/src/hilbertsfc/hilbert2d.pyi +237 -0
- hilbertsfc-0.1.0/src/hilbertsfc/hilbert3d.py +486 -0
- hilbertsfc-0.1.0/src/hilbertsfc/hilbert3d.pyi +247 -0
- hilbertsfc-0.1.0/src/hilbertsfc/py.typed +0 -0
hilbertsfc-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Remco Leijenaar
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hilbertsfc
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Ultra-fast 2D and 3D Hilbert space-filling curve encode/decode kernels (NumPy + Numba).
|
|
5
|
+
Keywords: hilbert,space-filling-curve,sfc,numba,numpy,indexing
|
|
6
|
+
Author: Remco Leijenaar
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Dist: numba>=0.59.0
|
|
24
|
+
Requires-Dist: numpy>=1.26.0
|
|
25
|
+
Requires-Python: >=3.12
|
|
26
|
+
Project-URL: Homepage, https://remcofl.github.io/HilbertSFC/
|
|
27
|
+
Project-URL: Source, https://github.com/remcofl/HilbertSFC
|
|
28
|
+
Project-URL: Documentation, https://remcofl.github.io/HilbertSFC/
|
|
29
|
+
Project-URL: Issues, https://github.com/remcofl/HilbertSFC/issues
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
|
|
32
|
+
<!-- markdownlint-disable MD033 MD041 -->
|
|
33
|
+
---
|
|
34
|
+
<h1 align="center">
|
|
35
|
+
HilbertSFC
|
|
36
|
+
</h1>
|
|
37
|
+
|
|
38
|
+
<p align="center">
|
|
39
|
+
<a href="https://github.com/remcofl/HilbertSFC/blob/main/LICENSE">
|
|
40
|
+
<img src="https://img.shields.io/badge/License-MIT-97ca00?style=flat-square" alt="License">
|
|
41
|
+
</a>
|
|
42
|
+
<a href="https://remcofl.github.io/HilbertSFC/">
|
|
43
|
+
<img src="https://img.shields.io/badge/Docs-API%20%26%20Guide-0A7F8E?style=flat-square" alt="Documentation">
|
|
44
|
+
</a>
|
|
45
|
+
<a href="https://pypi.org/project/hilbertsfc/">
|
|
46
|
+
<img src="https://img.shields.io/pypi/v/hilbertsfc?label=PyPI&style=flat-square" alt="PyPI">
|
|
47
|
+
</a>
|
|
48
|
+
<a href="https://pypi.org/project/hilbertsfc/">
|
|
49
|
+
<img src="https://img.shields.io/pypi/pyversions/hilbertsfc?label=Python&style=flat-square" alt="Python versions">
|
|
50
|
+
</a>
|
|
51
|
+
<a href="https://github.com/remcofl/HilbertSFC/actions/workflows/ci.yml">
|
|
52
|
+
<img src="https://img.shields.io/github/actions/workflow/status/remcofl/HilbertSFC/ci.yml?branch=main&label=CI&style=flat-square" alt="CI">
|
|
53
|
+
</a>
|
|
54
|
+
</p>
|
|
55
|
+
|
|
56
|
+
<p align="center">
|
|
57
|
+
<strong>Ultra-fast 2D & 3D Hilbert space-filling curve encode/decode kernels for Python.</strong>
|
|
58
|
+
</p>
|
|
59
|
+
|
|
60
|
+
<p align="center">
|
|
61
|
+
<img src="https://raw.githubusercontent.com/remcofl/HilbertSFC/refs/heads/main/docs/img/hilbert2d_grid.png" width="420" align="middle" alt="2D Hilbert curves for nbits 1..5" />
|
|
62
|
+
<img src="https://raw.githubusercontent.com/remcofl/HilbertSFC/refs/heads/main/docs/img/hilbert3d_grid.webp" width="340" align="middle" hspace="5" alt="3D Hilbert curves animation grid for nbits 1..4" />
|
|
63
|
+
</p>
|
|
64
|
+
|
|
65
|
+
<p align="center">
|
|
66
|
+
<sub>2D Hilbert curves (nbits 1..5) and 3D Hilbert curves (nbits 1..4, animated).</sub>
|
|
67
|
+
</p>
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
This project is **performance-first** and **implemented entirely in Python**. The hot kernels are JIT-compiled with Numba and tuned for:
|
|
72
|
+
|
|
73
|
+
- **Branchless, fully unrolled inner loops**
|
|
74
|
+
- **SIMD via LLVM vector intrinsics**
|
|
75
|
+
- **Small, L1-cache-friendly lookup tables (LUTs)**
|
|
76
|
+
- **Reduced dependency chains for better ILP and MLP**
|
|
77
|
+
- **Optional multi-threading for batch operations**
|
|
78
|
+
|
|
79
|
+
It provides both convenient Python APIs and *kernel accessors* designed to be embedded into other Numba kernels.
|
|
80
|
+
|
|
81
|
+
## Performance
|
|
82
|
+
|
|
83
|
+
**HilbertSFC** is orders of magnitude faster than existing Python implementations. It also outperforms the **Fast Hilbert** implementation in Rust by a factor of ~7x. In fact, **HilbertSFC** takes only ~8 CPU cycles per point for 2D encode/decode of 32-bit coordinates.
|
|
84
|
+
|
|
85
|
+
#### 2D Points - Random, `nbits=32`, `n=5,000,000`
|
|
86
|
+
|
|
87
|
+
| Implementation | ns/pt (enc) | ns/pt (dec) | Mpts/s (enc) | Mpts/s (dec) |
|
|
88
|
+
| --- | ---: | ---: | ---: | ---: |
|
|
89
|
+
| 🔥**hilbertsfc (multi-threaded)** | 0.53 | 0.57 | 1883.52 | 1742.08 |
|
|
90
|
+
| 🔥**hilbertsfc (Python)** | 1.84 | 1.88 | 543.60 | 532.77 |
|
|
91
|
+
| [fast_hilbert (Rust)](https://crates.io/crates/fast_hilbert) | 13.71 | 13.47 | 72.92 | 74.23 |
|
|
92
|
+
| [hilbert_2d (Rust)](https://crates.io/crates/hilbert_2d) | 121.23 | 101.34 | 8.25 | 9.87 |
|
|
93
|
+
| [hilbert-bytes (Python)](https://pypi.org/project/hilbert-bytes/) | 2997.51 | 2642.86 | 0.334 | 0.378 |
|
|
94
|
+
| [numpy-hilbert-curve (Python)](https://pypi.org/project/numpy-hilbert-curve/) | 7606.88 | 5075.58 | 0.131 | 0.197 |
|
|
95
|
+
| [hilbertcurve (Python)](https://pypi.org/project/hilbertcurve/) | 14355.76 | 10411.20 | 0.0697 | 0.0961 |
|
|
96
|
+
|
|
97
|
+
> **System info:** Intel Core Ultra 7 258v, Ubuntu 24.04.4, Python 3.12.12, Numba 0.63.1
|
|
98
|
+
|
|
99
|
+
Additional benchmarks and details are available in the [benchmark.md](https://github.com/remcofl/HilbertSFC/blob/main/benchmark.md).
|
|
100
|
+
|
|
101
|
+
For a deep dive into how the HilbertSFC kernels are derived and why the implementation maps well to modern CPUs (FSM/LUT formulation, dependency chains, ILP/MLP, unrolling, constant folding, vectorization, gathers), see the [performance deep dive notebook](https://github.com/remcofl/HilbertSFC/blob/main/notebooks/hilbertsfc_performance_deep_dive.ipynb).
|
|
102
|
+
|
|
103
|
+
## Quickstart
|
|
104
|
+
|
|
105
|
+
### Installation
|
|
106
|
+
|
|
107
|
+
With pip:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
pip install hilbertsfc
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Or with uv:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
uv add hilbertsfc
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Usage
|
|
120
|
+
|
|
121
|
+
Hilbert curves map multi-dimensional integer coordinates onto a single scalar index while preserving spatial locality. `hilbertsfc` provides an encode and decode API for 2D and 3D coordinates that support both scalar values and vectorized array inputs.
|
|
122
|
+
|
|
123
|
+
The `nbits` parameter specifies the number of bits per coordinate, defining the grid domain as `[0, 2**nbits)`. If omitted, it's inferred from the input array dtype (for arrays) or defaults to the maximum (32 for 2D, 21 for 3D).
|
|
124
|
+
|
|
125
|
+
#### Scalar 2D
|
|
126
|
+
|
|
127
|
+
Encode a single `(x, y)` coordinate into a Hilbert index, and decode it back:
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
from hilbertsfc import hilbert_decode_2d, hilbert_encode_2d
|
|
131
|
+
|
|
132
|
+
index = hilbert_encode_2d(17, 23, nbits=10) # index = 534
|
|
133
|
+
x, y = hilbert_decode_2d(index, nbits=10) # x, y = (17, 23)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
#### Batch 2D
|
|
137
|
+
|
|
138
|
+
The same functions operate elementwise on NumPy arrays, preserving shape and avoiding Python loops:
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
import numpy as np
|
|
142
|
+
from hilbertsfc import hilbert_decode_2d, hilbert_encode_2d
|
|
143
|
+
|
|
144
|
+
xs = np.arange(1024, dtype=np.uint16)
|
|
145
|
+
ys = xs[::-1]
|
|
146
|
+
|
|
147
|
+
indices = hilbert_encode_2d(xs, ys, nbits=10) # shape (1024,), dtype uint32
|
|
148
|
+
xs2, ys2 = hilbert_decode_2d(indices, nbits=10) # xs2 = xs, ys2 = ys
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
This is the preferred use for high-throughput workloads. It can be further accelerated with `parallel=True`.
|
|
152
|
+
|
|
153
|
+
#### Batch 3D
|
|
154
|
+
|
|
155
|
+
3D works identically, mapping `(x, y, z)` coordinates to a single Hilbert index:
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
import numpy as np
|
|
159
|
+
from hilbertsfc import hilbert_decode_3d, hilbert_encode_3d
|
|
160
|
+
|
|
161
|
+
nbits = 10
|
|
162
|
+
n = 10_000
|
|
163
|
+
rng = np.random.default_rng(0)
|
|
164
|
+
|
|
165
|
+
xs = rng.integers(0, 2**nbits, size=n, dtype=np.uint32)
|
|
166
|
+
ys = rng.integers(0, 2**nbits, size=n, dtype=np.uint32)
|
|
167
|
+
zs = rng.integers(0, 2**nbits, size=n, dtype=np.uint32)
|
|
168
|
+
|
|
169
|
+
indices = hilbert_encode_3d(xs, ys, zs, nbits=nbits) # shape (10000,), dtype uint32
|
|
170
|
+
xs2, ys2, zs2 = hilbert_decode_3d(indices, nbits=nbits) # xs2 = xs, ys2 = ys, zs2 = zs
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
This is can be useful for applications like 3D spatial indexing, volumetric data processing, compression, and more.
|
|
174
|
+
|
|
175
|
+
#### Embedding kernels in your own Numba code
|
|
176
|
+
|
|
177
|
+
While the main API is designed for ease of use, the package also provides *kernel accessors* that expose the scalar encode/decode kernels. This allows you to embed the Hilbert curve logic directly into your own Numba kernels, enabling further optimizations like loop fusion and reduced Python call overhead.
|
|
178
|
+
|
|
179
|
+
Example embedding the 2D encode kernel:
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
import numpy as np
|
|
183
|
+
import numba as nb
|
|
184
|
+
|
|
185
|
+
from hilbertsfc import get_hilbert_encode_2d_kernel
|
|
186
|
+
|
|
187
|
+
encode_2d_10 = get_hilbert_encode_2d_kernel(nbits=10)
|
|
188
|
+
|
|
189
|
+
@nb.njit
|
|
190
|
+
def encode_many(xs: np.ndarray, ys: np.ndarray) -> np.ndarray:
|
|
191
|
+
out = np.empty(xs.shape, dtype=np.uint32)
|
|
192
|
+
for i in range(xs.size):
|
|
193
|
+
out[i] = encode_2d_10(xs[i], ys[i])
|
|
194
|
+
return out
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
The same pattern works for decode and for 3D kernels.
|
|
198
|
+
|
|
199
|
+
### Demo Notebook
|
|
200
|
+
|
|
201
|
+
For more examples, see the [demo notebook](https://github.com/remcofl/HilbertSFC/blob/main/notebooks/hilbertsfc_demo.ipynb) which includes visualizations of the curves and embedding the kernels into custom Numba code.
|
|
202
|
+
|
|
203
|
+
## API notes
|
|
204
|
+
|
|
205
|
+
- `nbits` specifies the number of bits per coordinate. Coordinates must be in `[0, 2**nbits)`. A tighter `nbits` improves performance and reduces output dtypes. Excess bits are ignored.
|
|
206
|
+
- Hilbert indices obtained with a certain `nbits` are compatible with those from another `nbits`, given that the coordinates are within the valid range. This is because the kernels resolve the starting state parity to ensure compatibility.
|
|
207
|
+
- The batched API accepts arbitrary shapes and preserves the input shape. The requirement is that inputs/outputs support a *zero-copy* 1D view. Most strided views are supported but they can reduce performance since the kernels are close to memory-bandwidth bound.
|
|
208
|
+
- You can pass `out=...` buffers for batch encode, and `out_xs/out_ys/out_zs` for batch decode. This can for example be useful to write into memory-mapped arrays or to reuse buffers across multiple calls.
|
|
209
|
+
- `parallel=True` dispatches the parallel version of the kernel (when available). The number of threads can be controlled with the environment variable `NUMBA_NUM_THREADS` or during runtime with `numba.set_num_threads()`.
|
|
210
|
+
|
|
211
|
+
## Documentation
|
|
212
|
+
|
|
213
|
+
[Documentation](https://remcofl.github.io/HilbertSFC/) is hosted online. It includes a quick start guide, and API reference.
|
|
214
|
+
|
|
215
|
+
To serve the docs locally:
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
uv run --no-dev --group docs mkdocs serve
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
Build a static site into `site/`:
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
uv run --no-dev --group docs mkdocs build
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
## Development
|
|
228
|
+
|
|
229
|
+
The repo uses `uv` for environment management. CI and local development workflows (lint, tests, type checking, docs) are automated with `nox`.
|
|
230
|
+
|
|
231
|
+
Sync a local environment with dev dependencies:
|
|
232
|
+
|
|
233
|
+
```bash
|
|
234
|
+
uv sync
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
Run the full `nox` suite:
|
|
238
|
+
|
|
239
|
+
```bash
|
|
240
|
+
uvx nox
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
More details are in [CONTRIBUTING.md](https://github.com/remcofl/HilbertSFC/blob/main/CONTRIBUTING.md).
|
|
244
|
+
|
|
245
|
+
## Cache control
|
|
246
|
+
|
|
247
|
+
If you want to clear cached kernels and lookup tables (e.g., for benchmarking or testing), you can use the `clear_all_caches()` function:
|
|
248
|
+
|
|
249
|
+
```python
|
|
250
|
+
from hilbertsfc import clear_all_caches
|
|
251
|
+
|
|
252
|
+
clear_all_caches()
|
|
253
|
+
```
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
<!-- markdownlint-disable MD033 MD041 -->
|
|
2
|
+
---
|
|
3
|
+
<h1 align="center">
|
|
4
|
+
HilbertSFC
|
|
5
|
+
</h1>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
<a href="https://github.com/remcofl/HilbertSFC/blob/main/LICENSE">
|
|
9
|
+
<img src="https://img.shields.io/badge/License-MIT-97ca00?style=flat-square" alt="License">
|
|
10
|
+
</a>
|
|
11
|
+
<a href="https://remcofl.github.io/HilbertSFC/">
|
|
12
|
+
<img src="https://img.shields.io/badge/Docs-API%20%26%20Guide-0A7F8E?style=flat-square" alt="Documentation">
|
|
13
|
+
</a>
|
|
14
|
+
<a href="https://pypi.org/project/hilbertsfc/">
|
|
15
|
+
<img src="https://img.shields.io/pypi/v/hilbertsfc?label=PyPI&style=flat-square" alt="PyPI">
|
|
16
|
+
</a>
|
|
17
|
+
<a href="https://pypi.org/project/hilbertsfc/">
|
|
18
|
+
<img src="https://img.shields.io/pypi/pyversions/hilbertsfc?label=Python&style=flat-square" alt="Python versions">
|
|
19
|
+
</a>
|
|
20
|
+
<a href="https://github.com/remcofl/HilbertSFC/actions/workflows/ci.yml">
|
|
21
|
+
<img src="https://img.shields.io/github/actions/workflow/status/remcofl/HilbertSFC/ci.yml?branch=main&label=CI&style=flat-square" alt="CI">
|
|
22
|
+
</a>
|
|
23
|
+
</p>
|
|
24
|
+
|
|
25
|
+
<p align="center">
|
|
26
|
+
<strong>Ultra-fast 2D & 3D Hilbert space-filling curve encode/decode kernels for Python.</strong>
|
|
27
|
+
</p>
|
|
28
|
+
|
|
29
|
+
<p align="center">
|
|
30
|
+
<img src="https://raw.githubusercontent.com/remcofl/HilbertSFC/refs/heads/main/docs/img/hilbert2d_grid.png" width="420" align="middle" alt="2D Hilbert curves for nbits 1..5" />
|
|
31
|
+
<img src="https://raw.githubusercontent.com/remcofl/HilbertSFC/refs/heads/main/docs/img/hilbert3d_grid.webp" width="340" align="middle" hspace="5" alt="3D Hilbert curves animation grid for nbits 1..4" />
|
|
32
|
+
</p>
|
|
33
|
+
|
|
34
|
+
<p align="center">
|
|
35
|
+
<sub>2D Hilbert curves (nbits 1..5) and 3D Hilbert curves (nbits 1..4, animated).</sub>
|
|
36
|
+
</p>
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
This project is **performance-first** and **implemented entirely in Python**. The hot kernels are JIT-compiled with Numba and tuned for:
|
|
41
|
+
|
|
42
|
+
- **Branchless, fully unrolled inner loops**
|
|
43
|
+
- **SIMD via LLVM vector intrinsics**
|
|
44
|
+
- **Small, L1-cache-friendly lookup tables (LUTs)**
|
|
45
|
+
- **Reduced dependency chains for better ILP and MLP**
|
|
46
|
+
- **Optional multi-threading for batch operations**
|
|
47
|
+
|
|
48
|
+
It provides both convenient Python APIs and *kernel accessors* designed to be embedded into other Numba kernels.
|
|
49
|
+
|
|
50
|
+
## Performance
|
|
51
|
+
|
|
52
|
+
**HilbertSFC** is orders of magnitude faster than existing Python implementations. It also outperforms the **Fast Hilbert** implementation in Rust by a factor of ~7x. In fact, **HilbertSFC** takes only ~8 CPU cycles per point for 2D encode/decode of 32-bit coordinates.
|
|
53
|
+
|
|
54
|
+
#### 2D Points - Random, `nbits=32`, `n=5,000,000`
|
|
55
|
+
|
|
56
|
+
| Implementation | ns/pt (enc) | ns/pt (dec) | Mpts/s (enc) | Mpts/s (dec) |
|
|
57
|
+
| --- | ---: | ---: | ---: | ---: |
|
|
58
|
+
| 🔥**hilbertsfc (multi-threaded)** | 0.53 | 0.57 | 1883.52 | 1742.08 |
|
|
59
|
+
| 🔥**hilbertsfc (Python)** | 1.84 | 1.88 | 543.60 | 532.77 |
|
|
60
|
+
| [fast_hilbert (Rust)](https://crates.io/crates/fast_hilbert) | 13.71 | 13.47 | 72.92 | 74.23 |
|
|
61
|
+
| [hilbert_2d (Rust)](https://crates.io/crates/hilbert_2d) | 121.23 | 101.34 | 8.25 | 9.87 |
|
|
62
|
+
| [hilbert-bytes (Python)](https://pypi.org/project/hilbert-bytes/) | 2997.51 | 2642.86 | 0.334 | 0.378 |
|
|
63
|
+
| [numpy-hilbert-curve (Python)](https://pypi.org/project/numpy-hilbert-curve/) | 7606.88 | 5075.58 | 0.131 | 0.197 |
|
|
64
|
+
| [hilbertcurve (Python)](https://pypi.org/project/hilbertcurve/) | 14355.76 | 10411.20 | 0.0697 | 0.0961 |
|
|
65
|
+
|
|
66
|
+
> **System info:** Intel Core Ultra 7 258v, Ubuntu 24.04.4, Python 3.12.12, Numba 0.63.1
|
|
67
|
+
|
|
68
|
+
Additional benchmarks and details are available in the [benchmark.md](https://github.com/remcofl/HilbertSFC/blob/main/benchmark.md).
|
|
69
|
+
|
|
70
|
+
For a deep dive into how the HilbertSFC kernels are derived and why the implementation maps well to modern CPUs (FSM/LUT formulation, dependency chains, ILP/MLP, unrolling, constant folding, vectorization, gathers), see the [performance deep dive notebook](https://github.com/remcofl/HilbertSFC/blob/main/notebooks/hilbertsfc_performance_deep_dive.ipynb).
|
|
71
|
+
|
|
72
|
+
## Quickstart
|
|
73
|
+
|
|
74
|
+
### Installation
|
|
75
|
+
|
|
76
|
+
With pip:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
pip install hilbertsfc
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Or with uv:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
uv add hilbertsfc
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Usage
|
|
89
|
+
|
|
90
|
+
Hilbert curves map multi-dimensional integer coordinates onto a single scalar index while preserving spatial locality. `hilbertsfc` provides an encode and decode API for 2D and 3D coordinates that support both scalar values and vectorized array inputs.
|
|
91
|
+
|
|
92
|
+
The `nbits` parameter specifies the number of bits per coordinate, defining the grid domain as `[0, 2**nbits)`. If omitted, it's inferred from the input array dtype (for arrays) or defaults to the maximum (32 for 2D, 21 for 3D).
|
|
93
|
+
|
|
94
|
+
#### Scalar 2D
|
|
95
|
+
|
|
96
|
+
Encode a single `(x, y)` coordinate into a Hilbert index, and decode it back:
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from hilbertsfc import hilbert_decode_2d, hilbert_encode_2d
|
|
100
|
+
|
|
101
|
+
index = hilbert_encode_2d(17, 23, nbits=10) # index = 534
|
|
102
|
+
x, y = hilbert_decode_2d(index, nbits=10) # x, y = (17, 23)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
#### Batch 2D
|
|
106
|
+
|
|
107
|
+
The same functions operate elementwise on NumPy arrays, preserving shape and avoiding Python loops:
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
import numpy as np
|
|
111
|
+
from hilbertsfc import hilbert_decode_2d, hilbert_encode_2d
|
|
112
|
+
|
|
113
|
+
xs = np.arange(1024, dtype=np.uint16)
|
|
114
|
+
ys = xs[::-1]
|
|
115
|
+
|
|
116
|
+
indices = hilbert_encode_2d(xs, ys, nbits=10) # shape (1024,), dtype uint32
|
|
117
|
+
xs2, ys2 = hilbert_decode_2d(indices, nbits=10) # xs2 = xs, ys2 = ys
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
This is the preferred use for high-throughput workloads. It can be further accelerated with `parallel=True`.
|
|
121
|
+
|
|
122
|
+
#### Batch 3D
|
|
123
|
+
|
|
124
|
+
3D works identically, mapping `(x, y, z)` coordinates to a single Hilbert index:
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
import numpy as np
|
|
128
|
+
from hilbertsfc import hilbert_decode_3d, hilbert_encode_3d
|
|
129
|
+
|
|
130
|
+
nbits = 10
|
|
131
|
+
n = 10_000
|
|
132
|
+
rng = np.random.default_rng(0)
|
|
133
|
+
|
|
134
|
+
xs = rng.integers(0, 2**nbits, size=n, dtype=np.uint32)
|
|
135
|
+
ys = rng.integers(0, 2**nbits, size=n, dtype=np.uint32)
|
|
136
|
+
zs = rng.integers(0, 2**nbits, size=n, dtype=np.uint32)
|
|
137
|
+
|
|
138
|
+
indices = hilbert_encode_3d(xs, ys, zs, nbits=nbits) # shape (10000,), dtype uint32
|
|
139
|
+
xs2, ys2, zs2 = hilbert_decode_3d(indices, nbits=nbits) # xs2 = xs, ys2 = ys, zs2 = zs
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
This is can be useful for applications like 3D spatial indexing, volumetric data processing, compression, and more.
|
|
143
|
+
|
|
144
|
+
#### Embedding kernels in your own Numba code
|
|
145
|
+
|
|
146
|
+
While the main API is designed for ease of use, the package also provides *kernel accessors* that expose the scalar encode/decode kernels. This allows you to embed the Hilbert curve logic directly into your own Numba kernels, enabling further optimizations like loop fusion and reduced Python call overhead.
|
|
147
|
+
|
|
148
|
+
Example embedding the 2D encode kernel:
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
import numpy as np
|
|
152
|
+
import numba as nb
|
|
153
|
+
|
|
154
|
+
from hilbertsfc import get_hilbert_encode_2d_kernel
|
|
155
|
+
|
|
156
|
+
encode_2d_10 = get_hilbert_encode_2d_kernel(nbits=10)
|
|
157
|
+
|
|
158
|
+
@nb.njit
|
|
159
|
+
def encode_many(xs: np.ndarray, ys: np.ndarray) -> np.ndarray:
|
|
160
|
+
out = np.empty(xs.shape, dtype=np.uint32)
|
|
161
|
+
for i in range(xs.size):
|
|
162
|
+
out[i] = encode_2d_10(xs[i], ys[i])
|
|
163
|
+
return out
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
The same pattern works for decode and for 3D kernels.
|
|
167
|
+
|
|
168
|
+
### Demo Notebook
|
|
169
|
+
|
|
170
|
+
For more examples, see the [demo notebook](https://github.com/remcofl/HilbertSFC/blob/main/notebooks/hilbertsfc_demo.ipynb) which includes visualizations of the curves and embedding the kernels into custom Numba code.
|
|
171
|
+
|
|
172
|
+
## API notes
|
|
173
|
+
|
|
174
|
+
- `nbits` specifies the number of bits per coordinate. Coordinates must be in `[0, 2**nbits)`. A tighter `nbits` improves performance and reduces output dtypes. Excess bits are ignored.
|
|
175
|
+
- Hilbert indices obtained with a certain `nbits` are compatible with those from another `nbits`, given that the coordinates are within the valid range. This is because the kernels resolve the starting state parity to ensure compatibility.
|
|
176
|
+
- The batched API accepts arbitrary shapes and preserves the input shape. The requirement is that inputs/outputs support a *zero-copy* 1D view. Most strided views are supported but they can reduce performance since the kernels are close to memory-bandwidth bound.
|
|
177
|
+
- You can pass `out=...` buffers for batch encode, and `out_xs/out_ys/out_zs` for batch decode. This can for example be useful to write into memory-mapped arrays or to reuse buffers across multiple calls.
|
|
178
|
+
- `parallel=True` dispatches the parallel version of the kernel (when available). The number of threads can be controlled with the environment variable `NUMBA_NUM_THREADS` or during runtime with `numba.set_num_threads()`.
|
|
179
|
+
|
|
180
|
+
## Documentation
|
|
181
|
+
|
|
182
|
+
[Documentation](https://remcofl.github.io/HilbertSFC/) is hosted online. It includes a quick start guide, and API reference.
|
|
183
|
+
|
|
184
|
+
To serve the docs locally:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
uv run --no-dev --group docs mkdocs serve
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Build a static site into `site/`:
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
uv run --no-dev --group docs mkdocs build
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
## Development
|
|
197
|
+
|
|
198
|
+
The repo uses `uv` for environment management. CI and local development workflows (lint, tests, type checking, docs) are automated with `nox`.
|
|
199
|
+
|
|
200
|
+
Sync a local environment with dev dependencies:
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
uv sync
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
Run the full `nox` suite:
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
uvx nox
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
More details are in [CONTRIBUTING.md](https://github.com/remcofl/HilbertSFC/blob/main/CONTRIBUTING.md).
|
|
213
|
+
|
|
214
|
+
## Cache control
|
|
215
|
+
|
|
216
|
+
If you want to clear cached kernels and lookup tables (e.g., for benchmarking or testing), you can use the `clear_all_caches()` function:
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
from hilbertsfc import clear_all_caches
|
|
220
|
+
|
|
221
|
+
clear_all_caches()
|
|
222
|
+
```
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "hilbertsfc"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Ultra-fast 2D and 3D Hilbert space-filling curve encode/decode kernels (NumPy + Numba)."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [{ name = "Remco Leijenaar" }]
|
|
7
|
+
requires-python = ">=3.12"
|
|
8
|
+
dependencies = ["numba>=0.59.0", "numpy>=1.26.0"]
|
|
9
|
+
license = "MIT"
|
|
10
|
+
license-files = ["LICENSE"]
|
|
11
|
+
|
|
12
|
+
keywords = [
|
|
13
|
+
"hilbert",
|
|
14
|
+
"space-filling-curve",
|
|
15
|
+
"sfc",
|
|
16
|
+
"numba",
|
|
17
|
+
"numpy",
|
|
18
|
+
"indexing",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
classifiers = [
|
|
22
|
+
"Development Status :: 5 - Production/Stable",
|
|
23
|
+
"Intended Audience :: Developers",
|
|
24
|
+
"Intended Audience :: Science/Research",
|
|
25
|
+
"License :: OSI Approved :: MIT License",
|
|
26
|
+
"Operating System :: OS Independent",
|
|
27
|
+
"Programming Language :: Python :: 3",
|
|
28
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
29
|
+
"Programming Language :: Python :: 3.12",
|
|
30
|
+
"Programming Language :: Python :: 3.13",
|
|
31
|
+
"Programming Language :: Python :: 3.14",
|
|
32
|
+
"Programming Language :: Python :: Implementation :: CPython",
|
|
33
|
+
"Topic :: Scientific/Engineering",
|
|
34
|
+
"Topic :: Software Development :: Libraries",
|
|
35
|
+
"Typing :: Typed",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[project.urls]
|
|
39
|
+
Homepage = "https://remcofl.github.io/HilbertSFC/"
|
|
40
|
+
Source = "https://github.com/remcofl/HilbertSFC"
|
|
41
|
+
Documentation = "https://remcofl.github.io/HilbertSFC/"
|
|
42
|
+
Issues = "https://github.com/remcofl/HilbertSFC/issues"
|
|
43
|
+
|
|
44
|
+
[build-system]
|
|
45
|
+
requires = ["uv_build>=0.9.18,<0.10.0"]
|
|
46
|
+
build-backend = "uv_build"
|
|
47
|
+
|
|
48
|
+
[dependency-groups]
|
|
49
|
+
bench = [
|
|
50
|
+
"hilbert-bytes>=0.6.0",
|
|
51
|
+
"hilbertcurve>=2.0.5",
|
|
52
|
+
"numpy-hilbert-curve>=1.0.1",
|
|
53
|
+
]
|
|
54
|
+
dev = [
|
|
55
|
+
{ include-group = "test" },
|
|
56
|
+
{ include-group = "typecheck" },
|
|
57
|
+
"prek>=0.3.1",
|
|
58
|
+
"ruff>=0.15.0",
|
|
59
|
+
"nox>=2025.11.12",
|
|
60
|
+
]
|
|
61
|
+
docs = [
|
|
62
|
+
"mkdocs>=1.6.1,<2",
|
|
63
|
+
"mkdocs-material>=9.6.0",
|
|
64
|
+
"mkdocstrings[python]>=0.30.0",
|
|
65
|
+
"pymdown-extensions>=10.14.0",
|
|
66
|
+
"ruff>=0.15.0",
|
|
67
|
+
]
|
|
68
|
+
notebooks = [
|
|
69
|
+
"ipykernel>=7.2.0",
|
|
70
|
+
"matplotlib>=3.10.8",
|
|
71
|
+
"rich>=14.3.2",
|
|
72
|
+
"numpy>=2.3.5",
|
|
73
|
+
"pandas>=3.0.1",
|
|
74
|
+
]
|
|
75
|
+
scripts = [
|
|
76
|
+
"matplotlib>=3.10.8",
|
|
77
|
+
"numpy>=2.3.5",
|
|
78
|
+
"pillow>=12.1.0",
|
|
79
|
+
"rich>=14.3.2",
|
|
80
|
+
]
|
|
81
|
+
test = ["pytest>=9.0.2"]
|
|
82
|
+
typecheck = ["pyright>=1.1.408", "pyrefly>=0.51.0", "ty>=0.0.15"]
|
|
83
|
+
|
|
84
|
+
[tool.ruff]
|
|
85
|
+
line-length = 88
|
|
86
|
+
indent-width = 4
|
|
87
|
+
target-version = "py312"
|
|
88
|
+
|
|
89
|
+
[tool.ruff.lint]
|
|
90
|
+
select = ["E4", "E7", "E9", "F", "I", "N", "NPY", "UP"]
|
|
91
|
+
ignore = []
|
|
92
|
+
|
|
93
|
+
# Allow unused variables when underscore-prefixed.
|
|
94
|
+
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
|
|
95
|
+
|
|
96
|
+
[tool.ruff.format]
|
|
97
|
+
quote-style = "double"
|
|
98
|
+
indent-style = "space"
|
|
99
|
+
skip-magic-trailing-comma = false
|
|
100
|
+
line-ending = "auto"
|
|
101
|
+
|
|
102
|
+
docstring-code-format = true
|
|
103
|
+
docstring-code-line-length = "dynamic"
|
|
104
|
+
|
|
105
|
+
[tool.ty]
|
|
106
|
+
src.include = ["src", "tests", "scripts", "notebooks"]
|
|
107
|
+
|
|
108
|
+
[tool.pyright]
|
|
109
|
+
include = ["src", "tests", "scripts", "notebooks"]
|
|
110
|
+
pythonVersion = "3.12"
|
|
111
|
+
typeCheckingMode = "basic"
|
|
112
|
+
|
|
113
|
+
[tool.pyrefly]
|
|
114
|
+
project-includes = ["src", "tests", "scripts", "notebooks"]
|
|
115
|
+
python-version = "3.12.0"
|
|
116
|
+
infer-with-first-use = false
|
|
117
|
+
|
|
118
|
+
[tool.pytest.ini_options]
|
|
119
|
+
addopts = "-ra"
|
|
120
|
+
testpaths = ["tests"]
|
|
121
|
+
markers = [
|
|
122
|
+
"slow: exhaustive tests that may take longer (deselect with '-m \"not slow\"')",
|
|
123
|
+
]
|
|
124
|
+
|
|
125
|
+
[[tool.uv.index]]
|
|
126
|
+
name = "testpypi"
|
|
127
|
+
url = "https://test.pypi.org/simple/"
|
|
128
|
+
publish-url = "https://test.pypi.org/legacy/"
|
|
129
|
+
explicit = true
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""hilbertsfc package.
|
|
2
|
+
|
|
3
|
+
This package is intended to host Hilbert space-filling curve kernels and their
|
|
4
|
+
lookup tables as lazily-loaded package resources.
|
|
5
|
+
|
|
6
|
+
Public API lives in:
|
|
7
|
+
- ``hilbertsfc.hilbert2d``
|
|
8
|
+
- ``hilbertsfc.hilbert3d``
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from ._cache import clear_all_caches, clear_kernel_caches, clear_lut_caches
|
|
14
|
+
from .hilbert2d import (
|
|
15
|
+
get_hilbert_decode_2d_kernel,
|
|
16
|
+
get_hilbert_encode_2d_kernel,
|
|
17
|
+
hilbert_decode_2d,
|
|
18
|
+
hilbert_encode_2d,
|
|
19
|
+
)
|
|
20
|
+
from .hilbert3d import (
|
|
21
|
+
get_hilbert_decode_3d_kernel,
|
|
22
|
+
get_hilbert_encode_3d_kernel,
|
|
23
|
+
hilbert_decode_3d,
|
|
24
|
+
hilbert_encode_3d,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"clear_all_caches",
|
|
29
|
+
"clear_kernel_caches",
|
|
30
|
+
"clear_lut_caches",
|
|
31
|
+
"get_hilbert_decode_2d_kernel",
|
|
32
|
+
"get_hilbert_encode_2d_kernel",
|
|
33
|
+
"get_hilbert_decode_3d_kernel",
|
|
34
|
+
"get_hilbert_encode_3d_kernel",
|
|
35
|
+
"hilbert_decode_2d",
|
|
36
|
+
"hilbert_decode_3d",
|
|
37
|
+
"hilbert_encode_2d",
|
|
38
|
+
"hilbert_encode_3d",
|
|
39
|
+
]
|