floatbungler 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- floatbungler-0.1.0/.github/workflows/CI.yml +185 -0
- floatbungler-0.1.0/.github/workflows/bench.yml +37 -0
- floatbungler-0.1.0/.gitignore +83 -0
- floatbungler-0.1.0/CHANGELOG.md +68 -0
- floatbungler-0.1.0/CLAUDE.md +133 -0
- floatbungler-0.1.0/Cargo.lock +171 -0
- floatbungler-0.1.0/Cargo.toml +18 -0
- floatbungler-0.1.0/Justfile +23 -0
- floatbungler-0.1.0/LICENSE +13 -0
- floatbungler-0.1.0/LICENSES/CC0-1.0.txt +121 -0
- floatbungler-0.1.0/LICENSES/ISC.txt +8 -0
- floatbungler-0.1.0/PKG-INFO +136 -0
- floatbungler-0.1.0/README.md +110 -0
- floatbungler-0.1.0/REUSE.toml +6 -0
- floatbungler-0.1.0/bench/conftest.py +44 -0
- floatbungler-0.1.0/bench/test_bench.py +34 -0
- floatbungler-0.1.0/cliff.toml +109 -0
- floatbungler-0.1.0/pyproject.toml +57 -0
- floatbungler-0.1.0/python/floatbungler/__init__.py +9 -0
- floatbungler-0.1.0/python/floatbungler/chimp.pyi +10 -0
- floatbungler-0.1.0/python/floatbungler/chimp128.pyi +10 -0
- floatbungler-0.1.0/python/floatbungler/gorilla.pyi +10 -0
- floatbungler-0.1.0/python/floatbungler/patas.pyi +10 -0
- floatbungler-0.1.0/src/bits.rs +125 -0
- floatbungler-0.1.0/src/chimp.rs +146 -0
- floatbungler-0.1.0/src/chimp128.rs +166 -0
- floatbungler-0.1.0/src/chimp_utils.rs +32 -0
- floatbungler-0.1.0/src/gorilla.rs +122 -0
- floatbungler-0.1.0/src/lib.rs +54 -0
- floatbungler-0.1.0/src/patas.rs +136 -0
- floatbungler-0.1.0/test/__init__.py +3 -0
- floatbungler-0.1.0/test/test_chimp.py +27 -0
- floatbungler-0.1.0/test/test_chimp128.py +27 -0
- floatbungler-0.1.0/test/test_gorilla.py +27 -0
- floatbungler-0.1.0/test/test_patas.py +30 -0
- floatbungler-0.1.0/test/vector_cases.py +141 -0
- floatbungler-0.1.0/uv.lock +380 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# This file is autogenerated by maturin v1.12.6
|
|
2
|
+
# To update, run
|
|
3
|
+
#
|
|
4
|
+
# maturin generate-ci github
|
|
5
|
+
#
|
|
6
|
+
name: CI
|
|
7
|
+
|
|
8
|
+
on:
|
|
9
|
+
push:
|
|
10
|
+
branches:
|
|
11
|
+
- main
|
|
12
|
+
- master
|
|
13
|
+
tags:
|
|
14
|
+
- "*"
|
|
15
|
+
pull_request:
|
|
16
|
+
workflow_dispatch:
|
|
17
|
+
|
|
18
|
+
permissions:
|
|
19
|
+
contents: read
|
|
20
|
+
|
|
21
|
+
jobs:
|
|
22
|
+
linux:
|
|
23
|
+
runs-on: ${{ matrix.platform.runner }}
|
|
24
|
+
strategy:
|
|
25
|
+
matrix:
|
|
26
|
+
platform:
|
|
27
|
+
- runner: ubuntu-22.04
|
|
28
|
+
target: x86_64
|
|
29
|
+
- runner: ubuntu-22.04
|
|
30
|
+
target: x86
|
|
31
|
+
- runner: ubuntu-22.04
|
|
32
|
+
target: aarch64
|
|
33
|
+
- runner: ubuntu-22.04
|
|
34
|
+
target: armv7
|
|
35
|
+
- runner: ubuntu-22.04
|
|
36
|
+
target: s390x
|
|
37
|
+
- runner: ubuntu-22.04
|
|
38
|
+
target: ppc64le
|
|
39
|
+
steps:
|
|
40
|
+
- uses: actions/checkout@v6
|
|
41
|
+
- uses: actions/setup-python@v6
|
|
42
|
+
with:
|
|
43
|
+
python-version: 3.x
|
|
44
|
+
- name: Build wheels
|
|
45
|
+
uses: PyO3/maturin-action@v1
|
|
46
|
+
with:
|
|
47
|
+
target: ${{ matrix.platform.target }}
|
|
48
|
+
args: --release --out dist --find-interpreter
|
|
49
|
+
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
|
50
|
+
manylinux: auto
|
|
51
|
+
- name: Upload wheels
|
|
52
|
+
uses: actions/upload-artifact@v6
|
|
53
|
+
with:
|
|
54
|
+
name: wheels-linux-${{ matrix.platform.target }}
|
|
55
|
+
path: dist
|
|
56
|
+
|
|
57
|
+
musllinux:
|
|
58
|
+
runs-on: ${{ matrix.platform.runner }}
|
|
59
|
+
strategy:
|
|
60
|
+
matrix:
|
|
61
|
+
platform:
|
|
62
|
+
- runner: ubuntu-22.04
|
|
63
|
+
target: x86_64
|
|
64
|
+
- runner: ubuntu-22.04
|
|
65
|
+
target: x86
|
|
66
|
+
- runner: ubuntu-22.04
|
|
67
|
+
target: aarch64
|
|
68
|
+
- runner: ubuntu-22.04
|
|
69
|
+
target: armv7
|
|
70
|
+
steps:
|
|
71
|
+
- uses: actions/checkout@v6
|
|
72
|
+
- uses: actions/setup-python@v6
|
|
73
|
+
with:
|
|
74
|
+
python-version: 3.x
|
|
75
|
+
- name: Build wheels
|
|
76
|
+
uses: PyO3/maturin-action@v1
|
|
77
|
+
with:
|
|
78
|
+
target: ${{ matrix.platform.target }}
|
|
79
|
+
args: --release --out dist --find-interpreter
|
|
80
|
+
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
|
81
|
+
manylinux: musllinux_1_2
|
|
82
|
+
- name: Upload wheels
|
|
83
|
+
uses: actions/upload-artifact@v6
|
|
84
|
+
with:
|
|
85
|
+
name: wheels-musllinux-${{ matrix.platform.target }}
|
|
86
|
+
path: dist
|
|
87
|
+
|
|
88
|
+
windows:
|
|
89
|
+
runs-on: ${{ matrix.platform.runner }}
|
|
90
|
+
strategy:
|
|
91
|
+
matrix:
|
|
92
|
+
platform:
|
|
93
|
+
- runner: windows-latest
|
|
94
|
+
target: x64
|
|
95
|
+
python_arch: x64
|
|
96
|
+
- runner: windows-latest
|
|
97
|
+
target: x86
|
|
98
|
+
python_arch: x86
|
|
99
|
+
- runner: windows-11-arm
|
|
100
|
+
target: aarch64
|
|
101
|
+
python_arch: arm64
|
|
102
|
+
steps:
|
|
103
|
+
- uses: actions/checkout@v6
|
|
104
|
+
- uses: actions/setup-python@v6
|
|
105
|
+
with:
|
|
106
|
+
python-version: 3.13
|
|
107
|
+
architecture: ${{ matrix.platform.python_arch }}
|
|
108
|
+
- name: Build wheels
|
|
109
|
+
uses: PyO3/maturin-action@v1
|
|
110
|
+
with:
|
|
111
|
+
target: ${{ matrix.platform.target }}
|
|
112
|
+
args: --release --out dist --find-interpreter
|
|
113
|
+
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
|
114
|
+
- name: Upload wheels
|
|
115
|
+
uses: actions/upload-artifact@v6
|
|
116
|
+
with:
|
|
117
|
+
name: wheels-windows-${{ matrix.platform.target }}
|
|
118
|
+
path: dist
|
|
119
|
+
|
|
120
|
+
macos:
|
|
121
|
+
runs-on: ${{ matrix.platform.runner }}
|
|
122
|
+
strategy:
|
|
123
|
+
matrix:
|
|
124
|
+
platform:
|
|
125
|
+
- runner: macos-15-intel
|
|
126
|
+
target: x86_64
|
|
127
|
+
- runner: macos-latest
|
|
128
|
+
target: aarch64
|
|
129
|
+
steps:
|
|
130
|
+
- uses: actions/checkout@v6
|
|
131
|
+
- uses: actions/setup-python@v6
|
|
132
|
+
with:
|
|
133
|
+
python-version: 3.x
|
|
134
|
+
- name: Build wheels
|
|
135
|
+
uses: PyO3/maturin-action@v1
|
|
136
|
+
with:
|
|
137
|
+
target: ${{ matrix.platform.target }}
|
|
138
|
+
args: --release --out dist --find-interpreter
|
|
139
|
+
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
|
140
|
+
- name: Upload wheels
|
|
141
|
+
uses: actions/upload-artifact@v6
|
|
142
|
+
with:
|
|
143
|
+
name: wheels-macos-${{ matrix.platform.target }}
|
|
144
|
+
path: dist
|
|
145
|
+
|
|
146
|
+
sdist:
|
|
147
|
+
runs-on: ubuntu-latest
|
|
148
|
+
steps:
|
|
149
|
+
- uses: actions/checkout@v6
|
|
150
|
+
- name: Build sdist
|
|
151
|
+
uses: PyO3/maturin-action@v1
|
|
152
|
+
with:
|
|
153
|
+
command: sdist
|
|
154
|
+
args: --out dist
|
|
155
|
+
- name: Upload sdist
|
|
156
|
+
uses: actions/upload-artifact@v6
|
|
157
|
+
with:
|
|
158
|
+
name: wheels-sdist
|
|
159
|
+
path: dist
|
|
160
|
+
|
|
161
|
+
release:
|
|
162
|
+
name: Release
|
|
163
|
+
environment: pypi
|
|
164
|
+
runs-on: ubuntu-latest
|
|
165
|
+
if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }}
|
|
166
|
+
needs: [linux, musllinux, windows, macos, sdist]
|
|
167
|
+
permissions:
|
|
168
|
+
# Use to sign the release artifacts
|
|
169
|
+
id-token: write
|
|
170
|
+
# Used to upload release artifacts
|
|
171
|
+
contents: write
|
|
172
|
+
# Used to generate artifact attestation
|
|
173
|
+
attestations: write
|
|
174
|
+
steps:
|
|
175
|
+
- uses: actions/download-artifact@v7
|
|
176
|
+
- name: Generate artifact attestation
|
|
177
|
+
uses: actions/attest-build-provenance@v3
|
|
178
|
+
with:
|
|
179
|
+
subject-path: "wheels-*/*"
|
|
180
|
+
- name: Install uv
|
|
181
|
+
if: ${{ startsWith(github.ref, 'refs/tags/') }}
|
|
182
|
+
uses: astral-sh/setup-uv@v7
|
|
183
|
+
- name: Publish to PyPI
|
|
184
|
+
if: ${{ startsWith(github.ref, 'refs/tags/') }}
|
|
185
|
+
run: uv publish 'wheels-*/*'
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2026 Miikka Koskinen
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: ISC
|
|
4
|
+
|
|
5
|
+
name: Benchmark
|
|
6
|
+
|
|
7
|
+
on:
|
|
8
|
+
push:
|
|
9
|
+
branches:
|
|
10
|
+
- main
|
|
11
|
+
|
|
12
|
+
permissions:
|
|
13
|
+
contents: write
|
|
14
|
+
deployments: write
|
|
15
|
+
|
|
16
|
+
jobs:
|
|
17
|
+
benchmark:
|
|
18
|
+
name: Benchmark
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
steps:
|
|
21
|
+
- uses: actions/checkout@v6
|
|
22
|
+
- uses: actions/setup-python@v6
|
|
23
|
+
with:
|
|
24
|
+
python-version: 3.x
|
|
25
|
+
- uses: astral-sh/setup-uv@v7
|
|
26
|
+
|
|
27
|
+
- name: Run benchmark
|
|
28
|
+
run: |
|
|
29
|
+
uv run pytest bench/ --benchmark-enable --benchmark-json benchmark.json
|
|
30
|
+
|
|
31
|
+
- name: Store benchmark result
|
|
32
|
+
uses: benchmark-action/github-action-benchmark@v1
|
|
33
|
+
with:
|
|
34
|
+
tool: "pytest"
|
|
35
|
+
output-file-path: benchmark.json
|
|
36
|
+
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
37
|
+
auto-push: true
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2025 Miikka Koskinen
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: ISC
|
|
4
|
+
|
|
5
|
+
/target
|
|
6
|
+
|
|
7
|
+
# Byte-compiled / optimized / DLL files
|
|
8
|
+
__pycache__/
|
|
9
|
+
.pytest_cache/
|
|
10
|
+
*.py[cod]
|
|
11
|
+
|
|
12
|
+
# C extensions
|
|
13
|
+
*.so
|
|
14
|
+
*.dSYM
|
|
15
|
+
|
|
16
|
+
# Distribution / packaging
|
|
17
|
+
.Python
|
|
18
|
+
.venv/
|
|
19
|
+
env/
|
|
20
|
+
bin/
|
|
21
|
+
build/
|
|
22
|
+
develop-eggs/
|
|
23
|
+
dist/
|
|
24
|
+
eggs/
|
|
25
|
+
lib/
|
|
26
|
+
lib64/
|
|
27
|
+
parts/
|
|
28
|
+
sdist/
|
|
29
|
+
var/
|
|
30
|
+
include/
|
|
31
|
+
man/
|
|
32
|
+
venv/
|
|
33
|
+
*.egg-info/
|
|
34
|
+
.installed.cfg
|
|
35
|
+
*.egg
|
|
36
|
+
|
|
37
|
+
# Installer logs
|
|
38
|
+
pip-log.txt
|
|
39
|
+
pip-delete-this-directory.txt
|
|
40
|
+
pip-selfcheck.json
|
|
41
|
+
|
|
42
|
+
# Unit test / coverage reports
|
|
43
|
+
htmlcov/
|
|
44
|
+
.tox/
|
|
45
|
+
.coverage
|
|
46
|
+
.cache
|
|
47
|
+
nosetests.xml
|
|
48
|
+
coverage.xml
|
|
49
|
+
|
|
50
|
+
# Translations
|
|
51
|
+
*.mo
|
|
52
|
+
|
|
53
|
+
# Mr Developer
|
|
54
|
+
.mr.developer.cfg
|
|
55
|
+
.project
|
|
56
|
+
.pydevproject
|
|
57
|
+
|
|
58
|
+
# Rope
|
|
59
|
+
.ropeproject
|
|
60
|
+
|
|
61
|
+
# Django stuff:
|
|
62
|
+
*.log
|
|
63
|
+
*.pot
|
|
64
|
+
|
|
65
|
+
.DS_Store
|
|
66
|
+
|
|
67
|
+
# Sphinx documentation
|
|
68
|
+
docs/_build/
|
|
69
|
+
|
|
70
|
+
# PyCharm
|
|
71
|
+
.idea/
|
|
72
|
+
|
|
73
|
+
# VSCode
|
|
74
|
+
.vscode/
|
|
75
|
+
|
|
76
|
+
# Pyenv
|
|
77
|
+
.python-version
|
|
78
|
+
|
|
79
|
+
# Hypothesis
|
|
80
|
+
.hypothesis/
|
|
81
|
+
|
|
82
|
+
# Claude
|
|
83
|
+
.claude/
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
<!--
|
|
2
|
+
SPDX-FileCopyrightText: 2025 Miikka Koskinen
|
|
3
|
+
|
|
4
|
+
SPDX-License-Identifier: ISC
|
|
5
|
+
-->
|
|
6
|
+
|
|
7
|
+
# Changelog
|
|
8
|
+
|
|
9
|
+
All notable changes to this project will be documented in this file.
|
|
10
|
+
|
|
11
|
+
## [0.1.1] - 2026-03-14
|
|
12
|
+
|
|
13
|
+
### 🐛 Bug Fixes
|
|
14
|
+
|
|
15
|
+
- Fix patas for long inputs
|
|
16
|
+
|
|
17
|
+
### 🚜 Refactor
|
|
18
|
+
|
|
19
|
+
- Replace `count_leading` with a built-in
|
|
20
|
+
- Replace `count_trailing` with a built-in
|
|
21
|
+
- *(patas)* Simplify diff128 logic
|
|
22
|
+
- *(patas)* Pre-allocate the result vector
|
|
23
|
+
- *(patas)* Simplify code for meaningful bytes
|
|
24
|
+
- *(patas)* Copy the float in the loop
|
|
25
|
+
- *(patas)* Pre-allocate the encoding buffer
|
|
26
|
+
|
|
27
|
+
### ⚙️ Miscellaneous Tasks
|
|
28
|
+
|
|
29
|
+
- Update the CI workflow
|
|
30
|
+
- Set up a basic benchmark
|
|
31
|
+
- Keep the benchmark cache available
|
|
32
|
+
- Push benchmark results to GH Pages
|
|
33
|
+
- Add benchmark site to README
|
|
34
|
+
- Add test vectors
|
|
35
|
+
- Run cargo fmt
|
|
36
|
+
- `just fix`
|
|
37
|
+
|
|
38
|
+
## [0.1.0] - 2026-03-14
|
|
39
|
+
|
|
40
|
+
### 🚀 Features
|
|
41
|
+
|
|
42
|
+
- Initial commit
|
|
43
|
+
- Add typestubs for floatbungler.gorilla
|
|
44
|
+
- Add a Chimp128 implementation
|
|
45
|
+
- Add a Chimp implementation
|
|
46
|
+
- Add a Patas implementation
|
|
47
|
+
- Add type stubs for all algorithms
|
|
48
|
+
|
|
49
|
+
### 🚜 Refactor
|
|
50
|
+
|
|
51
|
+
- Remove debug prints
|
|
52
|
+
- Deduplicate helper functions
|
|
53
|
+
- Deduplicate the module registration
|
|
54
|
+
|
|
55
|
+
### 📚 Documentation
|
|
56
|
+
|
|
57
|
+
- Add a README.md
|
|
58
|
+
- Add license information
|
|
59
|
+
- Generate CHANGELOG.md
|
|
60
|
+
- Update motivation for the project
|
|
61
|
+
|
|
62
|
+
### ⚙️ Miscellaneous Tasks
|
|
63
|
+
|
|
64
|
+
- Add project metadata for the package
|
|
65
|
+
- Host the package on GitHub after all
|
|
66
|
+
- Release v0.1.0
|
|
67
|
+
|
|
68
|
+
<!-- generated by git-cliff -->
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
<!--
|
|
2
|
+
SPDX-FileCopyrightText: 2025 Miikka Koskinen
|
|
3
|
+
|
|
4
|
+
SPDX-License-Identifier: ISC
|
|
5
|
+
-->
|
|
6
|
+
|
|
7
|
+
# CLAUDE.md
|
|
8
|
+
|
|
9
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
10
|
+
|
|
11
|
+
## Project Overview
|
|
12
|
+
|
|
13
|
+
floatbungler is a Python extension module written in Rust using PyO3 that implements floating-point compression algorithms. The project provides four main compression algorithms:
|
|
14
|
+
- **Gorilla**: Facebook's time-series compression algorithm
|
|
15
|
+
- **Chimp**: Standard Chimp compression algorithm with bin-encoded leading zeros
|
|
16
|
+
- **Chimp128**: Advanced Chimp variant with 128-entry ring buffer and 16384-entry lookup table
|
|
17
|
+
- **Patas**: Byte-aligned compression algorithm with 128-entry ring buffer and lookup table
|
|
18
|
+
|
|
19
|
+
## Build System
|
|
20
|
+
|
|
21
|
+
This project uses Maturin to build Python extensions from Rust code. The toolchain uses `uv` for Python package management.
|
|
22
|
+
|
|
23
|
+
### Build Commands
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# Build the extension module (development mode)
|
|
27
|
+
maturin develop
|
|
28
|
+
|
|
29
|
+
# Build release version
|
|
30
|
+
maturin build --release
|
|
31
|
+
|
|
32
|
+
# Run Python tests
|
|
33
|
+
uv run pytest
|
|
34
|
+
|
|
35
|
+
# Or using just
|
|
36
|
+
just test
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Architecture
|
|
41
|
+
|
|
42
|
+
### Rust Core (`src/`)
|
|
43
|
+
|
|
44
|
+
The Rust implementation is organized into modules:
|
|
45
|
+
|
|
46
|
+
- **`lib.rs`**: PyO3 module definition. Creates the `floatbungler` Python module with submodules. Uses the sys.modules workaround to enable direct imports like `from floatbungler import gorilla`.
|
|
47
|
+
|
|
48
|
+
- **`gorilla.rs`**: Gorilla compression algorithm implementation
|
|
49
|
+
- `encode_plain()`/`encode()`: Compresses f64 arrays using XOR-based delta encoding with leading/trailing zero optimization
|
|
50
|
+
- `decode_plain()`/`decode()`: Decompresses back to f64 arrays
|
|
51
|
+
- Tracks previous leading/trailing zeros for efficient encoding
|
|
52
|
+
- Uses helper functions from `bit_utils.rs`
|
|
53
|
+
|
|
54
|
+
- **`chimp.rs`**: Standard Chimp compression algorithm
|
|
55
|
+
- Similar to Gorilla but uses bin-encoded leading zeros (3 bits instead of 5 bits)
|
|
56
|
+
- Uses trailing zero threshold of 6 bits to decide between control codes
|
|
57
|
+
- Simpler than Chimp128 - no ring buffer, just XOR with previous value
|
|
58
|
+
|
|
59
|
+
- **`chimp128.rs`**: Advanced Chimp128 compression algorithm
|
|
60
|
+
- Uses a 128-entry ring buffer and 16384-entry lookup table
|
|
61
|
+
- Selects best reference value from ring buffer based on trailing zeros
|
|
62
|
+
- Uses trailing zero threshold of 13 bits (log2(128) + log2(64))
|
|
63
|
+
- Encodes using bin-encoded leading zeros (3 bits) rather than raw values
|
|
64
|
+
|
|
65
|
+
- **`patas.rs`**: Patas compression algorithm (byte-aligned variant)
|
|
66
|
+
- Uses a 128-entry ring buffer and 16384-entry lookup table (similar to Chimp128)
|
|
67
|
+
- Byte-aligned encoding: uses 16-bit header + variable number of bytes for meaningful bits
|
|
68
|
+
- Header format: 7 bits for reference index, 3 bits for meaningful bytes count, 6 bits for trailing zeros
|
|
69
|
+
- Stores meaningful bits as whole bytes rather than individual bits
|
|
70
|
+
- Uses `bytes` crate for efficient buffer management
|
|
71
|
+
|
|
72
|
+
- **`bits.rs`**: Low-level bit-oriented I/O
|
|
73
|
+
- `Bitwrite`: Writes individual bits to a byte buffer
|
|
74
|
+
- `Bitread`: Reads individual bits from a byte slice
|
|
75
|
+
- Both support f64 and arbitrary-length u64 operations
|
|
76
|
+
|
|
77
|
+
- **`bit_utils.rs`**: Utility functions for bit manipulation
|
|
78
|
+
- `bin_count()` / `bin_encode()` / `bin_decode()`: Bin-based encoding for leading zeros (bins: 0, 8, 12, 16, 18, 20, 22, 24)
|
|
79
|
+
|
|
80
|
+
### Python Interface (`python/floatbungler/`)
|
|
81
|
+
|
|
82
|
+
- **`__init__.py`**: Re-exports everything from the compiled Rust extension module
|
|
83
|
+
- **Type stubs (`.pyi` files)**: Type stubs are provided for all compression algorithms (`gorilla.pyi`, `chimp.pyi`, `chimp128.pyi`, `patas.pyi`) to enable IDE autocomplete and static type checking
|
|
84
|
+
|
|
85
|
+
### Testing
|
|
86
|
+
|
|
87
|
+
- **Python tests**: `test/test_gorilla.py`, `test/test_chimp.py`, `test/test_chimp128.py`, and `test/test_patas.py` use Hypothesis for property-based testing of the compression codecs. Tests use `numpy.testing.assert_equal` to handle NaN comparisons correctly.
|
|
88
|
+
|
|
89
|
+
## Key Implementation Details
|
|
90
|
+
|
|
91
|
+
### PyO3 Module Structure
|
|
92
|
+
|
|
93
|
+
The project uses PyO3's submodule pattern. All four compression algorithm submodules (`gorilla`, `chimp`, `chimp128`, `patas`) are created as separate PyModules and added to sys.modules to support direct imports (`from floatbungler import gorilla`, `from floatbungler import chimp`, `from floatbungler import chimp128`, `from floatbungler import patas`). This is a workaround for PyO3 issue #759.
|
|
94
|
+
|
|
95
|
+
### Compression Algorithms
|
|
96
|
+
|
|
97
|
+
All four algorithms use XOR-based compression:
|
|
98
|
+
1. First value is stored uncompressed
|
|
99
|
+
2. Subsequent values are XORed with a reference (previous value for Gorilla/Chimp, ring buffer entry for Chimp128/Patas)
|
|
100
|
+
3. The XOR result is encoded with leading/trailing zero compression
|
|
101
|
+
|
|
102
|
+
**Gorilla** uses:
|
|
103
|
+
- 5 bits for leading zero count
|
|
104
|
+
- Direct XOR with previous value
|
|
105
|
+
- Bit-level encoding
|
|
106
|
+
|
|
107
|
+
**Chimp** uses:
|
|
108
|
+
- Bin-encoded leading zeros (3 bits, covering bins: 0, 8, 12, 16, 18, 20, 22, 24)
|
|
109
|
+
- Trailing zero threshold of 6 bits to select control codes
|
|
110
|
+
- Direct XOR with previous value
|
|
111
|
+
- Bit-level encoding
|
|
112
|
+
|
|
113
|
+
**Chimp128** uses:
|
|
114
|
+
- A 128-entry ring buffer to find better reference values (more trailing zeros = better compression)
|
|
115
|
+
- A lookup table indexed by the low 14 bits for fast ring buffer lookups
|
|
116
|
+
- Trailing zero threshold of 13 bits (log2(128) + log2(64))
|
|
117
|
+
- Bin-encoded leading zeros (same as Chimp)
|
|
118
|
+
- Bit-level encoding
|
|
119
|
+
|
|
120
|
+
**Patas** uses:
|
|
121
|
+
- A 128-entry ring buffer and 16384-entry lookup table (similar to Chimp128)
|
|
122
|
+
- Byte-aligned encoding with 16-bit headers
|
|
123
|
+
- Header encodes: 7 bits reference index + 3 bits meaningful bytes + 6 bits trailing zeros
|
|
124
|
+
- Stores meaningful bits rounded up to whole bytes
|
|
125
|
+
- Uses `bytes` crate for buffer management instead of bit-level operations
|
|
126
|
+
|
|
127
|
+
## Development Notes
|
|
128
|
+
|
|
129
|
+
- The project requires Python 3.10+
|
|
130
|
+
- Maturin is configured to use `python-source = "python"` to find the Python package
|
|
131
|
+
- All compression algorithms handle NaN values correctly
|
|
132
|
+
- To add new dependencies, use `uv add` instead of editing `pyproject.toml`
|
|
133
|
+
- Helper functions for bit manipulation are centralized in `bit_utils.rs` and shared across all algorithms
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# This file is automatically @generated by Cargo.
|
|
2
|
+
# It is not intended for manual editing.
|
|
3
|
+
version = 4
|
|
4
|
+
|
|
5
|
+
[[package]]
|
|
6
|
+
name = "autocfg"
|
|
7
|
+
version = "1.5.0"
|
|
8
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
9
|
+
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
|
10
|
+
|
|
11
|
+
[[package]]
|
|
12
|
+
name = "bytes"
|
|
13
|
+
version = "1.10.1"
|
|
14
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
15
|
+
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
|
|
16
|
+
|
|
17
|
+
[[package]]
|
|
18
|
+
name = "floatbungler"
|
|
19
|
+
version = "0.1.0"
|
|
20
|
+
dependencies = [
|
|
21
|
+
"bytes",
|
|
22
|
+
"pyo3",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[[package]]
|
|
26
|
+
name = "heck"
|
|
27
|
+
version = "0.5.0"
|
|
28
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
29
|
+
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
|
30
|
+
|
|
31
|
+
[[package]]
|
|
32
|
+
name = "indoc"
|
|
33
|
+
version = "2.0.6"
|
|
34
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
35
|
+
checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
|
|
36
|
+
|
|
37
|
+
[[package]]
|
|
38
|
+
name = "libc"
|
|
39
|
+
version = "0.2.175"
|
|
40
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
41
|
+
checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
|
|
42
|
+
|
|
43
|
+
[[package]]
|
|
44
|
+
name = "memoffset"
|
|
45
|
+
version = "0.9.1"
|
|
46
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
47
|
+
checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
|
|
48
|
+
dependencies = [
|
|
49
|
+
"autocfg",
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
[[package]]
|
|
53
|
+
name = "once_cell"
|
|
54
|
+
version = "1.21.3"
|
|
55
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
56
|
+
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
|
57
|
+
|
|
58
|
+
[[package]]
|
|
59
|
+
name = "portable-atomic"
|
|
60
|
+
version = "1.11.1"
|
|
61
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
62
|
+
checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
|
|
63
|
+
|
|
64
|
+
[[package]]
|
|
65
|
+
name = "proc-macro2"
|
|
66
|
+
version = "1.0.101"
|
|
67
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
68
|
+
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
|
|
69
|
+
dependencies = [
|
|
70
|
+
"unicode-ident",
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
[[package]]
|
|
74
|
+
name = "pyo3"
|
|
75
|
+
version = "0.25.1"
|
|
76
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
77
|
+
checksum = "8970a78afe0628a3e3430376fc5fd76b6b45c4d43360ffd6cdd40bdde72b682a"
|
|
78
|
+
dependencies = [
|
|
79
|
+
"indoc",
|
|
80
|
+
"libc",
|
|
81
|
+
"memoffset",
|
|
82
|
+
"once_cell",
|
|
83
|
+
"portable-atomic",
|
|
84
|
+
"pyo3-build-config",
|
|
85
|
+
"pyo3-ffi",
|
|
86
|
+
"pyo3-macros",
|
|
87
|
+
"unindent",
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
[[package]]
|
|
91
|
+
name = "pyo3-build-config"
|
|
92
|
+
version = "0.25.1"
|
|
93
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
94
|
+
checksum = "458eb0c55e7ece017adeba38f2248ff3ac615e53660d7c71a238d7d2a01c7598"
|
|
95
|
+
dependencies = [
|
|
96
|
+
"once_cell",
|
|
97
|
+
"target-lexicon",
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
[[package]]
|
|
101
|
+
name = "pyo3-ffi"
|
|
102
|
+
version = "0.25.1"
|
|
103
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
104
|
+
checksum = "7114fe5457c61b276ab77c5055f206295b812608083644a5c5b2640c3102565c"
|
|
105
|
+
dependencies = [
|
|
106
|
+
"libc",
|
|
107
|
+
"pyo3-build-config",
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
[[package]]
|
|
111
|
+
name = "pyo3-macros"
|
|
112
|
+
version = "0.25.1"
|
|
113
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
114
|
+
checksum = "a8725c0a622b374d6cb051d11a0983786448f7785336139c3c94f5aa6bef7e50"
|
|
115
|
+
dependencies = [
|
|
116
|
+
"proc-macro2",
|
|
117
|
+
"pyo3-macros-backend",
|
|
118
|
+
"quote",
|
|
119
|
+
"syn",
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
[[package]]
|
|
123
|
+
name = "pyo3-macros-backend"
|
|
124
|
+
version = "0.25.1"
|
|
125
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
126
|
+
checksum = "4109984c22491085343c05b0dbc54ddc405c3cf7b4374fc533f5c3313a572ccc"
|
|
127
|
+
dependencies = [
|
|
128
|
+
"heck",
|
|
129
|
+
"proc-macro2",
|
|
130
|
+
"pyo3-build-config",
|
|
131
|
+
"quote",
|
|
132
|
+
"syn",
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
[[package]]
|
|
136
|
+
name = "quote"
|
|
137
|
+
version = "1.0.40"
|
|
138
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
139
|
+
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
|
|
140
|
+
dependencies = [
|
|
141
|
+
"proc-macro2",
|
|
142
|
+
]
|
|
143
|
+
|
|
144
|
+
[[package]]
|
|
145
|
+
name = "syn"
|
|
146
|
+
version = "2.0.106"
|
|
147
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
148
|
+
checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
|
|
149
|
+
dependencies = [
|
|
150
|
+
"proc-macro2",
|
|
151
|
+
"quote",
|
|
152
|
+
"unicode-ident",
|
|
153
|
+
]
|
|
154
|
+
|
|
155
|
+
[[package]]
|
|
156
|
+
name = "target-lexicon"
|
|
157
|
+
version = "0.13.2"
|
|
158
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
159
|
+
checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a"
|
|
160
|
+
|
|
161
|
+
[[package]]
|
|
162
|
+
name = "unicode-ident"
|
|
163
|
+
version = "1.0.18"
|
|
164
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
165
|
+
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
|
166
|
+
|
|
167
|
+
[[package]]
|
|
168
|
+
name = "unindent"
|
|
169
|
+
version = "0.2.4"
|
|
170
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
171
|
+
checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2025 Miikka Koskinen
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: ISC
|
|
4
|
+
|
|
5
|
+
[package]
|
|
6
|
+
name = "floatbungler"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
edition = "2021"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
|
|
11
|
+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
|
12
|
+
[lib]
|
|
13
|
+
name = "floatbungler"
|
|
14
|
+
crate-type = ["cdylib"]
|
|
15
|
+
|
|
16
|
+
[dependencies]
|
|
17
|
+
bytes = "1.10.1"
|
|
18
|
+
pyo3 = "0.25.0"
|