lexindex 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,94 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, master]
6
+ pull_request:
7
+
8
+ permissions:
9
+ contents: read
10
+
11
+ concurrency:
12
+ group: ci-${{ github.ref }}
13
+ cancel-in-progress: true
14
+
15
+ env:
16
+ CARGO_TERM_COLOR: always
17
+
18
+ jobs:
19
+ rust:
20
+ name: rust (fmt · clippy · test · audit)
21
+ runs-on: ubuntu-latest
22
+ steps:
23
+ - uses: actions/checkout@v4
24
+ - uses: dtolnay/rust-toolchain@stable
25
+ with:
26
+ components: rustfmt, clippy
27
+ - uses: Swatinem/rust-cache@v2
28
+ - uses: taiki-e/install-action@v2
29
+ with:
30
+ tool: cargo-audit
31
+ - name: fmt
32
+ run: cargo fmt --all --check
33
+ - name: clippy (default = with mph)
34
+ run: cargo clippy --all-targets -- -D warnings
35
+ - name: clippy (no-default-features = fst only)
36
+ run: cargo clippy --no-default-features --all-targets -- -D warnings
37
+ - name: test (default)
38
+ run: cargo test
39
+ - name: test (no-default-features)
40
+ run: cargo test --no-default-features
41
+ - name: cargo audit
42
+ run: cargo audit
43
+
44
+ python-build:
45
+ name: python (ruff · clippy · build)
46
+ runs-on: ubuntu-latest
47
+ env:
48
+ PYO3_USE_ABI3_FORWARD_COMPATIBILITY: "1"
49
+ steps:
50
+ - uses: actions/checkout@v4
51
+ - uses: dtolnay/rust-toolchain@stable
52
+ with:
53
+ components: clippy
54
+ - uses: Swatinem/rust-cache@v2
55
+ - uses: astral-sh/setup-uv@v5
56
+ - name: ruff check
57
+ run: uvx ruff check python/ tests/test_python.py
58
+ - name: ruff format --check
59
+ run: uvx ruff format --check python/ tests/test_python.py
60
+ - name: clippy (python bindings)
61
+ run: cargo clippy --features python -- -D warnings
62
+ - name: build abi3 wheel
63
+ run: uv run --with maturin maturin build --release --out dist
64
+ - uses: actions/upload-artifact@v4
65
+ with:
66
+ name: wheel
67
+ path: dist/*.whl
68
+
69
+ python-test:
70
+ name: python (pytest · stubtest · py${{ matrix.python-version }})
71
+ needs: python-build
72
+ runs-on: ubuntu-latest
73
+ strategy:
74
+ fail-fast: false
75
+ matrix:
76
+ python-version: ["3.11", "3.12", "3.13", "3.14"]
77
+ steps:
78
+ - uses: actions/checkout@v4
79
+ - uses: astral-sh/setup-uv@v5
80
+ - uses: actions/download-artifact@v4
81
+ with:
82
+ name: wheel
83
+ path: dist
84
+ # Install-only: the single abi3 wheel must import and pass on every supported interpreter.
85
+ - name: pytest
86
+ run: |
87
+ wheel=$(ls dist/*.whl | head -1)
88
+ uv run --python ${{ matrix.python-version }} --with pytest --with "$wheel" \
89
+ pytest tests/test_python.py -q
90
+ - name: stubtest
91
+ run: |
92
+ wheel=$(ls dist/*.whl | head -1)
93
+ uv run --python ${{ matrix.python-version }} --with mypy --with "$wheel" \
94
+ python -m mypy.stubtest lexindex
@@ -0,0 +1,100 @@
1
+ name: Release
2
+
3
+ # Build redistributable wheels for every platform and (on a version tag) publish to PyPI.
4
+ # Publishing uses PyPI Trusted Publishing (OIDC) — add a pending publisher for this repo at
5
+ # https://pypi.org/manage/account/publishing/ (workflow `release.yml`, environment `pypi`) before the
6
+ # first `v*` tag, and create a `pypi` environment in the repo settings.
7
+
8
+ on:
9
+ push:
10
+ tags: ["v*"]
11
+ workflow_dispatch:
12
+
13
+ permissions:
14
+ contents: read
15
+
16
+ jobs:
17
+ wheels:
18
+ name: wheels ${{ matrix.platform.runner }} ${{ matrix.platform.target }}
19
+ runs-on: ${{ matrix.platform.runner }}
20
+ strategy:
21
+ fail-fast: false
22
+ matrix:
23
+ platform:
24
+ - { runner: ubuntu-latest, target: x86_64 }
25
+ - { runner: ubuntu-latest, target: aarch64 }
26
+ # macOS x86_64 is cross-built on the arm64 macos-14 runner: dedicated Intel (macos-13)
27
+ # runners are scarce/deprecated. abi3 needs no interpreter at build time, so this is sound.
28
+ - { runner: macos-14, target: x86_64 }
29
+ - { runner: macos-14, target: aarch64 }
30
+ - { runner: windows-latest, target: x64 }
31
+ steps:
32
+ - uses: actions/checkout@v4
33
+ - uses: actions/setup-python@v5
34
+ with:
35
+ python-version: "3.x"
36
+ - name: Build wheels
37
+ uses: PyO3/maturin-action@v1
38
+ with:
39
+ target: ${{ matrix.platform.target }}
40
+ args: --release --out dist
41
+ manylinux: auto
42
+ sccache: "true"
43
+ - uses: actions/upload-artifact@v4
44
+ with:
45
+ name: wheels-${{ matrix.platform.runner }}-${{ matrix.platform.target }}
46
+ path: dist
47
+
48
+ sdist:
49
+ name: sdist
50
+ runs-on: ubuntu-latest
51
+ steps:
52
+ - uses: actions/checkout@v4
53
+ - name: Build sdist
54
+ uses: PyO3/maturin-action@v1
55
+ with:
56
+ command: sdist
57
+ args: --out dist
58
+ - uses: actions/upload-artifact@v4
59
+ with:
60
+ name: wheels-sdist
61
+ path: dist
62
+
63
+ publish:
64
+ name: publish to PyPI
65
+ runs-on: ubuntu-latest
66
+ needs: [wheels, sdist]
67
+ if: startsWith(github.ref, 'refs/tags/')
68
+ environment: pypi
69
+ permissions:
70
+ id-token: write # OIDC token for PyPI Trusted Publishing
71
+ steps:
72
+ - uses: actions/download-artifact@v4
73
+ with:
74
+ pattern: wheels-*
75
+ merge-multiple: true
76
+ path: dist
77
+ - uses: pypa/gh-action-pypi-publish@release/v1
78
+
79
+ github-release:
80
+ name: GitHub Release
81
+ runs-on: ubuntu-latest
82
+ needs: [publish]
83
+ if: startsWith(github.ref, 'refs/tags/')
84
+ permissions:
85
+ contents: write # create the GitHub Release
86
+ steps:
87
+ - uses: actions/checkout@v4
88
+ - name: Extract this version's CHANGELOG section
89
+ run: |
90
+ ver="${GITHUB_REF_NAME#v}"
91
+ awk -v v="$ver" '
92
+ $0 ~ "^## \\[" v "\\]" { f = 1; next }
93
+ f && /^## \[/ { exit }
94
+ f { print }
95
+ ' CHANGELOG.md > release-notes.md
96
+ echo "----- release notes -----"; cat release-notes.md
97
+ - uses: softprops/action-gh-release@v2
98
+ with:
99
+ name: lexindex ${{ github.ref_name }}
100
+ body_path: release-notes.md
@@ -0,0 +1,8 @@
1
+ /target
2
+ /dist
3
+ /.venv
4
+ __pycache__/
5
+ *.py[cod]
6
+ *.so
7
+ .pytest_cache/
8
+ .ruff_cache/
@@ -0,0 +1,28 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented here. The format follows
4
+ [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and the project adheres to
5
+ [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [0.1.0] — 2026-06-28
8
+
9
+ First public release — compact, immutable string<->id indexes for huge catalogs; a standalone Rust +
10
+ Python library that also pairs with `betula-cluster` (map string ids to cluster ids and back).
11
+
12
+ ### Added
13
+
14
+ - **`StringIndex`** — ordered, FST-backed index: exact `string <-> id`, plus prefix, range, fuzzy
15
+ (bounded Levenshtein edit distance), and subsequence iteration — all automaton-driven over the FST,
16
+ never a full scan. Serialises to a flat, relocatable blob (`save` / `load` / `to_bytes` /
17
+ `from_bytes`) with fully length- and offset-validated parsing (safe on untrusted input).
18
+ - **`PerfectHashIndex`** — minimal-perfect-hash dictionary (`ptr_hash`): verified-membership `id`,
19
+ a faster `id_unchecked` for closed vocabularies (~1.25× faster than `std::HashMap` on point lookup),
20
+ reverse lookup, and persistence (`save` / `load`) via `epserde`, keyed on a version-stable hash
21
+ (FNV-1a + splitmix64) so a serialised MPH reloads and queries identically on any build.
22
+ - **Python bindings** (PyO3 abi3 extension, CPython 3.11+): `pip install lexindex`, zero runtime
23
+ dependencies, typed (`py.typed` + stubs).
24
+ - **Feature gating** — `mph` (default) provides `PerfectHashIndex` (pulls `ptr_hash` + `epserde`);
25
+ `--no-default-features` is an `fst`-only build, free of the informational RustSec advisories on the
26
+ `ptr_hash` dependency tree. `fst`'s `levenshtein` is always on for fuzzy search.
27
+ - **Benchmark** — `cargo run --release --example bench` compares both indexes against
28
+ `std::HashMap` / `BTreeMap` (build time, lookup latency, serialised size).