pylibstats 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pylibstats-0.1.5/.github/workflows/ci.yml +33 -0
- pylibstats-0.1.5/.github/workflows/wheels.yml +104 -0
- pylibstats-0.1.5/.gitignore +29 -0
- pylibstats-0.1.5/CMakeLists.txt +79 -0
- pylibstats-0.1.5/PKG-INFO +111 -0
- pylibstats-0.1.5/README.md +99 -0
- pylibstats-0.1.5/WARP.md +50 -0
- pylibstats-0.1.5/examples/basic_usage.py +78 -0
- pylibstats-0.1.5/examples/benchmark.py +110 -0
- pylibstats-0.1.5/examples/scipy_comparison.py +155 -0
- pylibstats-0.1.5/pyproject.toml +23 -0
- pylibstats-0.1.5/src/pylibstats/__init__.py +396 -0
- pylibstats-0.1.5/src/pylibstats/__init__.pyi +132 -0
- pylibstats-0.1.5/src/pylibstats/_common.h +158 -0
- pylibstats-0.1.5/src/pylibstats/_core.cpp +204 -0
- pylibstats-0.1.5/src/pylibstats/_core.pyi +366 -0
- pylibstats-0.1.5/src/pylibstats/py.typed +1 -0
- pylibstats-0.1.5/tests/conftest.py +66 -0
- pylibstats-0.1.5/tests/test_beta.py +80 -0
- pylibstats-0.1.5/tests/test_chi_squared.py +78 -0
- pylibstats-0.1.5/tests/test_discrete_uniform.py +70 -0
- pylibstats-0.1.5/tests/test_exponential.py +90 -0
- pylibstats-0.1.5/tests/test_gamma.py +84 -0
- pylibstats-0.1.5/tests/test_gaussian.py +134 -0
- pylibstats-0.1.5/tests/test_poisson.py +75 -0
- pylibstats-0.1.5/tests/test_student_t.py +89 -0
- pylibstats-0.1.5/tests/test_uniform.py +81 -0
- pylibstats-0.1.5/tests/test_validation.py +196 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
concurrency:
|
|
10
|
+
group: ci-${{ github.ref }}
|
|
11
|
+
cancel-in-progress: true
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
build-and-test:
|
|
15
|
+
name: ${{ matrix.os }} / Python ${{ matrix.python }}
|
|
16
|
+
runs-on: ${{ matrix.os }}
|
|
17
|
+
strategy:
|
|
18
|
+
fail-fast: false
|
|
19
|
+
matrix:
|
|
20
|
+
os: [ubuntu-latest, macos-latest, windows-latest]
|
|
21
|
+
python: ["3.11", "3.12", "3.13"]
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- uses: actions/setup-python@v5
|
|
26
|
+
with:
|
|
27
|
+
python-version: ${{ matrix.python }}
|
|
28
|
+
|
|
29
|
+
- name: Build and install
|
|
30
|
+
run: pip install ".[test]" -v
|
|
31
|
+
|
|
32
|
+
- name: Run tests
|
|
33
|
+
run: pytest tests -v --tb=short
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
name: Build wheels
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags: ["v*"]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
concurrency:
|
|
9
|
+
group: wheels-${{ github.ref }}
|
|
10
|
+
cancel-in-progress: true
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
build-wheels:
|
|
14
|
+
name: Wheels / ${{ matrix.name }}
|
|
15
|
+
runs-on: ${{ matrix.os }}
|
|
16
|
+
strategy:
|
|
17
|
+
fail-fast: false
|
|
18
|
+
matrix:
|
|
19
|
+
include:
|
|
20
|
+
- name: linux-x86_64
|
|
21
|
+
os: ubuntu-latest
|
|
22
|
+
cibw_archs_linux: x86_64
|
|
23
|
+
- name: linux-aarch64
|
|
24
|
+
os: ubuntu-24.04-arm
|
|
25
|
+
cibw_archs_linux: aarch64
|
|
26
|
+
- name: macos
|
|
27
|
+
os: macos-latest
|
|
28
|
+
cibw_archs_linux: ""
|
|
29
|
+
- name: windows
|
|
30
|
+
os: windows-latest
|
|
31
|
+
cibw_archs_linux: ""
|
|
32
|
+
|
|
33
|
+
steps:
|
|
34
|
+
- uses: actions/checkout@v4
|
|
35
|
+
|
|
36
|
+
- uses: actions/setup-python@v5
|
|
37
|
+
with:
|
|
38
|
+
python-version: "3.12"
|
|
39
|
+
|
|
40
|
+
- name: Install cibuildwheel
|
|
41
|
+
run: pip install cibuildwheel
|
|
42
|
+
|
|
43
|
+
- name: Build wheels
|
|
44
|
+
run: cibuildwheel --output-dir wheelhouse
|
|
45
|
+
env:
|
|
46
|
+
# macOS: build universal2 (x86_64 + arm64)
|
|
47
|
+
CIBW_ARCHS_MACOS: "x86_64 arm64"
|
|
48
|
+
# nanobind uses aligned deallocation, which requires macOS 10.13+
|
|
49
|
+
CIBW_ENVIRONMENT_MACOS: "MACOSX_DEPLOYMENT_TARGET=10.13"
|
|
50
|
+
# Linux: set per-matrix (x86_64 on x64 runner, aarch64 on native ARM runner)
|
|
51
|
+
CIBW_ARCHS_LINUX: ${{ matrix.cibw_archs_linux }}
|
|
52
|
+
# Windows: x86_64 only
|
|
53
|
+
CIBW_ARCHS_WINDOWS: "AMD64"
|
|
54
|
+
# Skip PyPy, musllinux, and older Pythons
|
|
55
|
+
CIBW_SKIP: "*-musllinux_* cp39-* cp310-*"
|
|
56
|
+
# Build dependencies (installed in the build environment)
|
|
57
|
+
CIBW_BUILD_FRONTEND: "pip"
|
|
58
|
+
# Test the built wheel
|
|
59
|
+
CIBW_TEST_REQUIRES: "pytest scipy"
|
|
60
|
+
CIBW_TEST_COMMAND: "pytest {project}/tests -q"
|
|
61
|
+
# Ensure CMake is available in manylinux images
|
|
62
|
+
CIBW_BEFORE_ALL_LINUX: "yum install -y cmake || apt-get install -y cmake || true"
|
|
63
|
+
|
|
64
|
+
- uses: actions/upload-artifact@v4
|
|
65
|
+
with:
|
|
66
|
+
name: wheels-${{ matrix.name }}
|
|
67
|
+
path: wheelhouse/*.whl
|
|
68
|
+
|
|
69
|
+
build-sdist:
|
|
70
|
+
name: Source distribution
|
|
71
|
+
runs-on: ubuntu-latest
|
|
72
|
+
steps:
|
|
73
|
+
- uses: actions/checkout@v4
|
|
74
|
+
|
|
75
|
+
- uses: actions/setup-python@v5
|
|
76
|
+
with:
|
|
77
|
+
python-version: "3.12"
|
|
78
|
+
|
|
79
|
+
- name: Build sdist
|
|
80
|
+
run: pip install build && python -m build --sdist
|
|
81
|
+
|
|
82
|
+
- uses: actions/upload-artifact@v4
|
|
83
|
+
with:
|
|
84
|
+
name: sdist
|
|
85
|
+
path: dist/*.tar.gz
|
|
86
|
+
|
|
87
|
+
publish:
|
|
88
|
+
name: Publish to PyPI
|
|
89
|
+
needs: [build-wheels, build-sdist]
|
|
90
|
+
runs-on: ubuntu-latest
|
|
91
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
92
|
+
environment:
|
|
93
|
+
name: pypi
|
|
94
|
+
permissions:
|
|
95
|
+
id-token: write
|
|
96
|
+
steps:
|
|
97
|
+
- uses: actions/download-artifact@v4
|
|
98
|
+
with:
|
|
99
|
+
path: dist
|
|
100
|
+
merge-multiple: true
|
|
101
|
+
|
|
102
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
103
|
+
with:
|
|
104
|
+
packages-dir: dist
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Build artifacts
|
|
2
|
+
build/
|
|
3
|
+
_skbuild/
|
|
4
|
+
dist/
|
|
5
|
+
*.egg-info/
|
|
6
|
+
*.so
|
|
7
|
+
*.pyd
|
|
8
|
+
*.dll
|
|
9
|
+
|
|
10
|
+
# Python
|
|
11
|
+
__pycache__/
|
|
12
|
+
*.pyc
|
|
13
|
+
.venv/
|
|
14
|
+
venv/
|
|
15
|
+
|
|
16
|
+
# CMake
|
|
17
|
+
CMakeCache.txt
|
|
18
|
+
CMakeFiles/
|
|
19
|
+
cmake_install.cmake
|
|
20
|
+
Makefile
|
|
21
|
+
|
|
22
|
+
# IDE
|
|
23
|
+
.vscode/
|
|
24
|
+
.idea/
|
|
25
|
+
*.swp
|
|
26
|
+
|
|
27
|
+
# OS
|
|
28
|
+
.DS_Store
|
|
29
|
+
Thumbs.db
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.20)
|
|
2
|
+
project(pylibstats LANGUAGES CXX)
|
|
3
|
+
|
|
4
|
+
set(CMAKE_CXX_STANDARD 20)
|
|
5
|
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
6
|
+
set(CMAKE_CXX_EXTENSIONS OFF)
|
|
7
|
+
|
|
8
|
+
# ---------------------------------------------------------------------------
|
|
9
|
+
# Python + nanobind
|
|
10
|
+
# ---------------------------------------------------------------------------
|
|
11
|
+
find_package(Python REQUIRED COMPONENTS Interpreter Development.Module)
|
|
12
|
+
|
|
13
|
+
# scikit-build-core adds site-packages to CMAKE_PREFIX_PATH automatically.
|
|
14
|
+
# For direct CMake invocation, discover nanobind via Python as a fallback.
|
|
15
|
+
if(NOT nanobind_ROOT AND NOT nanobind_DIR)
|
|
16
|
+
execute_process(
|
|
17
|
+
COMMAND "${Python_EXECUTABLE}" -m nanobind --cmake_dir
|
|
18
|
+
OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE nanobind_ROOT
|
|
19
|
+
RESULT_VARIABLE _nanobind_rc)
|
|
20
|
+
if(NOT _nanobind_rc EQUAL 0)
|
|
21
|
+
set(nanobind_ROOT "")
|
|
22
|
+
endif()
|
|
23
|
+
endif()
|
|
24
|
+
|
|
25
|
+
find_package(nanobind CONFIG REQUIRED)
|
|
26
|
+
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
# libstats — prefer installed package, fall back to FetchContent
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
find_package(libstats 1.1.5 QUIET)
|
|
31
|
+
|
|
32
|
+
if(NOT libstats_FOUND)
|
|
33
|
+
message(STATUS "libstats not found via find_package — using FetchContent")
|
|
34
|
+
include(FetchContent)
|
|
35
|
+
FetchContent_Declare(
|
|
36
|
+
libstats
|
|
37
|
+
GIT_REPOSITORY https://github.com/OldCrow/libstats.git
|
|
38
|
+
GIT_TAG v1.1.5
|
|
39
|
+
)
|
|
40
|
+
# Disable libstats tests/tools/examples when building as a subdirectory.
|
|
41
|
+
set(LIBSTATS_BUILD_TESTS OFF CACHE BOOL "" FORCE)
|
|
42
|
+
set(LIBSTATS_BUILD_TOOLS OFF CACHE BOOL "" FORCE)
|
|
43
|
+
set(LIBSTATS_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
|
|
44
|
+
set(BUILD_DOCS OFF CACHE BOOL "" FORCE)
|
|
45
|
+
FetchContent_MakeAvailable(libstats)
|
|
46
|
+
set(LIBSTATS_TARGET libstats_static)
|
|
47
|
+
else()
|
|
48
|
+
message(STATUS "Using installed libstats ${LIBSTATS_VERSION}")
|
|
49
|
+
# Installed exports use libstats::libstats_static (not libstats::static)
|
|
50
|
+
if(TARGET libstats::libstats_static)
|
|
51
|
+
set(LIBSTATS_TARGET libstats::libstats_static)
|
|
52
|
+
elseif(TARGET libstats::static)
|
|
53
|
+
set(LIBSTATS_TARGET libstats::static)
|
|
54
|
+
elseif(DEFINED LIBSTATS_LIBRARIES)
|
|
55
|
+
set(LIBSTATS_TARGET ${LIBSTATS_LIBRARIES})
|
|
56
|
+
else()
|
|
57
|
+
message(FATAL_ERROR "libstats found but no usable static target")
|
|
58
|
+
endif()
|
|
59
|
+
endif()
|
|
60
|
+
|
|
61
|
+
# ---------------------------------------------------------------------------
|
|
62
|
+
# Extension module
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
nanobind_add_module(
|
|
65
|
+
_core
|
|
66
|
+
STABLE_ABI
|
|
67
|
+
NB_STATIC
|
|
68
|
+
src/pylibstats/_core.cpp
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
target_link_libraries(_core PRIVATE ${LIBSTATS_TARGET})
|
|
72
|
+
|
|
73
|
+
# Windows: prevent min/max macros from <windows.h> colliding with std::min/max
|
|
74
|
+
if(WIN32)
|
|
75
|
+
target_compile_definitions(_core PRIVATE NOMINMAX _USE_MATH_DEFINES)
|
|
76
|
+
endif()
|
|
77
|
+
|
|
78
|
+
# Let nanobind / scikit-build-core handle the install destination
|
|
79
|
+
install(TARGETS _core LIBRARY DESTINATION pylibstats)
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: pylibstats
|
|
3
|
+
Version: 0.1.5
|
|
4
|
+
Summary: Python bindings for libstats — SIMD-accelerated statistical distributions via NumPy
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: numpy>=1.20
|
|
8
|
+
Provides-Extra: test
|
|
9
|
+
Requires-Dist: pytest>=7.0; extra == "test"
|
|
10
|
+
Requires-Dist: scipy; extra == "test"
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# pylibstats
|
|
14
|
+
|
|
15
|
+
Python bindings for [libstats](https://github.com/OldCrow/libstats) — a C++20 statistical distributions library with SIMD batch operations.
|
|
16
|
+
|
|
17
|
+
## Features
|
|
18
|
+
|
|
19
|
+
- **9 distributions**: Gaussian, Exponential, Uniform, Poisson, Discrete Uniform, Gamma, Beta, Chi-Squared, Student's t
|
|
20
|
+
- **NumPy integration**: pass arrays directly to `pdf()`, `cdf()`, `log_pdf()` — the SIMD/parallel batch path runs automatically
|
|
21
|
+
- **GIL-releasing**: batch operations release the Python GIL for concurrent workloads
|
|
22
|
+
- **SciPy-compatible naming**: `pdf`, `cdf`, `ppf`, `fit`, `sample`
|
|
23
|
+
- **Input validation**: all constructor, setter, and `fit()` parameters are validated in Python with clear `ValueError` messages
|
|
24
|
+
|
|
25
|
+
## Quick start
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
import numpy as np
|
|
29
|
+
import pylibstats
|
|
30
|
+
|
|
31
|
+
dist = pylibstats.Gaussian(mu=0.0, sigma=1.0)
|
|
32
|
+
|
|
33
|
+
# Scalar
|
|
34
|
+
dist.pdf(1.0)
|
|
35
|
+
dist.cdf(0.0) # 0.5
|
|
36
|
+
dist.ppf(0.975) # ~1.96
|
|
37
|
+
|
|
38
|
+
# Batch (SIMD-accelerated)
|
|
39
|
+
x = np.linspace(-4, 4, 100_000)
|
|
40
|
+
densities = dist.pdf(x)
|
|
41
|
+
|
|
42
|
+
# Sampling
|
|
43
|
+
samples = dist.sample(n=10_000, seed=42)
|
|
44
|
+
|
|
45
|
+
# Fitting
|
|
46
|
+
dist.fit(samples)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Building from source
|
|
50
|
+
|
|
51
|
+
Requires Python ≥3.11, CMake ≥3.20, and a C++20 compiler.
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install .
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
This fetches libstats v1.1.5 via CMake FetchContent if not already installed.
|
|
58
|
+
|
|
59
|
+
### Building against a local libstats
|
|
60
|
+
|
|
61
|
+
To link against a locally built libstats (e.g. a development branch), install
|
|
62
|
+
libstats to a prefix and point `pip` at it:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
# In the libstats repo
|
|
66
|
+
cmake --install build --prefix /path/to/libstats/install
|
|
67
|
+
|
|
68
|
+
# In this repo — use libstats_DIR, not CMAKE_PREFIX_PATH
|
|
69
|
+
# (overriding CMAKE_PREFIX_PATH breaks nanobind discovery)
|
|
70
|
+
pip install --no-build-isolation -ve . \
|
|
71
|
+
-Ccmake.define.libstats_DIR=/path/to/libstats/install/lib/cmake/libstats
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
`--no-build-isolation` requires build deps in the active environment:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
pip install "scikit-build-core>=0.10" "nanobind>=2.0"
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Running tests
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
pip install ".[test]"
|
|
84
|
+
pytest
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Examples
|
|
88
|
+
|
|
89
|
+
See the `examples/` directory:
|
|
90
|
+
|
|
91
|
+
- `basic_usage.py` — scalar/batch operations, sampling, and fitting
|
|
92
|
+
- `benchmark.py` — wall-clock comparison against SciPy (PDF and CDF)
|
|
93
|
+
- `scipy_comparison.py` — numerical accuracy verification across all 9 distributions
|
|
94
|
+
|
|
95
|
+
## Known limitations
|
|
96
|
+
|
|
97
|
+
- **Beta CDF performance**: the regularised incomplete beta function in libstats is slower than SciPy's implementation (~0.5× speedup). All other distribution/operation combinations are faster.
|
|
98
|
+
|
|
99
|
+
## Contributing
|
|
100
|
+
|
|
101
|
+
### macOS ABI note
|
|
102
|
+
|
|
103
|
+
On macOS, libstats may be compiled with Homebrew LLVM while Python extensions use Apple clang. These ship different `libc++` versions whose exception-handling ABIs are incompatible — C++ exceptions thrown from libstats segfault during stack unwinding instead of propagating normally.
|
|
104
|
+
|
|
105
|
+
pylibstats works around this by validating all parameters in pure Python (in `__init__.py`) *before* calling into the C++ layer, so the error path never crosses the ABI boundary. If you add new parameters or distribution classes, follow the same pattern: validate in Python, then delegate to `_core`.
|
|
106
|
+
|
|
107
|
+
See `libstats/include/core/error_handling.h` for the upstream discussion.
|
|
108
|
+
|
|
109
|
+
## License
|
|
110
|
+
|
|
111
|
+
MIT
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# pylibstats
|
|
2
|
+
|
|
3
|
+
Python bindings for [libstats](https://github.com/OldCrow/libstats) — a C++20 statistical distributions library with SIMD batch operations.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **9 distributions**: Gaussian, Exponential, Uniform, Poisson, Discrete Uniform, Gamma, Beta, Chi-Squared, Student's t
|
|
8
|
+
- **NumPy integration**: pass arrays directly to `pdf()`, `cdf()`, `log_pdf()` — the SIMD/parallel batch path runs automatically
|
|
9
|
+
- **GIL-releasing**: batch operations release the Python GIL for concurrent workloads
|
|
10
|
+
- **SciPy-compatible naming**: `pdf`, `cdf`, `ppf`, `fit`, `sample`
|
|
11
|
+
- **Input validation**: all constructor, setter, and `fit()` parameters are validated in Python with clear `ValueError` messages
|
|
12
|
+
|
|
13
|
+
## Quick start
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
import numpy as np
|
|
17
|
+
import pylibstats
|
|
18
|
+
|
|
19
|
+
dist = pylibstats.Gaussian(mu=0.0, sigma=1.0)
|
|
20
|
+
|
|
21
|
+
# Scalar
|
|
22
|
+
dist.pdf(1.0)
|
|
23
|
+
dist.cdf(0.0) # 0.5
|
|
24
|
+
dist.ppf(0.975) # ~1.96
|
|
25
|
+
|
|
26
|
+
# Batch (SIMD-accelerated)
|
|
27
|
+
x = np.linspace(-4, 4, 100_000)
|
|
28
|
+
densities = dist.pdf(x)
|
|
29
|
+
|
|
30
|
+
# Sampling
|
|
31
|
+
samples = dist.sample(n=10_000, seed=42)
|
|
32
|
+
|
|
33
|
+
# Fitting
|
|
34
|
+
dist.fit(samples)
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Building from source
|
|
38
|
+
|
|
39
|
+
Requires Python ≥3.11, CMake ≥3.20, and a C++20 compiler.
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install .
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
This fetches libstats v1.1.5 via CMake FetchContent if not already installed.
|
|
46
|
+
|
|
47
|
+
### Building against a local libstats
|
|
48
|
+
|
|
49
|
+
To link against a locally built libstats (e.g. a development branch), install
|
|
50
|
+
libstats to a prefix and point `pip` at it:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# In the libstats repo
|
|
54
|
+
cmake --install build --prefix /path/to/libstats/install
|
|
55
|
+
|
|
56
|
+
# In this repo — use libstats_DIR, not CMAKE_PREFIX_PATH
|
|
57
|
+
# (overriding CMAKE_PREFIX_PATH breaks nanobind discovery)
|
|
58
|
+
pip install --no-build-isolation -ve . \
|
|
59
|
+
-Ccmake.define.libstats_DIR=/path/to/libstats/install/lib/cmake/libstats
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
`--no-build-isolation` requires build deps in the active environment:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pip install "scikit-build-core>=0.10" "nanobind>=2.0"
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Running tests
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
pip install ".[test]"
|
|
72
|
+
pytest
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Examples
|
|
76
|
+
|
|
77
|
+
See the `examples/` directory:
|
|
78
|
+
|
|
79
|
+
- `basic_usage.py` — scalar/batch operations, sampling, and fitting
|
|
80
|
+
- `benchmark.py` — wall-clock comparison against SciPy (PDF and CDF)
|
|
81
|
+
- `scipy_comparison.py` — numerical accuracy verification across all 9 distributions
|
|
82
|
+
|
|
83
|
+
## Known limitations
|
|
84
|
+
|
|
85
|
+
- **Beta CDF performance**: the regularised incomplete beta function in libstats is slower than SciPy's implementation (~0.5× speedup). All other distribution/operation combinations are faster.
|
|
86
|
+
|
|
87
|
+
## Contributing
|
|
88
|
+
|
|
89
|
+
### macOS ABI note
|
|
90
|
+
|
|
91
|
+
On macOS, libstats may be compiled with Homebrew LLVM while Python extensions use Apple clang. These ship different `libc++` versions whose exception-handling ABIs are incompatible — C++ exceptions thrown from libstats segfault during stack unwinding instead of propagating normally.
|
|
92
|
+
|
|
93
|
+
pylibstats works around this by validating all parameters in pure Python (in `__init__.py`) *before* calling into the C++ layer, so the error path never crosses the ABI boundary. If you add new parameters or distribution classes, follow the same pattern: validate in Python, then delegate to `_core`.
|
|
94
|
+
|
|
95
|
+
See `libstats/include/core/error_handling.h` for the upstream discussion.
|
|
96
|
+
|
|
97
|
+
## License
|
|
98
|
+
|
|
99
|
+
MIT
|
pylibstats-0.1.5/WARP.md
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# WARP.md
|
|
2
|
+
|
|
3
|
+
Guidance for working in `pylibstats`.
|
|
4
|
+
|
|
5
|
+
## Project purpose
|
|
6
|
+
|
|
7
|
+
`pylibstats` provides Python bindings for `libstats` via `nanobind` and `scikit-build-core`.
|
|
8
|
+
|
|
9
|
+
Core goals:
|
|
10
|
+
|
|
11
|
+
- expose the `libstats` distribution API in Python
|
|
12
|
+
- keep NumPy-based batch paths fast and simple
|
|
13
|
+
- keep Python wrappers, native bindings, and stubs synchronized
|
|
14
|
+
|
|
15
|
+
## Key files
|
|
16
|
+
|
|
17
|
+
- `CMakeLists.txt` — native extension build and `libstats` dependency wiring
|
|
18
|
+
- `pyproject.toml` — package metadata and build backend config
|
|
19
|
+
- `src/pylibstats/_core.cpp` — nanobind bindings
|
|
20
|
+
- `src/pylibstats/_common.h` — NumPy conversion helpers
|
|
21
|
+
- `src/pylibstats/__init__.py` — Python wrappers and validation
|
|
22
|
+
- `src/pylibstats/__init__.pyi`, `src/pylibstats/_core.pyi` — typing stubs
|
|
23
|
+
- `tests/` — pytest suite
|
|
24
|
+
|
|
25
|
+
## Dependency notes
|
|
26
|
+
|
|
27
|
+
- Build first tries `find_package(libstats)`.
|
|
28
|
+
- If not found, CMake fetches `libstats` from GitHub tag `v1.1.5`.
|
|
29
|
+
- For local development against a custom `libstats` install, pass `libstats_DIR` (do not override `CMAKE_PREFIX_PATH`, which can break nanobind discovery).
|
|
30
|
+
|
|
31
|
+
## Common commands
|
|
32
|
+
|
|
33
|
+
```powershell
|
|
34
|
+
python -m pip install -e C:\Users\gdwol\Development\pylibstats
|
|
35
|
+
python -m pip install ".[test]"
|
|
36
|
+
python -m pytest C:\Users\gdwol\Development\pylibstats\tests -q
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Local `libstats` build override example:
|
|
40
|
+
|
|
41
|
+
```powershell
|
|
42
|
+
python -m pip install --no-build-isolation -ve C:\Users\gdwol\Development\pylibstats `
|
|
43
|
+
-Ccmake.define.libstats_DIR=C:\path\to\libstats\install\lib\cmake\libstats
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Editing rules
|
|
47
|
+
|
|
48
|
+
1. Keep `_core.cpp`, `__init__.py`, and `.pyi` stubs consistent.
|
|
49
|
+
2. Add or update tests for any behavior/API change.
|
|
50
|
+
3. Keep docs concise and accurate.
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Basic pylibstats usage examples."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
import pylibstats
|
|
6
|
+
|
|
7
|
+
# ── Construction ─────────────────────────────────────────────────────────────
|
|
8
|
+
|
|
9
|
+
gaussian = pylibstats.Gaussian(mu=0.0, sigma=1.0)
|
|
10
|
+
gamma = pylibstats.Gamma(alpha=2.0, beta=0.5)
|
|
11
|
+
beta = pylibstats.Beta(alpha=2.0, beta=5.0)
|
|
12
|
+
poisson = pylibstats.Poisson(lam=4.0)
|
|
13
|
+
|
|
14
|
+
# Normal is an alias for Gaussian
|
|
15
|
+
normal = pylibstats.Normal(mu=100.0, sigma=15.0)
|
|
16
|
+
|
|
17
|
+
print("── Distributions ──")
|
|
18
|
+
print(f" {gaussian!r}")
|
|
19
|
+
print(f" {gamma!r}")
|
|
20
|
+
print(f" {beta!r}")
|
|
21
|
+
print(f" {poisson!r}")
|
|
22
|
+
print(f" {normal!r}")
|
|
23
|
+
|
|
24
|
+
# ── Scalar operations ────────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
print("\n── Scalar PDF / CDF / PPF ──")
|
|
27
|
+
print(f" Gaussian PDF(0) = {gaussian.pdf(0.0):.6f}")
|
|
28
|
+
print(f" Gaussian CDF(0) = {gaussian.cdf(0.0):.6f}")
|
|
29
|
+
print(f" Gaussian PPF(0.975) = {gaussian.ppf(0.975):.4f}")
|
|
30
|
+
print(f" Gamma PDF(2) = {gamma.pdf(2.0):.6f}")
|
|
31
|
+
print(f" Beta CDF(0.3) = {beta.cdf(0.3):.6f}")
|
|
32
|
+
print(f" Poisson PMF(4) = {poisson.pdf(4.0):.6f}")
|
|
33
|
+
|
|
34
|
+
# ── Batch operations (SIMD-accelerated) ──────────────────────────────────────
|
|
35
|
+
|
|
36
|
+
x = np.linspace(-4, 4, 10)
|
|
37
|
+
pdf_values = gaussian.pdf(x)
|
|
38
|
+
cdf_values = gaussian.cdf(x)
|
|
39
|
+
|
|
40
|
+
print("\n── Batch operations (10 points) ──")
|
|
41
|
+
print(f" x = {x}")
|
|
42
|
+
print(f" pdf = {pdf_values}")
|
|
43
|
+
print(f" cdf = {cdf_values}")
|
|
44
|
+
|
|
45
|
+
# ── Moment properties ────────────────────────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
print("\n── Moment properties ──")
|
|
48
|
+
for name, dist in [("Gaussian(0,1)", gaussian), ("Gamma(2,0.5)", gamma),
|
|
49
|
+
("Beta(2,5)", beta), ("Poisson(4)", poisson)]:
|
|
50
|
+
print(f" {name:16s} mean={dist.mean:.4f} var={dist.variance:.4f} "
|
|
51
|
+
f"skew={dist.skewness:.4f} kurt={dist.kurtosis:.4f}")
|
|
52
|
+
|
|
53
|
+
# ── Sampling ─────────────────────────────────────────────────────────────────
|
|
54
|
+
|
|
55
|
+
samples = gaussian.sample(n=10_000, seed=42)
|
|
56
|
+
print(f"\n── Sampling ──")
|
|
57
|
+
print(f" 10,000 Gaussian samples: mean={samples.mean():.4f}, std={samples.std():.4f}")
|
|
58
|
+
|
|
59
|
+
beta_samples = beta.sample(n=10_000, seed=42)
|
|
60
|
+
print(f" 10,000 Beta(2,5) samples: mean={beta_samples.mean():.4f}, "
|
|
61
|
+
f"min={beta_samples.min():.4f}, max={beta_samples.max():.4f}")
|
|
62
|
+
|
|
63
|
+
# ── Fitting ──────────────────────────────────────────────────────────────────
|
|
64
|
+
|
|
65
|
+
data = np.random.default_rng(42).normal(loc=5.0, scale=2.0, size=5000)
|
|
66
|
+
fitted = pylibstats.Gaussian()
|
|
67
|
+
fitted.fit(data)
|
|
68
|
+
print(f"\n── Fitting ──")
|
|
69
|
+
print(f" Fitted Gaussian to 5,000 samples from N(5, 2):")
|
|
70
|
+
print(f" mu={fitted.mu:.4f} sigma={fitted.sigma:.4f}")
|
|
71
|
+
|
|
72
|
+
# ── Parameter modification ───────────────────────────────────────────────────
|
|
73
|
+
|
|
74
|
+
print(f"\n── Parameter modification ──")
|
|
75
|
+
dist = pylibstats.Exponential(lam=1.0)
|
|
76
|
+
print(f" Before: lam={dist.lam}, mean={dist.mean:.4f}")
|
|
77
|
+
dist.lam = 3.0
|
|
78
|
+
print(f" After: lam={dist.lam}, mean={dist.mean:.4f}")
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Benchmark pylibstats batch operations against SciPy.
|
|
2
|
+
|
|
3
|
+
Measures wall-clock time for PDF and CDF evaluation on arrays of 100k and 1M
|
|
4
|
+
elements across all continuous distributions. Prints a formatted table with
|
|
5
|
+
per-distribution speedup ratios.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python examples/benchmark.py
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import time
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
from scipy import stats as sp
|
|
15
|
+
|
|
16
|
+
import pylibstats
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def bench(fn, *, warmup: int = 2, repeats: int = 5) -> float:
|
|
20
|
+
"""Return median wall-clock seconds for fn()."""
|
|
21
|
+
for _ in range(warmup):
|
|
22
|
+
fn()
|
|
23
|
+
times = []
|
|
24
|
+
for _ in range(repeats):
|
|
25
|
+
t0 = time.perf_counter()
|
|
26
|
+
fn()
|
|
27
|
+
times.append(time.perf_counter() - t0)
|
|
28
|
+
return float(np.median(times))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# ── Distribution configs ─────────────────────────────────────────────────────
|
|
32
|
+
# Each entry: (name, pylibstats instance, scipy frozen instance, x_range)
|
|
33
|
+
|
|
34
|
+
DISTRIBUTIONS = [
|
|
35
|
+
("Gaussian(0,1)",
|
|
36
|
+
pylibstats.Gaussian(0.0, 1.0),
|
|
37
|
+
sp.norm(0.0, 1.0),
|
|
38
|
+
(-4.0, 4.0)),
|
|
39
|
+
|
|
40
|
+
("Exponential(1)",
|
|
41
|
+
pylibstats.Exponential(1.0),
|
|
42
|
+
sp.expon(scale=1.0),
|
|
43
|
+
(0.01, 10.0)),
|
|
44
|
+
|
|
45
|
+
("Uniform(0,1)",
|
|
46
|
+
pylibstats.Uniform(0.0, 1.0),
|
|
47
|
+
sp.uniform(0.0, 1.0),
|
|
48
|
+
(0.0, 1.0)),
|
|
49
|
+
|
|
50
|
+
("Gamma(2,1)",
|
|
51
|
+
pylibstats.Gamma(2.0, 1.0),
|
|
52
|
+
sp.gamma(a=2.0, scale=1.0),
|
|
53
|
+
(0.01, 10.0)),
|
|
54
|
+
|
|
55
|
+
("Beta(2,5)",
|
|
56
|
+
pylibstats.Beta(2.0, 5.0),
|
|
57
|
+
sp.beta(a=2.0, b=5.0),
|
|
58
|
+
(0.01, 0.99)),
|
|
59
|
+
|
|
60
|
+
("ChiSquared(5)",
|
|
61
|
+
pylibstats.ChiSquared(5.0),
|
|
62
|
+
sp.chi2(df=5),
|
|
63
|
+
(0.1, 20.0)),
|
|
64
|
+
|
|
65
|
+
("StudentT(10)",
|
|
66
|
+
pylibstats.StudentT(10.0),
|
|
67
|
+
sp.t(df=10),
|
|
68
|
+
(-5.0, 5.0)),
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
SIZES = [100_000, 1_000_000]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def run_benchmarks() -> None:
|
|
75
|
+
header = (f"{'Distribution':>20s} {'N':>10s} {'Op':>4s} "
|
|
76
|
+
f"{'pylibstats (ms)':>15s} {'SciPy (ms)':>12s} {'Speedup':>8s}")
|
|
77
|
+
sep = "─" * len(header)
|
|
78
|
+
|
|
79
|
+
print()
|
|
80
|
+
print(header)
|
|
81
|
+
print(sep)
|
|
82
|
+
|
|
83
|
+
for name, pl_dist, sc_dist, (lo, hi) in DISTRIBUTIONS:
|
|
84
|
+
for n in SIZES:
|
|
85
|
+
x = np.linspace(lo, hi, n)
|
|
86
|
+
|
|
87
|
+
# PDF — bind loop vars via default args to avoid late-binding
|
|
88
|
+
t_pl = bench(lambda d=pl_dist, arr=x: d.pdf(arr)) * 1000
|
|
89
|
+
t_sc = bench(lambda d=sc_dist, arr=x: d.pdf(arr)) * 1000
|
|
90
|
+
ratio = t_sc / t_pl if t_pl > 0 else float("inf")
|
|
91
|
+
print(f"{name:>20s} {n:>10,d} {'PDF':>4s} "
|
|
92
|
+
f"{t_pl:>15.2f} {t_sc:>12.2f} {ratio:>7.1f}x")
|
|
93
|
+
|
|
94
|
+
# CDF
|
|
95
|
+
t_pl = bench(lambda d=pl_dist, arr=x: d.cdf(arr)) * 1000
|
|
96
|
+
t_sc = bench(lambda d=sc_dist, arr=x: d.cdf(arr)) * 1000
|
|
97
|
+
ratio = t_sc / t_pl if t_pl > 0 else float("inf")
|
|
98
|
+
print(f"{'':>20s} {'':>10s} {'CDF':>4s} "
|
|
99
|
+
f"{t_pl:>15.2f} {t_sc:>12.2f} {ratio:>7.1f}x")
|
|
100
|
+
|
|
101
|
+
print(sep)
|
|
102
|
+
print()
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
if __name__ == "__main__":
|
|
106
|
+
import scipy
|
|
107
|
+
print("pylibstats vs SciPy batch performance benchmark")
|
|
108
|
+
print(f"NumPy {np.__version__}, SciPy {scipy.__version__}")
|
|
109
|
+
print(f"Repeats: 5 (median), Warmup: 2")
|
|
110
|
+
run_benchmarks()
|