bayesian-sparse-gmm 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bayesian_sparse_gmm-0.1.0/.flake8 +10 -0
- bayesian_sparse_gmm-0.1.0/.github/workflows/release.yml +72 -0
- bayesian_sparse_gmm-0.1.0/.gitignore +145 -0
- bayesian_sparse_gmm-0.1.0/PKG-INFO +108 -0
- bayesian_sparse_gmm-0.1.0/README.md +85 -0
- bayesian_sparse_gmm-0.1.0/evaluate.py +410 -0
- bayesian_sparse_gmm-0.1.0/pyproject.toml +41 -0
- bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/__init__.py +5 -0
- bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/backends/__init__.py +52 -0
- bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/backends/_base.py +46 -0
- bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/backends/_cuda.py +106 -0
- bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/backends/_numba.py +274 -0
- bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/backends/_numpy.py +53 -0
- bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/config.py +29 -0
- bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/diagnostics.py +111 -0
- bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/model.py +203 -0
- bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/postprocessing.py +46 -0
- bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/sampler.py +167 -0
- bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/state.py +17 -0
- bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/utils.py +30 -0
- bayesian_sparse_gmm-0.1.0/tests/__init__.py +1 -0
- bayesian_sparse_gmm-0.1.0/tests/test_backends.py +78 -0
- bayesian_sparse_gmm-0.1.0/tests/test_cuda.py +38 -0
- bayesian_sparse_gmm-0.1.0/tests/test_diagnostics.py +54 -0
- bayesian_sparse_gmm-0.1.0/tests/test_model.py +67 -0
- bayesian_sparse_gmm-0.1.0/tests/test_numba.py +62 -0
- bayesian_sparse_gmm-0.1.0/tests/test_phase1.py +57 -0
- bayesian_sparse_gmm-0.1.0/tests/test_sampler.py +195 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
name: CI/CD Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
pull_request:
|
|
8
|
+
branches:
|
|
9
|
+
- main
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
test:
|
|
13
|
+
name: Test and Lint
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
strategy:
|
|
16
|
+
matrix:
|
|
17
|
+
python-version: ["3.9", "3.10", "3.11", "3.12"]
|
|
18
|
+
steps:
|
|
19
|
+
- name: Checkout Source Code
|
|
20
|
+
uses: actions/checkout@v4
|
|
21
|
+
|
|
22
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
23
|
+
uses: actions/setup-python@v5
|
|
24
|
+
with:
|
|
25
|
+
python-version: ${{ matrix.python-version }}
|
|
26
|
+
cache: "pip"
|
|
27
|
+
|
|
28
|
+
- name: Install Development Dependencies
|
|
29
|
+
run: |
|
|
30
|
+
python -m pip install --upgrade pip
|
|
31
|
+
pip install .[dev]
|
|
32
|
+
|
|
33
|
+
- name: Run Lint Checks
|
|
34
|
+
run: |
|
|
35
|
+
black --check src tests
|
|
36
|
+
isort --check-only src tests
|
|
37
|
+
flake8 src tests
|
|
38
|
+
|
|
39
|
+
- name: Run Unit Tests
|
|
40
|
+
run: pytest
|
|
41
|
+
|
|
42
|
+
publish:
|
|
43
|
+
name: Publish to PyPI
|
|
44
|
+
needs: test
|
|
45
|
+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
|
46
|
+
runs-on: ubuntu-latest
|
|
47
|
+
permissions:
|
|
48
|
+
id-token: write
|
|
49
|
+
steps:
|
|
50
|
+
- name: Checkout Source Code
|
|
51
|
+
uses: actions/checkout@v4
|
|
52
|
+
|
|
53
|
+
- name: Set up Python
|
|
54
|
+
uses: actions/setup-python@v5
|
|
55
|
+
with:
|
|
56
|
+
python-version: "3.10"
|
|
57
|
+
|
|
58
|
+
- name: Install Build Tools
|
|
59
|
+
run: |
|
|
60
|
+
python -m pip install --upgrade pip
|
|
61
|
+
pip install build twine
|
|
62
|
+
|
|
63
|
+
- name: Build Source and Wheel Distributions
|
|
64
|
+
run: python -m build
|
|
65
|
+
|
|
66
|
+
- name: Validate Package Metadata
|
|
67
|
+
run: twine check dist/*
|
|
68
|
+
|
|
69
|
+
- name: Upload to PyPI
|
|
70
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
71
|
+
with:
|
|
72
|
+
skip-existing: true
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
bin/
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script containing a list of
|
|
31
|
+
# files to include or exclude inside the final binary
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py,cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Sphinx documentation
|
|
65
|
+
docs/_build/
|
|
66
|
+
|
|
67
|
+
# PyBuilder
|
|
68
|
+
.pybuilder/
|
|
69
|
+
target/
|
|
70
|
+
|
|
71
|
+
# Jupyter Notebook
|
|
72
|
+
.ipynb_checkpoints
|
|
73
|
+
|
|
74
|
+
# IPython
|
|
75
|
+
.ipython/
|
|
76
|
+
|
|
77
|
+
# pyenv
|
|
78
|
+
# For a library or app, you might want to share your .python-version if you
|
|
79
|
+
# use pyenv. Uncomment if needed.
|
|
80
|
+
#.python-version
|
|
81
|
+
|
|
82
|
+
# pipenv
|
|
83
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
84
|
+
# However, in case of collaboration, if the platform is not identical, it may cause dependency resolution issues.
|
|
85
|
+
#Pipfile.lock
|
|
86
|
+
|
|
87
|
+
# poetry
|
|
88
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
89
|
+
#poetry.lock
|
|
90
|
+
|
|
91
|
+
# pdm
|
|
92
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
93
|
+
#.pdm.toml
|
|
94
|
+
#.pdm-plugins/
|
|
95
|
+
# pdm build uses __pdm__
|
|
96
|
+
__pdm__
|
|
97
|
+
|
|
98
|
+
# PEP 582; used by e.g. github.com/lincolnloop/layman and pdm
|
|
99
|
+
__pypackages__/
|
|
100
|
+
|
|
101
|
+
# Celery stuff
|
|
102
|
+
celerybeat-schedule
|
|
103
|
+
celerybeat.pid
|
|
104
|
+
|
|
105
|
+
# SageMath parsed files
|
|
106
|
+
*.sage.py
|
|
107
|
+
|
|
108
|
+
# Environments
|
|
109
|
+
.env
|
|
110
|
+
.venv
|
|
111
|
+
env/
|
|
112
|
+
venv/
|
|
113
|
+
ENV/
|
|
114
|
+
env.bak/
|
|
115
|
+
venv.bak/
|
|
116
|
+
|
|
117
|
+
# Spyder project settings
|
|
118
|
+
.spyderproject
|
|
119
|
+
.spyproject
|
|
120
|
+
|
|
121
|
+
# Rope project settings
|
|
122
|
+
.ropeproject
|
|
123
|
+
|
|
124
|
+
# mkdocs documentation
|
|
125
|
+
/site/
|
|
126
|
+
|
|
127
|
+
# mypy
|
|
128
|
+
.mypy_cache/
|
|
129
|
+
.dmypy.json
|
|
130
|
+
dmypy.json
|
|
131
|
+
|
|
132
|
+
# Pyre type checker
|
|
133
|
+
.pyre/
|
|
134
|
+
|
|
135
|
+
# pytype static analyzer
|
|
136
|
+
.pytype/
|
|
137
|
+
|
|
138
|
+
# Cython debug symbols
|
|
139
|
+
cython_debug/
|
|
140
|
+
|
|
141
|
+
# Projects
|
|
142
|
+
/visualize/
|
|
143
|
+
# /tests/
|
|
144
|
+
/docs/
|
|
145
|
+
/.benchmarks/
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bayesian-sparse-gmm
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Bayesian Sparse Gaussian Mixture Model implementation in Python
|
|
5
|
+
Author-email: Nam Nam <nampvh4436@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
11
|
+
Requires-Python: >=3.9
|
|
12
|
+
Requires-Dist: numpy>=1.20.0
|
|
13
|
+
Requires-Dist: scikit-learn>=1.0.0
|
|
14
|
+
Requires-Dist: scipy>=1.7.0
|
|
15
|
+
Requires-Dist: tqdm>=4.60.0
|
|
16
|
+
Provides-Extra: dev
|
|
17
|
+
Requires-Dist: black>=22.0.0; extra == 'dev'
|
|
18
|
+
Requires-Dist: flake8>=4.0.0; extra == 'dev'
|
|
19
|
+
Requires-Dist: isort>=5.10.0; extra == 'dev'
|
|
20
|
+
Requires-Dist: matplotlib>=3.5.0; extra == 'dev'
|
|
21
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
# Bayesian Sparse GMM
|
|
25
|
+
|
|
26
|
+
Bayesian Sparse Gaussian Mixture Model (GMM) implementation in Python.
|
|
27
|
+
|
|
28
|
+
This model employs a sparsity-inducing prior (e.g., a Dirichlet distribution with parameter $\alpha_0 < 1$) over mixture component weights to automatically determine/prune the number of active components.
|
|
29
|
+
|
|
30
|
+
## Installation
|
|
31
|
+
|
|
32
|
+
To install the latest release:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install bayesian-sparse-gmm
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Or for development (editable mode):
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
git clone https://github.com/Coalyx/bayesian-sparse-gmm.git
|
|
42
|
+
cd bayesian-sparse-gmm
|
|
43
|
+
pip install -e .
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Quick Start
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
import numpy as np
|
|
50
|
+
from sklearn.datasets import make_blobs
|
|
51
|
+
from sklearn.preprocessing import StandardScaler
|
|
52
|
+
from bayesian_sparse_gmm import BayesianSparseGMM
|
|
53
|
+
|
|
54
|
+
# Append noise dimensions to true clusters to verify that the model successfully performs feature selection.
|
|
55
|
+
rng = np.random.default_rng(42)
|
|
56
|
+
X_clean, _ = make_blobs(n_samples=200, centers=3, n_features=2, cluster_std=0.5, random_state=42)
|
|
57
|
+
X_noise = rng.normal(loc=0.0, scale=1.0, size=(200, 8))
|
|
58
|
+
X = np.hstack([X_clean, X_noise])
|
|
59
|
+
|
|
60
|
+
# Standardize features to satisfy the zero-mean assumptions in the prior structure.
|
|
61
|
+
X = StandardScaler().fit_transform(X)
|
|
62
|
+
|
|
63
|
+
model = BayesianSparseGMM(
|
|
64
|
+
K_max=5,
|
|
65
|
+
n_iter=300,
|
|
66
|
+
burn_in=100,
|
|
67
|
+
lambda_0=10.0,
|
|
68
|
+
lambda_1=0.05,
|
|
69
|
+
random_state=42,
|
|
70
|
+
verbose=0
|
|
71
|
+
)
|
|
72
|
+
model.fit(X)
|
|
73
|
+
|
|
74
|
+
print(f"Number of active clusters: {model.n_clusters_}")
|
|
75
|
+
print(f"Selected informative features: {model.selected_features_}")
|
|
76
|
+
print(f"Feature inclusion probabilities: {model.feature_probabilities_.round(3)}")
|
|
77
|
+
|
|
78
|
+
labels = model.predict(X)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Development and Testing
|
|
82
|
+
|
|
83
|
+
Install development dependencies:
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
pip install -e ".[dev]"
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Run tests using `pytest`:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
pytest
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Reference
|
|
96
|
+
|
|
97
|
+
```bib
|
|
98
|
+
@article{JMLR:v26:23-0142,
|
|
99
|
+
author = {Dapeng Yao and Fangzheng Xie and Yanxun Xu},
|
|
100
|
+
title = {Bayesian Sparse Gaussian Mixture Model for Clustering in High Dimensions},
|
|
101
|
+
journal = {Journal of Machine Learning Research},
|
|
102
|
+
year = {2025},
|
|
103
|
+
volume = {26},
|
|
104
|
+
number = {21},
|
|
105
|
+
pages = {1--50},
|
|
106
|
+
url = {http://jmlr.org/papers/v26/23-0142.html}
|
|
107
|
+
}
|
|
108
|
+
```
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# Bayesian Sparse GMM
|
|
2
|
+
|
|
3
|
+
Bayesian Sparse Gaussian Mixture Model (GMM) implementation in Python.
|
|
4
|
+
|
|
5
|
+
This model employs a sparsity-inducing prior (e.g., a Dirichlet distribution with parameter $\alpha_0 < 1$) over mixture component weights to automatically determine/prune the number of active components.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
To install the latest release:
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install bayesian-sparse-gmm
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Or for development (editable mode):
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
git clone https://github.com/Coalyx/bayesian-sparse-gmm.git
|
|
19
|
+
cd bayesian-sparse-gmm
|
|
20
|
+
pip install -e .
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Quick Start
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
import numpy as np
|
|
27
|
+
from sklearn.datasets import make_blobs
|
|
28
|
+
from sklearn.preprocessing import StandardScaler
|
|
29
|
+
from bayesian_sparse_gmm import BayesianSparseGMM
|
|
30
|
+
|
|
31
|
+
# Append noise dimensions to true clusters to verify that the model successfully performs feature selection.
|
|
32
|
+
rng = np.random.default_rng(42)
|
|
33
|
+
X_clean, _ = make_blobs(n_samples=200, centers=3, n_features=2, cluster_std=0.5, random_state=42)
|
|
34
|
+
X_noise = rng.normal(loc=0.0, scale=1.0, size=(200, 8))
|
|
35
|
+
X = np.hstack([X_clean, X_noise])
|
|
36
|
+
|
|
37
|
+
# Standardize features to satisfy the zero-mean assumptions in the prior structure.
|
|
38
|
+
X = StandardScaler().fit_transform(X)
|
|
39
|
+
|
|
40
|
+
model = BayesianSparseGMM(
|
|
41
|
+
K_max=5,
|
|
42
|
+
n_iter=300,
|
|
43
|
+
burn_in=100,
|
|
44
|
+
lambda_0=10.0,
|
|
45
|
+
lambda_1=0.05,
|
|
46
|
+
random_state=42,
|
|
47
|
+
verbose=0
|
|
48
|
+
)
|
|
49
|
+
model.fit(X)
|
|
50
|
+
|
|
51
|
+
print(f"Number of active clusters: {model.n_clusters_}")
|
|
52
|
+
print(f"Selected informative features: {model.selected_features_}")
|
|
53
|
+
print(f"Feature inclusion probabilities: {model.feature_probabilities_.round(3)}")
|
|
54
|
+
|
|
55
|
+
labels = model.predict(X)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Development and Testing
|
|
59
|
+
|
|
60
|
+
Install development dependencies:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install -e ".[dev]"
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Run tests using `pytest`:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
pytest
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Reference
|
|
73
|
+
|
|
74
|
+
```bib
|
|
75
|
+
@article{JMLR:v26:23-0142,
|
|
76
|
+
author = {Dapeng Yao and Fangzheng Xie and Yanxun Xu},
|
|
77
|
+
title = {Bayesian Sparse Gaussian Mixture Model for Clustering in High Dimensions},
|
|
78
|
+
journal = {Journal of Machine Learning Research},
|
|
79
|
+
year = {2025},
|
|
80
|
+
volume = {26},
|
|
81
|
+
number = {21},
|
|
82
|
+
pages = {1--50},
|
|
83
|
+
url = {http://jmlr.org/papers/v26/23-0142.html}
|
|
84
|
+
}
|
|
85
|
+
```
|