bayesian-sparse-gmm 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. bayesian_sparse_gmm-0.1.0/.flake8 +10 -0
  2. bayesian_sparse_gmm-0.1.0/.github/workflows/release.yml +72 -0
  3. bayesian_sparse_gmm-0.1.0/.gitignore +145 -0
  4. bayesian_sparse_gmm-0.1.0/PKG-INFO +108 -0
  5. bayesian_sparse_gmm-0.1.0/README.md +85 -0
  6. bayesian_sparse_gmm-0.1.0/evaluate.py +410 -0
  7. bayesian_sparse_gmm-0.1.0/pyproject.toml +41 -0
  8. bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/__init__.py +5 -0
  9. bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/backends/__init__.py +52 -0
  10. bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/backends/_base.py +46 -0
  11. bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/backends/_cuda.py +106 -0
  12. bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/backends/_numba.py +274 -0
  13. bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/backends/_numpy.py +53 -0
  14. bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/config.py +29 -0
  15. bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/diagnostics.py +111 -0
  16. bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/model.py +203 -0
  17. bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/postprocessing.py +46 -0
  18. bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/sampler.py +167 -0
  19. bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/state.py +17 -0
  20. bayesian_sparse_gmm-0.1.0/src/bayesian_sparse_gmm/utils.py +30 -0
  21. bayesian_sparse_gmm-0.1.0/tests/__init__.py +1 -0
  22. bayesian_sparse_gmm-0.1.0/tests/test_backends.py +78 -0
  23. bayesian_sparse_gmm-0.1.0/tests/test_cuda.py +38 -0
  24. bayesian_sparse_gmm-0.1.0/tests/test_diagnostics.py +54 -0
  25. bayesian_sparse_gmm-0.1.0/tests/test_model.py +67 -0
  26. bayesian_sparse_gmm-0.1.0/tests/test_numba.py +62 -0
  27. bayesian_sparse_gmm-0.1.0/tests/test_phase1.py +57 -0
  28. bayesian_sparse_gmm-0.1.0/tests/test_sampler.py +195 -0
@@ -0,0 +1,10 @@
1
+ [flake8]
2
+ max-line-length = 120
3
+ extend-ignore = E203, W503, E501
4
+ exclude =
5
+ .git,
6
+ __pycache__,
7
+ build,
8
+ dist,
9
+ .venv,
10
+ venv
@@ -0,0 +1,72 @@
1
+ name: CI/CD Release
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ branches:
9
+ - main
10
+
11
+ jobs:
12
+ test:
13
+ name: Test and Lint
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ matrix:
17
+ python-version: ["3.9", "3.10", "3.11", "3.12"]
18
+ steps:
19
+ - name: Checkout Source Code
20
+ uses: actions/checkout@v4
21
+
22
+ - name: Set up Python ${{ matrix.python-version }}
23
+ uses: actions/setup-python@v5
24
+ with:
25
+ python-version: ${{ matrix.python-version }}
26
+ cache: "pip"
27
+
28
+ - name: Install Development Dependencies
29
+ run: |
30
+ python -m pip install --upgrade pip
31
+ pip install .[dev]
32
+
33
+ - name: Run Lint Checks
34
+ run: |
35
+ black --check src tests
36
+ isort --check-only src tests
37
+ flake8 src tests
38
+
39
+ - name: Run Unit Tests
40
+ run: pytest
41
+
42
+ publish:
43
+ name: Publish to PyPI
44
+ needs: test
45
+ if: github.event_name == 'push' && github.ref == 'refs/heads/main'
46
+ runs-on: ubuntu-latest
47
+ permissions:
48
+ id-token: write
49
+ steps:
50
+ - name: Checkout Source Code
51
+ uses: actions/checkout@v4
52
+
53
+ - name: Set up Python
54
+ uses: actions/setup-python@v5
55
+ with:
56
+ python-version: "3.10"
57
+
58
+ - name: Install Build Tools
59
+ run: |
60
+ python -m pip install --upgrade pip
61
+ pip install build twine
62
+
63
+ - name: Build Source and Wheel Distributions
64
+ run: python -m build
65
+
66
+ - name: Validate Package Metadata
67
+ run: twine check dist/*
68
+
69
+ - name: Upload to PyPI
70
+ uses: pypa/gh-action-pypi-publish@release/v1
71
+ with:
72
+ skip-existing: true
@@ -0,0 +1,145 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ bin/
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script containing a list of
31
+ # files to include or exclude inside the final binary
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Sphinx documentation
65
+ docs/_build/
66
+
67
+ # PyBuilder
68
+ .pybuilder/
69
+ target/
70
+
71
+ # Jupyter Notebook
72
+ .ipynb_checkpoints
73
+
74
+ # IPython
75
+ .ipython/
76
+
77
+ # pyenv
78
+ # For a library or app, you might want to share your .python-version if you
79
+ # use pyenv. Uncomment if needed.
80
+ #.python-version
81
+
82
+ # pipenv
83
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
84
+ # However, in case of collaboration, if the platform is not identical, it may cause dependency resolution issues.
85
+ #Pipfile.lock
86
+
87
+ # poetry
88
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
89
+ #poetry.lock
90
+
91
+ # pdm
92
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
93
+ #.pdm.toml
94
+ #.pdm-plugins/
95
+ # pdm build uses __pdm__
96
+ __pdm__
97
+
98
+ # PEP 582; used by e.g. github.com/lincolnloop/layman and pdm
99
+ __pypackages__/
100
+
101
+ # Celery stuff
102
+ celerybeat-schedule
103
+ celerybeat.pid
104
+
105
+ # SageMath parsed files
106
+ *.sage.py
107
+
108
+ # Environments
109
+ .env
110
+ .venv
111
+ env/
112
+ venv/
113
+ ENV/
114
+ env.bak/
115
+ venv.bak/
116
+
117
+ # Spyder project settings
118
+ .spyderproject
119
+ .spyproject
120
+
121
+ # Rope project settings
122
+ .ropeproject
123
+
124
+ # mkdocs documentation
125
+ /site/
126
+
127
+ # mypy
128
+ .mypy_cache/
129
+ .dmypy.json
130
+ dmypy.json
131
+
132
+ # Pyre type checker
133
+ .pyre/
134
+
135
+ # pytype static analyzer
136
+ .pytype/
137
+
138
+ # Cython debug symbols
139
+ cython_debug/
140
+
141
+ # Projects
142
+ /visualize/
143
+ # /tests/
144
+ /docs/
145
+ /.benchmarks/
@@ -0,0 +1,108 @@
1
+ Metadata-Version: 2.4
2
+ Name: bayesian-sparse-gmm
3
+ Version: 0.1.0
4
+ Summary: Bayesian Sparse Gaussian Mixture Model implementation in Python
5
+ Author-email: Nam Nam <nampvh4436@gmail.com>
6
+ License: MIT
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
11
+ Requires-Python: >=3.9
12
+ Requires-Dist: numpy>=1.20.0
13
+ Requires-Dist: scikit-learn>=1.0.0
14
+ Requires-Dist: scipy>=1.7.0
15
+ Requires-Dist: tqdm>=4.60.0
16
+ Provides-Extra: dev
17
+ Requires-Dist: black>=22.0.0; extra == 'dev'
18
+ Requires-Dist: flake8>=4.0.0; extra == 'dev'
19
+ Requires-Dist: isort>=5.10.0; extra == 'dev'
20
+ Requires-Dist: matplotlib>=3.5.0; extra == 'dev'
21
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
22
+ Description-Content-Type: text/markdown
23
+
24
+ # Bayesian Sparse GMM
25
+
26
+ Bayesian Sparse Gaussian Mixture Model (GMM) implementation in Python.
27
+
28
+ This model employs a sparsity-inducing prior (e.g., a Dirichlet distribution with parameter $\alpha_0 < 1$) over mixture component weights to automatically determine/prune the number of active components.
29
+
30
+ ## Installation
31
+
32
+ To install the latest release:
33
+
34
+ ```bash
35
+ pip install bayesian-sparse-gmm
36
+ ```
37
+
38
+ Or for development (editable mode):
39
+
40
+ ```bash
41
+ git clone https://github.com/Coalyx/bayesian-sparse-gmm.git
42
+ cd bayesian-sparse-gmm
43
+ pip install -e .
44
+ ```
45
+
46
+ ## Quick Start
47
+
48
+ ```python
49
+ import numpy as np
50
+ from sklearn.datasets import make_blobs
51
+ from sklearn.preprocessing import StandardScaler
52
+ from bayesian_sparse_gmm import BayesianSparseGMM
53
+
54
+ # Append noise dimensions to true clusters to verify that the model successfully performs feature selection.
55
+ rng = np.random.default_rng(42)
56
+ X_clean, _ = make_blobs(n_samples=200, centers=3, n_features=2, cluster_std=0.5, random_state=42)
57
+ X_noise = rng.normal(loc=0.0, scale=1.0, size=(200, 8))
58
+ X = np.hstack([X_clean, X_noise])
59
+
60
+ # Standardize features to satisfy the zero-mean assumptions in the prior structure.
61
+ X = StandardScaler().fit_transform(X)
62
+
63
+ model = BayesianSparseGMM(
64
+ K_max=5,
65
+ n_iter=300,
66
+ burn_in=100,
67
+ lambda_0=10.0,
68
+ lambda_1=0.05,
69
+ random_state=42,
70
+ verbose=0
71
+ )
72
+ model.fit(X)
73
+
74
+ print(f"Number of active clusters: {model.n_clusters_}")
75
+ print(f"Selected informative features: {model.selected_features_}")
76
+ print(f"Feature inclusion probabilities: {model.feature_probabilities_.round(3)}")
77
+
78
+ labels = model.predict(X)
79
+ ```
80
+
81
+ ## Development and Testing
82
+
83
+ Install development dependencies:
84
+
85
+ ```bash
86
+ pip install -e ".[dev]"
87
+ ```
88
+
89
+ Run tests using `pytest`:
90
+
91
+ ```bash
92
+ pytest
93
+ ```
94
+
95
+ ## Reference
96
+
97
+ ```bib
98
+ @article{JMLR:v26:23-0142,
99
+ author = {Dapeng Yao and Fangzheng Xie and Yanxun Xu},
100
+ title = {Bayesian Sparse Gaussian Mixture Model for Clustering in High Dimensions},
101
+ journal = {Journal of Machine Learning Research},
102
+ year = {2025},
103
+ volume = {26},
104
+ number = {21},
105
+ pages = {1--50},
106
+ url = {http://jmlr.org/papers/v26/23-0142.html}
107
+ }
108
+ ```
@@ -0,0 +1,85 @@
1
+ # Bayesian Sparse GMM
2
+
3
+ Bayesian Sparse Gaussian Mixture Model (GMM) implementation in Python.
4
+
5
+ This model employs a sparsity-inducing prior (e.g., a Dirichlet distribution with parameter $\alpha_0 < 1$) over mixture component weights to automatically determine/prune the number of active components.
6
+
7
+ ## Installation
8
+
9
+ To install the latest release:
10
+
11
+ ```bash
12
+ pip install bayesian-sparse-gmm
13
+ ```
14
+
15
+ Or for development (editable mode):
16
+
17
+ ```bash
18
+ git clone https://github.com/Coalyx/bayesian-sparse-gmm.git
19
+ cd bayesian-sparse-gmm
20
+ pip install -e .
21
+ ```
22
+
23
+ ## Quick Start
24
+
25
+ ```python
26
+ import numpy as np
27
+ from sklearn.datasets import make_blobs
28
+ from sklearn.preprocessing import StandardScaler
29
+ from bayesian_sparse_gmm import BayesianSparseGMM
30
+
31
+ # Append noise dimensions to true clusters to verify that the model successfully performs feature selection.
32
+ rng = np.random.default_rng(42)
33
+ X_clean, _ = make_blobs(n_samples=200, centers=3, n_features=2, cluster_std=0.5, random_state=42)
34
+ X_noise = rng.normal(loc=0.0, scale=1.0, size=(200, 8))
35
+ X = np.hstack([X_clean, X_noise])
36
+
37
+ # Standardize features to satisfy the zero-mean assumptions in the prior structure.
38
+ X = StandardScaler().fit_transform(X)
39
+
40
+ model = BayesianSparseGMM(
41
+ K_max=5,
42
+ n_iter=300,
43
+ burn_in=100,
44
+ lambda_0=10.0,
45
+ lambda_1=0.05,
46
+ random_state=42,
47
+ verbose=0
48
+ )
49
+ model.fit(X)
50
+
51
+ print(f"Number of active clusters: {model.n_clusters_}")
52
+ print(f"Selected informative features: {model.selected_features_}")
53
+ print(f"Feature inclusion probabilities: {model.feature_probabilities_.round(3)}")
54
+
55
+ labels = model.predict(X)
56
+ ```
57
+
58
+ ## Development and Testing
59
+
60
+ Install development dependencies:
61
+
62
+ ```bash
63
+ pip install -e ".[dev]"
64
+ ```
65
+
66
+ Run tests using `pytest`:
67
+
68
+ ```bash
69
+ pytest
70
+ ```
71
+
72
+ ## Reference
73
+
74
+ ```bib
75
+ @article{JMLR:v26:23-0142,
76
+ author = {Dapeng Yao and Fangzheng Xie and Yanxun Xu},
77
+ title = {Bayesian Sparse Gaussian Mixture Model for Clustering in High Dimensions},
78
+ journal = {Journal of Machine Learning Research},
79
+ year = {2025},
80
+ volume = {26},
81
+ number = {21},
82
+ pages = {1--50},
83
+ url = {http://jmlr.org/papers/v26/23-0142.html}
84
+ }
85
+ ```