mimisbm 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mimisbm-0.1.0/.github/workflows/lint.yml +21 -0
- mimisbm-0.1.0/.github/workflows/pages.yml +33 -0
- mimisbm-0.1.0/.github/workflows/publish.yml +26 -0
- mimisbm-0.1.0/PKG-INFO +91 -0
- mimisbm-0.1.0/README.md +74 -0
- mimisbm-0.1.0/demo.py +41 -0
- mimisbm-0.1.0/docs/Makefile +20 -0
- mimisbm-0.1.0/docs/make.bat +35 -0
- mimisbm-0.1.0/docs/source/_static/.gitkeep +0 -0
- mimisbm-0.1.0/docs/source/_templates/autosummary/class.rst +8 -0
- mimisbm-0.1.0/docs/source/conf.py +46 -0
- mimisbm-0.1.0/docs/source/index.rst +78 -0
- mimisbm-0.1.0/mimisbm/__init__.py +5 -0
- mimisbm-0.1.0/mimisbm/_model.py +444 -0
- mimisbm-0.1.0/mimisbm.egg-info/PKG-INFO +91 -0
- mimisbm-0.1.0/mimisbm.egg-info/SOURCES.txt +19 -0
- mimisbm-0.1.0/mimisbm.egg-info/dependency_links.txt +1 -0
- mimisbm-0.1.0/mimisbm.egg-info/requires.txt +4 -0
- mimisbm-0.1.0/mimisbm.egg-info/top_level.txt +1 -0
- mimisbm-0.1.0/pyproject.toml +55 -0
- mimisbm-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
name: Lint
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: ["**"]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
lint:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
container:
|
|
12
|
+
image: python:latest
|
|
13
|
+
steps:
|
|
14
|
+
- name: Checkout code
|
|
15
|
+
uses: actions/checkout@v4
|
|
16
|
+
- name: Install ruff
|
|
17
|
+
run: pip install ruff
|
|
18
|
+
- name: Format check
|
|
19
|
+
run: ruff format mimisbm
|
|
20
|
+
- name: Lint
|
|
21
|
+
run: ruff check mimisbm
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
name: Pages
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
|
|
6
|
+
jobs:
|
|
7
|
+
pages:
|
|
8
|
+
runs-on: ubuntu-latest
|
|
9
|
+
container:
|
|
10
|
+
image: python:3.13
|
|
11
|
+
permissions:
|
|
12
|
+
contents: read
|
|
13
|
+
pages: write
|
|
14
|
+
id-token: write
|
|
15
|
+
steps:
|
|
16
|
+
- name: Checkout code
|
|
17
|
+
uses: actions/checkout@v4
|
|
18
|
+
- name: Fix git safe directory
|
|
19
|
+
run: git config --global --add safe.directory '*'
|
|
20
|
+
- name: Install system dependencies
|
|
21
|
+
run: apt-get update && apt-get install -y git
|
|
22
|
+
- name: Install Python dependencies
|
|
23
|
+
run: |
|
|
24
|
+
pip install sphinx furo
|
|
25
|
+
pip install . --extra-index-url https://download.pytorch.org/whl/cpu
|
|
26
|
+
- name: Build documentation
|
|
27
|
+
run: sphinx-build -b html docs/source public
|
|
28
|
+
- name: Upload Pages artifact
|
|
29
|
+
uses: actions/upload-pages-artifact@v3
|
|
30
|
+
with:
|
|
31
|
+
path: public
|
|
32
|
+
- name: Deploy to GitHub Pages
|
|
33
|
+
uses: actions/deploy-pages@v4
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
publish-to-pypi:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
container:
|
|
12
|
+
image: python:3.13
|
|
13
|
+
steps:
|
|
14
|
+
- name: Checkout code
|
|
15
|
+
uses: actions/checkout@v4
|
|
16
|
+
- name: Fix git safe directory
|
|
17
|
+
run: git config --global --add safe.directory '*'
|
|
18
|
+
- name: Install build tools
|
|
19
|
+
run: python -m pip install --upgrade pip build twine setuptools-scm
|
|
20
|
+
- name: Build package
|
|
21
|
+
run: python -m build
|
|
22
|
+
- name: Publish to PyPI
|
|
23
|
+
env:
|
|
24
|
+
TWINE_USERNAME: __token__
|
|
25
|
+
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
|
|
26
|
+
run: python -m twine upload --verbose dist/*
|
mimisbm-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mimisbm
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Mixture of Multilayer Integrator Stochastic Block Model
|
|
5
|
+
Author: Félix Laplante
|
|
6
|
+
Project-URL: Source, https://github.com/felixlaplante0/mimisbm
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
9
|
+
Classifier: Operating System :: MacOS
|
|
10
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: numpy
|
|
14
|
+
Requires-Dist: scipy
|
|
15
|
+
Requires-Dist: fastkmeanspp
|
|
16
|
+
Requires-Dist: scikit-learn
|
|
17
|
+
|
|
18
|
+
# 🕸️ MimiSBM
|
|
19
|
+
|
|
20
|
+
**mimisbm** is a Python package implementing the **Mixture of Multilayer Integrator Stochastic Block Model** proposed by the original authors. It jointly groups nodes into clusters and layers into components, providing a unified framework for identifying shared connectivity patterns across multiple network layers.
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## ✨ Features
|
|
25
|
+
|
|
26
|
+
- **Multilayer Clustering**: Jointly identifies node communities and layer components in a single probabilistic framework.
|
|
27
|
+
- **Variational EM**: Efficient inference using a Variational Expectation-Maximization (VEM) algorithm for large-scale networks.
|
|
28
|
+
- **Bayesian Framework**: Supports flexible Dirichlet and Beta priors, allowing for robust structure discovery under different sparsity regimes.
|
|
29
|
+
- **Component-wise SBMs**: Groups layers sharing similar block-model structures into distinct mixture components.
|
|
30
|
+
- **scikit-learn API**: Native `BaseEstimator` and `ClusterMixin` integration with a familiar `fit` / `predict` interface.
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## 🚀 Installation
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install mimisbm
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## 🔧 Usage
|
|
41
|
+
|
|
42
|
+
### Example
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
import numpy as np
|
|
46
|
+
from mimisbm import MimiSBM
|
|
47
|
+
|
|
48
|
+
# Generate a synthetic multilayer adjacency tensor (20 nodes, 5 layers)
|
|
49
|
+
np.random.seed(42)
|
|
50
|
+
N, V = 20, 5
|
|
51
|
+
X = np.random.randint(0, 2, size=(N, N, V))
|
|
52
|
+
|
|
53
|
+
# Ensure the adjacency matrices are symmetric (undirected)
|
|
54
|
+
for v in range(V):
|
|
55
|
+
X[..., v] = np.tril(X[..., v], -1) + np.tril(X[..., v], -1).T
|
|
56
|
+
|
|
57
|
+
# Initialize the model with 3 node clusters and 2 layer components
|
|
58
|
+
model = MimiSBM(n_clusters=3, n_components=2, random_state=42)
|
|
59
|
+
|
|
60
|
+
# Fit the model to the multilayer network
|
|
61
|
+
model.fit(X)
|
|
62
|
+
|
|
63
|
+
# Predict node cluster and layer component assignments
|
|
64
|
+
node_labels, layer_labels = model.predict()
|
|
65
|
+
|
|
66
|
+
print(f"Node clusters: {node_labels}")
|
|
67
|
+
print(f"Layer components: {layer_labels}")
|
|
68
|
+
print(f"Final ELBO: {model.elbo_:.2f}")
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## 📖 Learn More
|
|
74
|
+
|
|
75
|
+
For tutorials and detailed API reference, visit the official site:
|
|
76
|
+
👉 [mimisbm's documentation](https://felixlaplante0.github.io/mimisbm)
|
|
77
|
+
|
|
78
|
+
### 📚 Citation
|
|
79
|
+
|
|
80
|
+
If you use MimiSBM in your research, please cite the original authors' paper:
|
|
81
|
+
|
|
82
|
+
```bibtex
|
|
83
|
+
@article{de2024mixture,
|
|
84
|
+
title={Mixture of multilayer stochastic block models for multiview clustering},
|
|
85
|
+
author={De Santiago, Kylliann and Szafranski, Marie and Ambroise, Christophe},
|
|
86
|
+
journal={arXiv preprint arXiv:2401.04682},
|
|
87
|
+
year={2024}
|
|
88
|
+
}
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
For more details, see the corresponding Preprint: https://arxiv.org/abs/2401.04682
|
mimisbm-0.1.0/README.md
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# 🕸️ MimiSBM
|
|
2
|
+
|
|
3
|
+
**mimisbm** is a Python package implementing the **Mixture of Multilayer Integrator Stochastic Block Model** proposed by the original authors. It jointly groups nodes into clusters and layers into components, providing a unified framework for identifying shared connectivity patterns across multiple network layers.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## ✨ Features
|
|
8
|
+
|
|
9
|
+
- **Multilayer Clustering**: Jointly identifies node communities and layer components in a single probabilistic framework.
|
|
10
|
+
- **Variational EM**: Efficient inference using a Variational Expectation-Maximization (VEM) algorithm for large-scale networks.
|
|
11
|
+
- **Bayesian Framework**: Supports flexible Dirichlet and Beta priors, allowing for robust structure discovery under different sparsity regimes.
|
|
12
|
+
- **Component-wise SBMs**: Groups layers sharing similar block-model structures into distinct mixture components.
|
|
13
|
+
- **scikit-learn API**: Native `BaseEstimator` and `ClusterMixin` integration with a familiar `fit` / `predict` interface.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## 🚀 Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install mimisbm
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## 🔧 Usage
|
|
24
|
+
|
|
25
|
+
### Example
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
import numpy as np
|
|
29
|
+
from mimisbm import MimiSBM
|
|
30
|
+
|
|
31
|
+
# Generate a synthetic multilayer adjacency tensor (20 nodes, 5 layers)
|
|
32
|
+
np.random.seed(42)
|
|
33
|
+
N, V = 20, 5
|
|
34
|
+
X = np.random.randint(0, 2, size=(N, N, V))
|
|
35
|
+
|
|
36
|
+
# Ensure the adjacency matrices are symmetric (undirected)
|
|
37
|
+
for v in range(V):
|
|
38
|
+
X[..., v] = np.tril(X[..., v], -1) + np.tril(X[..., v], -1).T
|
|
39
|
+
|
|
40
|
+
# Initialize the model with 3 node clusters and 2 layer components
|
|
41
|
+
model = MimiSBM(n_clusters=3, n_components=2, random_state=42)
|
|
42
|
+
|
|
43
|
+
# Fit the model to the multilayer network
|
|
44
|
+
model.fit(X)
|
|
45
|
+
|
|
46
|
+
# Predict node cluster and layer component assignments
|
|
47
|
+
node_labels, layer_labels = model.predict()
|
|
48
|
+
|
|
49
|
+
print(f"Node clusters: {node_labels}")
|
|
50
|
+
print(f"Layer components: {layer_labels}")
|
|
51
|
+
print(f"Final ELBO: {model.elbo_:.2f}")
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## 📖 Learn More
|
|
57
|
+
|
|
58
|
+
For tutorials and detailed API reference, visit the official site:
|
|
59
|
+
👉 [mimisbm's documentation](https://felixlaplante0.github.io/mimisbm)
|
|
60
|
+
|
|
61
|
+
### 📚 Citation
|
|
62
|
+
|
|
63
|
+
If you use MimiSBM in your research, please cite the original authors' paper:
|
|
64
|
+
|
|
65
|
+
```bibtex
|
|
66
|
+
@article{de2024mixture,
|
|
67
|
+
title={Mixture of multilayer stochastic block models for multiview clustering},
|
|
68
|
+
author={De Santiago, Kylliann and Szafranski, Marie and Ambroise, Christophe},
|
|
69
|
+
journal={arXiv preprint arXiv:2401.04682},
|
|
70
|
+
year={2024}
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
For more details, see the corresponding Preprint: https://arxiv.org/abs/2401.04682
|
mimisbm-0.1.0/demo.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from sklearn.metrics import adjusted_rand_score
|
|
3
|
+
|
|
4
|
+
from mimisbm._model import MimiSBM
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def gen_data(N=100, V=20, K=2, Q=2):
|
|
8
|
+
z = np.random.randint(0, K, N)
|
|
9
|
+
w = np.random.randint(0, Q, V)
|
|
10
|
+
|
|
11
|
+
# Connectivity for component 0 (assortative) and component 1 (disassortative)
|
|
12
|
+
alphas = np.zeros((K, K, Q))
|
|
13
|
+
alphas[:, :, 0] = [[0.8, 0.1], [0.1, 0.8]]
|
|
14
|
+
alphas[:, :, 1] = [[0.1, 0.8], [0.8, 0.1]]
|
|
15
|
+
|
|
16
|
+
A = np.zeros((N, N, V))
|
|
17
|
+
for v in range(V):
|
|
18
|
+
p_mat = alphas[np.ix_(z, z, [w[v]])].squeeze()
|
|
19
|
+
# Generate undirected edges without self-loops
|
|
20
|
+
tri_mask = np.random.rand(N, N) < p_mat
|
|
21
|
+
A[:, :, v] = np.tril(tri_mask, -1).astype(float)
|
|
22
|
+
A[:, :, v] += A[:, :, v].T
|
|
23
|
+
|
|
24
|
+
return A, z, w
|
|
25
|
+
|
|
26
|
+
np.random.seed(42)
|
|
27
|
+
|
|
28
|
+
print("Generating synthetic data...")
|
|
29
|
+
A, true_z, true_w = gen_data()
|
|
30
|
+
|
|
31
|
+
print("Fitting MimiSBM...")
|
|
32
|
+
model = MimiSBM(n_clusters=2, n_subclusters=2, random_state=42).fit(A)
|
|
33
|
+
pred_z, pred_w = model.predict()
|
|
34
|
+
|
|
35
|
+
# 3. Evaluate
|
|
36
|
+
node_ari = adjusted_rand_score(true_z, pred_z)
|
|
37
|
+
view_ari = adjusted_rand_score(true_w, pred_w)
|
|
38
|
+
|
|
39
|
+
print("\nEvaluation Results:")
|
|
40
|
+
print(f"Node ARI: {node_ari:.4f}")
|
|
41
|
+
print(f"View ARI: {view_ari:.4f}")
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Minimal makefile for Sphinx documentation
|
|
2
|
+
#
|
|
3
|
+
|
|
4
|
+
# You can set these variables from the command line, and also
|
|
5
|
+
# from the environment for the first two.
|
|
6
|
+
SPHINXOPTS ?=
|
|
7
|
+
SPHINXBUILD ?= sphinx-build
|
|
8
|
+
SOURCEDIR = source
|
|
9
|
+
BUILDDIR = build
|
|
10
|
+
|
|
11
|
+
# Put it first so that "make" without argument is like "make help".
|
|
12
|
+
help:
|
|
13
|
+
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
|
14
|
+
|
|
15
|
+
.PHONY: help Makefile
|
|
16
|
+
|
|
17
|
+
# Catch-all target: route all unknown targets to Sphinx using the new
|
|
18
|
+
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
|
19
|
+
%: Makefile
|
|
20
|
+
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
@ECHO OFF
|
|
2
|
+
|
|
3
|
+
pushd %~dp0
|
|
4
|
+
|
|
5
|
+
REM Command file for Sphinx documentation
|
|
6
|
+
|
|
7
|
+
if "%SPHINXBUILD%" == "" (
|
|
8
|
+
set SPHINXBUILD=sphinx-build
|
|
9
|
+
)
|
|
10
|
+
set SOURCEDIR=source
|
|
11
|
+
set BUILDDIR=build
|
|
12
|
+
|
|
13
|
+
%SPHINXBUILD% >NUL 2>NUL
|
|
14
|
+
if errorlevel 9009 (
|
|
15
|
+
echo.
|
|
16
|
+
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
|
17
|
+
echo.installed, then set the SPHINXBUILD environment variable to point
|
|
18
|
+
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
|
19
|
+
echo.may add the Sphinx directory to PATH.
|
|
20
|
+
echo.
|
|
21
|
+
echo.If you don't have Sphinx installed, grab it from
|
|
22
|
+
echo.https://www.sphinx-doc.org/
|
|
23
|
+
exit /b 1
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
if "%1" == "" goto help
|
|
27
|
+
|
|
28
|
+
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
|
29
|
+
goto end
|
|
30
|
+
|
|
31
|
+
:help
|
|
32
|
+
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
|
33
|
+
|
|
34
|
+
:end
|
|
35
|
+
popd
|
|
File without changes
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Configuration file for the Sphinx documentation builder.
|
|
2
|
+
#
|
|
3
|
+
# For the full list of built-in configuration values, see the documentation:
|
|
4
|
+
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
sys.path.insert(0, os.path.abspath("../../"))
|
|
10
|
+
|
|
11
|
+
# -- Project information -----------------------------------------------------
|
|
12
|
+
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
|
13
|
+
|
|
14
|
+
project = "mimisbm"
|
|
15
|
+
release = ""
|
|
16
|
+
version = ""
|
|
17
|
+
copyright = "2026, Félix Laplante"
|
|
18
|
+
author = "Félix Laplante"
|
|
19
|
+
|
|
20
|
+
# -- General configuration ---------------------------------------------------
|
|
21
|
+
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
|
22
|
+
|
|
23
|
+
extensions = [
|
|
24
|
+
"sphinx.ext.autodoc",
|
|
25
|
+
"sphinx.ext.napoleon",
|
|
26
|
+
"sphinx.ext.viewcode",
|
|
27
|
+
"sphinx.ext.autosummary",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
templates_path = ["_templates"]
|
|
31
|
+
exclude_patterns = []
|
|
32
|
+
|
|
33
|
+
autodoc_member_order = "bysource"
|
|
34
|
+
autodoc_typehints = "description"
|
|
35
|
+
autodoc_typehints_format = "short"
|
|
36
|
+
autodoc_inherit_docstrings = True
|
|
37
|
+
autosummary_generate = True
|
|
38
|
+
add_module_names = False
|
|
39
|
+
napoleon_use_ivar = True
|
|
40
|
+
napoleon_attr_annotations = True
|
|
41
|
+
|
|
42
|
+
# -- Options for HTML output -------------------------------------------------
|
|
43
|
+
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
|
44
|
+
|
|
45
|
+
html_theme = "furo"
|
|
46
|
+
html_static_path = ["_static"]
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
MimiSBM
|
|
2
|
+
=======
|
|
3
|
+
|
|
4
|
+
**mimisbm** is a Python package implementing the **Mixture of Multilayer Integrator Stochastic Block Model** proposed by the original authors. It jointly groups nodes into clusters and layers into components, providing a unified framework for identifying shared connectivity patterns across multiple network layers.
|
|
5
|
+
|
|
6
|
+
Features
|
|
7
|
+
--------
|
|
8
|
+
|
|
9
|
+
- **Multilayer Clustering**: Jointly identifies node communities and layer components in a single probabilistic framework.
|
|
10
|
+
- **Variational EM**: Efficient inference using a Variational Expectation-Maximization (VEM) algorithm for large-scale networks.
|
|
11
|
+
- **Bayesian Framework**: Supports flexible Dirichlet and Beta priors, allowing for robust structure discovery under different sparsity regimes.
|
|
12
|
+
- **Component-wise SBMs**: Groups layers sharing similar block-model structures into distinct mixture components.
|
|
13
|
+
- **scikit-learn API**: Native ``BaseEstimator`` and ``ClusterMixin`` integration with a familiar ``fit`` / ``predict`` interface.
|
|
14
|
+
|
|
15
|
+
Installation
|
|
16
|
+
------------
|
|
17
|
+
|
|
18
|
+
You can install the package via pip:
|
|
19
|
+
|
|
20
|
+
.. code-block:: bash
|
|
21
|
+
|
|
22
|
+
pip install mimisbm
|
|
23
|
+
|
|
24
|
+
Usage
|
|
25
|
+
-----
|
|
26
|
+
|
|
27
|
+
Example:
|
|
28
|
+
|
|
29
|
+
.. code-block:: python
|
|
30
|
+
|
|
31
|
+
import numpy as np
|
|
32
|
+
from mimisbm import MimiSBM
|
|
33
|
+
|
|
34
|
+
# Generate a synthetic multilayer adjacency tensor (20 nodes, 5 layers)
|
|
35
|
+
np.random.seed(42)
|
|
36
|
+
N, V = 20, 5
|
|
37
|
+
X = np.random.randint(0, 2, size=(N, N, V))
|
|
38
|
+
|
|
39
|
+
# Ensure the adjacency matrices are symmetric (undirected)
|
|
40
|
+
for v in range(V):
|
|
41
|
+
X[..., v] = np.tril(X[..., v], -1) + np.tril(X[..., v], -1).T
|
|
42
|
+
|
|
43
|
+
# Initialize the model with 3 node clusters and 2 layer components
|
|
44
|
+
model = MimiSBM(n_clusters=3, n_components=2, random_state=42)
|
|
45
|
+
|
|
46
|
+
# Fit the model to the multilayer network
|
|
47
|
+
model.fit(X)
|
|
48
|
+
|
|
49
|
+
# Predict node cluster and layer component assignments
|
|
50
|
+
node_labels, layer_labels = model.predict()
|
|
51
|
+
|
|
52
|
+
print(f"Node clusters: {node_labels}")
|
|
53
|
+
print(f"Layer components: {layer_labels}")
|
|
54
|
+
print(f"Final ELBO: {model.elbo_:.2f}")
|
|
55
|
+
|
|
56
|
+
Citation
|
|
57
|
+
--------
|
|
58
|
+
|
|
59
|
+
If you use MimiSBM in your research, please cite the original authors' paper:
|
|
60
|
+
|
|
61
|
+
.. code-block:: bibtex
|
|
62
|
+
|
|
63
|
+
@article{de2024mixture,
|
|
64
|
+
title={Mixture of multilayer stochastic block models for multiview clustering},
|
|
65
|
+
author={De Santiago, Kylliann and Szafranski, Marie and Ambroise, Christophe},
|
|
66
|
+
journal={arXiv preprint arXiv:2401.04682},
|
|
67
|
+
year={2024}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
For more details, see the corresponding Preprint: https://arxiv.org/abs/2401.04682
|
|
71
|
+
|
|
72
|
+
API Reference
|
|
73
|
+
-------------
|
|
74
|
+
|
|
75
|
+
.. autoclass:: mimisbm.MimiSBM
|
|
76
|
+
:members:
|
|
77
|
+
:undoc-members:
|
|
78
|
+
:show-inheritance:
|
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
from numbers import Integral, Real
|
|
2
|
+
from typing import Self
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from fastkmeanspp import KMeans # type: ignore
|
|
6
|
+
from scipy.special import betaln, digamma, entr, gammaln, softmax # type: ignore
|
|
7
|
+
from sklearn.base import BaseEstimator, ClusterMixin # type: ignore
|
|
8
|
+
from sklearn.utils._param_validation import ( # type: ignore
|
|
9
|
+
Interval, # type: ignore
|
|
10
|
+
StrOptions, # type: ignore
|
|
11
|
+
validate_params, # type: ignore
|
|
12
|
+
)
|
|
13
|
+
from sklearn.utils.validation import check_is_fitted, validate_data # type: ignore
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class MimiSBM(ClusterMixin, BaseEstimator):
|
|
17
|
+
r"""Mixture of Multilayer Integrator Stochastic Block Model (MimiSBM).
|
|
18
|
+
|
|
19
|
+
The MimiSBM is a generative model for multilayer networks that identifies mesoscale
|
|
20
|
+
structures by grouping nodes into clusters and layers into components.
|
|
21
|
+
|
|
22
|
+
Each component represents a distinct Stochastic Block Model (SBM) shared by a subset
|
|
23
|
+
of layers. This model uses a Variational Expectation-Maximization (VEM) algorithm to
|
|
24
|
+
perform inference and estimation of the posterior distributions.
|
|
25
|
+
|
|
26
|
+
Model settings:
|
|
27
|
+
- `n_clusters`: Number of clusters for the nodes.
|
|
28
|
+
- `n_components`: Number of mixture components for the layers.
|
|
29
|
+
|
|
30
|
+
Prior settings:
|
|
31
|
+
- `clusters_prior`: Dirichlet prior for the node cluster mixing proportions.
|
|
32
|
+
- `components_prior`: Dirichlet prior for the layer component mixing
|
|
33
|
+
proportions.
|
|
34
|
+
- `adjacency_prior`: Beta prior for the edge probabilities within and
|
|
35
|
+
between clusters for each component.
|
|
36
|
+
|
|
37
|
+
EM settings:
|
|
38
|
+
- `max_iter`: Maximum number of iterations for the VEM algorithm.
|
|
39
|
+
- `tol`: Convergence tolerance based on the Evidence Lower Bound (ELBO).
|
|
40
|
+
- `warm_start`: If True, reuse the responsibilities from the previous fit
|
|
41
|
+
as initialization.
|
|
42
|
+
|
|
43
|
+
Attributes:
|
|
44
|
+
n_clusters (int): Number of node clusters.
|
|
45
|
+
n_components (int): Number of layer components.
|
|
46
|
+
clusters_prior (np.ndarray): Prior parameters for node clusters.
|
|
47
|
+
components_prior (np.ndarray): Prior parameters for layer components.
|
|
48
|
+
adjacency_prior (np.ndarray): Prior parameters for edge connections.
|
|
49
|
+
max_iter (int): Maximum number of iterations for the EM algorithm.
|
|
50
|
+
tol (float): Tolerance to declare convergence based on the ELBO.
|
|
51
|
+
warm_start (bool): Whether to reuse the solution of the previous call
|
|
52
|
+
to fit as initialization.
|
|
53
|
+
random_state (int | None): Random state for initialization.
|
|
54
|
+
cluster_responsibilities_ (np.ndarray): Posterior probabilities of node cluster
|
|
55
|
+
assignments (N, K).
|
|
56
|
+
component_responsibilities_ (np.ndarray): Posterior probabilities of layer
|
|
57
|
+
component assignments (V, Q).
|
|
58
|
+
cluster_posterior_ (np.ndarray): Dirichlet posterior parameters for clusters.
|
|
59
|
+
component_posterior_ (np.ndarray): Dirichlet posterior parameters for
|
|
60
|
+
components.
|
|
61
|
+
adjacency_posterior_ (np.ndarray): Beta posterior parameters for edge
|
|
62
|
+
connections (2, K, K, Q).
|
|
63
|
+
elbo_ (float): Evidence Lower Bound of the fitted model.
|
|
64
|
+
converged_ (bool): True if the algorithm converged, False otherwise.
|
|
65
|
+
|
|
66
|
+
Examples:
|
|
67
|
+
>>> from mimisbm import MimiSBM
|
|
68
|
+
>>> import numpy as np
|
|
69
|
+
>>> X = np.random.randint(0, 2, size=(10, 10, 5))
|
|
70
|
+
>>> model = MimiSBM(n_clusters=2, n_components=2)
|
|
71
|
+
>>> model.fit(X)
|
|
72
|
+
>>> node_labels, layer_labels = model.predict()
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
n_clusters: int
|
|
76
|
+
n_components: int
|
|
77
|
+
clusters_prior: np.ndarray
|
|
78
|
+
components_prior: np.ndarray
|
|
79
|
+
adjacency_prior: np.ndarray
|
|
80
|
+
max_iter: int
|
|
81
|
+
tol: float
|
|
82
|
+
warm_start: bool
|
|
83
|
+
random_state: int | None
|
|
84
|
+
cluster_responsibilities_: np.ndarray
|
|
85
|
+
component_responsibilities_: np.ndarray
|
|
86
|
+
cluster_posterior_: np.ndarray
|
|
87
|
+
component_posterior_: np.ndarray
|
|
88
|
+
adjacency_posterior_: np.ndarray
|
|
89
|
+
elbo_: float
|
|
90
|
+
converged_: bool
|
|
91
|
+
|
|
92
|
+
@validate_params(
|
|
93
|
+
{
|
|
94
|
+
"n_clusters": [Interval(Integral, 1, None, closed="left")],
|
|
95
|
+
"n_components": [Interval(Integral, 1, None, closed="left")],
|
|
96
|
+
"clusters_prior": [StrOptions({"jeffreys", "uniform"}), np.ndarray],
|
|
97
|
+
"components_prior": [StrOptions({"jeffreys", "uniform"}), np.ndarray],
|
|
98
|
+
"adjacency_prior": [StrOptions({"jeffreys", "uniform"}), np.ndarray],
|
|
99
|
+
"max_iter": [Interval(Integral, 1, None, closed="left")],
|
|
100
|
+
"tol": [Interval(Real, 0, None, closed="left")],
|
|
101
|
+
},
|
|
102
|
+
prefer_skip_nested_validation=True,
|
|
103
|
+
)
|
|
104
|
+
def __init__(
|
|
105
|
+
self,
|
|
106
|
+
n_clusters: int = 2,
|
|
107
|
+
n_components: int = 2,
|
|
108
|
+
*,
|
|
109
|
+
clusters_prior: np.ndarray | str = "jeffreys",
|
|
110
|
+
components_prior: np.ndarray | str = "jeffreys",
|
|
111
|
+
adjacency_prior: np.ndarray | str = "jeffreys",
|
|
112
|
+
max_iter: int = 100,
|
|
113
|
+
tol: float = 1e-4,
|
|
114
|
+
warm_start: bool = False,
|
|
115
|
+
random_state: int | None = None,
|
|
116
|
+
):
|
|
117
|
+
r"""Initializes the MimiSBM model with specified design and priors.
|
|
118
|
+
|
|
119
|
+
Constructs a mixture of multilayer SBMs with user-defined priors and
|
|
120
|
+
EM settings. Provides default settings for Bayesian inference and
|
|
121
|
+
convergence criteria.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
n_clusters (int, optional): Number of clusters for the nodes.
|
|
125
|
+
Defaults to 2.
|
|
126
|
+
n_components (int, optional): Number of mixture components for the layers.
|
|
127
|
+
Defaults to 2.
|
|
128
|
+
clusters_prior (np.ndarray | str, optional): Dirichlet prior for node
|
|
129
|
+
clusters. Can be "jeffreys" (0.5), "uniform" (1.0), or a custom array.
|
|
130
|
+
Defaults to "jeffreys".
|
|
131
|
+
components_prior (np.ndarray | str, optional): Dirichlet prior for layer
|
|
132
|
+
components. Defaults to "jeffreys".
|
|
133
|
+
adjacency_prior (np.ndarray | str, optional): Beta prior for edge
|
|
134
|
+
probabilities. Defaults to "jeffreys".
|
|
135
|
+
max_iter (int, optional): Maximum number of VEM iterations. Defaults to 100.
|
|
136
|
+
tol (float, optional): Convergence tolerance for ELBO. Defaults to 1e-4.
|
|
137
|
+
warm_start (bool, optional): Whether to reuse responsibilities from a
|
|
138
|
+
previous fit. Defaults to False.
|
|
139
|
+
random_state (int | None, optional): Seed for the KMeans initialization.
|
|
140
|
+
Defaults to None.
|
|
141
|
+
"""
|
|
142
|
+
self.n_clusters = n_clusters
|
|
143
|
+
self.n_components = n_components
|
|
144
|
+
self.clusters_prior = self._init_prior(clusters_prior, n_clusters)
|
|
145
|
+
self.components_prior = self._init_prior(components_prior, self.n_components)
|
|
146
|
+
self.adjacency_prior = self._init_prior(adjacency_prior, 2)
|
|
147
|
+
self.max_iter = max_iter
|
|
148
|
+
self.tol = tol
|
|
149
|
+
self.warm_start = warm_start
|
|
150
|
+
self.random_state = random_state
|
|
151
|
+
|
|
152
|
+
@staticmethod
|
|
153
|
+
def _init_prior(prior: np.ndarray | str, d: int) -> np.ndarray:
|
|
154
|
+
r"""Initializes the prior parameters for a given dimension.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
prior (np.ndarray | str): The prior specification.
|
|
158
|
+
d (int): The dimension of the prior vector.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
np.ndarray: The initialized prior parameters.
|
|
162
|
+
|
|
163
|
+
Raises:
|
|
164
|
+
ValueError: If the provided prior array has an incorrect length.
|
|
165
|
+
"""
|
|
166
|
+
if prior == "jeffreys":
|
|
167
|
+
return np.full((d,), 0.5)
|
|
168
|
+
if prior == "uniform":
|
|
169
|
+
return np.full((d,), 1.0)
|
|
170
|
+
|
|
171
|
+
prior = prior.reshape(-1)
|
|
172
|
+
if len(prior) != d:
|
|
173
|
+
raise ValueError(f"Prior must have {d} elements, got {len(prior)}")
|
|
174
|
+
return prior
|
|
175
|
+
|
|
176
|
+
def _init_responsibilities(
|
|
177
|
+
self, X: np.ndarray, n_clusters: int, axis: tuple[int, ...]
|
|
178
|
+
) -> np.ndarray:
|
|
179
|
+
r"""Initializes responsibilities using KMeans on aggregated adjacency data.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
X (np.ndarray): The multilayer adjacency tensor.
|
|
183
|
+
n_clusters (int): Number of clusters/components to initialize.
|
|
184
|
+
axis (tuple[int, ...]): Axis over which to aggregate the tensor.
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
np.ndarray: Initialized responsibilities.
|
|
188
|
+
"""
|
|
189
|
+
X_agg = X.sum(axis=axis)
|
|
190
|
+
X_agg = X_agg.reshape(X_agg.shape[0], -1)
|
|
191
|
+
|
|
192
|
+
labels = KMeans(
|
|
193
|
+
n_clusters=n_clusters, random_state=self.random_state
|
|
194
|
+
).fit_predict(X_agg)
|
|
195
|
+
|
|
196
|
+
responsibilities = np.zeros((labels.shape[0], n_clusters))
|
|
197
|
+
responsibilities[np.arange(labels.shape[0]), labels] = 1
|
|
198
|
+
return responsibilities
|
|
199
|
+
|
|
200
|
+
def _elbo(self) -> float:
|
|
201
|
+
r"""Computes the Evidence Lower Bound (ELBO) for the current state.
|
|
202
|
+
|
|
203
|
+
The ELBO is used to monitor convergence and as a surrogate for the
|
|
204
|
+
log-likelihood in the Variational EM algorithm.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
float: The computed ELBO value.
|
|
208
|
+
"""
|
|
209
|
+
cluster_entropy = entr(self.cluster_responsibilities_).sum()
|
|
210
|
+
component_entropy = entr(self.component_responsibilities_).sum()
|
|
211
|
+
|
|
212
|
+
cluster_evidence = (
|
|
213
|
+
gammaln(self.cluster_posterior_).sum()
|
|
214
|
+
- gammaln(self.cluster_posterior_.sum())
|
|
215
|
+
- gammaln(self.clusters_prior).sum()
|
|
216
|
+
+ gammaln(self.clusters_prior.sum())
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
component_evidence = (
|
|
220
|
+
gammaln(self.component_posterior_).sum()
|
|
221
|
+
- gammaln(self.component_posterior_.sum())
|
|
222
|
+
- gammaln(self.components_prior).sum()
|
|
223
|
+
+ gammaln(self.components_prior.sum())
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
log_adjacency_posterior = betaln(
|
|
227
|
+
self.adjacency_posterior_[0], self.adjacency_posterior_[1]
|
|
228
|
+
)
|
|
229
|
+
log_adjacency_prior = betaln(self.adjacency_prior[0], self.adjacency_prior[1])
|
|
230
|
+
|
|
231
|
+
# Sum over i < j
|
|
232
|
+
rows, cols = np.tril_indices(self.n_clusters)
|
|
233
|
+
adjacency_evidence = (
|
|
234
|
+
log_adjacency_posterior[rows, cols, :] - log_adjacency_prior
|
|
235
|
+
).sum()
|
|
236
|
+
|
|
237
|
+
evidence = cluster_evidence + component_evidence + adjacency_evidence
|
|
238
|
+
entropy = cluster_entropy + component_entropy
|
|
239
|
+
|
|
240
|
+
return evidence + entropy
|
|
241
|
+
|
|
242
|
+
def _m_step(self, X: np.ndarray, X_non: np.ndarray):
|
|
243
|
+
r"""Performs the M-step of the Variational EM algorithm.
|
|
244
|
+
|
|
245
|
+
Updates the posterior parameters of the priors based on the current
|
|
246
|
+
responsibilities.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
X (np.ndarray): The multilayer adjacency tensor.
|
|
250
|
+
X_non (np.ndarray): The complement of the adjacency tensor.
|
|
251
|
+
"""
|
|
252
|
+
self.cluster_posterior_ = (
|
|
253
|
+
self.clusters_prior + self.cluster_responsibilities_.sum(axis=0)
|
|
254
|
+
)
|
|
255
|
+
self.component_posterior_ = (
|
|
256
|
+
self.components_prior + self.component_responsibilities_.sum(axis=0)
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
weighted_edges = X @ self.component_responsibilities_
|
|
260
|
+
weighted_non_edges = X_non @ self.component_responsibilities_
|
|
261
|
+
|
|
262
|
+
expected_edges = (
|
|
263
|
+
self.cluster_responsibilities_.T
|
|
264
|
+
@ weighted_edges.swapaxes(0, 2)
|
|
265
|
+
@ self.cluster_responsibilities_
|
|
266
|
+
).swapaxes(0, 2)
|
|
267
|
+
expected_non_edges = (
|
|
268
|
+
self.cluster_responsibilities_.T
|
|
269
|
+
@ weighted_non_edges.swapaxes(0, 2)
|
|
270
|
+
@ self.cluster_responsibilities_
|
|
271
|
+
).swapaxes(0, 2)
|
|
272
|
+
|
|
273
|
+
# Sum over i < j
|
|
274
|
+
rows, cols = np.diag_indices(self.n_clusters)
|
|
275
|
+
expected_edges[rows, cols, :] *= 0.5
|
|
276
|
+
expected_non_edges[rows, cols, :] *= 0.5
|
|
277
|
+
|
|
278
|
+
self.adjacency_posterior_ = np.stack(
|
|
279
|
+
[
|
|
280
|
+
expected_edges + self.adjacency_prior[0],
|
|
281
|
+
expected_non_edges + self.adjacency_prior[1],
|
|
282
|
+
]
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
def _e_step(self, X: np.ndarray, X_non: np.ndarray):
|
|
286
|
+
r"""Performs the E-step of the Variational EM algorithm.
|
|
287
|
+
|
|
288
|
+
Updates the responsibilities for node clusters and layer components given the
|
|
289
|
+
current posterior parameters.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
X (np.ndarray): The multilayer adjacency tensor.
|
|
293
|
+
X_non (np.ndarray): The complement of the adjacency tensor.
|
|
294
|
+
"""
|
|
295
|
+
digamma_adjacency_posterior = digamma(self.adjacency_posterior_.sum(axis=0))
|
|
296
|
+
log_edges = digamma(self.adjacency_posterior_[0]) - digamma_adjacency_posterior
|
|
297
|
+
log_non_edges = (
|
|
298
|
+
digamma(self.adjacency_posterior_[1]) - digamma_adjacency_posterior
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
expected_component_edges = (
|
|
302
|
+
self.cluster_responsibilities_
|
|
303
|
+
@ log_edges.swapaxes(0, 2)
|
|
304
|
+
@ self.cluster_responsibilities_.T
|
|
305
|
+
).swapaxes(0, 2)
|
|
306
|
+
expected_component_non_edges = (
|
|
307
|
+
self.cluster_responsibilities_
|
|
308
|
+
@ log_non_edges.swapaxes(0, 2)
|
|
309
|
+
@ self.cluster_responsibilities_.T
|
|
310
|
+
).swapaxes(0, 2)
|
|
311
|
+
|
|
312
|
+
# Sum over i < j
|
|
313
|
+
component_posterior_evidence = digamma(self.component_posterior_) - digamma(
|
|
314
|
+
self.component_posterior_.sum()
|
|
315
|
+
)
|
|
316
|
+
component_edges_evidence = 0.5 * np.tensordot(
|
|
317
|
+
X, expected_component_edges, axes=([0, 1], [0, 1])
|
|
318
|
+
)
|
|
319
|
+
component_edges_evidence = np.nan_to_num(component_edges_evidence)
|
|
320
|
+
component_non_edges_evidence = 0.5 * np.tensordot(
|
|
321
|
+
X_non, expected_component_non_edges, axes=([0, 1], [0, 1])
|
|
322
|
+
)
|
|
323
|
+
component_non_edges_evidence = np.nan_to_num(component_non_edges_evidence)
|
|
324
|
+
self.component_responsibilities_ = softmax(
|
|
325
|
+
component_posterior_evidence
|
|
326
|
+
+ component_edges_evidence
|
|
327
|
+
+ component_non_edges_evidence,
|
|
328
|
+
axis=1,
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
# Sum over i != j
|
|
332
|
+
expected_cluster_edges = np.tensordot(
|
|
333
|
+
self.component_responsibilities_, log_edges, axes=([1], [2])
|
|
334
|
+
)
|
|
335
|
+
expected_cluster_non_edges = np.tensordot(
|
|
336
|
+
self.component_responsibilities_, log_non_edges, axes=([1], [2])
|
|
337
|
+
)
|
|
338
|
+
cluster_posterior_evidence = digamma(self.cluster_posterior_) - digamma(
|
|
339
|
+
self.cluster_posterior_.sum()
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
cluster_edges_evidence = np.tensordot(
|
|
343
|
+
X,
|
|
344
|
+
self.cluster_responsibilities_ @ expected_cluster_edges.swapaxes(1, 2),
|
|
345
|
+
axes=([1, 2], [1, 0]),
|
|
346
|
+
)
|
|
347
|
+
cluster_non_edges_evidence = np.tensordot(
|
|
348
|
+
X_non,
|
|
349
|
+
self.cluster_responsibilities_ @ expected_cluster_non_edges.swapaxes(1, 2),
|
|
350
|
+
axes=([1, 2], [1, 0]),
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
self.cluster_responsibilities_ = softmax(
|
|
354
|
+
cluster_posterior_evidence
|
|
355
|
+
+ cluster_edges_evidence
|
|
356
|
+
+ cluster_non_edges_evidence,
|
|
357
|
+
axis=1,
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
def _validate(self, X: np.typing.ArrayLike) -> tuple[np.ndarray, np.ndarray]:
|
|
361
|
+
r"""Validates the input data and ensures it is in the correct format.
|
|
362
|
+
|
|
363
|
+
Checks that the input is a 3D numpy array with appropriate dimension for a
|
|
364
|
+
multilayer adjacency tensor.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
X (np.typing.ArrayLike): The input data to validate.
|
|
368
|
+
|
|
369
|
+
Raises:
|
|
370
|
+
ValueError: If the input data is not a 3D array.
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
tuple[np.ndarray, np.ndarray]: A tuple containing the validated adjacency
|
|
374
|
+
tensor and its complement.
|
|
375
|
+
"""
|
|
376
|
+
X = np.asarray(validate_data(self, X, allow_nd=True, dtype=np.bool)) # type: ignore
|
|
377
|
+
if X.ndim != 3: # noqa: PLR2004
|
|
378
|
+
raise ValueError(f"Input data must be a 3D array, got {X.ndim}D array")
|
|
379
|
+
|
|
380
|
+
X |= X.swapaxes(0, 1)
|
|
381
|
+
rows, cols = np.diag_indices(X.shape[0])
|
|
382
|
+
X[rows, cols, :] = 0
|
|
383
|
+
X_non = 1 - X
|
|
384
|
+
X_non[rows, cols, :] = 0
|
|
385
|
+
|
|
386
|
+
return X, X_non
|
|
387
|
+
|
|
388
|
+
@validate_params({"X": ["array-like"]}, prefer_skip_nested_validation=True)
|
|
389
|
+
def fit(self, X: np.typing.ArrayLike) -> Self:
|
|
390
|
+
r"""Fits the MimiSBM model to the multilayer adjacency tensor.
|
|
391
|
+
|
|
392
|
+
Initializes the model responsibilities and iteratively updates them using the
|
|
393
|
+
VEM algorithm. The process continues until the ELBO converges or the maximum
|
|
394
|
+
number of iterations is reached.
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
X (np.typing.ArrayLike): A 3D numpy array-like representing the multilayer
|
|
398
|
+
adjacency tensor of shape (N, N, V).
|
|
399
|
+
|
|
400
|
+
Returns:
|
|
401
|
+
Self: The fitted model instance.
|
|
402
|
+
"""
|
|
403
|
+
X, X_non = self._validate(X) # type: ignore
|
|
404
|
+
|
|
405
|
+
if not (self.warm_start and hasattr(self, "converged_")):
|
|
406
|
+
self.cluster_responsibilities_ = self._init_responsibilities(
|
|
407
|
+
X, self.n_clusters, (2,)
|
|
408
|
+
)
|
|
409
|
+
self.component_responsibilities_ = self._init_responsibilities(
|
|
410
|
+
X, self.n_components, (0, 1)
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
old_elbo = -np.inf
|
|
414
|
+
for _ in range(self.max_iter):
|
|
415
|
+
self._m_step(X, X_non)
|
|
416
|
+
self._e_step(X, X_non)
|
|
417
|
+
|
|
418
|
+
self.elbo_ = self._elbo()
|
|
419
|
+
if abs(self.elbo_ - old_elbo) < self.tol:
|
|
420
|
+
self.converged_ = True
|
|
421
|
+
return self
|
|
422
|
+
old_elbo = self.elbo_
|
|
423
|
+
|
|
424
|
+
self.converged_ = False
|
|
425
|
+
|
|
426
|
+
return self
|
|
427
|
+
|
|
428
|
+
def predict(self) -> tuple[np.ndarray, np.ndarray]:
|
|
429
|
+
r"""Predicts the node clusters and layer components labels.
|
|
430
|
+
|
|
431
|
+
Assigns each node and each layer to the cluster/component with the highest
|
|
432
|
+
probability.
|
|
433
|
+
|
|
434
|
+
Returns:
|
|
435
|
+
tuple[np.ndarray, np.ndarray]: A tuple containing:
|
|
436
|
+
- node_labels (np.ndarray): Predicted cluster for each node (N,).
|
|
437
|
+
- layer_labels (np.ndarray): Predicted component for each layer (V,).
|
|
438
|
+
"""
|
|
439
|
+
check_is_fitted(
|
|
440
|
+
self, ["cluster_responsibilities_", "component_responsibilities_"]
|
|
441
|
+
)
|
|
442
|
+
return self.cluster_responsibilities_.argmax(
|
|
443
|
+
axis=1
|
|
444
|
+
), self.component_responsibilities_.argmax(axis=1)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mimisbm
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Mixture of Multilayer Integrator Stochastic Block Model
|
|
5
|
+
Author: Félix Laplante
|
|
6
|
+
Project-URL: Source, https://github.com/felixlaplante0/mimisbm
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
9
|
+
Classifier: Operating System :: MacOS
|
|
10
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: numpy
|
|
14
|
+
Requires-Dist: scipy
|
|
15
|
+
Requires-Dist: fastkmeanspp
|
|
16
|
+
Requires-Dist: scikit-learn
|
|
17
|
+
|
|
18
|
+
# 🕸️ MimiSBM
|
|
19
|
+
|
|
20
|
+
**mimisbm** is a Python package implementing the **Mixture of Multilayer Integrator Stochastic Block Model** proposed by the original authors. It jointly groups nodes into clusters and layers into components, providing a unified framework for identifying shared connectivity patterns across multiple network layers.
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## ✨ Features
|
|
25
|
+
|
|
26
|
+
- **Multilayer Clustering**: Jointly identifies node communities and layer components in a single probabilistic framework.
|
|
27
|
+
- **Variational EM**: Efficient inference using a Variational Expectation-Maximization (VEM) algorithm for large-scale networks.
|
|
28
|
+
- **Bayesian Framework**: Supports flexible Dirichlet and Beta priors, allowing for robust structure discovery under different sparsity regimes.
|
|
29
|
+
- **Component-wise SBMs**: Groups layers sharing similar block-model structures into distinct mixture components.
|
|
30
|
+
- **scikit-learn API**: Native `BaseEstimator` and `ClusterMixin` integration with a familiar `fit` / `predict` interface.
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## 🚀 Installation
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install mimisbm
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## 🔧 Usage
|
|
41
|
+
|
|
42
|
+
### Example
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
import numpy as np
|
|
46
|
+
from mimisbm import MimiSBM
|
|
47
|
+
|
|
48
|
+
# Generate a synthetic multilayer adjacency tensor (20 nodes, 5 layers)
|
|
49
|
+
np.random.seed(42)
|
|
50
|
+
N, V = 20, 5
|
|
51
|
+
X = np.random.randint(0, 2, size=(N, N, V))
|
|
52
|
+
|
|
53
|
+
# Ensure the adjacency matrices are symmetric (undirected)
|
|
54
|
+
for v in range(V):
|
|
55
|
+
X[..., v] = np.tril(X[..., v], -1) + np.tril(X[..., v], -1).T
|
|
56
|
+
|
|
57
|
+
# Initialize the model with 3 node clusters and 2 layer components
|
|
58
|
+
model = MimiSBM(n_clusters=3, n_components=2, random_state=42)
|
|
59
|
+
|
|
60
|
+
# Fit the model to the multilayer network
|
|
61
|
+
model.fit(X)
|
|
62
|
+
|
|
63
|
+
# Predict node cluster and layer component assignments
|
|
64
|
+
node_labels, layer_labels = model.predict()
|
|
65
|
+
|
|
66
|
+
print(f"Node clusters: {node_labels}")
|
|
67
|
+
print(f"Layer components: {layer_labels}")
|
|
68
|
+
print(f"Final ELBO: {model.elbo_:.2f}")
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## 📖 Learn More
|
|
74
|
+
|
|
75
|
+
For tutorials and detailed API reference, visit the official site:
|
|
76
|
+
👉 [mimisbm's documentation](https://felixlaplante0.github.io/mimisbm)
|
|
77
|
+
|
|
78
|
+
### 📚 Citation
|
|
79
|
+
|
|
80
|
+
If you use MimiSBM in your research, please cite the original authors' paper:
|
|
81
|
+
|
|
82
|
+
```bibtex
|
|
83
|
+
@article{de2024mixture,
|
|
84
|
+
title={Mixture of multilayer stochastic block models for multiview clustering},
|
|
85
|
+
author={De Santiago, Kylliann and Szafranski, Marie and Ambroise, Christophe},
|
|
86
|
+
journal={arXiv preprint arXiv:2401.04682},
|
|
87
|
+
year={2024}
|
|
88
|
+
}
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
For more details, see the corresponding Preprint: https://arxiv.org/abs/2401.04682
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
demo.py
|
|
3
|
+
pyproject.toml
|
|
4
|
+
.github/workflows/lint.yml
|
|
5
|
+
.github/workflows/pages.yml
|
|
6
|
+
.github/workflows/publish.yml
|
|
7
|
+
docs/Makefile
|
|
8
|
+
docs/make.bat
|
|
9
|
+
docs/source/conf.py
|
|
10
|
+
docs/source/index.rst
|
|
11
|
+
docs/source/_static/.gitkeep
|
|
12
|
+
docs/source/_templates/autosummary/class.rst
|
|
13
|
+
mimisbm/__init__.py
|
|
14
|
+
mimisbm/_model.py
|
|
15
|
+
mimisbm.egg-info/PKG-INFO
|
|
16
|
+
mimisbm.egg-info/SOURCES.txt
|
|
17
|
+
mimisbm.egg-info/dependency_links.txt
|
|
18
|
+
mimisbm.egg-info/requires.txt
|
|
19
|
+
mimisbm.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
mimisbm
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "mimisbm"
|
|
3
|
+
description = "Mixture of Multilayer Integrator Stochastic Block Model"
|
|
4
|
+
readme = "README.md"
|
|
5
|
+
urls = { "Source" = "https://github.com/felixlaplante0/mimisbm" }
|
|
6
|
+
authors = [{ name = "Félix Laplante" }]
|
|
7
|
+
requires-python = ">=3.10"
|
|
8
|
+
dependencies = ["numpy", "scipy", "fastkmeanspp", "scikit-learn"]
|
|
9
|
+
dynamic = ["version"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Programming Language :: Python :: 3",
|
|
12
|
+
"Operating System :: POSIX :: Linux",
|
|
13
|
+
"Operating System :: MacOS",
|
|
14
|
+
"Operating System :: Microsoft :: Windows",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[build-system]
|
|
18
|
+
requires = ["setuptools>=42", "setuptools-scm[toml]>=6.0", "wheel"]
|
|
19
|
+
build-backend = "setuptools.build_meta"
|
|
20
|
+
|
|
21
|
+
[tool.setuptools]
|
|
22
|
+
packages = ["mimisbm"]
|
|
23
|
+
|
|
24
|
+
[tool.setuptools_scm]
|
|
25
|
+
version_scheme = "post-release"
|
|
26
|
+
local_scheme = "no-local-version"
|
|
27
|
+
|
|
28
|
+
[tool.setuptools.package-data]
|
|
29
|
+
mypkg = ["py.typed"]
|
|
30
|
+
|
|
31
|
+
[tool.ruff]
|
|
32
|
+
lint.select = [
|
|
33
|
+
"D", # pydocstyle (docstring conventions)
|
|
34
|
+
"E", # pycodestyle errors
|
|
35
|
+
"W", # pycodestyle warnings
|
|
36
|
+
"F", # Pyflakes
|
|
37
|
+
"I", # isort
|
|
38
|
+
"UP", # pyupgrade
|
|
39
|
+
"B", # flake8-bugbear
|
|
40
|
+
"C4", # flake8-comprehensions
|
|
41
|
+
"S", # flake8-bandit (security)
|
|
42
|
+
"T20", # flake8-print
|
|
43
|
+
"PT", # flake8-pytest-style
|
|
44
|
+
"Q", # flake8-quotes
|
|
45
|
+
"RET", # flake8-return
|
|
46
|
+
"SIM", # flake8-simplify
|
|
47
|
+
"ARG", # flake8-unused-arguments
|
|
48
|
+
"ERA", # eradicate (commented code)
|
|
49
|
+
"PL", # Pylint
|
|
50
|
+
"RUF", # Ruff-specific rules
|
|
51
|
+
]
|
|
52
|
+
lint.ignore = ["D417", "PLR0913"]
|
|
53
|
+
|
|
54
|
+
[tool.ruff.lint.pydocstyle]
|
|
55
|
+
convention = "google"
|
mimisbm-0.1.0/setup.cfg
ADDED