mimisbm 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ name: Lint
2
+
3
+ on:
4
+ push:
5
+ branches: ["**"]
6
+ pull_request:
7
+
8
+ jobs:
9
+ lint:
10
+ runs-on: ubuntu-latest
11
+ container:
12
+ image: python:latest
13
+ steps:
14
+ - name: Checkout code
15
+ uses: actions/checkout@v4
16
+ - name: Install ruff
17
+ run: pip install ruff
18
+ - name: Format check
19
+ run: ruff format mimisbm
20
+ - name: Lint
21
+ run: ruff check mimisbm
@@ -0,0 +1,33 @@
1
+ name: Pages
2
+
3
+ on:
4
+ workflow_dispatch:
5
+
6
+ jobs:
7
+ pages:
8
+ runs-on: ubuntu-latest
9
+ container:
10
+ image: python:3.13
11
+ permissions:
12
+ contents: read
13
+ pages: write
14
+ id-token: write
15
+ steps:
16
+ - name: Checkout code
17
+ uses: actions/checkout@v4
18
+ - name: Fix git safe directory
19
+ run: git config --global --add safe.directory '*'
20
+ - name: Install system dependencies
21
+ run: apt-get update && apt-get install -y git
22
+ - name: Install Python dependencies
23
+ run: |
24
+ pip install sphinx furo
25
+ pip install . --extra-index-url https://download.pytorch.org/whl/cpu
26
+ - name: Build documentation
27
+ run: sphinx-build -b html docs/source public
28
+ - name: Upload Pages artifact
29
+ uses: actions/upload-pages-artifact@v3
30
+ with:
31
+ path: public
32
+ - name: Deploy to GitHub Pages
33
+ uses: actions/deploy-pages@v4
@@ -0,0 +1,26 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ publish-to-pypi:
10
+ runs-on: ubuntu-latest
11
+ container:
12
+ image: python:3.13
13
+ steps:
14
+ - name: Checkout code
15
+ uses: actions/checkout@v4
16
+ - name: Fix git safe directory
17
+ run: git config --global --add safe.directory '*'
18
+ - name: Install build tools
19
+ run: python -m pip install --upgrade pip build twine setuptools-scm
20
+ - name: Build package
21
+ run: python -m build
22
+ - name: Publish to PyPI
23
+ env:
24
+ TWINE_USERNAME: __token__
25
+ TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
26
+ run: python -m twine upload --verbose dist/*
mimisbm-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,91 @@
1
+ Metadata-Version: 2.4
2
+ Name: mimisbm
3
+ Version: 0.1.0
4
+ Summary: Mixture of Multilayer Integrator Stochastic Block Model
5
+ Author: Félix Laplante
6
+ Project-URL: Source, https://github.com/felixlaplante0/mimisbm
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Operating System :: POSIX :: Linux
9
+ Classifier: Operating System :: MacOS
10
+ Classifier: Operating System :: Microsoft :: Windows
11
+ Requires-Python: >=3.10
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: numpy
14
+ Requires-Dist: scipy
15
+ Requires-Dist: fastkmeanspp
16
+ Requires-Dist: scikit-learn
17
+
18
+ # 🕸️ MimiSBM
19
+
20
+ **mimisbm** is a Python package implementing the **Mixture of Multilayer Integrator Stochastic Block Model** proposed by the original authors. It jointly groups nodes into clusters and layers into components, providing a unified framework for identifying shared connectivity patterns across multiple network layers.
21
+
22
+ ---
23
+
24
+ ## ✨ Features
25
+
26
+ - **Multilayer Clustering**: Jointly identifies node communities and layer components in a single probabilistic framework.
27
+ - **Variational EM**: Efficient inference using a Variational Expectation-Maximization (VEM) algorithm for large-scale networks.
28
+ - **Bayesian Framework**: Supports flexible Dirichlet and Beta priors, allowing for robust structure discovery under different sparsity regimes.
29
+ - **Component-wise SBMs**: Groups layers sharing similar block-model structures into distinct mixture components.
30
+ - **scikit-learn API**: Native `BaseEstimator` and `ClusterMixin` integration with a familiar `fit` / `predict` interface.
31
+
32
+ ---
33
+
34
+ ## 🚀 Installation
35
+
36
+ ```bash
37
+ pip install mimisbm
38
+ ```
39
+
40
+ ## 🔧 Usage
41
+
42
+ ### Example
43
+
44
+ ```python
45
+ import numpy as np
46
+ from mimisbm import MimiSBM
47
+
48
+ # Generate a synthetic multilayer adjacency tensor (20 nodes, 5 layers)
49
+ np.random.seed(42)
50
+ N, V = 20, 5
51
+ X = np.random.randint(0, 2, size=(N, N, V))
52
+
53
+ # Ensure the adjacency matrices are symmetric (undirected)
54
+ for v in range(V):
55
+ X[..., v] = np.tril(X[..., v], -1) + np.tril(X[..., v], -1).T
56
+
57
+ # Initialize the model with 3 node clusters and 2 layer components
58
+ model = MimiSBM(n_clusters=3, n_components=2, random_state=42)
59
+
60
+ # Fit the model to the multilayer network
61
+ model.fit(X)
62
+
63
+ # Predict node cluster and layer component assignments
64
+ node_labels, layer_labels = model.predict()
65
+
66
+ print(f"Node clusters: {node_labels}")
67
+ print(f"Layer components: {layer_labels}")
68
+ print(f"Final ELBO: {model.elbo_:.2f}")
69
+ ```
70
+
71
+ ---
72
+
73
+ ## 📖 Learn More
74
+
75
+ For tutorials and detailed API reference, visit the official site:
76
+ 👉 [mimisbm's documentation](https://felixlaplante0.github.io/mimisbm)
77
+
78
+ ### 📚 Citation
79
+
80
+ If you use MimiSBM in your research, please cite the original authors' paper:
81
+
82
+ ```bibtex
83
+ @article{de2024mixture,
84
+ title={Mixture of multilayer stochastic block models for multiview clustering},
85
+ author={De Santiago, Kylliann and Szafranski, Marie and Ambroise, Christophe},
86
+ journal={arXiv preprint arXiv:2401.04682},
87
+ year={2024}
88
+ }
89
+ ```
90
+
91
+ For more details, see the corresponding Preprint: https://arxiv.org/abs/2401.04682
@@ -0,0 +1,74 @@
1
+ # 🕸️ MimiSBM
2
+
3
+ **mimisbm** is a Python package implementing the **Mixture of Multilayer Integrator Stochastic Block Model** proposed by the original authors. It jointly groups nodes into clusters and layers into components, providing a unified framework for identifying shared connectivity patterns across multiple network layers.
4
+
5
+ ---
6
+
7
+ ## ✨ Features
8
+
9
+ - **Multilayer Clustering**: Jointly identifies node communities and layer components in a single probabilistic framework.
10
+ - **Variational EM**: Efficient inference using a Variational Expectation-Maximization (VEM) algorithm for large-scale networks.
11
+ - **Bayesian Framework**: Supports flexible Dirichlet and Beta priors, allowing for robust structure discovery under different sparsity regimes.
12
+ - **Component-wise SBMs**: Groups layers sharing similar block-model structures into distinct mixture components.
13
+ - **scikit-learn API**: Native `BaseEstimator` and `ClusterMixin` integration with a familiar `fit` / `predict` interface.
14
+
15
+ ---
16
+
17
+ ## 🚀 Installation
18
+
19
+ ```bash
20
+ pip install mimisbm
21
+ ```
22
+
23
+ ## 🔧 Usage
24
+
25
+ ### Example
26
+
27
+ ```python
28
+ import numpy as np
29
+ from mimisbm import MimiSBM
30
+
31
+ # Generate a synthetic multilayer adjacency tensor (20 nodes, 5 layers)
32
+ np.random.seed(42)
33
+ N, V = 20, 5
34
+ X = np.random.randint(0, 2, size=(N, N, V))
35
+
36
+ # Ensure the adjacency matrices are symmetric (undirected)
37
+ for v in range(V):
38
+ X[..., v] = np.tril(X[..., v], -1) + np.tril(X[..., v], -1).T
39
+
40
+ # Initialize the model with 3 node clusters and 2 layer components
41
+ model = MimiSBM(n_clusters=3, n_components=2, random_state=42)
42
+
43
+ # Fit the model to the multilayer network
44
+ model.fit(X)
45
+
46
+ # Predict node cluster and layer component assignments
47
+ node_labels, layer_labels = model.predict()
48
+
49
+ print(f"Node clusters: {node_labels}")
50
+ print(f"Layer components: {layer_labels}")
51
+ print(f"Final ELBO: {model.elbo_:.2f}")
52
+ ```
53
+
54
+ ---
55
+
56
+ ## 📖 Learn More
57
+
58
+ For tutorials and detailed API reference, visit the official site:
59
+ 👉 [mimisbm's documentation](https://felixlaplante0.github.io/mimisbm)
60
+
61
+ ### 📚 Citation
62
+
63
+ If you use MimiSBM in your research, please cite the original authors' paper:
64
+
65
+ ```bibtex
66
+ @article{de2024mixture,
67
+ title={Mixture of multilayer stochastic block models for multiview clustering},
68
+ author={De Santiago, Kylliann and Szafranski, Marie and Ambroise, Christophe},
69
+ journal={arXiv preprint arXiv:2401.04682},
70
+ year={2024}
71
+ }
72
+ ```
73
+
74
+ For more details, see the corresponding Preprint: https://arxiv.org/abs/2401.04682
mimisbm-0.1.0/demo.py ADDED
@@ -0,0 +1,41 @@
1
+ import numpy as np
2
+ from sklearn.metrics import adjusted_rand_score
3
+
4
+ from mimisbm._model import MimiSBM
5
+
6
+
7
+ def gen_data(N=100, V=20, K=2, Q=2):
8
+ z = np.random.randint(0, K, N)
9
+ w = np.random.randint(0, Q, V)
10
+
11
+ # Connectivity for component 0 (assortative) and component 1 (disassortative)
12
+ alphas = np.zeros((K, K, Q))
13
+ alphas[:, :, 0] = [[0.8, 0.1], [0.1, 0.8]]
14
+ alphas[:, :, 1] = [[0.1, 0.8], [0.8, 0.1]]
15
+
16
+ A = np.zeros((N, N, V))
17
+ for v in range(V):
18
+ p_mat = alphas[np.ix_(z, z, [w[v]])].squeeze()
19
+ # Generate undirected edges without self-loops
20
+ tri_mask = np.random.rand(N, N) < p_mat
21
+ A[:, :, v] = np.tril(tri_mask, -1).astype(float)
22
+ A[:, :, v] += A[:, :, v].T
23
+
24
+ return A, z, w
25
+
26
+ np.random.seed(42)
27
+
28
+ print("Generating synthetic data...")
29
+ A, true_z, true_w = gen_data()
30
+
31
+ print("Fitting MimiSBM...")
32
+ model = MimiSBM(n_clusters=2, n_subclusters=2, random_state=42).fit(A)
33
+ pred_z, pred_w = model.predict()
34
+
35
+ # 3. Evaluate
36
+ node_ari = adjusted_rand_score(true_z, pred_z)
37
+ view_ari = adjusted_rand_score(true_w, pred_w)
38
+
39
+ print("\nEvaluation Results:")
40
+ print(f"Node ARI: {node_ari:.4f}")
41
+ print(f"View ARI: {view_ari:.4f}")
@@ -0,0 +1,20 @@
1
+ # Minimal makefile for Sphinx documentation
2
+ #
3
+
4
+ # You can set these variables from the command line, and also
5
+ # from the environment for the first two.
6
+ SPHINXOPTS ?=
7
+ SPHINXBUILD ?= sphinx-build
8
+ SOURCEDIR = source
9
+ BUILDDIR = build
10
+
11
+ # Put it first so that "make" without argument is like "make help".
12
+ help:
13
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14
+
15
+ .PHONY: help Makefile
16
+
17
+ # Catch-all target: route all unknown targets to Sphinx using the new
18
+ # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19
+ %: Makefile
20
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@@ -0,0 +1,35 @@
1
+ @ECHO OFF
2
+
3
+ pushd %~dp0
4
+
5
+ REM Command file for Sphinx documentation
6
+
7
+ if "%SPHINXBUILD%" == "" (
8
+ set SPHINXBUILD=sphinx-build
9
+ )
10
+ set SOURCEDIR=source
11
+ set BUILDDIR=build
12
+
13
+ %SPHINXBUILD% >NUL 2>NUL
14
+ if errorlevel 9009 (
15
+ echo.
16
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17
+ echo.installed, then set the SPHINXBUILD environment variable to point
18
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
19
+ echo.may add the Sphinx directory to PATH.
20
+ echo.
21
+ echo.If you don't have Sphinx installed, grab it from
22
+ echo.https://www.sphinx-doc.org/
23
+ exit /b 1
24
+ )
25
+
26
+ if "%1" == "" goto help
27
+
28
+ %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29
+ goto end
30
+
31
+ :help
32
+ %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33
+
34
+ :end
35
+ popd
File without changes
@@ -0,0 +1,8 @@
1
+ {{ fullname | escape | underline}}
2
+
3
+ .. currentmodule:: {{ module }}
4
+
5
+ .. autoclass:: {{ objname }}
6
+ :members:
7
+
8
+ .. automethod:: __init__
@@ -0,0 +1,46 @@
1
+ # Configuration file for the Sphinx documentation builder.
2
+ #
3
+ # For the full list of built-in configuration values, see the documentation:
4
+ # https://www.sphinx-doc.org/en/master/usage/configuration.html
5
+
6
+ import os
7
+ import sys
8
+
9
+ sys.path.insert(0, os.path.abspath("../../"))
10
+
11
+ # -- Project information -----------------------------------------------------
12
+ # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
13
+
14
+ project = "mimisbm"
15
+ release = ""
16
+ version = ""
17
+ copyright = "2026, Félix Laplante"
18
+ author = "Félix Laplante"
19
+
20
+ # -- General configuration ---------------------------------------------------
21
+ # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
22
+
23
+ extensions = [
24
+ "sphinx.ext.autodoc",
25
+ "sphinx.ext.napoleon",
26
+ "sphinx.ext.viewcode",
27
+ "sphinx.ext.autosummary",
28
+ ]
29
+
30
+ templates_path = ["_templates"]
31
+ exclude_patterns = []
32
+
33
+ autodoc_member_order = "bysource"
34
+ autodoc_typehints = "description"
35
+ autodoc_typehints_format = "short"
36
+ autodoc_inherit_docstrings = True
37
+ autosummary_generate = True
38
+ add_module_names = False
39
+ napoleon_use_ivar = True
40
+ napoleon_attr_annotations = True
41
+
42
+ # -- Options for HTML output -------------------------------------------------
43
+ # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
44
+
45
+ html_theme = "furo"
46
+ html_static_path = ["_static"]
@@ -0,0 +1,78 @@
1
+ MimiSBM
2
+ =======
3
+
4
+ **mimisbm** is a Python package implementing the **Mixture of Multilayer Integrator Stochastic Block Model** proposed by the original authors. It jointly groups nodes into clusters and layers into components, providing a unified framework for identifying shared connectivity patterns across multiple network layers.
5
+
6
+ Features
7
+ --------
8
+
9
+ - **Multilayer Clustering**: Jointly identifies node communities and layer components in a single probabilistic framework.
10
+ - **Variational EM**: Efficient inference using a Variational Expectation-Maximization (VEM) algorithm for large-scale networks.
11
+ - **Bayesian Framework**: Supports flexible Dirichlet and Beta priors, allowing for robust structure discovery under different sparsity regimes.
12
+ - **Component-wise SBMs**: Groups layers sharing similar block-model structures into distinct mixture components.
13
+ - **scikit-learn API**: Native ``BaseEstimator`` and ``ClusterMixin`` integration with a familiar ``fit`` / ``predict`` interface.
14
+
15
+ Installation
16
+ ------------
17
+
18
+ You can install the package via pip:
19
+
20
+ .. code-block:: bash
21
+
22
+ pip install mimisbm
23
+
24
+ Usage
25
+ -----
26
+
27
+ Example:
28
+
29
+ .. code-block:: python
30
+
31
+ import numpy as np
32
+ from mimisbm import MimiSBM
33
+
34
+ # Generate a synthetic multilayer adjacency tensor (20 nodes, 5 layers)
35
+ np.random.seed(42)
36
+ N, V = 20, 5
37
+ X = np.random.randint(0, 2, size=(N, N, V))
38
+
39
+ # Ensure the adjacency matrices are symmetric (undirected)
40
+ for v in range(V):
41
+ X[..., v] = np.tril(X[..., v], -1) + np.tril(X[..., v], -1).T
42
+
43
+ # Initialize the model with 3 node clusters and 2 layer components
44
+ model = MimiSBM(n_clusters=3, n_components=2, random_state=42)
45
+
46
+ # Fit the model to the multilayer network
47
+ model.fit(X)
48
+
49
+ # Predict node cluster and layer component assignments
50
+ node_labels, layer_labels = model.predict()
51
+
52
+ print(f"Node clusters: {node_labels}")
53
+ print(f"Layer components: {layer_labels}")
54
+ print(f"Final ELBO: {model.elbo_:.2f}")
55
+
56
+ Citation
57
+ --------
58
+
59
+ If you use MimiSBM in your research, please cite the original authors' paper:
60
+
61
+ .. code-block:: bibtex
62
+
63
+ @article{de2024mixture,
64
+ title={Mixture of multilayer stochastic block models for multiview clustering},
65
+ author={De Santiago, Kylliann and Szafranski, Marie and Ambroise, Christophe},
66
+ journal={arXiv preprint arXiv:2401.04682},
67
+ year={2024}
68
+ }
69
+
70
+ For more details, see the corresponding Preprint: https://arxiv.org/abs/2401.04682
71
+
72
+ API Reference
73
+ -------------
74
+
75
+ .. autoclass:: mimisbm.MimiSBM
76
+ :members:
77
+ :undoc-members:
78
+ :show-inheritance:
@@ -0,0 +1,5 @@
1
+ """Mixture of Multilayer Integrator Stochastic Block Model."""
2
+
3
+ from ._model import MimiSBM
4
+
5
+ __all__ = ["MimiSBM"]
@@ -0,0 +1,444 @@
1
+ from numbers import Integral, Real
2
+ from typing import Self
3
+
4
+ import numpy as np
5
+ from fastkmeanspp import KMeans # type: ignore
6
+ from scipy.special import betaln, digamma, entr, gammaln, softmax # type: ignore
7
+ from sklearn.base import BaseEstimator, ClusterMixin # type: ignore
8
+ from sklearn.utils._param_validation import ( # type: ignore
9
+ Interval, # type: ignore
10
+ StrOptions, # type: ignore
11
+ validate_params, # type: ignore
12
+ )
13
+ from sklearn.utils.validation import check_is_fitted, validate_data # type: ignore
14
+
15
+
16
+ class MimiSBM(ClusterMixin, BaseEstimator):
17
+ r"""Mixture of Multilayer Integrator Stochastic Block Model (MimiSBM).
18
+
19
+ The MimiSBM is a generative model for multilayer networks that identifies mesoscale
20
+ structures by grouping nodes into clusters and layers into components.
21
+
22
+ Each component represents a distinct Stochastic Block Model (SBM) shared by a subset
23
+ of layers. This model uses a Variational Expectation-Maximization (VEM) algorithm to
24
+ perform inference and estimation of the posterior distributions.
25
+
26
+ Model settings:
27
+ - `n_clusters`: Number of clusters for the nodes.
28
+ - `n_components`: Number of mixture components for the layers.
29
+
30
+ Prior settings:
31
+ - `clusters_prior`: Dirichlet prior for the node cluster mixing proportions.
32
+ - `components_prior`: Dirichlet prior for the layer component mixing
33
+ proportions.
34
+ - `adjacency_prior`: Beta prior for the edge probabilities within and
35
+ between clusters for each component.
36
+
37
+ EM settings:
38
+ - `max_iter`: Maximum number of iterations for the VEM algorithm.
39
+ - `tol`: Convergence tolerance based on the Evidence Lower Bound (ELBO).
40
+ - `warm_start`: If True, reuse the responsibilities from the previous fit
41
+ as initialization.
42
+
43
+ Attributes:
44
+ n_clusters (int): Number of node clusters.
45
+ n_components (int): Number of layer components.
46
+ clusters_prior (np.ndarray): Prior parameters for node clusters.
47
+ components_prior (np.ndarray): Prior parameters for layer components.
48
+ adjacency_prior (np.ndarray): Prior parameters for edge connections.
49
+ max_iter (int): Maximum number of iterations for the EM algorithm.
50
+ tol (float): Tolerance to declare convergence based on the ELBO.
51
+ warm_start (bool): Whether to reuse the solution of the previous call
52
+ to fit as initialization.
53
+ random_state (int | None): Random state for initialization.
54
+ cluster_responsibilities_ (np.ndarray): Posterior probabilities of node cluster
55
+ assignments (N, K).
56
+ component_responsibilities_ (np.ndarray): Posterior probabilities of layer
57
+ component assignments (V, Q).
58
+ cluster_posterior_ (np.ndarray): Dirichlet posterior parameters for clusters.
59
+ component_posterior_ (np.ndarray): Dirichlet posterior parameters for
60
+ components.
61
+ adjacency_posterior_ (np.ndarray): Beta posterior parameters for edge
62
+ connections (2, K, K, Q).
63
+ elbo_ (float): Evidence Lower Bound of the fitted model.
64
+ converged_ (bool): True if the algorithm converged, False otherwise.
65
+
66
+ Examples:
67
+ >>> from mimisbm import MimiSBM
68
+ >>> import numpy as np
69
+ >>> X = np.random.randint(0, 2, size=(10, 10, 5))
70
+ >>> model = MimiSBM(n_clusters=2, n_components=2)
71
+ >>> model.fit(X)
72
+ >>> node_labels, layer_labels = model.predict()
73
+ """
74
+
75
+ n_clusters: int
76
+ n_components: int
77
+ clusters_prior: np.ndarray
78
+ components_prior: np.ndarray
79
+ adjacency_prior: np.ndarray
80
+ max_iter: int
81
+ tol: float
82
+ warm_start: bool
83
+ random_state: int | None
84
+ cluster_responsibilities_: np.ndarray
85
+ component_responsibilities_: np.ndarray
86
+ cluster_posterior_: np.ndarray
87
+ component_posterior_: np.ndarray
88
+ adjacency_posterior_: np.ndarray
89
+ elbo_: float
90
+ converged_: bool
91
+
92
+ @validate_params(
93
+ {
94
+ "n_clusters": [Interval(Integral, 1, None, closed="left")],
95
+ "n_components": [Interval(Integral, 1, None, closed="left")],
96
+ "clusters_prior": [StrOptions({"jeffreys", "uniform"}), np.ndarray],
97
+ "components_prior": [StrOptions({"jeffreys", "uniform"}), np.ndarray],
98
+ "adjacency_prior": [StrOptions({"jeffreys", "uniform"}), np.ndarray],
99
+ "max_iter": [Interval(Integral, 1, None, closed="left")],
100
+ "tol": [Interval(Real, 0, None, closed="left")],
101
+ },
102
+ prefer_skip_nested_validation=True,
103
+ )
104
+ def __init__(
105
+ self,
106
+ n_clusters: int = 2,
107
+ n_components: int = 2,
108
+ *,
109
+ clusters_prior: np.ndarray | str = "jeffreys",
110
+ components_prior: np.ndarray | str = "jeffreys",
111
+ adjacency_prior: np.ndarray | str = "jeffreys",
112
+ max_iter: int = 100,
113
+ tol: float = 1e-4,
114
+ warm_start: bool = False,
115
+ random_state: int | None = None,
116
+ ):
117
+ r"""Initializes the MimiSBM model with specified design and priors.
118
+
119
+ Constructs a mixture of multilayer SBMs with user-defined priors and
120
+ EM settings. Provides default settings for Bayesian inference and
121
+ convergence criteria.
122
+
123
+ Args:
124
+ n_clusters (int, optional): Number of clusters for the nodes.
125
+ Defaults to 2.
126
+ n_components (int, optional): Number of mixture components for the layers.
127
+ Defaults to 2.
128
+ clusters_prior (np.ndarray | str, optional): Dirichlet prior for node
129
+ clusters. Can be "jeffreys" (0.5), "uniform" (1.0), or a custom array.
130
+ Defaults to "jeffreys".
131
+ components_prior (np.ndarray | str, optional): Dirichlet prior for layer
132
+ components. Defaults to "jeffreys".
133
+ adjacency_prior (np.ndarray | str, optional): Beta prior for edge
134
+ probabilities. Defaults to "jeffreys".
135
+ max_iter (int, optional): Maximum number of VEM iterations. Defaults to 100.
136
+ tol (float, optional): Convergence tolerance for ELBO. Defaults to 1e-4.
137
+ warm_start (bool, optional): Whether to reuse responsibilities from a
138
+ previous fit. Defaults to False.
139
+ random_state (int | None, optional): Seed for the KMeans initialization.
140
+ Defaults to None.
141
+ """
142
+ self.n_clusters = n_clusters
143
+ self.n_components = n_components
144
+ self.clusters_prior = self._init_prior(clusters_prior, n_clusters)
145
+ self.components_prior = self._init_prior(components_prior, self.n_components)
146
+ self.adjacency_prior = self._init_prior(adjacency_prior, 2)
147
+ self.max_iter = max_iter
148
+ self.tol = tol
149
+ self.warm_start = warm_start
150
+ self.random_state = random_state
151
+
152
+ @staticmethod
153
+ def _init_prior(prior: np.ndarray | str, d: int) -> np.ndarray:
154
+ r"""Initializes the prior parameters for a given dimension.
155
+
156
+ Args:
157
+ prior (np.ndarray | str): The prior specification.
158
+ d (int): The dimension of the prior vector.
159
+
160
+ Returns:
161
+ np.ndarray: The initialized prior parameters.
162
+
163
+ Raises:
164
+ ValueError: If the provided prior array has an incorrect length.
165
+ """
166
+ if prior == "jeffreys":
167
+ return np.full((d,), 0.5)
168
+ if prior == "uniform":
169
+ return np.full((d,), 1.0)
170
+
171
+ prior = prior.reshape(-1)
172
+ if len(prior) != d:
173
+ raise ValueError(f"Prior must have {d} elements, got {len(prior)}")
174
+ return prior
175
+
176
+ def _init_responsibilities(
177
+ self, X: np.ndarray, n_clusters: int, axis: tuple[int, ...]
178
+ ) -> np.ndarray:
179
+ r"""Initializes responsibilities using KMeans on aggregated adjacency data.
180
+
181
+ Args:
182
+ X (np.ndarray): The multilayer adjacency tensor.
183
+ n_clusters (int): Number of clusters/components to initialize.
184
+ axis (tuple[int, ...]): Axis over which to aggregate the tensor.
185
+
186
+ Returns:
187
+ np.ndarray: Initialized responsibilities.
188
+ """
189
+ X_agg = X.sum(axis=axis)
190
+ X_agg = X_agg.reshape(X_agg.shape[0], -1)
191
+
192
+ labels = KMeans(
193
+ n_clusters=n_clusters, random_state=self.random_state
194
+ ).fit_predict(X_agg)
195
+
196
+ responsibilities = np.zeros((labels.shape[0], n_clusters))
197
+ responsibilities[np.arange(labels.shape[0]), labels] = 1
198
+ return responsibilities
199
+
200
+ def _elbo(self) -> float:
201
+ r"""Computes the Evidence Lower Bound (ELBO) for the current state.
202
+
203
+ The ELBO is used to monitor convergence and as a surrogate for the
204
+ log-likelihood in the Variational EM algorithm.
205
+
206
+ Returns:
207
+ float: The computed ELBO value.
208
+ """
209
+ cluster_entropy = entr(self.cluster_responsibilities_).sum()
210
+ component_entropy = entr(self.component_responsibilities_).sum()
211
+
212
+ cluster_evidence = (
213
+ gammaln(self.cluster_posterior_).sum()
214
+ - gammaln(self.cluster_posterior_.sum())
215
+ - gammaln(self.clusters_prior).sum()
216
+ + gammaln(self.clusters_prior.sum())
217
+ )
218
+
219
+ component_evidence = (
220
+ gammaln(self.component_posterior_).sum()
221
+ - gammaln(self.component_posterior_.sum())
222
+ - gammaln(self.components_prior).sum()
223
+ + gammaln(self.components_prior.sum())
224
+ )
225
+
226
+ log_adjacency_posterior = betaln(
227
+ self.adjacency_posterior_[0], self.adjacency_posterior_[1]
228
+ )
229
+ log_adjacency_prior = betaln(self.adjacency_prior[0], self.adjacency_prior[1])
230
+
231
+ # Sum over i < j
232
+ rows, cols = np.tril_indices(self.n_clusters)
233
+ adjacency_evidence = (
234
+ log_adjacency_posterior[rows, cols, :] - log_adjacency_prior
235
+ ).sum()
236
+
237
+ evidence = cluster_evidence + component_evidence + adjacency_evidence
238
+ entropy = cluster_entropy + component_entropy
239
+
240
+ return evidence + entropy
241
+
242
+ def _m_step(self, X: np.ndarray, X_non: np.ndarray):
243
+ r"""Performs the M-step of the Variational EM algorithm.
244
+
245
+ Updates the posterior parameters of the priors based on the current
246
+ responsibilities.
247
+
248
+ Args:
249
+ X (np.ndarray): The multilayer adjacency tensor.
250
+ X_non (np.ndarray): The complement of the adjacency tensor.
251
+ """
252
+ self.cluster_posterior_ = (
253
+ self.clusters_prior + self.cluster_responsibilities_.sum(axis=0)
254
+ )
255
+ self.component_posterior_ = (
256
+ self.components_prior + self.component_responsibilities_.sum(axis=0)
257
+ )
258
+
259
+ weighted_edges = X @ self.component_responsibilities_
260
+ weighted_non_edges = X_non @ self.component_responsibilities_
261
+
262
+ expected_edges = (
263
+ self.cluster_responsibilities_.T
264
+ @ weighted_edges.swapaxes(0, 2)
265
+ @ self.cluster_responsibilities_
266
+ ).swapaxes(0, 2)
267
+ expected_non_edges = (
268
+ self.cluster_responsibilities_.T
269
+ @ weighted_non_edges.swapaxes(0, 2)
270
+ @ self.cluster_responsibilities_
271
+ ).swapaxes(0, 2)
272
+
273
+ # Sum over i < j
274
+ rows, cols = np.diag_indices(self.n_clusters)
275
+ expected_edges[rows, cols, :] *= 0.5
276
+ expected_non_edges[rows, cols, :] *= 0.5
277
+
278
+ self.adjacency_posterior_ = np.stack(
279
+ [
280
+ expected_edges + self.adjacency_prior[0],
281
+ expected_non_edges + self.adjacency_prior[1],
282
+ ]
283
+ )
284
+
285
+ def _e_step(self, X: np.ndarray, X_non: np.ndarray):
286
+ r"""Performs the E-step of the Variational EM algorithm.
287
+
288
+ Updates the responsibilities for node clusters and layer components given the
289
+ current posterior parameters.
290
+
291
+ Args:
292
+ X (np.ndarray): The multilayer adjacency tensor.
293
+ X_non (np.ndarray): The complement of the adjacency tensor.
294
+ """
295
+ digamma_adjacency_posterior = digamma(self.adjacency_posterior_.sum(axis=0))
296
+ log_edges = digamma(self.adjacency_posterior_[0]) - digamma_adjacency_posterior
297
+ log_non_edges = (
298
+ digamma(self.adjacency_posterior_[1]) - digamma_adjacency_posterior
299
+ )
300
+
301
+ expected_component_edges = (
302
+ self.cluster_responsibilities_
303
+ @ log_edges.swapaxes(0, 2)
304
+ @ self.cluster_responsibilities_.T
305
+ ).swapaxes(0, 2)
306
+ expected_component_non_edges = (
307
+ self.cluster_responsibilities_
308
+ @ log_non_edges.swapaxes(0, 2)
309
+ @ self.cluster_responsibilities_.T
310
+ ).swapaxes(0, 2)
311
+
312
+ # Sum over i < j
313
+ component_posterior_evidence = digamma(self.component_posterior_) - digamma(
314
+ self.component_posterior_.sum()
315
+ )
316
+ component_edges_evidence = 0.5 * np.tensordot(
317
+ X, expected_component_edges, axes=([0, 1], [0, 1])
318
+ )
319
+ component_edges_evidence = np.nan_to_num(component_edges_evidence)
320
+ component_non_edges_evidence = 0.5 * np.tensordot(
321
+ X_non, expected_component_non_edges, axes=([0, 1], [0, 1])
322
+ )
323
+ component_non_edges_evidence = np.nan_to_num(component_non_edges_evidence)
324
+ self.component_responsibilities_ = softmax(
325
+ component_posterior_evidence
326
+ + component_edges_evidence
327
+ + component_non_edges_evidence,
328
+ axis=1,
329
+ )
330
+
331
+ # Sum over i != j
332
+ expected_cluster_edges = np.tensordot(
333
+ self.component_responsibilities_, log_edges, axes=([1], [2])
334
+ )
335
+ expected_cluster_non_edges = np.tensordot(
336
+ self.component_responsibilities_, log_non_edges, axes=([1], [2])
337
+ )
338
+ cluster_posterior_evidence = digamma(self.cluster_posterior_) - digamma(
339
+ self.cluster_posterior_.sum()
340
+ )
341
+
342
+ cluster_edges_evidence = np.tensordot(
343
+ X,
344
+ self.cluster_responsibilities_ @ expected_cluster_edges.swapaxes(1, 2),
345
+ axes=([1, 2], [1, 0]),
346
+ )
347
+ cluster_non_edges_evidence = np.tensordot(
348
+ X_non,
349
+ self.cluster_responsibilities_ @ expected_cluster_non_edges.swapaxes(1, 2),
350
+ axes=([1, 2], [1, 0]),
351
+ )
352
+
353
+ self.cluster_responsibilities_ = softmax(
354
+ cluster_posterior_evidence
355
+ + cluster_edges_evidence
356
+ + cluster_non_edges_evidence,
357
+ axis=1,
358
+ )
359
+
360
+ def _validate(self, X: np.typing.ArrayLike) -> tuple[np.ndarray, np.ndarray]:
361
+ r"""Validates the input data and ensures it is in the correct format.
362
+
363
+ Checks that the input is a 3D numpy array with appropriate dimension for a
364
+ multilayer adjacency tensor.
365
+
366
+ Args:
367
+ X (np.typing.ArrayLike): The input data to validate.
368
+
369
+ Raises:
370
+ ValueError: If the input data is not a 3D array.
371
+
372
+ Returns:
373
+ tuple[np.ndarray, np.ndarray]: A tuple containing the validated adjacency
374
+ tensor and its complement.
375
+ """
376
+ X = np.asarray(validate_data(self, X, allow_nd=True, dtype=np.bool)) # type: ignore
377
+ if X.ndim != 3: # noqa: PLR2004
378
+ raise ValueError(f"Input data must be a 3D array, got {X.ndim}D array")
379
+
380
+ X |= X.swapaxes(0, 1)
381
+ rows, cols = np.diag_indices(X.shape[0])
382
+ X[rows, cols, :] = 0
383
+ X_non = 1 - X
384
+ X_non[rows, cols, :] = 0
385
+
386
+ return X, X_non
387
+
388
+ @validate_params({"X": ["array-like"]}, prefer_skip_nested_validation=True)
389
+ def fit(self, X: np.typing.ArrayLike) -> Self:
390
+ r"""Fits the MimiSBM model to the multilayer adjacency tensor.
391
+
392
+ Initializes the model responsibilities and iteratively updates them using the
393
+ VEM algorithm. The process continues until the ELBO converges or the maximum
394
+ number of iterations is reached.
395
+
396
+ Args:
397
+ X (np.typing.ArrayLike): A 3D numpy array-like representing the multilayer
398
+ adjacency tensor of shape (N, N, V).
399
+
400
+ Returns:
401
+ Self: The fitted model instance.
402
+ """
403
+ X, X_non = self._validate(X) # type: ignore
404
+
405
+ if not (self.warm_start and hasattr(self, "converged_")):
406
+ self.cluster_responsibilities_ = self._init_responsibilities(
407
+ X, self.n_clusters, (2,)
408
+ )
409
+ self.component_responsibilities_ = self._init_responsibilities(
410
+ X, self.n_components, (0, 1)
411
+ )
412
+
413
+ old_elbo = -np.inf
414
+ for _ in range(self.max_iter):
415
+ self._m_step(X, X_non)
416
+ self._e_step(X, X_non)
417
+
418
+ self.elbo_ = self._elbo()
419
+ if abs(self.elbo_ - old_elbo) < self.tol:
420
+ self.converged_ = True
421
+ return self
422
+ old_elbo = self.elbo_
423
+
424
+ self.converged_ = False
425
+
426
+ return self
427
+
428
+ def predict(self) -> tuple[np.ndarray, np.ndarray]:
429
+ r"""Predicts the node clusters and layer components labels.
430
+
431
+ Assigns each node and each layer to the cluster/component with the highest
432
+ probability.
433
+
434
+ Returns:
435
+ tuple[np.ndarray, np.ndarray]: A tuple containing:
436
+ - node_labels (np.ndarray): Predicted cluster for each node (N,).
437
+ - layer_labels (np.ndarray): Predicted component for each layer (V,).
438
+ """
439
+ check_is_fitted(
440
+ self, ["cluster_responsibilities_", "component_responsibilities_"]
441
+ )
442
+ return self.cluster_responsibilities_.argmax(
443
+ axis=1
444
+ ), self.component_responsibilities_.argmax(axis=1)
@@ -0,0 +1,91 @@
1
+ Metadata-Version: 2.4
2
+ Name: mimisbm
3
+ Version: 0.1.0
4
+ Summary: Mixture of Multilayer Integrator Stochastic Block Model
5
+ Author: Félix Laplante
6
+ Project-URL: Source, https://github.com/felixlaplante0/mimisbm
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Operating System :: POSIX :: Linux
9
+ Classifier: Operating System :: MacOS
10
+ Classifier: Operating System :: Microsoft :: Windows
11
+ Requires-Python: >=3.10
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: numpy
14
+ Requires-Dist: scipy
15
+ Requires-Dist: fastkmeanspp
16
+ Requires-Dist: scikit-learn
17
+
18
+ # 🕸️ MimiSBM
19
+
20
+ **mimisbm** is a Python package implementing the **Mixture of Multilayer Integrator Stochastic Block Model** proposed by the original authors. It jointly groups nodes into clusters and layers into components, providing a unified framework for identifying shared connectivity patterns across multiple network layers.
21
+
22
+ ---
23
+
24
+ ## ✨ Features
25
+
26
+ - **Multilayer Clustering**: Jointly identifies node communities and layer components in a single probabilistic framework.
27
+ - **Variational EM**: Efficient inference using a Variational Expectation-Maximization (VEM) algorithm for large-scale networks.
28
+ - **Bayesian Framework**: Supports flexible Dirichlet and Beta priors, allowing for robust structure discovery under different sparsity regimes.
29
+ - **Component-wise SBMs**: Groups layers sharing similar block-model structures into distinct mixture components.
30
+ - **scikit-learn API**: Native `BaseEstimator` and `ClusterMixin` integration with a familiar `fit` / `predict` interface.
31
+
32
+ ---
33
+
34
+ ## 🚀 Installation
35
+
36
+ ```bash
37
+ pip install mimisbm
38
+ ```
39
+
40
+ ## 🔧 Usage
41
+
42
+ ### Example
43
+
44
+ ```python
45
+ import numpy as np
46
+ from mimisbm import MimiSBM
47
+
48
+ # Generate a synthetic multilayer adjacency tensor (20 nodes, 5 layers)
49
+ np.random.seed(42)
50
+ N, V = 20, 5
51
+ X = np.random.randint(0, 2, size=(N, N, V))
52
+
53
+ # Ensure the adjacency matrices are symmetric (undirected)
54
+ for v in range(V):
55
+ X[..., v] = np.tril(X[..., v], -1) + np.tril(X[..., v], -1).T
56
+
57
+ # Initialize the model with 3 node clusters and 2 layer components
58
+ model = MimiSBM(n_clusters=3, n_components=2, random_state=42)
59
+
60
+ # Fit the model to the multilayer network
61
+ model.fit(X)
62
+
63
+ # Predict node cluster and layer component assignments
64
+ node_labels, layer_labels = model.predict()
65
+
66
+ print(f"Node clusters: {node_labels}")
67
+ print(f"Layer components: {layer_labels}")
68
+ print(f"Final ELBO: {model.elbo_:.2f}")
69
+ ```
70
+
71
+ ---
72
+
73
+ ## 📖 Learn More
74
+
75
+ For tutorials and detailed API reference, visit the official site:
76
+ 👉 [mimisbm's documentation](https://felixlaplante0.github.io/mimisbm)
77
+
78
+ ### 📚 Citation
79
+
80
+ If you use MimiSBM in your research, please cite the original authors' paper:
81
+
82
+ ```bibtex
83
+ @article{de2024mixture,
84
+ title={Mixture of multilayer stochastic block models for multiview clustering},
85
+ author={De Santiago, Kylliann and Szafranski, Marie and Ambroise, Christophe},
86
+ journal={arXiv preprint arXiv:2401.04682},
87
+ year={2024}
88
+ }
89
+ ```
90
+
91
+ For more details, see the corresponding Preprint: https://arxiv.org/abs/2401.04682
@@ -0,0 +1,19 @@
1
+ README.md
2
+ demo.py
3
+ pyproject.toml
4
+ .github/workflows/lint.yml
5
+ .github/workflows/pages.yml
6
+ .github/workflows/publish.yml
7
+ docs/Makefile
8
+ docs/make.bat
9
+ docs/source/conf.py
10
+ docs/source/index.rst
11
+ docs/source/_static/.gitkeep
12
+ docs/source/_templates/autosummary/class.rst
13
+ mimisbm/__init__.py
14
+ mimisbm/_model.py
15
+ mimisbm.egg-info/PKG-INFO
16
+ mimisbm.egg-info/SOURCES.txt
17
+ mimisbm.egg-info/dependency_links.txt
18
+ mimisbm.egg-info/requires.txt
19
+ mimisbm.egg-info/top_level.txt
@@ -0,0 +1,4 @@
1
+ numpy
2
+ scipy
3
+ fastkmeanspp
4
+ scikit-learn
@@ -0,0 +1 @@
1
+ mimisbm
@@ -0,0 +1,55 @@
1
+ [project]
2
+ name = "mimisbm"
3
+ description = "Mixture of Multilayer Integrator Stochastic Block Model"
4
+ readme = "README.md"
5
+ urls = { "Source" = "https://github.com/felixlaplante0/mimisbm" }
6
+ authors = [{ name = "Félix Laplante" }]
7
+ requires-python = ">=3.10"
8
+ dependencies = ["numpy", "scipy", "fastkmeanspp", "scikit-learn"]
9
+ dynamic = ["version"]
10
+ classifiers = [
11
+ "Programming Language :: Python :: 3",
12
+ "Operating System :: POSIX :: Linux",
13
+ "Operating System :: MacOS",
14
+ "Operating System :: Microsoft :: Windows",
15
+ ]
16
+
17
+ [build-system]
18
+ requires = ["setuptools>=42", "setuptools-scm[toml]>=6.0", "wheel"]
19
+ build-backend = "setuptools.build_meta"
20
+
21
+ [tool.setuptools]
22
+ packages = ["mimisbm"]
23
+
24
+ [tool.setuptools_scm]
25
+ version_scheme = "post-release"
26
+ local_scheme = "no-local-version"
27
+
28
+ [tool.setuptools.package-data]
29
+ mypkg = ["py.typed"]
30
+
31
+ [tool.ruff]
32
+ lint.select = [
33
+ "D", # pydocstyle (docstring conventions)
34
+ "E", # pycodestyle errors
35
+ "W", # pycodestyle warnings
36
+ "F", # Pyflakes
37
+ "I", # isort
38
+ "UP", # pyupgrade
39
+ "B", # flake8-bugbear
40
+ "C4", # flake8-comprehensions
41
+ "S", # flake8-bandit (security)
42
+ "T20", # flake8-print
43
+ "PT", # flake8-pytest-style
44
+ "Q", # flake8-quotes
45
+ "RET", # flake8-return
46
+ "SIM", # flake8-simplify
47
+ "ARG", # flake8-unused-arguments
48
+ "ERA", # eradicate (commented code)
49
+ "PL", # Pylint
50
+ "RUF", # Ruff-specific rules
51
+ ]
52
+ lint.ignore = ["D417", "PLR0913"]
53
+
54
+ [tool.ruff.lint.pydocstyle]
55
+ convention = "google"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+