scikit-network 0.33.3__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- scikit_network-0.33.3.dist-info/METADATA +122 -0
- scikit_network-0.33.3.dist-info/RECORD +228 -0
- scikit_network-0.33.3.dist-info/WHEEL +5 -0
- scikit_network-0.33.3.dist-info/licenses/AUTHORS.rst +43 -0
- scikit_network-0.33.3.dist-info/licenses/LICENSE +34 -0
- scikit_network-0.33.3.dist-info/top_level.txt +1 -0
- sknetwork/__init__.py +21 -0
- sknetwork/base.py +67 -0
- sknetwork/classification/__init__.py +8 -0
- sknetwork/classification/base.py +142 -0
- sknetwork/classification/base_rank.py +133 -0
- sknetwork/classification/diffusion.py +134 -0
- sknetwork/classification/knn.py +139 -0
- sknetwork/classification/metrics.py +205 -0
- sknetwork/classification/pagerank.py +66 -0
- sknetwork/classification/propagation.py +152 -0
- sknetwork/classification/tests/__init__.py +1 -0
- sknetwork/classification/tests/test_API.py +30 -0
- sknetwork/classification/tests/test_diffusion.py +77 -0
- sknetwork/classification/tests/test_knn.py +23 -0
- sknetwork/classification/tests/test_metrics.py +53 -0
- sknetwork/classification/tests/test_pagerank.py +20 -0
- sknetwork/classification/tests/test_propagation.py +24 -0
- sknetwork/classification/vote.cp313-win_amd64.pyd +0 -0
- sknetwork/classification/vote.cpp +27584 -0
- sknetwork/classification/vote.pyx +56 -0
- sknetwork/clustering/__init__.py +8 -0
- sknetwork/clustering/base.py +172 -0
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +242 -0
- sknetwork/clustering/leiden_core.cp313-win_amd64.pyd +0 -0
- sknetwork/clustering/leiden_core.cpp +31575 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +286 -0
- sknetwork/clustering/louvain_core.cp313-win_amd64.pyd +0 -0
- sknetwork/clustering/louvain_core.cpp +31220 -0
- sknetwork/clustering/louvain_core.pyx +124 -0
- sknetwork/clustering/metrics.py +91 -0
- sknetwork/clustering/postprocess.py +66 -0
- sknetwork/clustering/propagation_clustering.py +104 -0
- sknetwork/clustering/tests/__init__.py +1 -0
- sknetwork/clustering/tests/test_API.py +38 -0
- sknetwork/clustering/tests/test_kcenters.py +60 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +135 -0
- sknetwork/clustering/tests/test_metrics.py +50 -0
- sknetwork/clustering/tests/test_postprocess.py +39 -0
- sknetwork/data/__init__.py +6 -0
- sknetwork/data/base.py +33 -0
- sknetwork/data/load.py +406 -0
- sknetwork/data/models.py +459 -0
- sknetwork/data/parse.py +644 -0
- sknetwork/data/test_graphs.py +84 -0
- sknetwork/data/tests/__init__.py +1 -0
- sknetwork/data/tests/test_API.py +30 -0
- sknetwork/data/tests/test_base.py +14 -0
- sknetwork/data/tests/test_load.py +95 -0
- sknetwork/data/tests/test_models.py +52 -0
- sknetwork/data/tests/test_parse.py +250 -0
- sknetwork/data/tests/test_test_graphs.py +29 -0
- sknetwork/data/tests/test_toy_graphs.py +68 -0
- sknetwork/data/timeout.py +38 -0
- sknetwork/data/toy_graphs.py +611 -0
- sknetwork/embedding/__init__.py +8 -0
- sknetwork/embedding/base.py +94 -0
- sknetwork/embedding/force_atlas.py +198 -0
- sknetwork/embedding/louvain_embedding.py +148 -0
- sknetwork/embedding/random_projection.py +135 -0
- sknetwork/embedding/spectral.py +141 -0
- sknetwork/embedding/spring.py +198 -0
- sknetwork/embedding/svd.py +359 -0
- sknetwork/embedding/tests/__init__.py +1 -0
- sknetwork/embedding/tests/test_API.py +49 -0
- sknetwork/embedding/tests/test_force_atlas.py +35 -0
- sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
- sknetwork/embedding/tests/test_random_projection.py +28 -0
- sknetwork/embedding/tests/test_spectral.py +81 -0
- sknetwork/embedding/tests/test_spring.py +50 -0
- sknetwork/embedding/tests/test_svd.py +43 -0
- sknetwork/gnn/__init__.py +10 -0
- sknetwork/gnn/activation.py +117 -0
- sknetwork/gnn/base.py +181 -0
- sknetwork/gnn/base_activation.py +90 -0
- sknetwork/gnn/base_layer.py +109 -0
- sknetwork/gnn/gnn_classifier.py +305 -0
- sknetwork/gnn/layer.py +153 -0
- sknetwork/gnn/loss.py +180 -0
- sknetwork/gnn/neighbor_sampler.py +65 -0
- sknetwork/gnn/optimizer.py +164 -0
- sknetwork/gnn/tests/__init__.py +1 -0
- sknetwork/gnn/tests/test_activation.py +56 -0
- sknetwork/gnn/tests/test_base.py +75 -0
- sknetwork/gnn/tests/test_base_layer.py +37 -0
- sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
- sknetwork/gnn/tests/test_layers.py +80 -0
- sknetwork/gnn/tests/test_loss.py +33 -0
- sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
- sknetwork/gnn/tests/test_optimizer.py +43 -0
- sknetwork/gnn/tests/test_utils.py +41 -0
- sknetwork/gnn/utils.py +127 -0
- sknetwork/hierarchy/__init__.py +6 -0
- sknetwork/hierarchy/base.py +96 -0
- sknetwork/hierarchy/louvain_hierarchy.py +272 -0
- sknetwork/hierarchy/metrics.py +234 -0
- sknetwork/hierarchy/paris.cp313-win_amd64.pyd +0 -0
- sknetwork/hierarchy/paris.cpp +37868 -0
- sknetwork/hierarchy/paris.pyx +316 -0
- sknetwork/hierarchy/postprocess.py +350 -0
- sknetwork/hierarchy/tests/__init__.py +1 -0
- sknetwork/hierarchy/tests/test_API.py +24 -0
- sknetwork/hierarchy/tests/test_algos.py +34 -0
- sknetwork/hierarchy/tests/test_metrics.py +62 -0
- sknetwork/hierarchy/tests/test_postprocess.py +57 -0
- sknetwork/linalg/__init__.py +9 -0
- sknetwork/linalg/basics.py +37 -0
- sknetwork/linalg/diteration.cp313-win_amd64.pyd +0 -0
- sknetwork/linalg/diteration.cpp +27400 -0
- sknetwork/linalg/diteration.pyx +47 -0
- sknetwork/linalg/eig_solver.py +93 -0
- sknetwork/linalg/laplacian.py +15 -0
- sknetwork/linalg/normalizer.py +86 -0
- sknetwork/linalg/operators.py +225 -0
- sknetwork/linalg/polynome.py +76 -0
- sknetwork/linalg/ppr_solver.py +170 -0
- sknetwork/linalg/push.cp313-win_amd64.pyd +0 -0
- sknetwork/linalg/push.cpp +31072 -0
- sknetwork/linalg/push.pyx +71 -0
- sknetwork/linalg/sparse_lowrank.py +142 -0
- sknetwork/linalg/svd_solver.py +91 -0
- sknetwork/linalg/tests/__init__.py +1 -0
- sknetwork/linalg/tests/test_eig.py +44 -0
- sknetwork/linalg/tests/test_laplacian.py +18 -0
- sknetwork/linalg/tests/test_normalization.py +34 -0
- sknetwork/linalg/tests/test_operators.py +66 -0
- sknetwork/linalg/tests/test_polynome.py +38 -0
- sknetwork/linalg/tests/test_ppr.py +50 -0
- sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
- sknetwork/linalg/tests/test_svd.py +38 -0
- sknetwork/linkpred/__init__.py +2 -0
- sknetwork/linkpred/base.py +46 -0
- sknetwork/linkpred/nn.py +126 -0
- sknetwork/linkpred/tests/__init__.py +1 -0
- sknetwork/linkpred/tests/test_nn.py +27 -0
- sknetwork/log.py +19 -0
- sknetwork/path/__init__.py +5 -0
- sknetwork/path/dag.py +54 -0
- sknetwork/path/distances.py +98 -0
- sknetwork/path/search.py +31 -0
- sknetwork/path/shortest_path.py +61 -0
- sknetwork/path/tests/__init__.py +1 -0
- sknetwork/path/tests/test_dag.py +37 -0
- sknetwork/path/tests/test_distances.py +62 -0
- sknetwork/path/tests/test_search.py +40 -0
- sknetwork/path/tests/test_shortest_path.py +40 -0
- sknetwork/ranking/__init__.py +8 -0
- sknetwork/ranking/base.py +61 -0
- sknetwork/ranking/betweenness.cp313-win_amd64.pyd +0 -0
- sknetwork/ranking/betweenness.cpp +9707 -0
- sknetwork/ranking/betweenness.pyx +97 -0
- sknetwork/ranking/closeness.py +92 -0
- sknetwork/ranking/hits.py +94 -0
- sknetwork/ranking/katz.py +83 -0
- sknetwork/ranking/pagerank.py +110 -0
- sknetwork/ranking/postprocess.py +37 -0
- sknetwork/ranking/tests/__init__.py +1 -0
- sknetwork/ranking/tests/test_API.py +32 -0
- sknetwork/ranking/tests/test_betweenness.py +38 -0
- sknetwork/ranking/tests/test_closeness.py +30 -0
- sknetwork/ranking/tests/test_hits.py +20 -0
- sknetwork/ranking/tests/test_pagerank.py +62 -0
- sknetwork/ranking/tests/test_postprocess.py +26 -0
- sknetwork/regression/__init__.py +4 -0
- sknetwork/regression/base.py +61 -0
- sknetwork/regression/diffusion.py +210 -0
- sknetwork/regression/tests/__init__.py +1 -0
- sknetwork/regression/tests/test_API.py +32 -0
- sknetwork/regression/tests/test_diffusion.py +56 -0
- sknetwork/sknetwork.py +3 -0
- sknetwork/test_base.py +35 -0
- sknetwork/test_log.py +15 -0
- sknetwork/topology/__init__.py +8 -0
- sknetwork/topology/cliques.cp313-win_amd64.pyd +0 -0
- sknetwork/topology/cliques.cpp +32565 -0
- sknetwork/topology/cliques.pyx +149 -0
- sknetwork/topology/core.cp313-win_amd64.pyd +0 -0
- sknetwork/topology/core.cpp +30651 -0
- sknetwork/topology/core.pyx +90 -0
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cp313-win_amd64.pyd +0 -0
- sknetwork/topology/minheap.cpp +27332 -0
- sknetwork/topology/minheap.pxd +20 -0
- sknetwork/topology/minheap.pyx +109 -0
- sknetwork/topology/structure.py +194 -0
- sknetwork/topology/tests/__init__.py +1 -0
- sknetwork/topology/tests/test_cliques.py +28 -0
- sknetwork/topology/tests/test_core.py +19 -0
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +85 -0
- sknetwork/topology/tests/test_triangles.py +38 -0
- sknetwork/topology/tests/test_wl.py +72 -0
- sknetwork/topology/triangles.cp313-win_amd64.pyd +0 -0
- sknetwork/topology/triangles.cpp +8894 -0
- sknetwork/topology/triangles.pyx +151 -0
- sknetwork/topology/weisfeiler_lehman.py +133 -0
- sknetwork/topology/weisfeiler_lehman_core.cp313-win_amd64.pyd +0 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +27635 -0
- sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
- sknetwork/utils/__init__.py +7 -0
- sknetwork/utils/check.py +355 -0
- sknetwork/utils/format.py +221 -0
- sknetwork/utils/membership.py +82 -0
- sknetwork/utils/neighbors.py +115 -0
- sknetwork/utils/tests/__init__.py +1 -0
- sknetwork/utils/tests/test_check.py +190 -0
- sknetwork/utils/tests/test_format.py +63 -0
- sknetwork/utils/tests/test_membership.py +24 -0
- sknetwork/utils/tests/test_neighbors.py +41 -0
- sknetwork/utils/tests/test_tfidf.py +18 -0
- sknetwork/utils/tests/test_values.py +66 -0
- sknetwork/utils/tfidf.py +37 -0
- sknetwork/utils/values.py +76 -0
- sknetwork/visualization/__init__.py +4 -0
- sknetwork/visualization/colors.py +34 -0
- sknetwork/visualization/dendrograms.py +277 -0
- sknetwork/visualization/graphs.py +1039 -0
- sknetwork/visualization/tests/__init__.py +1 -0
- sknetwork/visualization/tests/test_dendrograms.py +53 -0
- sknetwork/visualization/tests/test_graphs.py +176 -0
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in March 2020
|
|
5
|
+
@author: Quentin Lutz <qlutz@enst.fr>
|
|
6
|
+
@author: Thomas Bonald <tbonald@enst.fr>
|
|
7
|
+
"""
|
|
8
|
+
from typing import Optional, Union
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy import sparse
|
|
12
|
+
|
|
13
|
+
from sknetwork.clustering.louvain import Louvain
|
|
14
|
+
from sknetwork.hierarchy.base import BaseHierarchy
|
|
15
|
+
from sknetwork.hierarchy.postprocess import get_dendrogram, reorder_dendrogram
|
|
16
|
+
from sknetwork.utils.check import check_format
|
|
17
|
+
from sknetwork.utils.format import get_adjacency
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class LouvainIteration(BaseHierarchy):
|
|
21
|
+
"""Hierarchical clustering by successive instances of Louvain (top-down).
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
depth : int
|
|
26
|
+
Depth of the tree.
|
|
27
|
+
A negative value is interpreted as no limit (return a tree of maximum depth).
|
|
28
|
+
resolution : float
|
|
29
|
+
Resolution parameter.
|
|
30
|
+
tol_optimization : float
|
|
31
|
+
Minimum increase in the objective function to enter a new optimization pass.
|
|
32
|
+
tol_aggregation : float
|
|
33
|
+
Minimum increase in the objective function to enter a new aggregation pass.
|
|
34
|
+
n_aggregations : int
|
|
35
|
+
Maximum number of aggregations.
|
|
36
|
+
A negative value is interpreted as no limit.
|
|
37
|
+
shuffle_nodes : bool
|
|
38
|
+
If ``True``, shuffle nodes before optimization.
|
|
39
|
+
random_state : int
|
|
40
|
+
Random number generator or random seed. If ``None``, numpy.random is used.
|
|
41
|
+
verbose : bool
|
|
42
|
+
Verbose mode.
|
|
43
|
+
|
|
44
|
+
Attributes
|
|
45
|
+
----------
|
|
46
|
+
dendrogram_ : np.ndarray
|
|
47
|
+
Dendrogram of the graph.
|
|
48
|
+
dendrogram_row_ : np.ndarray
|
|
49
|
+
Dendrogram for the rows, for bipartite graphs.
|
|
50
|
+
dendrogram_col_ : np.ndarray
|
|
51
|
+
Dendrogram for the columns, for bipartite graphs.
|
|
52
|
+
dendrogram_full_ : np.ndarray
|
|
53
|
+
Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
|
|
54
|
+
|
|
55
|
+
Example
|
|
56
|
+
-------
|
|
57
|
+
>>> from sknetwork.hierarchy import LouvainIteration
|
|
58
|
+
>>> from sknetwork.data import house
|
|
59
|
+
>>> louvain = LouvainIteration()
|
|
60
|
+
>>> adjacency = house()
|
|
61
|
+
>>> louvain.fit_predict(adjacency)
|
|
62
|
+
array([[3., 2., 1., 2.],
|
|
63
|
+
[4., 1., 1., 2.],
|
|
64
|
+
[6., 0., 1., 3.],
|
|
65
|
+
[5., 7., 2., 5.]])
|
|
66
|
+
|
|
67
|
+
Notes
|
|
68
|
+
-----
|
|
69
|
+
Each row of the dendrogram = merge nodes, distance, size of cluster.
|
|
70
|
+
|
|
71
|
+
See Also
|
|
72
|
+
--------
|
|
73
|
+
scipy.cluster.hierarchy.dendrogram
|
|
74
|
+
sknetwork.clustering.Louvain
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(self, depth: int = 3, resolution: float = 1, tol_optimization: float = 1e-3,
|
|
78
|
+
tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
|
|
79
|
+
random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
|
|
80
|
+
super(LouvainIteration, self).__init__()
|
|
81
|
+
|
|
82
|
+
self.dendrogram_ = None
|
|
83
|
+
self.depth = depth
|
|
84
|
+
self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
|
|
85
|
+
tol_aggregation=tol_aggregation, n_aggregations=n_aggregations,
|
|
86
|
+
shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
|
|
87
|
+
self.bipartite = None
|
|
88
|
+
|
|
89
|
+
def _recursive_louvain(self, adjacency: Union[sparse.csr_matrix, np.ndarray], depth: int,
|
|
90
|
+
nodes: Optional[np.ndarray] = None):
|
|
91
|
+
"""Recursive function for fit.
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
adjacency : sparse.csr_matrix, np.ndarray
|
|
96
|
+
Adjacency matrix of the graph.
|
|
97
|
+
depth : int
|
|
98
|
+
Depth of the recursion.
|
|
99
|
+
nodes : np.ndarray
|
|
100
|
+
The indices of the current nodes in the original graph.
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
tree: recursive list of list of nodes.
|
|
105
|
+
"""
|
|
106
|
+
n = adjacency.shape[0]
|
|
107
|
+
if nodes is None:
|
|
108
|
+
nodes = np.arange(n)
|
|
109
|
+
|
|
110
|
+
if adjacency.nnz and depth:
|
|
111
|
+
labels = self._clustering_method.fit_predict(adjacency)
|
|
112
|
+
else:
|
|
113
|
+
labels = np.zeros(n)
|
|
114
|
+
|
|
115
|
+
clusters = np.unique(labels)
|
|
116
|
+
|
|
117
|
+
tree = []
|
|
118
|
+
if len(clusters) == 1:
|
|
119
|
+
if len(nodes) > 1:
|
|
120
|
+
return [[node] for node in nodes]
|
|
121
|
+
else:
|
|
122
|
+
return [nodes[0]]
|
|
123
|
+
else:
|
|
124
|
+
for cluster in clusters:
|
|
125
|
+
mask = (labels == cluster)
|
|
126
|
+
nodes_cluster = nodes[mask]
|
|
127
|
+
adjacency_cluster = adjacency[mask, :][:, mask]
|
|
128
|
+
tree.append(self._recursive_louvain(adjacency_cluster, depth - 1, nodes_cluster))
|
|
129
|
+
return tree
|
|
130
|
+
|
|
131
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) \
|
|
132
|
+
-> 'LouvainIteration':
|
|
133
|
+
"""Fit algorithm to data.
|
|
134
|
+
|
|
135
|
+
Parameters
|
|
136
|
+
----------
|
|
137
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
138
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
139
|
+
force_bipartite :
|
|
140
|
+
If ``True``, force the input matrix to be considered as a biadjacency matrix.
|
|
141
|
+
|
|
142
|
+
Returns
|
|
143
|
+
-------
|
|
144
|
+
self: :class:`LouvainIteration`
|
|
145
|
+
"""
|
|
146
|
+
self._init_vars()
|
|
147
|
+
adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
|
|
148
|
+
tree = self._recursive_louvain(adjacency, self.depth)
|
|
149
|
+
dendrogram, _ = get_dendrogram(tree)
|
|
150
|
+
dendrogram = np.array(dendrogram)
|
|
151
|
+
dendrogram[:, 2] += 1 - min(dendrogram[:, 2])
|
|
152
|
+
self.dendrogram_ = reorder_dendrogram(dendrogram)
|
|
153
|
+
if self.bipartite:
|
|
154
|
+
self._split_vars(input_matrix.shape)
|
|
155
|
+
return self
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class LouvainHierarchy(BaseHierarchy):
|
|
159
|
+
"""Hierarchical clustering by Louvain (bottom-up).
|
|
160
|
+
|
|
161
|
+
Each level corresponds to an aggregation step of the Louvain algorithm.
|
|
162
|
+
|
|
163
|
+
Parameters
|
|
164
|
+
----------
|
|
165
|
+
resolution : float
|
|
166
|
+
Resolution parameter.
|
|
167
|
+
tol_optimization : float
|
|
168
|
+
Minimum increase in the objective function to enter a new optimization pass.
|
|
169
|
+
tol_aggregation : float
|
|
170
|
+
Minimum increase in the objective function to enter a new aggregation pass.
|
|
171
|
+
shuffle_nodes : bool
|
|
172
|
+
If ``True``, shuffle nodes before optimization.
|
|
173
|
+
random_state : int
|
|
174
|
+
Random number generator or random seed. If ``None``, numpy.random is used.
|
|
175
|
+
verbose : bool
|
|
176
|
+
Verbose mode.
|
|
177
|
+
|
|
178
|
+
Attributes
|
|
179
|
+
----------
|
|
180
|
+
dendrogram_ : np.ndarray
|
|
181
|
+
Dendrogram of the graph.
|
|
182
|
+
dendrogram_row_ : np.ndarray
|
|
183
|
+
Dendrogram for the rows, for bipartite graphs.
|
|
184
|
+
dendrogram_col_ : np.ndarray
|
|
185
|
+
Dendrogram for the columns, for bipartite graphs.
|
|
186
|
+
dendrogram_full_ : np.ndarray
|
|
187
|
+
Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
|
|
188
|
+
|
|
189
|
+
Example
|
|
190
|
+
-------
|
|
191
|
+
>>> from sknetwork.hierarchy import LouvainHierarchy
|
|
192
|
+
>>> from sknetwork.data import house
|
|
193
|
+
>>> louvain = LouvainHierarchy()
|
|
194
|
+
>>> adjacency = house()
|
|
195
|
+
>>> louvain.fit_predict(adjacency)
|
|
196
|
+
array([[3., 2., 1., 2.],
|
|
197
|
+
[4., 1., 1., 2.],
|
|
198
|
+
[6., 0., 1., 3.],
|
|
199
|
+
[5., 7., 2., 5.]])
|
|
200
|
+
|
|
201
|
+
Notes
|
|
202
|
+
-----
|
|
203
|
+
Each row of the dendrogram = merge nodes, distance, size of cluster.
|
|
204
|
+
|
|
205
|
+
See Also
|
|
206
|
+
--------
|
|
207
|
+
scipy.cluster.hierarchy.dendrogram
|
|
208
|
+
sknetwork.clustering.Louvain
|
|
209
|
+
"""
|
|
210
|
+
|
|
211
|
+
def __init__(self, resolution: float = 1, tol_optimization: float = 1e-3,
|
|
212
|
+
tol_aggregation: float = 1e-3, shuffle_nodes: bool = False,
|
|
213
|
+
random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
|
|
214
|
+
super(LouvainHierarchy, self).__init__()
|
|
215
|
+
|
|
216
|
+
self.dendrogram_ = None
|
|
217
|
+
self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
|
|
218
|
+
tol_aggregation=tol_aggregation, n_aggregations=1,
|
|
219
|
+
shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
|
|
220
|
+
self.bipartite = None
|
|
221
|
+
|
|
222
|
+
def _get_hierarchy(self, adjacency: Union[sparse.csr_matrix, np.ndarray]):
|
|
223
|
+
"""Get the hierarchy from Louvain.
|
|
224
|
+
|
|
225
|
+
Parameters
|
|
226
|
+
----------
|
|
227
|
+
adjacency : sparse.csr_matrix, np.ndarray
|
|
228
|
+
Adjacency matrix of the graph.
|
|
229
|
+
|
|
230
|
+
Returns
|
|
231
|
+
-------
|
|
232
|
+
tree: recursive list of list of nodes
|
|
233
|
+
"""
|
|
234
|
+
tree = [[node] for node in range(adjacency.shape[0])]
|
|
235
|
+
labels = self._clustering_method.fit_predict(adjacency)
|
|
236
|
+
labels_unique = np.unique(labels)
|
|
237
|
+
while 1:
|
|
238
|
+
tree = [[tree[node] for node in np.flatnonzero(labels == label)] for label in labels_unique]
|
|
239
|
+
tree = [cluster[0] if len(cluster) == 1 else cluster for cluster in tree]
|
|
240
|
+
aggregate = self._clustering_method.aggregate_
|
|
241
|
+
labels = self._clustering_method.fit_predict(aggregate)
|
|
242
|
+
if len(labels_unique) == len(np.unique(labels)):
|
|
243
|
+
break
|
|
244
|
+
else:
|
|
245
|
+
labels_unique = np.unique(labels)
|
|
246
|
+
return tree
|
|
247
|
+
|
|
248
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) \
|
|
249
|
+
-> 'LouvainHierarchy':
|
|
250
|
+
"""Fit algorithm to data.
|
|
251
|
+
|
|
252
|
+
Parameters
|
|
253
|
+
----------
|
|
254
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
255
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
256
|
+
force_bipartite :
|
|
257
|
+
If ``True``, force the input matrix to be considered as a biadjacency matrix.
|
|
258
|
+
|
|
259
|
+
Returns
|
|
260
|
+
-------
|
|
261
|
+
self: :class:`LouvainHierarchy`
|
|
262
|
+
"""
|
|
263
|
+
self._init_vars()
|
|
264
|
+
adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
|
|
265
|
+
tree = self._get_hierarchy(adjacency)
|
|
266
|
+
dendrogram, _ = get_dendrogram(tree)
|
|
267
|
+
dendrogram = np.array(dendrogram)
|
|
268
|
+
dendrogram[:, 2] += 1 - min(dendrogram[:, 2])
|
|
269
|
+
self.dendrogram_ = reorder_dendrogram(dendrogram)
|
|
270
|
+
if self.bipartite:
|
|
271
|
+
self._split_vars(input_matrix.shape)
|
|
272
|
+
return self
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on March 2019
|
|
5
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
6
|
+
"""
|
|
7
|
+
import numpy as np
|
|
8
|
+
from scipy import sparse
|
|
9
|
+
from sknetwork.hierarchy.paris import AggregateGraph
|
|
10
|
+
|
|
11
|
+
from sknetwork.utils.check import check_format, get_probs, check_square
|
|
12
|
+
from sknetwork.utils.check import check_min_size, check_min_nnz
|
|
13
|
+
from sknetwork.utils.format import directed2undirected
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _instantiate_vars(adjacency: sparse.csr_matrix, weights: str = 'uniform'):
|
|
17
|
+
"""Initialize standard variables for metrics."""
|
|
18
|
+
weights_row = get_probs(weights, adjacency)
|
|
19
|
+
weights_col = get_probs(weights, adjacency.T)
|
|
20
|
+
sym_adjacency = directed2undirected(adjacency)
|
|
21
|
+
aggregate_graph = AggregateGraph(weights_row, weights_col, sym_adjacency.data.astype(float),
|
|
22
|
+
sym_adjacency.indices, sym_adjacency.indptr)
|
|
23
|
+
return aggregate_graph, weights_row, weights_col
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_sampling_distributions(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'uniform'):
|
|
27
|
+
"""Get sampling distributions over each internal node of the tree.
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
adjacency :
|
|
31
|
+
Adjacency matrix of the graph.
|
|
32
|
+
dendrogram :
|
|
33
|
+
Dendrogram.
|
|
34
|
+
weights :
|
|
35
|
+
Weights of nodes.
|
|
36
|
+
``'degree'`` or ``'uniform'`` (default).
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
edge_sampling: np.ndarray
|
|
40
|
+
Edge sampling distribution.
|
|
41
|
+
node_sampling: np.ndarray
|
|
42
|
+
Node sampling distribution.
|
|
43
|
+
cluster_weights: np.ndarray
|
|
44
|
+
Cluster weights.
|
|
45
|
+
"""
|
|
46
|
+
n = adjacency.shape[0]
|
|
47
|
+
aggregate_graph, weights_row, weights_col = _instantiate_vars(adjacency, weights)
|
|
48
|
+
cluster_weight = np.zeros(n-1)
|
|
49
|
+
edge_sampling = np.zeros(n-1)
|
|
50
|
+
node_sampling = np.zeros(n-1)
|
|
51
|
+
|
|
52
|
+
for t in range(n - 1):
|
|
53
|
+
i = int(dendrogram[t][0])
|
|
54
|
+
j = int(dendrogram[t][1])
|
|
55
|
+
if j in aggregate_graph.neighbors[i]:
|
|
56
|
+
edge_sampling[t] += 2 * aggregate_graph.neighbors[i][j]
|
|
57
|
+
node_sampling[t] += aggregate_graph.cluster_out_weights[i] * aggregate_graph.cluster_in_weights[j] + \
|
|
58
|
+
aggregate_graph.cluster_out_weights[j] * aggregate_graph.cluster_in_weights[i]
|
|
59
|
+
cluster_weight[t] = aggregate_graph.cluster_out_weights[i] + aggregate_graph.cluster_out_weights[j] + \
|
|
60
|
+
aggregate_graph.cluster_in_weights[i] + aggregate_graph.cluster_in_weights[j]
|
|
61
|
+
for node in {i, j}:
|
|
62
|
+
if node < n:
|
|
63
|
+
# self-loop
|
|
64
|
+
node_sampling[t] += aggregate_graph.cluster_out_weights[node] * aggregate_graph.cluster_in_weights[node]
|
|
65
|
+
if node in aggregate_graph.neighbors[node]:
|
|
66
|
+
edge_sampling[t] += aggregate_graph.neighbors[node][node]
|
|
67
|
+
aggregate_graph.merge(i, j)
|
|
68
|
+
return edge_sampling, node_sampling, cluster_weight / 2
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def dasgupta_cost(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'uniform',
|
|
72
|
+
normalized: bool = False) -> float:
|
|
73
|
+
"""Dasgupta's cost of a hierarchy.
|
|
74
|
+
|
|
75
|
+
Expected size (weights = ``'uniform'``) or expected volume (weights = ``'degree'``) of the cluster induced by
|
|
76
|
+
random edge sampling (closest ancestor of the two nodes in the hierarchy).
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
adjacency :
|
|
81
|
+
Adjacency matrix of the graph.
|
|
82
|
+
dendrogram :
|
|
83
|
+
Dendrogram.
|
|
84
|
+
weights :
|
|
85
|
+
Weights of nodes.
|
|
86
|
+
``'degree'`` or ``'uniform'`` (default).
|
|
87
|
+
normalized :
|
|
88
|
+
If ``True``, normalized cost (between 0 and 1).
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
cost : float
|
|
93
|
+
Cost.
|
|
94
|
+
|
|
95
|
+
Example
|
|
96
|
+
-------
|
|
97
|
+
>>> from sknetwork.hierarchy import dasgupta_score, Paris
|
|
98
|
+
>>> from sknetwork.data import house
|
|
99
|
+
>>> paris = Paris()
|
|
100
|
+
>>> adjacency = house()
|
|
101
|
+
>>> dendrogram = paris.fit_transform(adjacency)
|
|
102
|
+
>>> cost = dasgupta_cost(adjacency, dendrogram)
|
|
103
|
+
>>> float(np.round(cost, 2))
|
|
104
|
+
3.33
|
|
105
|
+
|
|
106
|
+
References
|
|
107
|
+
----------
|
|
108
|
+
Dasgupta, S. (2016). A cost function for similarity-based hierarchical clustering.
|
|
109
|
+
Proceedings of ACM symposium on Theory of Computing.
|
|
110
|
+
"""
|
|
111
|
+
adjacency = check_format(adjacency)
|
|
112
|
+
check_square(adjacency)
|
|
113
|
+
|
|
114
|
+
n = adjacency.shape[0]
|
|
115
|
+
check_min_size(n, 2)
|
|
116
|
+
|
|
117
|
+
edge_sampling, _, cluster_weight = get_sampling_distributions(adjacency, dendrogram, weights)
|
|
118
|
+
cost = edge_sampling.dot(cluster_weight)
|
|
119
|
+
|
|
120
|
+
if not normalized:
|
|
121
|
+
if weights == 'degree':
|
|
122
|
+
cost *= adjacency.data.sum()
|
|
123
|
+
else:
|
|
124
|
+
cost *= n
|
|
125
|
+
|
|
126
|
+
return cost
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def dasgupta_score(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'uniform') -> float:
|
|
130
|
+
"""Dasgupta's score of a hierarchy (quality metric, between 0 and 1).
|
|
131
|
+
|
|
132
|
+
Defined as 1 - normalized Dasgupta's cost.
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
adjacency :
|
|
137
|
+
Adjacency matrix of the graph.
|
|
138
|
+
dendrogram :
|
|
139
|
+
Dendrogram.
|
|
140
|
+
weights :
|
|
141
|
+
Weights of nodes.
|
|
142
|
+
``'degree'`` or ``'uniform'`` (default).
|
|
143
|
+
|
|
144
|
+
Returns
|
|
145
|
+
-------
|
|
146
|
+
score : float
|
|
147
|
+
Score.
|
|
148
|
+
|
|
149
|
+
Example
|
|
150
|
+
-------
|
|
151
|
+
>>> from sknetwork.hierarchy import dasgupta_score, Paris
|
|
152
|
+
>>> from sknetwork.data import house
|
|
153
|
+
>>> paris = Paris()
|
|
154
|
+
>>> adjacency = house()
|
|
155
|
+
>>> dendrogram = paris.fit_transform(adjacency)
|
|
156
|
+
>>> score = dasgupta_score(adjacency, dendrogram)
|
|
157
|
+
>>> float(np.round(score, 2))
|
|
158
|
+
0.33
|
|
159
|
+
|
|
160
|
+
References
|
|
161
|
+
----------
|
|
162
|
+
Dasgupta, S. (2016). A cost function for similarity-based hierarchical clustering.
|
|
163
|
+
Proceedings of ACM symposium on Theory of Computing.
|
|
164
|
+
"""
|
|
165
|
+
return 1 - dasgupta_cost(adjacency, dendrogram, weights, normalized=True)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def tree_sampling_divergence(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'degree',
|
|
169
|
+
normalized: bool = True) -> float:
|
|
170
|
+
"""Tree sampling divergence of a hierarchy (quality metric).
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
adjacency :
|
|
175
|
+
Adjacency matrix of the graph.
|
|
176
|
+
dendrogram :
|
|
177
|
+
Dendrogram.
|
|
178
|
+
weights :
|
|
179
|
+
Weights of nodes.
|
|
180
|
+
``'degree'`` (default) or ``'uniform'``.
|
|
181
|
+
normalized :
|
|
182
|
+
If ``True``, normalized score (between 0 and 1).
|
|
183
|
+
|
|
184
|
+
Returns
|
|
185
|
+
-------
|
|
186
|
+
score : float
|
|
187
|
+
Score.
|
|
188
|
+
|
|
189
|
+
Example
|
|
190
|
+
-------
|
|
191
|
+
>>> from sknetwork.hierarchy import tree_sampling_divergence, Paris
|
|
192
|
+
>>> from sknetwork.data import house
|
|
193
|
+
>>> paris = Paris()
|
|
194
|
+
>>> adjacency = house()
|
|
195
|
+
>>> dendrogram = paris.fit_transform(adjacency)
|
|
196
|
+
>>> score = tree_sampling_divergence(adjacency, dendrogram)
|
|
197
|
+
>>> float(np.round(score, 2))
|
|
198
|
+
0.05
|
|
199
|
+
|
|
200
|
+
References
|
|
201
|
+
----------
|
|
202
|
+
Charpentier, B. & Bonald, T. (2019).
|
|
203
|
+
`Tree Sampling Divergence: An Information-Theoretic Metric for
|
|
204
|
+
Hierarchical Graph Clustering.
|
|
205
|
+
<https://hal.telecom-paristech.fr/hal-02144394/document>`_
|
|
206
|
+
Proceedings of IJCAI.
|
|
207
|
+
"""
|
|
208
|
+
adjacency = check_format(adjacency)
|
|
209
|
+
check_square(adjacency)
|
|
210
|
+
check_min_nnz(adjacency.nnz, 1)
|
|
211
|
+
adjacency = adjacency.astype(float)
|
|
212
|
+
n = adjacency.shape[0]
|
|
213
|
+
check_min_size(n, 2)
|
|
214
|
+
|
|
215
|
+
adjacency.data /= adjacency.data.sum()
|
|
216
|
+
edge_sampling, node_sampling, _ = get_sampling_distributions(adjacency, dendrogram, weights)
|
|
217
|
+
|
|
218
|
+
index = np.where(edge_sampling)[0]
|
|
219
|
+
score = edge_sampling[index].dot(np.log(edge_sampling[index] / node_sampling[index]))
|
|
220
|
+
if normalized:
|
|
221
|
+
weights_row = get_probs(weights, adjacency)
|
|
222
|
+
weights_col = get_probs(weights, adjacency.T)
|
|
223
|
+
inv_out_weights = sparse.diags(weights_row, shape=(n, n), format='csr')
|
|
224
|
+
inv_out_weights.data = 1 / inv_out_weights.data
|
|
225
|
+
inv_in_weights = sparse.diags(weights_col, shape=(n, n), format='csr')
|
|
226
|
+
inv_in_weights.data = 1 / inv_in_weights.data
|
|
227
|
+
sampling_ratio = inv_out_weights.dot(adjacency.dot(inv_in_weights))
|
|
228
|
+
inv_out_weights.data = np.ones(len(inv_out_weights.data))
|
|
229
|
+
inv_in_weights.data = np.ones(len(inv_in_weights.data))
|
|
230
|
+
edge_sampling = inv_out_weights.dot(adjacency.dot(inv_in_weights))
|
|
231
|
+
mutual_information = edge_sampling.data.dot(np.log(sampling_ratio.data))
|
|
232
|
+
if mutual_information > 0:
|
|
233
|
+
score /= mutual_information
|
|
234
|
+
return score
|
|
Binary file
|