scikit-network 0.30.0__cp39-cp39-win_amd64.whl → 0.32.1__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (187) hide show
  1. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/AUTHORS.rst +3 -0
  2. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/METADATA +31 -3
  3. scikit_network-0.32.1.dist-info/RECORD +228 -0
  4. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/WHEEL +1 -1
  5. sknetwork/__init__.py +1 -1
  6. sknetwork/base.py +67 -0
  7. sknetwork/classification/base.py +24 -24
  8. sknetwork/classification/base_rank.py +17 -25
  9. sknetwork/classification/diffusion.py +35 -35
  10. sknetwork/classification/knn.py +24 -21
  11. sknetwork/classification/metrics.py +1 -1
  12. sknetwork/classification/pagerank.py +10 -10
  13. sknetwork/classification/propagation.py +23 -20
  14. sknetwork/classification/tests/test_diffusion.py +13 -3
  15. sknetwork/classification/vote.cp39-win_amd64.pyd +0 -0
  16. sknetwork/classification/vote.cpp +14482 -10351
  17. sknetwork/classification/vote.pyx +1 -3
  18. sknetwork/clustering/__init__.py +3 -1
  19. sknetwork/clustering/base.py +36 -40
  20. sknetwork/clustering/kcenters.py +253 -0
  21. sknetwork/clustering/leiden.py +241 -0
  22. sknetwork/clustering/leiden_core.cp39-win_amd64.pyd +0 -0
  23. sknetwork/clustering/leiden_core.cpp +31564 -0
  24. sknetwork/clustering/leiden_core.pyx +124 -0
  25. sknetwork/clustering/louvain.py +133 -102
  26. sknetwork/clustering/louvain_core.cp39-win_amd64.pyd +0 -0
  27. sknetwork/clustering/louvain_core.cpp +22457 -18792
  28. sknetwork/clustering/louvain_core.pyx +86 -96
  29. sknetwork/clustering/postprocess.py +2 -2
  30. sknetwork/clustering/propagation_clustering.py +15 -19
  31. sknetwork/clustering/tests/test_API.py +8 -4
  32. sknetwork/clustering/tests/test_kcenters.py +92 -0
  33. sknetwork/clustering/tests/test_leiden.py +34 -0
  34. sknetwork/clustering/tests/test_louvain.py +3 -4
  35. sknetwork/data/__init__.py +2 -1
  36. sknetwork/data/base.py +28 -0
  37. sknetwork/data/load.py +38 -37
  38. sknetwork/data/models.py +18 -18
  39. sknetwork/data/parse.py +54 -33
  40. sknetwork/data/test_graphs.py +2 -2
  41. sknetwork/data/tests/test_API.py +1 -1
  42. sknetwork/data/tests/test_base.py +14 -0
  43. sknetwork/data/tests/test_load.py +1 -1
  44. sknetwork/data/tests/test_parse.py +9 -12
  45. sknetwork/data/tests/test_test_graphs.py +1 -2
  46. sknetwork/data/toy_graphs.py +18 -18
  47. sknetwork/embedding/__init__.py +0 -1
  48. sknetwork/embedding/base.py +21 -20
  49. sknetwork/embedding/force_atlas.py +3 -2
  50. sknetwork/embedding/louvain_embedding.py +2 -2
  51. sknetwork/embedding/random_projection.py +5 -3
  52. sknetwork/embedding/spectral.py +0 -73
  53. sknetwork/embedding/tests/test_API.py +4 -28
  54. sknetwork/embedding/tests/test_louvain_embedding.py +4 -9
  55. sknetwork/embedding/tests/test_random_projection.py +2 -2
  56. sknetwork/embedding/tests/test_spectral.py +5 -8
  57. sknetwork/embedding/tests/test_svd.py +1 -1
  58. sknetwork/gnn/base.py +4 -4
  59. sknetwork/gnn/base_layer.py +3 -3
  60. sknetwork/gnn/gnn_classifier.py +45 -89
  61. sknetwork/gnn/layer.py +1 -1
  62. sknetwork/gnn/loss.py +1 -1
  63. sknetwork/gnn/optimizer.py +4 -3
  64. sknetwork/gnn/tests/test_base_layer.py +4 -4
  65. sknetwork/gnn/tests/test_gnn_classifier.py +12 -35
  66. sknetwork/gnn/utils.py +8 -8
  67. sknetwork/hierarchy/base.py +29 -2
  68. sknetwork/hierarchy/louvain_hierarchy.py +45 -41
  69. sknetwork/hierarchy/paris.cp39-win_amd64.pyd +0 -0
  70. sknetwork/hierarchy/paris.cpp +27369 -22852
  71. sknetwork/hierarchy/paris.pyx +7 -9
  72. sknetwork/hierarchy/postprocess.py +16 -16
  73. sknetwork/hierarchy/tests/test_API.py +1 -1
  74. sknetwork/hierarchy/tests/test_algos.py +5 -0
  75. sknetwork/hierarchy/tests/test_metrics.py +1 -1
  76. sknetwork/linalg/__init__.py +1 -1
  77. sknetwork/linalg/diteration.cp39-win_amd64.pyd +0 -0
  78. sknetwork/linalg/diteration.cpp +13474 -9454
  79. sknetwork/linalg/diteration.pyx +0 -2
  80. sknetwork/linalg/eig_solver.py +1 -1
  81. sknetwork/linalg/{normalization.py → normalizer.py} +18 -15
  82. sknetwork/linalg/operators.py +1 -1
  83. sknetwork/linalg/ppr_solver.py +1 -1
  84. sknetwork/linalg/push.cp39-win_amd64.pyd +0 -0
  85. sknetwork/linalg/push.cpp +22993 -18807
  86. sknetwork/linalg/push.pyx +0 -2
  87. sknetwork/linalg/svd_solver.py +1 -1
  88. sknetwork/linalg/tests/test_normalization.py +3 -7
  89. sknetwork/linalg/tests/test_operators.py +4 -8
  90. sknetwork/linalg/tests/test_ppr.py +1 -1
  91. sknetwork/linkpred/base.py +13 -2
  92. sknetwork/linkpred/nn.py +6 -6
  93. sknetwork/log.py +19 -0
  94. sknetwork/path/__init__.py +4 -3
  95. sknetwork/path/dag.py +54 -0
  96. sknetwork/path/distances.py +98 -0
  97. sknetwork/path/search.py +13 -47
  98. sknetwork/path/shortest_path.py +37 -162
  99. sknetwork/path/tests/test_dag.py +37 -0
  100. sknetwork/path/tests/test_distances.py +62 -0
  101. sknetwork/path/tests/test_search.py +26 -11
  102. sknetwork/path/tests/test_shortest_path.py +31 -36
  103. sknetwork/ranking/__init__.py +0 -1
  104. sknetwork/ranking/base.py +13 -8
  105. sknetwork/ranking/betweenness.cp39-win_amd64.pyd +0 -0
  106. sknetwork/ranking/betweenness.cpp +5709 -3017
  107. sknetwork/ranking/betweenness.pyx +0 -2
  108. sknetwork/ranking/closeness.py +7 -10
  109. sknetwork/ranking/pagerank.py +14 -14
  110. sknetwork/ranking/postprocess.py +12 -3
  111. sknetwork/ranking/tests/test_API.py +2 -4
  112. sknetwork/ranking/tests/test_betweenness.py +3 -3
  113. sknetwork/ranking/tests/test_closeness.py +3 -7
  114. sknetwork/ranking/tests/test_pagerank.py +11 -5
  115. sknetwork/ranking/tests/test_postprocess.py +5 -0
  116. sknetwork/regression/base.py +19 -2
  117. sknetwork/regression/diffusion.py +24 -10
  118. sknetwork/regression/tests/test_diffusion.py +8 -0
  119. sknetwork/test_base.py +35 -0
  120. sknetwork/test_log.py +15 -0
  121. sknetwork/topology/__init__.py +7 -8
  122. sknetwork/topology/cliques.cp39-win_amd64.pyd +0 -0
  123. sknetwork/topology/{kcliques.cpp → cliques.cpp} +23412 -20276
  124. sknetwork/topology/cliques.pyx +149 -0
  125. sknetwork/topology/core.cp39-win_amd64.pyd +0 -0
  126. sknetwork/topology/{kcore.cpp → core.cpp} +21732 -18867
  127. sknetwork/topology/core.pyx +90 -0
  128. sknetwork/topology/cycles.py +243 -0
  129. sknetwork/topology/minheap.cp39-win_amd64.pyd +0 -0
  130. sknetwork/{utils → topology}/minheap.cpp +19452 -15368
  131. sknetwork/{utils → topology}/minheap.pxd +1 -3
  132. sknetwork/{utils → topology}/minheap.pyx +1 -3
  133. sknetwork/topology/structure.py +3 -43
  134. sknetwork/topology/tests/test_cliques.py +11 -11
  135. sknetwork/topology/tests/test_core.py +19 -0
  136. sknetwork/topology/tests/test_cycles.py +65 -0
  137. sknetwork/topology/tests/test_structure.py +2 -16
  138. sknetwork/topology/tests/test_triangles.py +11 -15
  139. sknetwork/topology/tests/test_wl.py +72 -0
  140. sknetwork/topology/triangles.cp39-win_amd64.pyd +0 -0
  141. sknetwork/topology/triangles.cpp +5056 -2696
  142. sknetwork/topology/triangles.pyx +74 -89
  143. sknetwork/topology/weisfeiler_lehman.py +56 -86
  144. sknetwork/topology/weisfeiler_lehman_core.cp39-win_amd64.pyd +0 -0
  145. sknetwork/topology/weisfeiler_lehman_core.cpp +14727 -10622
  146. sknetwork/topology/weisfeiler_lehman_core.pyx +0 -2
  147. sknetwork/utils/__init__.py +1 -31
  148. sknetwork/utils/check.py +2 -2
  149. sknetwork/utils/format.py +5 -3
  150. sknetwork/utils/membership.py +2 -2
  151. sknetwork/utils/tests/test_check.py +3 -3
  152. sknetwork/utils/tests/test_format.py +3 -1
  153. sknetwork/utils/values.py +1 -1
  154. sknetwork/visualization/__init__.py +2 -2
  155. sknetwork/visualization/dendrograms.py +55 -7
  156. sknetwork/visualization/graphs.py +292 -72
  157. sknetwork/visualization/tests/test_dendrograms.py +9 -9
  158. sknetwork/visualization/tests/test_graphs.py +71 -62
  159. scikit_network-0.30.0.dist-info/RECORD +0 -227
  160. sknetwork/embedding/louvain_hierarchy.py +0 -142
  161. sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
  162. sknetwork/path/metrics.py +0 -148
  163. sknetwork/path/tests/test_metrics.py +0 -29
  164. sknetwork/ranking/harmonic.py +0 -82
  165. sknetwork/topology/dag.py +0 -74
  166. sknetwork/topology/dag_core.cp39-win_amd64.pyd +0 -0
  167. sknetwork/topology/dag_core.cpp +0 -23350
  168. sknetwork/topology/dag_core.pyx +0 -38
  169. sknetwork/topology/kcliques.cp39-win_amd64.pyd +0 -0
  170. sknetwork/topology/kcliques.pyx +0 -193
  171. sknetwork/topology/kcore.cp39-win_amd64.pyd +0 -0
  172. sknetwork/topology/kcore.pyx +0 -120
  173. sknetwork/topology/tests/test_cores.py +0 -21
  174. sknetwork/topology/tests/test_dag.py +0 -26
  175. sknetwork/topology/tests/test_wl_coloring.py +0 -49
  176. sknetwork/topology/tests/test_wl_kernel.py +0 -31
  177. sknetwork/utils/base.py +0 -35
  178. sknetwork/utils/minheap.cp39-win_amd64.pyd +0 -0
  179. sknetwork/utils/simplex.py +0 -140
  180. sknetwork/utils/tests/test_base.py +0 -28
  181. sknetwork/utils/tests/test_bunch.py +0 -16
  182. sknetwork/utils/tests/test_projection_simplex.py +0 -33
  183. sknetwork/utils/tests/test_verbose.py +0 -15
  184. sknetwork/utils/verbose.py +0 -37
  185. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/LICENSE +0 -0
  186. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/top_level.txt +0 -0
  187. /sknetwork/{utils → data}/timeout.py +0 -0
@@ -1,9 +1,7 @@
1
1
  # distutils: language = c++
2
2
  # cython: language_level=3
3
- # cython: linetrace=True
4
- # distutils: define_macros=CYTHON_TRACE_NOGIL=1
5
3
  """
6
- Created on April, 2020
4
+ Created in April 2020
7
5
  @author: Nathan de Lara <nathan.delara@polytechnique.org>
8
6
  """
9
7
  from libcpp.set cimport set
@@ -1,6 +1,8 @@
1
1
  """clustering module"""
2
2
  from sknetwork.clustering.base import BaseClustering
3
3
  from sknetwork.clustering.louvain import Louvain
4
+ from sknetwork.clustering.leiden import Leiden
5
+ from sknetwork.clustering.propagation_clustering import PropagationClustering
4
6
  from sknetwork.clustering.metrics import get_modularity
5
7
  from sknetwork.clustering.postprocess import reindex_labels, aggregate_graph
6
- from sknetwork.clustering.propagation_clustering import PropagationClustering
8
+ from sknetwork.clustering.kcenters import KCenters
@@ -9,8 +9,8 @@ from abc import ABC
9
9
  import numpy as np
10
10
  from scipy import sparse
11
11
 
12
- from sknetwork.linalg.normalization import normalize
13
- from sknetwork.utils.base import Algorithm
12
+ from sknetwork.linalg.normalizer import normalize
13
+ from sknetwork.base import Algorithm
14
14
  from sknetwork.utils.membership import get_membership
15
15
 
16
16
 
@@ -19,24 +19,20 @@ class BaseClustering(Algorithm, ABC):
19
19
 
20
20
  Attributes
21
21
  ----------
22
- labels_ : np.ndarray
23
- Labels of the nodes (rows for bipartite graphs)
24
- labels_row_ : np.ndarray
25
- Labels of the rows (for bipartite graphs).
26
- labels_col_ : np.ndarray
27
- Labels of the columns (for bipartite graphs, in case of co-clustering).
28
- membership_ : sparse.csr_matrix
29
- Membership matrix of the nodes, shape (n_nodes, n_clusters).
30
- membership_row_ : sparse.csr_matrix
31
- Membership matrix of the rows (for bipartite graphs).
32
- membership_col_ : sparse.csr_matrix
33
- Membership matrix of the columns (for bipartite graphs, in case of co-clustering).
22
+ labels_ : np.ndarray, shape (n_labels,)
23
+ Label of each node.
24
+ probs_ : sparse.csr_matrix, shape (n_row, n_labels)
25
+ Probability distribution over labels.
26
+ labels_row_, labels_col_ : np.ndarray
27
+ Labels of rows and columns, for bipartite graphs.
28
+ probs_row_, probs_col_ : sparse.csr_matrix, shape (n_row, n_labels)
29
+ Probability distributions over labels for rows and columns (for bipartite graphs).
34
30
  aggregate_ : sparse.csr_matrix
35
31
  Aggregate adjacency matrix or biadjacency matrix between clusters.
36
32
  """
37
- def __init__(self, sort_clusters: bool = True, return_membership: bool = False, return_aggregate: bool = False):
33
+ def __init__(self, sort_clusters: bool = True, return_probs: bool = False, return_aggregate: bool = False):
38
34
  self.sort_clusters = sort_clusters
39
- self.return_membership = return_membership
35
+ self.return_probs = return_probs
40
36
  self.return_aggregate = return_aggregate
41
37
  self._init_vars()
42
38
 
@@ -82,8 +78,8 @@ class BaseClustering(Algorithm, ABC):
82
78
  Probability distribution over labels.
83
79
  """
84
80
  if columns:
85
- return self.membership_col_.toarray()
86
- return self.membership_.toarray()
81
+ return self.probs_col_.toarray()
82
+ return self.probs_.toarray()
87
83
 
88
84
  def fit_predict_proba(self, *args, **kwargs) -> np.ndarray:
89
85
  """Fit algorithm to the data and return the probability distribution over labels.
@@ -107,12 +103,12 @@ class BaseClustering(Algorithm, ABC):
107
103
 
108
104
  Returns
109
105
  -------
110
- membership : sparse.csr_matrix
111
- Probability distribution over labels (aka membership matrix).
106
+ probs : sparse.csr_matrix
107
+ Probability distribution over labels.
112
108
  """
113
109
  if columns:
114
- return self.membership_col_
115
- return self.membership_
110
+ return self.probs_col_
111
+ return self.probs_
116
112
 
117
113
  def fit_transform(self, *args, **kwargs) -> np.ndarray:
118
114
  """Fit algorithm to the data and return the membership matrix. Same parameters as the ``fit`` method.
@@ -130,9 +126,9 @@ class BaseClustering(Algorithm, ABC):
130
126
  self.labels_ = None
131
127
  self.labels_row_ = None
132
128
  self.labels_col_ = None
133
- self.membership_ = None
134
- self.membership_row_ = None
135
- self.membership_col_ = None
129
+ self.probs_ = None
130
+ self.probs_row_ = None
131
+ self.probs_col_ = None
136
132
  self.aggregate_ = None
137
133
  self.bipartite = None
138
134
  return self
@@ -147,30 +143,30 @@ class BaseClustering(Algorithm, ABC):
147
143
 
148
144
  def _secondary_outputs(self, input_matrix: sparse.csr_matrix):
149
145
  """Compute different variables from labels_."""
150
- if self.return_membership or self.return_aggregate:
146
+ if self.return_probs or self.return_aggregate:
151
147
  input_matrix = input_matrix.astype(float)
152
148
  if not self.bipartite:
153
- membership = get_membership(self.labels_)
154
- if self.return_membership:
155
- self.membership_ = normalize(input_matrix.dot(membership))
149
+ probs = get_membership(self.labels_)
150
+ if self.return_probs:
151
+ self.probs_ = normalize(input_matrix.dot(probs))
156
152
  if self.return_aggregate:
157
- self.aggregate_ = sparse.csr_matrix(membership.T.dot(input_matrix.dot(membership)))
153
+ self.aggregate_ = sparse.csr_matrix(probs.T.dot(input_matrix.dot(probs)))
158
154
  else:
159
155
  if self.labels_col_ is None:
160
156
  n_labels = max(self.labels_) + 1
161
- membership_row = get_membership(self.labels_, n_labels=n_labels)
162
- membership_col = normalize(input_matrix.T.dot(membership_row))
157
+ probs_row = get_membership(self.labels_, n_labels=n_labels)
158
+ probs_col = normalize(input_matrix.T.dot(probs_row))
163
159
  else:
164
160
  n_labels = max(max(self.labels_row_), max(self.labels_col_)) + 1
165
- membership_row = get_membership(self.labels_row_, n_labels=n_labels)
166
- membership_col = get_membership(self.labels_col_, n_labels=n_labels)
167
- if self.return_membership:
168
- self.membership_row_ = normalize(input_matrix.dot(membership_col))
169
- self.membership_col_ = normalize(input_matrix.T.dot(membership_row))
170
- self.membership_ = self.membership_row_
161
+ probs_row = get_membership(self.labels_row_, n_labels=n_labels)
162
+ probs_col = get_membership(self.labels_col_, n_labels=n_labels)
163
+ if self.return_probs:
164
+ self.probs_row_ = normalize(input_matrix.dot(probs_col))
165
+ self.probs_col_ = normalize(input_matrix.T.dot(probs_row))
166
+ self.probs_ = self.probs_row_
171
167
  if self.return_aggregate:
172
- aggregate_ = sparse.csr_matrix(membership_row.T.dot(input_matrix))
173
- aggregate_ = aggregate_.dot(membership_col)
168
+ aggregate_ = sparse.csr_matrix(probs_row.T.dot(input_matrix))
169
+ aggregate_ = aggregate_.dot(probs_col)
174
170
  self.aggregate_ = aggregate_
175
171
 
176
172
  return self
@@ -0,0 +1,253 @@
1
+ """
2
+ Created in March 2024
3
+ @author: Laurène David <laurene.david@ip-paris.fr>
4
+ @author: Thomas Bonald <bonald@enst.fr>
5
+ """
6
+
7
+ from typing import Union
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+
12
+ from sknetwork.clustering import BaseClustering
13
+ from sknetwork.ranking import PageRank
14
+ from sknetwork.clustering import get_modularity
15
+ from sknetwork.classification.pagerank import PageRankClassifier
16
+ from sknetwork.utils.format import get_adjacency, directed2undirected
17
+
18
+
19
+ class KCenters(BaseClustering):
20
+ """K-center clustering algorithm. The center of each cluster is obtained by the PageRank algorithm.
21
+
22
+ Parameters
23
+ ----------
24
+ n_clusters : int
25
+ Number of clusters.
26
+ directed : bool, default False
27
+ If ``True``, the graph is considered directed.
28
+ center_position : str, default "row"
29
+ Force centers to correspond to the nodes on the rows or columns of the biadjacency matrix.
30
+ Can be ``row``, ``col`` or ``both``. Only considered for bipartite graphs.
31
+ n_init : int, default 5
32
+ Number of reruns of the k-centers algorithm with different centers.
33
+ The run that produce the best modularity is chosen as the final result.
34
+ max_iter : int, default 20
35
+ Maximum number of iterations of the k-centers algorithm for a single run.
36
+
37
+ Attributes
38
+ ----------
39
+ labels_ : np.ndarray, shape (n_nodes,)
40
+ Label of each node.
41
+ labels_row_, labels_col_ : np.ndarray
42
+ Labels of rows and columns, for bipartite graphs.
43
+ centers_ : np.ndarray, shape (n_nodes,)
44
+ Cluster centers.
45
+ centers_row_, centers_col_ : np.ndarray
46
+ Cluster centers of rows and columns, for bipartite graphs.
47
+
48
+ Example
49
+ -------
50
+ >>> from sknetwork.clustering import KCenters
51
+ >>> from sknetwork.data import karate_club
52
+ >>> kcenters = KCenters(n_clusters=2)
53
+ >>> adjacency = karate_club()
54
+ >>> labels = kcenters.fit_predict(adjacency)
55
+ >>> len(set(labels))
56
+ 2
57
+
58
+ """
59
+ def __init__(self, n_clusters: int, directed: bool = False, center_position: str = "row", n_init: int = 5,
60
+ max_iter: int = 20):
61
+ super(BaseClustering, self).__init__()
62
+ self.n_clusters = n_clusters
63
+ self.directed = directed
64
+ self.bipartite = None
65
+ self.center_position = center_position
66
+ self.n_init = n_init
67
+ self.max_iter = max_iter
68
+ self.labels_ = None
69
+ self.centers_ = None
70
+ self.centers_row_ = None
71
+ self.centers_col_ = None
72
+
73
+ def _compute_mask_centers(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]):
74
+ """Generate mask to filter nodes that can be cluster centers.
75
+
76
+ Parameters
77
+ ----------
78
+ input_matrix :
79
+ Adjacency matrix or biadjacency matrix of the graph.
80
+
81
+ Return
82
+ ------
83
+ mask : np.array, shape (n_nodes,)
84
+ Mask for possible cluster centers.
85
+
86
+ """
87
+ n_row, n_col = input_matrix.shape
88
+ if self.bipartite:
89
+ n_nodes = n_row + n_col
90
+ mask = np.zeros(n_nodes, dtype=bool)
91
+ if self.center_position == "row":
92
+ mask[:n_row] = True
93
+ elif self.center_position == "col":
94
+ mask[n_row:] = True
95
+ elif self.center_position == "both":
96
+ mask[:] = True
97
+ else:
98
+ raise ValueError('Unknown center position')
99
+ else:
100
+ mask = np.ones(n_row, dtype=bool)
101
+
102
+ return mask
103
+
104
+ @staticmethod
105
+ def _init_centers(adjacency: Union[sparse.csr_matrix, np.ndarray], mask: np.ndarray, n_clusters: int):
106
+ """
107
+ Kcenters++ initialization to select cluster centers.
108
+ This algorithm is an adaptation of the Kmeans++ algorithm to graphs.
109
+
110
+ Parameters
111
+ ----------
112
+ adjacency :
113
+ Adjacency matrix of the graph.
114
+ mask :
115
+ Initial mask for allowed positions of centers.
116
+ n_clusters : int
117
+ Number of centers to initialize.
118
+
119
+ Returns
120
+ ---------
121
+ centers : np.array, shape (n_clusters,)
122
+ Initial cluster centers.
123
+ """
124
+ mask = mask.copy()
125
+ n_nodes = adjacency.shape[0]
126
+ nodes = np.arange(n_nodes)
127
+ centers = []
128
+
129
+ # Choose the first center uniformly at random
130
+ center = np.random.choice(nodes[mask])
131
+ mask[center] = 0
132
+ centers.append(center)
133
+
134
+ pagerank = PageRank()
135
+ weights = {center: 1}
136
+
137
+ for k in range(n_clusters - 1):
138
+ # select nodes that are far from existing centers
139
+ ppr_scores = pagerank.fit_predict(adjacency, weights)
140
+ ppr_scores = ppr_scores[mask]
141
+
142
+ if min(ppr_scores) == 0:
143
+ center = np.random.choice(nodes[mask][ppr_scores == 0])
144
+ else:
145
+ probs = 1 / ppr_scores
146
+ probs = probs / np.sum(probs)
147
+ center = np.random.choice(nodes[mask], p=probs)
148
+
149
+ mask[center] = 0
150
+ centers.append(center)
151
+ weights.update({center: 1})
152
+
153
+ centers = np.array(centers)
154
+ return centers
155
+
156
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> "KCenters":
157
+ """Compute the clustering of the graph by k-centers.
158
+
159
+ Parameters
160
+ ----------
161
+ input_matrix :
162
+ Adjacency matrix or biadjacency matrix of the graph.
163
+ force_bipartite :
164
+ If ``True``, force the input matrix to be considered as a biadjacency matrix even if square.
165
+
166
+ Returns
167
+ -------
168
+ self : :class:`KCenters`
169
+ """
170
+
171
+ if self.n_clusters < 2:
172
+ raise ValueError("The number of clusters must be at least 2.")
173
+
174
+ if self.n_init < 1:
175
+ raise ValueError("The n_init parameter must be at least 1.")
176
+
177
+ if self.directed:
178
+ input_matrix = directed2undirected(input_matrix)
179
+
180
+ adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
181
+ n_row = input_matrix.shape[0]
182
+ n_nodes = adjacency.shape[0]
183
+ nodes = np.arange(n_nodes)
184
+
185
+ mask = self._compute_mask_centers(input_matrix)
186
+ if self.n_clusters > np.sum(mask):
187
+ raise ValueError("The number of clusters is to high. This might be due to the center_position parameter.")
188
+
189
+ pagerank_clf = PageRankClassifier()
190
+ pagerank = PageRank()
191
+
192
+ labels_ = []
193
+ centers_ = []
194
+ modularity_ = []
195
+
196
+ # Restarts
197
+ for i in range(self.n_init):
198
+
199
+ # Initialization
200
+ centers = self._init_centers(adjacency, mask, self.n_clusters)
201
+ prev_centers = None
202
+ labels = None
203
+ n_iter = 0
204
+
205
+ while not np.equal(prev_centers, centers).all() and (n_iter < self.max_iter):
206
+
207
+ # Assign nodes to centers
208
+ labels_center = {center: label for label, center in enumerate(centers)}
209
+ labels = pagerank_clf.fit_predict(adjacency, labels_center)
210
+
211
+ # Find new centers
212
+ prev_centers = centers.copy()
213
+ new_centers = []
214
+
215
+ for label in np.unique(labels):
216
+ mask_cluster = labels == label
217
+ mask_cluster &= mask
218
+ scores = pagerank.fit_predict(adjacency, weights=mask_cluster)
219
+ scores[~mask_cluster] = 0
220
+ new_centers.append(nodes[np.argmax(scores)])
221
+
222
+ n_iter += 1
223
+
224
+ # Store results
225
+ if self.bipartite:
226
+ labels_row = labels[:n_row]
227
+ labels_col = labels[n_row:]
228
+ modularity = get_modularity(input_matrix, labels_row, labels_col)
229
+ else:
230
+ modularity = get_modularity(adjacency, labels)
231
+
232
+ labels_.append(labels)
233
+ centers_.append(centers)
234
+ modularity_.append(modularity)
235
+
236
+ # Select restart with the highest modularity
237
+ idx_max = np.argmax(modularity_)
238
+ self.labels_ = np.array(labels_[idx_max])
239
+ self.centers_ = np.array(centers_[idx_max])
240
+
241
+ if self.bipartite:
242
+ self._split_vars(input_matrix.shape)
243
+
244
+ # Define centers based on center position
245
+ if self.center_position == "row":
246
+ self.centers_row_ = self.centers_
247
+ elif self.center_position == "col":
248
+ self.centers_col_ = self.centers_ - n_row
249
+ else:
250
+ self.centers_row_ = self.centers_[self.centers_ < n_row]
251
+ self.centers_col_ = self.centers_[~np.isin(self.centers_, self.centers_row_)] - n_row
252
+
253
+ return self
@@ -0,0 +1,241 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in March 2024
5
+ @author: Thomas Bonald <bonald@enst.fr>
6
+ @author: Ahmed Zaiou <ahmed.zaiou@capgemini.com>
7
+ """
8
+ from typing import Union, Optional
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.clustering import Louvain
14
+ from sknetwork.clustering.louvain_core import optimize_core
15
+ from sknetwork.clustering.leiden_core import optimize_refine_core
16
+ from sknetwork.utils.membership import get_membership
17
+ from sknetwork.utils.check import check_random_state
18
+ from sknetwork.log import Log
19
+
20
+
21
+ class Leiden(Louvain):
22
+ """Leiden algorithm for clustering graphs by maximization of modularity.
23
+ Compared to the Louvain algorithm, the partition is refined before each aggregation.
24
+
25
+ For bipartite graphs, the algorithm maximizes Barber's modularity by default.
26
+
27
+ Parameters
28
+ ----------
29
+ resolution :
30
+ Resolution parameter.
31
+ modularity : str
32
+ Type of modularity to maximize. Can be ``'Dugue'``, ``'Newman'`` or ``'Potts'`` (default = ``'dugue'``).
33
+ tol_optimization :
34
+ Minimum increase in modularity to enter a new optimization pass in the local search.
35
+ tol_aggregation :
36
+ Minimum increase in modularity to enter a new aggregation pass.
37
+ n_aggregations :
38
+ Maximum number of aggregations.
39
+ A negative value is interpreted as no limit.
40
+ shuffle_nodes :
41
+ Enables node shuffling before optimization.
42
+ sort_clusters :
43
+ If ``True``, sort labels in decreasing order of cluster size.
44
+ return_probs :
45
+ If ``True``, return the probability distribution over clusters (soft clustering).
46
+ return_aggregate :
47
+ If ``True``, return the adjacency matrix of the graph between clusters.
48
+ random_state :
49
+ Random number generator or random seed. If None, numpy.random is used.
50
+ verbose :
51
+ Verbose mode.
52
+
53
+ Attributes
54
+ ----------
55
+ labels_ : np.ndarray, shape (n_labels,)
56
+ Label of each node.
57
+ probs_ : sparse.csr_matrix, shape (n_row, n_labels)
58
+ Probability distribution over labels.
59
+ labels_row_, labels_col_ : np.ndarray
60
+ Labels of rows and columns, for bipartite graphs.
61
+ probs_row_, probs_col_ : sparse.csr_matrix, shape (n_row, n_labels)
62
+ Probability distributions over labels for rows and columns (for bipartite graphs).
63
+ aggregate_ : sparse.csr_matrix
64
+ Aggregate adjacency matrix or biadjacency matrix between clusters.
65
+
66
+ Example
67
+ -------
68
+ >>> from sknetwork.clustering import Leiden
69
+ >>> from sknetwork.data import karate_club
70
+ >>> leiden = Leiden()
71
+ >>> adjacency = karate_club()
72
+ >>> labels = leiden.fit_predict(adjacency)
73
+ >>> len(set(labels))
74
+ 4
75
+
76
+ References
77
+ ----------
78
+ * Traag, V. A., Waltman, L., & Van Eck, N. J. (2019).
79
+ `From Louvain to Leiden: guaranteeing well-connected communities`, Scientific reports.
80
+ """
81
+
82
+ def __init__(self, resolution: float = 1, modularity: str = 'dugue', tol_optimization: float = 1e-3,
83
+ tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
84
+ sort_clusters: bool = True, return_probs: bool = True, return_aggregate: bool = True,
85
+ random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
86
+ super(Leiden, self).__init__(sort_clusters=sort_clusters, return_probs=return_probs,
87
+ return_aggregate=return_aggregate)
88
+ Log.__init__(self, verbose)
89
+
90
+ self.labels_ = None
91
+ self.resolution = resolution
92
+ self.modularity = modularity.lower()
93
+ self.tol_optimization = tol_optimization
94
+ self.tol_aggregation = tol_aggregation
95
+ self.n_aggregations = n_aggregations
96
+ self.shuffle_nodes = shuffle_nodes
97
+ self.random_state = check_random_state(random_state)
98
+ self.bipartite = None
99
+
100
+ def _optimize(self, labels, adjacency, out_weights, in_weights):
101
+ """One optimization pass of the Leiden algorithm.
102
+
103
+ Parameters
104
+ ----------
105
+ labels :
106
+ Labels of nodes.
107
+ adjacency :
108
+ Adjacency matrix.
109
+ out_weights :
110
+ Out-weights of nodes.
111
+ in_weights :
112
+ In-weights of nodes
113
+
114
+ Returns
115
+ -------
116
+ labels :
117
+ Labels of nodes after optimization.
118
+ increase :
119
+ Gain in modularity after optimization.
120
+ """
121
+ indices = adjacency.indices
122
+ indptr = adjacency.indptr
123
+ data = adjacency.data.astype(np.float32)
124
+ out_weights = out_weights.astype(np.float32)
125
+ in_weights = in_weights.astype(np.float32)
126
+ membership = get_membership(labels)
127
+ out_cluster_weights = membership.T.dot(out_weights)
128
+ in_cluster_weights = membership.T.dot(in_weights)
129
+ cluster_weights = np.zeros_like(out_cluster_weights).astype(np.float32)
130
+ labels = labels.astype(np.int32)
131
+ self_loops = adjacency.diagonal().astype(np.float32)
132
+ return optimize_core(labels, indices, indptr, data, out_weights, in_weights, out_cluster_weights,
133
+ in_cluster_weights, cluster_weights, self_loops, self.resolution, self.tol_optimization)
134
+
135
+ def _optimize_refine(self, labels, labels_refined, adjacency, out_weights, in_weights):
136
+ """Get the refined partition optimizing modularity.
137
+
138
+ Parameters
139
+ ----------
140
+ labels :
141
+ Labels of nodes.
142
+ labels_refined :
143
+ Refined labels of nodes.
144
+ adjacency :
145
+ Adjacency matrix.
146
+ out_weights :
147
+ Out-weights of nodes.
148
+ in_weights :
149
+ In-weights of nodes
150
+
151
+ Returns
152
+ -------
153
+ labels_refined :
154
+ Refined labels of nodes.
155
+ """
156
+ indices = adjacency.indices
157
+ indptr = adjacency.indptr
158
+ data = adjacency.data.astype(np.float32)
159
+ out_weights = out_weights.astype(np.float32)
160
+ in_weights = in_weights.astype(np.float32)
161
+ membership = get_membership(labels_refined)
162
+ out_cluster_weights = membership.T.dot(out_weights)
163
+ in_cluster_weights = membership.T.dot(in_weights)
164
+ cluster_weights = np.zeros_like(out_cluster_weights).astype(np.float32)
165
+ self_loops = adjacency.diagonal().astype(np.float32)
166
+ labels = labels.astype(np.int32)
167
+ labels_refined = labels_refined.astype(np.int32)
168
+ return optimize_refine_core(labels, labels_refined, indices, indptr, data, out_weights, in_weights,
169
+ out_cluster_weights, in_cluster_weights, cluster_weights, self_loops,
170
+ self.resolution)
171
+
172
+ @staticmethod
173
+ def _aggregate_refine(labels, labels_refined, adjacency, out_weights, in_weights):
174
+ """Aggregate nodes according to refined labels.
175
+
176
+ Parameters
177
+ ----------
178
+ labels :
179
+ Labels of nodes.
180
+ labels_refined :
181
+ Refined labels of nodes.
182
+ adjacency :
183
+ Adjacency matrix.
184
+ out_weights :
185
+ Out-weights of nodes.
186
+ in_weights :
187
+ In-weights of nodes.
188
+
189
+ Returns
190
+ -------
191
+ Aggregate graph (labels, adjacency matrix, out-weights, in-weights).
192
+ """
193
+ membership = get_membership(labels)
194
+ membership_refined = get_membership(labels_refined)
195
+ adjacency_ = membership_refined.T.tocsr().dot(adjacency.dot(membership_refined))
196
+ out_weights_ = membership_refined.T.dot(out_weights)
197
+ in_weights_ = membership_refined.T.dot(in_weights)
198
+ labels_ = membership_refined.T.tocsr().dot(membership).indices
199
+ return labels_, adjacency_, out_weights_, in_weights_
200
+
201
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Leiden':
202
+ """Fit algorithm to data.
203
+
204
+ Parameters
205
+ ----------
206
+ input_matrix :
207
+ Adjacency matrix or biadjacency matrix of the graph.
208
+ force_bipartite :
209
+ If ``True``, force the input matrix to be considered as a biadjacency matrix even if square.
210
+
211
+ Returns
212
+ -------
213
+ self : :class:`Leiden`
214
+ """
215
+ adjacency, out_weights, in_weights, membership, index = self._pre_processing(input_matrix, force_bipartite)
216
+ n = adjacency.shape[0]
217
+ labels = np.arange(n)
218
+ count = 0
219
+ stop = False
220
+ while not stop:
221
+ count += 1
222
+ labels, increase = self._optimize(labels, adjacency, out_weights, in_weights)
223
+ _, labels = np.unique(labels, return_inverse=True)
224
+ labels_original = labels.copy()
225
+ labels_refined = np.arange(len(labels))
226
+ labels_refined = self._optimize_refine(labels, labels_refined, adjacency, out_weights, in_weights)
227
+ _, labels_refined = np.unique(labels_refined, return_inverse=True)
228
+ labels, adjacency, out_weights, in_weights = self._aggregate_refine(labels, labels_refined, adjacency,
229
+ out_weights, in_weights)
230
+ n = adjacency.shape[0]
231
+ stop = n == 1
232
+ stop |= increase <= self.tol_aggregation
233
+ stop |= count == self.n_aggregations
234
+ if stop:
235
+ membership = membership.dot(get_membership(labels_original))
236
+ else:
237
+ membership = membership.dot(get_membership(labels_refined))
238
+ self.print_log("Aggregation:", count, " Clusters:", n, " Increase:", increase)
239
+
240
+ self._post_processing(input_matrix, membership, index)
241
+ return self