scikit-network 0.30.0__cp39-cp39-win_amd64.whl → 0.32.1__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (187) hide show
  1. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/AUTHORS.rst +3 -0
  2. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/METADATA +31 -3
  3. scikit_network-0.32.1.dist-info/RECORD +228 -0
  4. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/WHEEL +1 -1
  5. sknetwork/__init__.py +1 -1
  6. sknetwork/base.py +67 -0
  7. sknetwork/classification/base.py +24 -24
  8. sknetwork/classification/base_rank.py +17 -25
  9. sknetwork/classification/diffusion.py +35 -35
  10. sknetwork/classification/knn.py +24 -21
  11. sknetwork/classification/metrics.py +1 -1
  12. sknetwork/classification/pagerank.py +10 -10
  13. sknetwork/classification/propagation.py +23 -20
  14. sknetwork/classification/tests/test_diffusion.py +13 -3
  15. sknetwork/classification/vote.cp39-win_amd64.pyd +0 -0
  16. sknetwork/classification/vote.cpp +14482 -10351
  17. sknetwork/classification/vote.pyx +1 -3
  18. sknetwork/clustering/__init__.py +3 -1
  19. sknetwork/clustering/base.py +36 -40
  20. sknetwork/clustering/kcenters.py +253 -0
  21. sknetwork/clustering/leiden.py +241 -0
  22. sknetwork/clustering/leiden_core.cp39-win_amd64.pyd +0 -0
  23. sknetwork/clustering/leiden_core.cpp +31564 -0
  24. sknetwork/clustering/leiden_core.pyx +124 -0
  25. sknetwork/clustering/louvain.py +133 -102
  26. sknetwork/clustering/louvain_core.cp39-win_amd64.pyd +0 -0
  27. sknetwork/clustering/louvain_core.cpp +22457 -18792
  28. sknetwork/clustering/louvain_core.pyx +86 -96
  29. sknetwork/clustering/postprocess.py +2 -2
  30. sknetwork/clustering/propagation_clustering.py +15 -19
  31. sknetwork/clustering/tests/test_API.py +8 -4
  32. sknetwork/clustering/tests/test_kcenters.py +92 -0
  33. sknetwork/clustering/tests/test_leiden.py +34 -0
  34. sknetwork/clustering/tests/test_louvain.py +3 -4
  35. sknetwork/data/__init__.py +2 -1
  36. sknetwork/data/base.py +28 -0
  37. sknetwork/data/load.py +38 -37
  38. sknetwork/data/models.py +18 -18
  39. sknetwork/data/parse.py +54 -33
  40. sknetwork/data/test_graphs.py +2 -2
  41. sknetwork/data/tests/test_API.py +1 -1
  42. sknetwork/data/tests/test_base.py +14 -0
  43. sknetwork/data/tests/test_load.py +1 -1
  44. sknetwork/data/tests/test_parse.py +9 -12
  45. sknetwork/data/tests/test_test_graphs.py +1 -2
  46. sknetwork/data/toy_graphs.py +18 -18
  47. sknetwork/embedding/__init__.py +0 -1
  48. sknetwork/embedding/base.py +21 -20
  49. sknetwork/embedding/force_atlas.py +3 -2
  50. sknetwork/embedding/louvain_embedding.py +2 -2
  51. sknetwork/embedding/random_projection.py +5 -3
  52. sknetwork/embedding/spectral.py +0 -73
  53. sknetwork/embedding/tests/test_API.py +4 -28
  54. sknetwork/embedding/tests/test_louvain_embedding.py +4 -9
  55. sknetwork/embedding/tests/test_random_projection.py +2 -2
  56. sknetwork/embedding/tests/test_spectral.py +5 -8
  57. sknetwork/embedding/tests/test_svd.py +1 -1
  58. sknetwork/gnn/base.py +4 -4
  59. sknetwork/gnn/base_layer.py +3 -3
  60. sknetwork/gnn/gnn_classifier.py +45 -89
  61. sknetwork/gnn/layer.py +1 -1
  62. sknetwork/gnn/loss.py +1 -1
  63. sknetwork/gnn/optimizer.py +4 -3
  64. sknetwork/gnn/tests/test_base_layer.py +4 -4
  65. sknetwork/gnn/tests/test_gnn_classifier.py +12 -35
  66. sknetwork/gnn/utils.py +8 -8
  67. sknetwork/hierarchy/base.py +29 -2
  68. sknetwork/hierarchy/louvain_hierarchy.py +45 -41
  69. sknetwork/hierarchy/paris.cp39-win_amd64.pyd +0 -0
  70. sknetwork/hierarchy/paris.cpp +27369 -22852
  71. sknetwork/hierarchy/paris.pyx +7 -9
  72. sknetwork/hierarchy/postprocess.py +16 -16
  73. sknetwork/hierarchy/tests/test_API.py +1 -1
  74. sknetwork/hierarchy/tests/test_algos.py +5 -0
  75. sknetwork/hierarchy/tests/test_metrics.py +1 -1
  76. sknetwork/linalg/__init__.py +1 -1
  77. sknetwork/linalg/diteration.cp39-win_amd64.pyd +0 -0
  78. sknetwork/linalg/diteration.cpp +13474 -9454
  79. sknetwork/linalg/diteration.pyx +0 -2
  80. sknetwork/linalg/eig_solver.py +1 -1
  81. sknetwork/linalg/{normalization.py → normalizer.py} +18 -15
  82. sknetwork/linalg/operators.py +1 -1
  83. sknetwork/linalg/ppr_solver.py +1 -1
  84. sknetwork/linalg/push.cp39-win_amd64.pyd +0 -0
  85. sknetwork/linalg/push.cpp +22993 -18807
  86. sknetwork/linalg/push.pyx +0 -2
  87. sknetwork/linalg/svd_solver.py +1 -1
  88. sknetwork/linalg/tests/test_normalization.py +3 -7
  89. sknetwork/linalg/tests/test_operators.py +4 -8
  90. sknetwork/linalg/tests/test_ppr.py +1 -1
  91. sknetwork/linkpred/base.py +13 -2
  92. sknetwork/linkpred/nn.py +6 -6
  93. sknetwork/log.py +19 -0
  94. sknetwork/path/__init__.py +4 -3
  95. sknetwork/path/dag.py +54 -0
  96. sknetwork/path/distances.py +98 -0
  97. sknetwork/path/search.py +13 -47
  98. sknetwork/path/shortest_path.py +37 -162
  99. sknetwork/path/tests/test_dag.py +37 -0
  100. sknetwork/path/tests/test_distances.py +62 -0
  101. sknetwork/path/tests/test_search.py +26 -11
  102. sknetwork/path/tests/test_shortest_path.py +31 -36
  103. sknetwork/ranking/__init__.py +0 -1
  104. sknetwork/ranking/base.py +13 -8
  105. sknetwork/ranking/betweenness.cp39-win_amd64.pyd +0 -0
  106. sknetwork/ranking/betweenness.cpp +5709 -3017
  107. sknetwork/ranking/betweenness.pyx +0 -2
  108. sknetwork/ranking/closeness.py +7 -10
  109. sknetwork/ranking/pagerank.py +14 -14
  110. sknetwork/ranking/postprocess.py +12 -3
  111. sknetwork/ranking/tests/test_API.py +2 -4
  112. sknetwork/ranking/tests/test_betweenness.py +3 -3
  113. sknetwork/ranking/tests/test_closeness.py +3 -7
  114. sknetwork/ranking/tests/test_pagerank.py +11 -5
  115. sknetwork/ranking/tests/test_postprocess.py +5 -0
  116. sknetwork/regression/base.py +19 -2
  117. sknetwork/regression/diffusion.py +24 -10
  118. sknetwork/regression/tests/test_diffusion.py +8 -0
  119. sknetwork/test_base.py +35 -0
  120. sknetwork/test_log.py +15 -0
  121. sknetwork/topology/__init__.py +7 -8
  122. sknetwork/topology/cliques.cp39-win_amd64.pyd +0 -0
  123. sknetwork/topology/{kcliques.cpp → cliques.cpp} +23412 -20276
  124. sknetwork/topology/cliques.pyx +149 -0
  125. sknetwork/topology/core.cp39-win_amd64.pyd +0 -0
  126. sknetwork/topology/{kcore.cpp → core.cpp} +21732 -18867
  127. sknetwork/topology/core.pyx +90 -0
  128. sknetwork/topology/cycles.py +243 -0
  129. sknetwork/topology/minheap.cp39-win_amd64.pyd +0 -0
  130. sknetwork/{utils → topology}/minheap.cpp +19452 -15368
  131. sknetwork/{utils → topology}/minheap.pxd +1 -3
  132. sknetwork/{utils → topology}/minheap.pyx +1 -3
  133. sknetwork/topology/structure.py +3 -43
  134. sknetwork/topology/tests/test_cliques.py +11 -11
  135. sknetwork/topology/tests/test_core.py +19 -0
  136. sknetwork/topology/tests/test_cycles.py +65 -0
  137. sknetwork/topology/tests/test_structure.py +2 -16
  138. sknetwork/topology/tests/test_triangles.py +11 -15
  139. sknetwork/topology/tests/test_wl.py +72 -0
  140. sknetwork/topology/triangles.cp39-win_amd64.pyd +0 -0
  141. sknetwork/topology/triangles.cpp +5056 -2696
  142. sknetwork/topology/triangles.pyx +74 -89
  143. sknetwork/topology/weisfeiler_lehman.py +56 -86
  144. sknetwork/topology/weisfeiler_lehman_core.cp39-win_amd64.pyd +0 -0
  145. sknetwork/topology/weisfeiler_lehman_core.cpp +14727 -10622
  146. sknetwork/topology/weisfeiler_lehman_core.pyx +0 -2
  147. sknetwork/utils/__init__.py +1 -31
  148. sknetwork/utils/check.py +2 -2
  149. sknetwork/utils/format.py +5 -3
  150. sknetwork/utils/membership.py +2 -2
  151. sknetwork/utils/tests/test_check.py +3 -3
  152. sknetwork/utils/tests/test_format.py +3 -1
  153. sknetwork/utils/values.py +1 -1
  154. sknetwork/visualization/__init__.py +2 -2
  155. sknetwork/visualization/dendrograms.py +55 -7
  156. sknetwork/visualization/graphs.py +292 -72
  157. sknetwork/visualization/tests/test_dendrograms.py +9 -9
  158. sknetwork/visualization/tests/test_graphs.py +71 -62
  159. scikit_network-0.30.0.dist-info/RECORD +0 -227
  160. sknetwork/embedding/louvain_hierarchy.py +0 -142
  161. sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
  162. sknetwork/path/metrics.py +0 -148
  163. sknetwork/path/tests/test_metrics.py +0 -29
  164. sknetwork/ranking/harmonic.py +0 -82
  165. sknetwork/topology/dag.py +0 -74
  166. sknetwork/topology/dag_core.cp39-win_amd64.pyd +0 -0
  167. sknetwork/topology/dag_core.cpp +0 -23350
  168. sknetwork/topology/dag_core.pyx +0 -38
  169. sknetwork/topology/kcliques.cp39-win_amd64.pyd +0 -0
  170. sknetwork/topology/kcliques.pyx +0 -193
  171. sknetwork/topology/kcore.cp39-win_amd64.pyd +0 -0
  172. sknetwork/topology/kcore.pyx +0 -120
  173. sknetwork/topology/tests/test_cores.py +0 -21
  174. sknetwork/topology/tests/test_dag.py +0 -26
  175. sknetwork/topology/tests/test_wl_coloring.py +0 -49
  176. sknetwork/topology/tests/test_wl_kernel.py +0 -31
  177. sknetwork/utils/base.py +0 -35
  178. sknetwork/utils/minheap.cp39-win_amd64.pyd +0 -0
  179. sknetwork/utils/simplex.py +0 -140
  180. sknetwork/utils/tests/test_base.py +0 -28
  181. sknetwork/utils/tests/test_bunch.py +0 -16
  182. sknetwork/utils/tests/test_projection_simplex.py +0 -33
  183. sknetwork/utils/tests/test_verbose.py +0 -15
  184. sknetwork/utils/verbose.py +0 -37
  185. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/LICENSE +0 -0
  186. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/top_level.txt +0 -0
  187. /sknetwork/{utils → data}/timeout.py +0 -0
@@ -0,0 +1,124 @@
1
+ # distutils: language=c++
2
+ # cython: language_level=3
3
+ from libcpp.set cimport set
4
+ from libc.stdlib cimport rand
5
+
6
+ cimport cython
7
+
8
+ ctypedef fused int_or_long:
9
+ int
10
+ long
11
+
12
+ @cython.boundscheck(False)
13
+ @cython.wraparound(False)
14
+ def optimize_refine_core(int_or_long[:] labels, int_or_long[:] labels_refined, int_or_long[:] indices,
15
+ int_or_long[:] indptr, float[:] data, float[:] out_weights, float[:] in_weights, float[:] out_cluster_weights,
16
+ float[:] in_cluster_weights, float[:] cluster_weights, float[:] self_loops, float resolution): # pragma: no cover
17
+ """Refine clusters while maximizing modularity.
18
+
19
+ Parameters
20
+ ----------
21
+ labels :
22
+ Labels (initial partition).
23
+ labels_refined :
24
+ Refined labels.
25
+ indices :
26
+ CSR format index array of the normalized adjacency matrix.
27
+ indptr :
28
+ CSR format index pointer array of the normalized adjacency matrix.
29
+ data :
30
+ CSR format data array of the normalized adjacency matrix.
31
+ out_weights :
32
+ Out-weights of nodes (sum to 1).
33
+ in_weights :
34
+ In-weights of nodes (sum to 1).
35
+ out_cluster_weights :
36
+ Out-weights of clusters (sum to 1).
37
+ in_cluster_weights :
38
+ In-weights of clusters (sum to 1).
39
+ cluster_weights :
40
+ Weights of clusters (initialized to 0).
41
+ self_loops :
42
+ Weights of self loops.
43
+ resolution :
44
+ Resolution parameter (positive).
45
+
46
+ Returns
47
+ -------
48
+ labels_refined :
49
+ Refined labels.
50
+ """
51
+ cdef int_or_long n
52
+ cdef int_or_long label
53
+ cdef int_or_long label_refined
54
+ cdef int_or_long label_target
55
+ cdef int_or_long label_best
56
+ cdef int_or_long i
57
+ cdef int_or_long j
58
+ cdef int_or_long start
59
+ cdef int_or_long end
60
+
61
+ cdef float increase = 1
62
+ cdef float delta
63
+ cdef float delta_local
64
+ cdef float delta_best
65
+ cdef float in_weight
66
+ cdef float out_weight
67
+
68
+ cdef set[int_or_long] label_set
69
+ cdef set[int_or_long] label_target_set
70
+
71
+ n = labels.shape[0]
72
+ while increase:
73
+ increase = 0
74
+
75
+ for i in range(n):
76
+ label_set = ()
77
+ label = labels[i]
78
+ label_refined = labels_refined[i]
79
+ start = indptr[i]
80
+ end = indptr[i+1]
81
+
82
+ # neighboring clusters
83
+ for j in range(start, end):
84
+ if labels[indices[j]] == label:
85
+ label_target = labels_refined[indices[j]]
86
+ label_set.insert(label_target)
87
+ cluster_weights[label_target] += data[j]
88
+ label_set.erase(label_refined)
89
+
90
+ if not label_set.empty():
91
+ out_weight = out_weights[i]
92
+ in_weight = in_weights[i]
93
+
94
+ # node leaving the current cluster
95
+ delta = 2 * (cluster_weights[label_refined] - self_loops[i])
96
+ delta -= resolution * out_weight * (in_cluster_weights[label_refined] - in_weight)
97
+ delta -= resolution * in_weight * (out_cluster_weights[label_refined] - out_weight)
98
+
99
+ label_target_set = ()
100
+ for label_target in label_set:
101
+ delta_local = 2 * cluster_weights[label_target]
102
+ delta_local -= resolution * out_weight * in_cluster_weights[label_target]
103
+ delta_local -= resolution * in_weight * out_cluster_weights[label_target]
104
+ delta_local -= delta
105
+ if delta_local > 0:
106
+ label_target_set.insert(label_target)
107
+ cluster_weights[label_target] = 0
108
+
109
+ if not label_target_set.empty():
110
+ increase = 1
111
+ k = rand() % label_target_set.size()
112
+ for label_target in label_target_set:
113
+ k -= 1
114
+ if k == 0:
115
+ break
116
+ labels_refined[i] = label_target
117
+ # update weights
118
+ out_cluster_weights[label_refined] -= out_weight
119
+ in_cluster_weights[label_refined] -= in_weight
120
+ out_cluster_weights[label_target] += out_weight
121
+ in_cluster_weights[label_target] += in_weight
122
+ cluster_weights[label_refined] = 0
123
+
124
+ return labels_refined
@@ -12,15 +12,15 @@ import numpy as np
12
12
  from scipy import sparse
13
13
 
14
14
  from sknetwork.clustering.base import BaseClustering
15
- from sknetwork.clustering.louvain_core import fit_core
15
+ from sknetwork.clustering.louvain_core import optimize_core
16
16
  from sknetwork.clustering.postprocess import reindex_labels
17
17
  from sknetwork.utils.check import check_random_state, get_probs
18
18
  from sknetwork.utils.format import check_format, get_adjacency, directed2undirected
19
19
  from sknetwork.utils.membership import get_membership
20
- from sknetwork.utils.verbose import VerboseMixin
20
+ from sknetwork.log import Log
21
21
 
22
22
 
23
- class Louvain(BaseClustering, VerboseMixin):
23
+ class Louvain(BaseClustering, Log):
24
24
  """Louvain algorithm for clustering graphs by maximization of modularity.
25
25
 
26
26
  For bipartite graphs, the algorithm maximizes Barber's modularity by default.
@@ -30,11 +30,11 @@ class Louvain(BaseClustering, VerboseMixin):
30
30
  resolution :
31
31
  Resolution parameter.
32
32
  modularity : str
33
- Which objective function to maximize. Can be ``'Dugue'``, ``'Newman'`` or ``'Potts'`` (default = ``'dugue'``).
33
+ Type of modularity to maximize. Can be ``'Dugue'``, ``'Newman'`` or ``'Potts'`` (default = ``'dugue'``).
34
34
  tol_optimization :
35
- Minimum increase in the objective function to enter a new optimization pass.
35
+ Minimum increase in modularity to enter a new optimization pass in the local search.
36
36
  tol_aggregation :
37
- Minimum increase in the objective function to enter a new aggregation pass.
37
+ Minimum increase in modularity to enter a new aggregation pass.
38
38
  n_aggregations :
39
39
  Maximum number of aggregations.
40
40
  A negative value is interpreted as no limit.
@@ -42,8 +42,8 @@ class Louvain(BaseClustering, VerboseMixin):
42
42
  Enables node shuffling before optimization.
43
43
  sort_clusters :
44
44
  If ``True``, sort labels in decreasing order of cluster size.
45
- return_membership :
46
- If ``True``, return the membership matrix of nodes to each cluster (soft clustering).
45
+ return_probs :
46
+ If ``True``, return the probability distribution over clusters (soft clustering).
47
47
  return_aggregate :
48
48
  If ``True``, return the adjacency matrix of the graph between clusters.
49
49
  random_state :
@@ -53,18 +53,14 @@ class Louvain(BaseClustering, VerboseMixin):
53
53
 
54
54
  Attributes
55
55
  ----------
56
- labels_ : np.ndarray
57
- Labels of the nodes.
58
- labels_row_ : np.ndarray
59
- Labels of the rows (for bipartite graphs).
60
- labels_col_ : np.ndarray
61
- Labels of the columns (for bipartite graphs).
62
- membership_ : sparse.csr_matrix
63
- Membership matrix of the nodes, shape (n_nodes, n_clusters).
64
- membership_row_ : sparse.csr_matrix
65
- Membership matrix of the rows (for bipartite graphs).
66
- membership_col_ : sparse.csr_matrix
67
- Membership matrix of the columns (for bipartite graphs).
56
+ labels_ : np.ndarray, shape (n_labels,)
57
+ Label of each node.
58
+ probs_ : sparse.csr_matrix, shape (n_row, n_labels)
59
+ Probability distribution over labels.
60
+ labels_row_, labels_col_ : np.ndarray
61
+ Labels of rows and columns, for bipartite graphs.
62
+ probs_row_, probs_col_ : sparse.csr_matrix, shape (n_row, n_labels)
63
+ Probability distributions over labels for rows and columns (for bipartite graphs).
68
64
  aggregate_ : sparse.csr_matrix
69
65
  Aggregate adjacency matrix or biadjacency matrix between clusters.
70
66
 
@@ -95,84 +91,88 @@ class Louvain(BaseClustering, VerboseMixin):
95
91
  <https://arxiv.org/pdf/0707.1616>`_
96
92
  Physical Review E, 76(6).
97
93
  """
94
+
98
95
  def __init__(self, resolution: float = 1, modularity: str = 'dugue', tol_optimization: float = 1e-3,
99
96
  tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
100
- sort_clusters: bool = True, return_membership: bool = True, return_aggregate: bool = True,
97
+ sort_clusters: bool = True, return_probs: bool = True, return_aggregate: bool = True,
101
98
  random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
102
- super(Louvain, self).__init__(sort_clusters=sort_clusters, return_membership=return_membership,
99
+ super(Louvain, self).__init__(sort_clusters=sort_clusters, return_probs=return_probs,
103
100
  return_aggregate=return_aggregate)
104
- VerboseMixin.__init__(self, verbose)
101
+ Log.__init__(self, verbose)
105
102
 
106
103
  self.labels_ = None
107
104
  self.resolution = resolution
108
105
  self.modularity = modularity.lower()
109
- self.tol = tol_optimization
106
+ self.tol_optimization = tol_optimization
110
107
  self.tol_aggregation = tol_aggregation
111
108
  self.n_aggregations = n_aggregations
112
109
  self.shuffle_nodes = shuffle_nodes
113
110
  self.random_state = check_random_state(random_state)
114
111
  self.bipartite = None
115
112
 
116
- def _optimize(self, adjacency_norm, probs_ou, probs_in):
117
- """One local optimization pass of the Louvain algorithm
113
+ def _optimize(self, labels, adjacency, out_weights, in_weights):
114
+ """One optimization pass of the Louvain algorithm.
118
115
 
119
116
  Parameters
120
117
  ----------
121
- adjacency_norm :
122
- the norm of the adjacency
123
- probs_ou :
124
- the array of degrees of the adjacency
125
- probs_in :
126
- the array of degrees of the transpose of the adjacency
118
+ labels :
119
+ Labels of nodes.
120
+ adjacency :
121
+ Adjacency matrix.
122
+ out_weights :
123
+ Out-weights of nodes.
124
+ in_weights :
125
+ In-weights of nodes
127
126
 
128
127
  Returns
129
128
  -------
130
129
  labels :
131
- the communities of each node after optimization
132
- pass_increase :
133
- the increase in modularity gained after optimization
130
+ Labels of nodes after optimization.
131
+ increase :
132
+ Gain in modularity after optimization.
134
133
  """
135
- node_probs_in = probs_in.astype(np.float32)
136
- node_probs_ou = probs_ou.astype(np.float32)
137
-
138
- adjacency = 0.5 * directed2undirected(adjacency_norm)
139
-
134
+ labels = labels.astype(np.int32)
135
+ indices = adjacency.indices
136
+ indptr = adjacency.indptr
137
+ data = adjacency.data.astype(np.float32)
138
+ out_weights = out_weights.astype(np.float32)
139
+ in_weights = in_weights.astype(np.float32)
140
+ out_cluster_weights = out_weights.copy()
141
+ in_cluster_weights = in_weights.copy()
142
+ cluster_weights = np.zeros_like(out_cluster_weights).astype(np.float32)
140
143
  self_loops = adjacency.diagonal().astype(np.float32)
141
-
142
- indptr: np.ndarray = adjacency.indptr
143
- indices: np.ndarray = adjacency.indices
144
- data: np.ndarray = adjacency.data.astype(np.float32)
145
-
146
- return fit_core(self.resolution, self.tol, node_probs_ou, node_probs_in, self_loops, data, indices, indptr)
144
+ return optimize_core(labels, indices, indptr, data, out_weights, in_weights, out_cluster_weights,
145
+ in_cluster_weights, cluster_weights, self_loops, self.resolution, self.tol_optimization)
147
146
 
148
147
  @staticmethod
149
- def _aggregate(adjacency_norm, probs_out, probs_in, membership: Union[sparse.csr_matrix, np.ndarray]):
148
+ def _aggregate(labels, adjacency, out_weights, in_weights):
150
149
  """Aggregate nodes belonging to the same cluster.
151
150
 
152
151
  Parameters
153
152
  ----------
154
- adjacency_norm :
155
- the norm of the adjacency
156
- probs_out :
157
- the array of degrees of the adjacency
158
- probs_in :
159
- the array of degrees of the transpose of the adjacency
160
- membership :
161
- membership matrix (rows).
153
+ labels :
154
+ Labels of nodes.
155
+ adjacency :
156
+ Adjacency matrix.
157
+ out_weights :
158
+ Out-weights of nodes.
159
+ in_weights :
160
+ In-weights of nodes.
162
161
 
163
162
  Returns
164
163
  -------
165
- Aggregate graph.
164
+ Aggregate graph (adjacency matrix, out-weights, in-weights).
166
165
  """
167
- adjacency_norm = (membership.T.dot(adjacency_norm.dot(membership))).tocsr()
168
- probs_in = np.array(membership.T.dot(probs_in).T)
169
- probs_out = np.array(membership.T.dot(probs_out).T)
170
- return adjacency_norm, probs_out, probs_in
166
+ membership = get_membership(labels)
167
+ adjacency_ = membership.T.tocsr().dot(adjacency.dot(membership))
168
+ out_weights_ = membership.T.dot(out_weights)
169
+ in_weights_ = membership.T.dot(in_weights)
170
+ return adjacency_, out_weights_, in_weights_
171
171
 
172
- def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Louvain':
173
- """Fit algorithm to data.
172
+ def _pre_processing(self, input_matrix, force_bipartite):
173
+ """Pre-processing for Louvain.
174
174
 
175
- Parameters
175
+ Parameters
176
176
  ----------
177
177
  input_matrix :
178
178
  Adjacency matrix or biadjacency matrix of the graph.
@@ -181,63 +181,64 @@ class Louvain(BaseClustering, VerboseMixin):
181
181
 
182
182
  Returns
183
183
  -------
184
- self: :class:`Louvain`
184
+ adjacency :
185
+ Adjacency matrix.
186
+ out_weights, in_weights :
187
+ Node weights.
188
+ membership :
189
+ Membership matrix (labels).
190
+ index :
191
+ Index of nodes.
185
192
  """
186
193
  self._init_vars()
194
+
195
+ # adjacency matrix
187
196
  input_matrix = check_format(input_matrix)
188
- if self.modularity == 'dugue':
189
- adjacency, self.bipartite = get_adjacency(input_matrix, force_directed=True,
190
- force_bipartite=force_bipartite)
191
- else:
192
- adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
197
+ force_directed = self.modularity == 'dugue'
198
+ adjacency, self.bipartite = get_adjacency(input_matrix, force_directed=force_directed,
199
+ force_bipartite=force_bipartite)
193
200
 
201
+ # shuffling
194
202
  n = adjacency.shape[0]
195
-
196
203
  index = np.arange(n)
197
204
  if self.shuffle_nodes:
198
205
  index = self.random_state.permutation(index)
199
206
  adjacency = adjacency[index][:, index]
200
207
 
208
+ # node weights
201
209
  if self.modularity == 'potts':
202
- probs_out = get_probs('uniform', adjacency)
203
- probs_in = probs_out.copy()
210
+ out_weights = get_probs('uniform', adjacency)
211
+ in_weights = out_weights.copy()
204
212
  elif self.modularity == 'newman':
205
- probs_out = get_probs('degree', adjacency)
206
- probs_in = probs_out.copy()
213
+ out_weights = get_probs('degree', adjacency)
214
+ in_weights = out_weights.copy()
207
215
  elif self.modularity == 'dugue':
208
- probs_out = get_probs('degree', adjacency)
209
- probs_in = get_probs('degree', adjacency.T)
216
+ out_weights = get_probs('degree', adjacency)
217
+ in_weights = get_probs('degree', adjacency.T)
210
218
  else:
211
219
  raise ValueError('Unknown modularity function.')
212
220
 
213
- adjacency_cluster = adjacency / adjacency.data.sum()
221
+ # normalized, symmetric adjacency matrix (sums to 1)
222
+ adjacency = directed2undirected(adjacency)
223
+ adjacency = adjacency / adjacency.data.sum()
214
224
 
225
+ # cluster membership
215
226
  membership = sparse.identity(n, format='csr')
216
- increase = True
217
- count_aggregations = 0
218
- self.log.print("Starting with", n, "nodes.")
219
- while increase:
220
- count_aggregations += 1
221
-
222
- labels_cluster, pass_increase = self._optimize(adjacency_cluster, probs_out, probs_in)
223
- _, labels_cluster = np.unique(labels_cluster, return_inverse=True)
224
-
225
- if pass_increase <= self.tol_aggregation:
226
- increase = False
227
- else:
228
- membership_cluster = get_membership(labels_cluster)
229
- membership = membership.dot(membership_cluster)
230
- adjacency_cluster, probs_out, probs_in = self._aggregate(adjacency_cluster, probs_out, probs_in,
231
- membership_cluster)
232
-
233
- n = adjacency_cluster.shape[0]
234
- if n == 1:
235
- break
236
- self.log.print("Aggregation", count_aggregations, "completed with", n, "clusters and ",
237
- pass_increase, "increment.")
238
- if count_aggregations == self.n_aggregations:
239
- break
240
227
 
228
+ return adjacency, out_weights, in_weights, membership, index
229
+
230
+ def _post_processing(self, input_matrix, membership, index):
231
+ """Post-processing for Louvain.
232
+
233
+ Parameters
234
+ ----------
235
+ input_matrix :
236
+ Adjacency matrix or biadjacency matrix of the graph.
237
+ membership :
238
+ Membership matrix (labels).
239
+ index :
240
+ Index of nodes.
241
+ """
241
242
  if self.sort_clusters:
242
243
  labels = reindex_labels(membership.indices)
243
244
  else:
@@ -246,10 +247,40 @@ class Louvain(BaseClustering, VerboseMixin):
246
247
  reverse = np.empty(index.size, index.dtype)
247
248
  reverse[index] = np.arange(index.size)
248
249
  labels = labels[reverse]
249
-
250
250
  self.labels_ = labels
251
251
  if self.bipartite:
252
252
  self._split_vars(input_matrix.shape)
253
253
  self._secondary_outputs(input_matrix)
254
254
 
255
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Louvain':
256
+ """Fit algorithm to data.
257
+
258
+ Parameters
259
+ ----------
260
+ input_matrix :
261
+ Adjacency matrix or biadjacency matrix of the graph.
262
+ force_bipartite :
263
+ If ``True``, force the input matrix to be considered as a biadjacency matrix even if square.
264
+
265
+ Returns
266
+ -------
267
+ self : :class:`Louvain`
268
+ """
269
+ adjacency, out_weights, in_weights, membership, index = self._pre_processing(input_matrix, force_bipartite)
270
+ n = adjacency.shape[0]
271
+ count = 0
272
+ stop = False
273
+ while not stop:
274
+ count += 1
275
+ labels = np.arange(n)
276
+ labels, increase = self._optimize(labels, adjacency, out_weights, in_weights)
277
+ _, labels = np.unique(labels, return_inverse=True)
278
+ adjacency, out_weights, in_weights = self._aggregate(labels, adjacency, out_weights, in_weights)
279
+ membership = membership.dot(get_membership(labels))
280
+ n = adjacency.shape[0]
281
+ stop = n == 1
282
+ stop |= increase <= self.tol_aggregation
283
+ stop |= count == self.n_aggregations
284
+ self.print_log("Aggregation:", count, " Clusters:", n, " Increase:", increase)
285
+ self._post_processing(input_matrix, membership, index)
255
286
  return self