scikit-network 0.30.0__cp39-cp39-win_amd64.whl → 0.32.1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/AUTHORS.rst +3 -0
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/METADATA +31 -3
- scikit_network-0.32.1.dist-info/RECORD +228 -0
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/WHEEL +1 -1
- sknetwork/__init__.py +1 -1
- sknetwork/base.py +67 -0
- sknetwork/classification/base.py +24 -24
- sknetwork/classification/base_rank.py +17 -25
- sknetwork/classification/diffusion.py +35 -35
- sknetwork/classification/knn.py +24 -21
- sknetwork/classification/metrics.py +1 -1
- sknetwork/classification/pagerank.py +10 -10
- sknetwork/classification/propagation.py +23 -20
- sknetwork/classification/tests/test_diffusion.py +13 -3
- sknetwork/classification/vote.cp39-win_amd64.pyd +0 -0
- sknetwork/classification/vote.cpp +14482 -10351
- sknetwork/classification/vote.pyx +1 -3
- sknetwork/clustering/__init__.py +3 -1
- sknetwork/clustering/base.py +36 -40
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +241 -0
- sknetwork/clustering/leiden_core.cp39-win_amd64.pyd +0 -0
- sknetwork/clustering/leiden_core.cpp +31564 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +133 -102
- sknetwork/clustering/louvain_core.cp39-win_amd64.pyd +0 -0
- sknetwork/clustering/louvain_core.cpp +22457 -18792
- sknetwork/clustering/louvain_core.pyx +86 -96
- sknetwork/clustering/postprocess.py +2 -2
- sknetwork/clustering/propagation_clustering.py +15 -19
- sknetwork/clustering/tests/test_API.py +8 -4
- sknetwork/clustering/tests/test_kcenters.py +92 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +3 -4
- sknetwork/data/__init__.py +2 -1
- sknetwork/data/base.py +28 -0
- sknetwork/data/load.py +38 -37
- sknetwork/data/models.py +18 -18
- sknetwork/data/parse.py +54 -33
- sknetwork/data/test_graphs.py +2 -2
- sknetwork/data/tests/test_API.py +1 -1
- sknetwork/data/tests/test_base.py +14 -0
- sknetwork/data/tests/test_load.py +1 -1
- sknetwork/data/tests/test_parse.py +9 -12
- sknetwork/data/tests/test_test_graphs.py +1 -2
- sknetwork/data/toy_graphs.py +18 -18
- sknetwork/embedding/__init__.py +0 -1
- sknetwork/embedding/base.py +21 -20
- sknetwork/embedding/force_atlas.py +3 -2
- sknetwork/embedding/louvain_embedding.py +2 -2
- sknetwork/embedding/random_projection.py +5 -3
- sknetwork/embedding/spectral.py +0 -73
- sknetwork/embedding/tests/test_API.py +4 -28
- sknetwork/embedding/tests/test_louvain_embedding.py +4 -9
- sknetwork/embedding/tests/test_random_projection.py +2 -2
- sknetwork/embedding/tests/test_spectral.py +5 -8
- sknetwork/embedding/tests/test_svd.py +1 -1
- sknetwork/gnn/base.py +4 -4
- sknetwork/gnn/base_layer.py +3 -3
- sknetwork/gnn/gnn_classifier.py +45 -89
- sknetwork/gnn/layer.py +1 -1
- sknetwork/gnn/loss.py +1 -1
- sknetwork/gnn/optimizer.py +4 -3
- sknetwork/gnn/tests/test_base_layer.py +4 -4
- sknetwork/gnn/tests/test_gnn_classifier.py +12 -35
- sknetwork/gnn/utils.py +8 -8
- sknetwork/hierarchy/base.py +29 -2
- sknetwork/hierarchy/louvain_hierarchy.py +45 -41
- sknetwork/hierarchy/paris.cp39-win_amd64.pyd +0 -0
- sknetwork/hierarchy/paris.cpp +27369 -22852
- sknetwork/hierarchy/paris.pyx +7 -9
- sknetwork/hierarchy/postprocess.py +16 -16
- sknetwork/hierarchy/tests/test_API.py +1 -1
- sknetwork/hierarchy/tests/test_algos.py +5 -0
- sknetwork/hierarchy/tests/test_metrics.py +1 -1
- sknetwork/linalg/__init__.py +1 -1
- sknetwork/linalg/diteration.cp39-win_amd64.pyd +0 -0
- sknetwork/linalg/diteration.cpp +13474 -9454
- sknetwork/linalg/diteration.pyx +0 -2
- sknetwork/linalg/eig_solver.py +1 -1
- sknetwork/linalg/{normalization.py → normalizer.py} +18 -15
- sknetwork/linalg/operators.py +1 -1
- sknetwork/linalg/ppr_solver.py +1 -1
- sknetwork/linalg/push.cp39-win_amd64.pyd +0 -0
- sknetwork/linalg/push.cpp +22993 -18807
- sknetwork/linalg/push.pyx +0 -2
- sknetwork/linalg/svd_solver.py +1 -1
- sknetwork/linalg/tests/test_normalization.py +3 -7
- sknetwork/linalg/tests/test_operators.py +4 -8
- sknetwork/linalg/tests/test_ppr.py +1 -1
- sknetwork/linkpred/base.py +13 -2
- sknetwork/linkpred/nn.py +6 -6
- sknetwork/log.py +19 -0
- sknetwork/path/__init__.py +4 -3
- sknetwork/path/dag.py +54 -0
- sknetwork/path/distances.py +98 -0
- sknetwork/path/search.py +13 -47
- sknetwork/path/shortest_path.py +37 -162
- sknetwork/path/tests/test_dag.py +37 -0
- sknetwork/path/tests/test_distances.py +62 -0
- sknetwork/path/tests/test_search.py +26 -11
- sknetwork/path/tests/test_shortest_path.py +31 -36
- sknetwork/ranking/__init__.py +0 -1
- sknetwork/ranking/base.py +13 -8
- sknetwork/ranking/betweenness.cp39-win_amd64.pyd +0 -0
- sknetwork/ranking/betweenness.cpp +5709 -3017
- sknetwork/ranking/betweenness.pyx +0 -2
- sknetwork/ranking/closeness.py +7 -10
- sknetwork/ranking/pagerank.py +14 -14
- sknetwork/ranking/postprocess.py +12 -3
- sknetwork/ranking/tests/test_API.py +2 -4
- sknetwork/ranking/tests/test_betweenness.py +3 -3
- sknetwork/ranking/tests/test_closeness.py +3 -7
- sknetwork/ranking/tests/test_pagerank.py +11 -5
- sknetwork/ranking/tests/test_postprocess.py +5 -0
- sknetwork/regression/base.py +19 -2
- sknetwork/regression/diffusion.py +24 -10
- sknetwork/regression/tests/test_diffusion.py +8 -0
- sknetwork/test_base.py +35 -0
- sknetwork/test_log.py +15 -0
- sknetwork/topology/__init__.py +7 -8
- sknetwork/topology/cliques.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/{kcliques.cpp → cliques.cpp} +23412 -20276
- sknetwork/topology/cliques.pyx +149 -0
- sknetwork/topology/core.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/{kcore.cpp → core.cpp} +21732 -18867
- sknetwork/topology/core.pyx +90 -0
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cp39-win_amd64.pyd +0 -0
- sknetwork/{utils → topology}/minheap.cpp +19452 -15368
- sknetwork/{utils → topology}/minheap.pxd +1 -3
- sknetwork/{utils → topology}/minheap.pyx +1 -3
- sknetwork/topology/structure.py +3 -43
- sknetwork/topology/tests/test_cliques.py +11 -11
- sknetwork/topology/tests/test_core.py +19 -0
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +2 -16
- sknetwork/topology/tests/test_triangles.py +11 -15
- sknetwork/topology/tests/test_wl.py +72 -0
- sknetwork/topology/triangles.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/triangles.cpp +5056 -2696
- sknetwork/topology/triangles.pyx +74 -89
- sknetwork/topology/weisfeiler_lehman.py +56 -86
- sknetwork/topology/weisfeiler_lehman_core.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +14727 -10622
- sknetwork/topology/weisfeiler_lehman_core.pyx +0 -2
- sknetwork/utils/__init__.py +1 -31
- sknetwork/utils/check.py +2 -2
- sknetwork/utils/format.py +5 -3
- sknetwork/utils/membership.py +2 -2
- sknetwork/utils/tests/test_check.py +3 -3
- sknetwork/utils/tests/test_format.py +3 -1
- sknetwork/utils/values.py +1 -1
- sknetwork/visualization/__init__.py +2 -2
- sknetwork/visualization/dendrograms.py +55 -7
- sknetwork/visualization/graphs.py +292 -72
- sknetwork/visualization/tests/test_dendrograms.py +9 -9
- sknetwork/visualization/tests/test_graphs.py +71 -62
- scikit_network-0.30.0.dist-info/RECORD +0 -227
- sknetwork/embedding/louvain_hierarchy.py +0 -142
- sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
- sknetwork/path/metrics.py +0 -148
- sknetwork/path/tests/test_metrics.py +0 -29
- sknetwork/ranking/harmonic.py +0 -82
- sknetwork/topology/dag.py +0 -74
- sknetwork/topology/dag_core.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/dag_core.cpp +0 -23350
- sknetwork/topology/dag_core.pyx +0 -38
- sknetwork/topology/kcliques.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/kcliques.pyx +0 -193
- sknetwork/topology/kcore.cp39-win_amd64.pyd +0 -0
- sknetwork/topology/kcore.pyx +0 -120
- sknetwork/topology/tests/test_cores.py +0 -21
- sknetwork/topology/tests/test_dag.py +0 -26
- sknetwork/topology/tests/test_wl_coloring.py +0 -49
- sknetwork/topology/tests/test_wl_kernel.py +0 -31
- sknetwork/utils/base.py +0 -35
- sknetwork/utils/minheap.cp39-win_amd64.pyd +0 -0
- sknetwork/utils/simplex.py +0 -140
- sknetwork/utils/tests/test_base.py +0 -28
- sknetwork/utils/tests/test_bunch.py +0 -16
- sknetwork/utils/tests/test_projection_simplex.py +0 -33
- sknetwork/utils/tests/test_verbose.py +0 -15
- sknetwork/utils/verbose.py +0 -37
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/LICENSE +0 -0
- {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/top_level.txt +0 -0
- /sknetwork/{utils → data}/timeout.py +0 -0
|
@@ -1,9 +1,6 @@
|
|
|
1
|
-
# distutils: language
|
|
1
|
+
# distutils: language=c++
|
|
2
2
|
# cython: language_level=3
|
|
3
|
-
# cython: linetrace=True
|
|
4
|
-
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
|
|
5
3
|
from libcpp.set cimport set
|
|
6
|
-
from libcpp.vector cimport vector
|
|
7
4
|
cimport cython
|
|
8
5
|
|
|
9
6
|
ctypedef fused int_or_long:
|
|
@@ -12,123 +9,116 @@ ctypedef fused int_or_long:
|
|
|
12
9
|
|
|
13
10
|
@cython.boundscheck(False)
|
|
14
11
|
@cython.wraparound(False)
|
|
15
|
-
def
|
|
16
|
-
|
|
17
|
-
|
|
12
|
+
def optimize_core(int_or_long[:] labels, int_or_long[:] indices, int_or_long[:] indptr, float[:] data,
|
|
13
|
+
float[:] out_weights, float[:] in_weights, float[:] out_cluster_weights, float[:] in_cluster_weights,
|
|
14
|
+
float[:] cluster_weights, float[:] self_loops, float resolution, float tol_optimization): # pragma: no cover
|
|
15
|
+
"""Find clusters maximizing modularity.
|
|
18
16
|
|
|
19
17
|
Parameters
|
|
20
18
|
----------
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
tol :
|
|
24
|
-
Minimum increase in modularity to enter a new optimization pass.
|
|
25
|
-
ou_node_probs :
|
|
26
|
-
Distribution of node weights based on their out-edges (sums to 1).
|
|
27
|
-
in_node_probs :
|
|
28
|
-
Distribution of node weights based on their in-edges (sums to 1).
|
|
29
|
-
self_loops :
|
|
30
|
-
Weights of self loops.
|
|
31
|
-
data :
|
|
32
|
-
CSR format data array of the normalized adjacency matrix.
|
|
19
|
+
labels :
|
|
20
|
+
Initial labels.
|
|
33
21
|
indices :
|
|
34
22
|
CSR format index array of the normalized adjacency matrix.
|
|
35
23
|
indptr :
|
|
36
24
|
CSR format index pointer array of the normalized adjacency matrix.
|
|
25
|
+
data :
|
|
26
|
+
CSR format data array of the normalized adjacency matrix.
|
|
27
|
+
out_weights :
|
|
28
|
+
Out-weights of nodes (sum to 1).
|
|
29
|
+
in_weights :
|
|
30
|
+
In-weights of nodes (sum to 1).
|
|
31
|
+
out_cluster_weights :
|
|
32
|
+
Out-weights of clusters (sum to 1).
|
|
33
|
+
in_cluster_weights :
|
|
34
|
+
In-weights of clusters (sum to 1).
|
|
35
|
+
cluster_weights :
|
|
36
|
+
Weights of clusters (initialized to 0).
|
|
37
|
+
self_loops :
|
|
38
|
+
Weights of self loops.
|
|
39
|
+
resolution :
|
|
40
|
+
Resolution parameter (positive).
|
|
41
|
+
tol_optimization :
|
|
42
|
+
Minimum increase in modularity to enter a new optimization pass.
|
|
37
43
|
|
|
38
44
|
Returns
|
|
39
45
|
-------
|
|
40
46
|
labels :
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
47
|
+
Labels of nodes.
|
|
48
|
+
increase :
|
|
49
|
+
Increase in modularity.
|
|
44
50
|
"""
|
|
45
|
-
cdef int_or_long n
|
|
46
|
-
cdef int_or_long
|
|
47
|
-
cdef int_or_long
|
|
48
|
-
cdef int_or_long
|
|
49
|
-
cdef int_or_long
|
|
51
|
+
cdef int_or_long n
|
|
52
|
+
cdef int_or_long stop = 0
|
|
53
|
+
cdef int_or_long label
|
|
54
|
+
cdef int_or_long label_target
|
|
55
|
+
cdef int_or_long label_best
|
|
50
56
|
cdef int_or_long i
|
|
51
57
|
cdef int_or_long j
|
|
52
|
-
cdef int_or_long
|
|
53
|
-
cdef int_or_long
|
|
54
|
-
cdef int_or_long label
|
|
58
|
+
cdef int_or_long start
|
|
59
|
+
cdef int_or_long end
|
|
55
60
|
|
|
56
|
-
cdef float
|
|
61
|
+
cdef float increase = 0
|
|
57
62
|
cdef float increase_pass
|
|
58
63
|
cdef float delta
|
|
59
|
-
cdef float delta_best
|
|
60
|
-
cdef float delta_exit
|
|
61
64
|
cdef float delta_local
|
|
62
|
-
cdef float
|
|
63
|
-
cdef float
|
|
64
|
-
cdef float
|
|
65
|
-
cdef float ratio_ou
|
|
66
|
-
|
|
67
|
-
cdef vector[int_or_long] labels
|
|
68
|
-
cdef vector[float] neighbor_clusters_weights
|
|
69
|
-
cdef vector[float] ou_clusters_weights
|
|
70
|
-
cdef vector[float] in_clusters_weights
|
|
71
|
-
cdef set[int_or_long] unique_clusters = ()
|
|
72
|
-
|
|
73
|
-
for i in range(n):
|
|
74
|
-
labels.push_back(i)
|
|
75
|
-
neighbor_clusters_weights.push_back(0.)
|
|
76
|
-
ou_clusters_weights.push_back(ou_node_probs[i])
|
|
77
|
-
in_clusters_weights.push_back(in_node_probs[i])
|
|
78
|
-
|
|
79
|
-
while increase == 1:
|
|
80
|
-
increase = 0
|
|
81
|
-
increase_pass = 0
|
|
82
|
-
|
|
83
|
-
for i in range(n):
|
|
84
|
-
unique_clusters.clear()
|
|
85
|
-
cluster_node = labels[i]
|
|
86
|
-
j1 = indptr[i]
|
|
87
|
-
j2 = indptr[i + 1]
|
|
88
|
-
|
|
89
|
-
for j in range(j1, j2):
|
|
90
|
-
label = labels[indices[j]]
|
|
91
|
-
neighbor_clusters_weights[label] += data[j]
|
|
92
|
-
unique_clusters.insert(label)
|
|
65
|
+
cdef float delta_best
|
|
66
|
+
cdef float in_weight
|
|
67
|
+
cdef float out_weight
|
|
93
68
|
|
|
94
|
-
|
|
69
|
+
cdef set[int_or_long] label_set = ()
|
|
95
70
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
ratio_ou = resolution * node_prob_ou
|
|
100
|
-
ratio_in = resolution * node_prob_in
|
|
71
|
+
n = labels.shape[0]
|
|
72
|
+
while not stop:
|
|
73
|
+
increase_pass = 0
|
|
101
74
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
75
|
+
for i in range(n):
|
|
76
|
+
label_set.clear()
|
|
77
|
+
label = labels[i]
|
|
78
|
+
start = indptr[i]
|
|
79
|
+
end = indptr[i+1]
|
|
80
|
+
|
|
81
|
+
# neighboring clusters
|
|
82
|
+
for j in range(start, end):
|
|
83
|
+
label_target = labels[indices[j]]
|
|
84
|
+
label_set.insert(label_target)
|
|
85
|
+
cluster_weights[label_target] += data[j]
|
|
86
|
+
label_set.erase(label)
|
|
87
|
+
|
|
88
|
+
if not label_set.empty():
|
|
89
|
+
out_weight = out_weights[i]
|
|
90
|
+
in_weight = in_weights[i]
|
|
91
|
+
|
|
92
|
+
# node leaving the current cluster
|
|
93
|
+
delta = 2 * (cluster_weights[label] - self_loops[i])
|
|
94
|
+
delta -= resolution * out_weight * (in_cluster_weights[label] - in_weight)
|
|
95
|
+
delta -= resolution * in_weight * (out_cluster_weights[label] - out_weight)
|
|
105
96
|
|
|
106
97
|
delta_best = 0
|
|
107
|
-
|
|
98
|
+
label_best = label
|
|
108
99
|
|
|
109
|
-
for
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
delta_local = delta - delta_exit
|
|
100
|
+
for label_target in label_set:
|
|
101
|
+
delta_local = 2 * cluster_weights[label_target]
|
|
102
|
+
delta_local -= resolution * out_weight * in_cluster_weights[label_target]
|
|
103
|
+
delta_local -= resolution * in_weight * out_cluster_weights[label_target]
|
|
104
|
+
delta_local -= delta
|
|
115
105
|
if delta_local > delta_best:
|
|
116
106
|
delta_best = delta_local
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
neighbor_clusters_weights[cluster] = 0
|
|
107
|
+
label_best = label_target
|
|
108
|
+
cluster_weights[label_target] = 0
|
|
120
109
|
|
|
121
|
-
if
|
|
110
|
+
if label_best != label:
|
|
122
111
|
increase_pass += delta_best
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
112
|
+
labels[i] = label_best
|
|
113
|
+
# update weights
|
|
114
|
+
out_cluster_weights[label] -= out_weight
|
|
115
|
+
in_cluster_weights[label] -= in_weight
|
|
116
|
+
out_cluster_weights[label_best] += out_weight
|
|
117
|
+
in_cluster_weights[label_best] += in_weight
|
|
118
|
+
|
|
119
|
+
cluster_weights[label] = 0
|
|
120
|
+
|
|
121
|
+
increase += increase_pass
|
|
122
|
+
stop = increase_pass <= tol_optimization
|
|
123
|
+
|
|
124
|
+
return labels, increase
|
|
@@ -41,7 +41,7 @@ def aggregate_graph(input_matrix: sparse.csr_matrix, labels: Optional[np.ndarray
|
|
|
41
41
|
labels_row: Optional[np.ndarray] = None, labels_col: Optional[np.ndarray] = None) \
|
|
42
42
|
-> sparse.csr_matrix:
|
|
43
43
|
"""Aggregate graph per label. All nodes with the same label become a single node.
|
|
44
|
-
Negative labels are ignored (corresponding nodes are
|
|
44
|
+
Negative labels are ignored (corresponding nodes are discarded).
|
|
45
45
|
|
|
46
46
|
Parameters
|
|
47
47
|
----------
|
|
@@ -63,4 +63,4 @@ def aggregate_graph(input_matrix: sparse.csr_matrix, labels: Optional[np.ndarray
|
|
|
63
63
|
else:
|
|
64
64
|
membership_col = membership_row
|
|
65
65
|
aggregate_matrix = membership_row.T.dot(input_matrix).dot(membership_col)
|
|
66
|
-
return aggregate_matrix
|
|
66
|
+
return aggregate_matrix.tocsr()
|
|
@@ -29,27 +29,23 @@ class PropagationClustering(BaseClustering, Propagation):
|
|
|
29
29
|
weighted : bool
|
|
30
30
|
If ``True``, the vote of each neighbor is proportional to the edge weight.
|
|
31
31
|
Otherwise, all votes have weight 1.
|
|
32
|
-
sort_clusters :
|
|
32
|
+
sort_clusters : bool
|
|
33
33
|
If ``True``, sort labels in decreasing order of cluster size.
|
|
34
|
-
|
|
35
|
-
If ``True``, return the
|
|
36
|
-
return_aggregate :
|
|
34
|
+
return_probs : bool
|
|
35
|
+
If ``True``, return the probability distribution over clusters (soft clustering).
|
|
36
|
+
return_aggregate : bool
|
|
37
37
|
If ``True``, return the aggregate adjacency matrix or biadjacency matrix between clusters.
|
|
38
38
|
|
|
39
39
|
Attributes
|
|
40
40
|
----------
|
|
41
|
-
labels_ : np.ndarray
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
labels_col_ : np.ndarray
|
|
46
|
-
Labels of
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
membership_row_ : sparse.csr_matrix
|
|
50
|
-
Membership matrix of the rows (for bipartite graphs).
|
|
51
|
-
membership_col_ : sparse.csr_matrix
|
|
52
|
-
Membership matrix of the columns (for bipartite graphs).
|
|
41
|
+
labels_ : np.ndarray, shape (n_labels,)
|
|
42
|
+
Label of each node.
|
|
43
|
+
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
44
|
+
Probability distribution over labels.
|
|
45
|
+
labels_row_, labels_col_ : np.ndarray
|
|
46
|
+
Labels of rows and columns, for bipartite graphs.
|
|
47
|
+
probs_row_, probs_col_ : sparse.csr_matrix, shape (n_row, n_labels)
|
|
48
|
+
Probability distributions over labels for rows and columns (for bipartite graphs).
|
|
53
49
|
aggregate_ : sparse.csr_matrix
|
|
54
50
|
Aggregate adjacency matrix or biadjacency matrix between clusters.
|
|
55
51
|
|
|
@@ -72,9 +68,9 @@ class PropagationClustering(BaseClustering, Propagation):
|
|
|
72
68
|
Physical review E, 76(3), 036106.
|
|
73
69
|
"""
|
|
74
70
|
def __init__(self, n_iter: int = 5, node_order: str = 'decreasing', weighted: bool = True,
|
|
75
|
-
sort_clusters: bool = True,
|
|
71
|
+
sort_clusters: bool = True, return_probs: bool = True, return_aggregate: bool = True):
|
|
76
72
|
Propagation.__init__(self, n_iter, node_order, weighted)
|
|
77
|
-
BaseClustering.__init__(self, sort_clusters,
|
|
73
|
+
BaseClustering.__init__(self, sort_clusters, return_probs, return_aggregate)
|
|
78
74
|
self.bipartite = None
|
|
79
75
|
|
|
80
76
|
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'PropagationClustering':
|
|
@@ -82,7 +78,7 @@ class PropagationClustering(BaseClustering, Propagation):
|
|
|
82
78
|
|
|
83
79
|
Parameters
|
|
84
80
|
----------
|
|
85
|
-
input_matrix :
|
|
81
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
86
82
|
Adjacency matrix or biadjacency matrix of the graph.
|
|
87
83
|
|
|
88
84
|
Returns
|
|
@@ -9,9 +9,13 @@ from sknetwork.data.test_graphs import *
|
|
|
9
9
|
|
|
10
10
|
class TestClusteringAPI(unittest.TestCase):
|
|
11
11
|
|
|
12
|
+
def setUp(self):
|
|
13
|
+
self.algos = [Louvain(return_aggregate=True), Leiden(return_aggregate=True),
|
|
14
|
+
PropagationClustering(return_aggregate=True)]
|
|
15
|
+
|
|
12
16
|
def test_regular(self):
|
|
13
|
-
for algo in
|
|
14
|
-
for adjacency in [test_graph(), test_digraph(),
|
|
17
|
+
for algo in self.algos:
|
|
18
|
+
for adjacency in [test_graph(), test_digraph(), test_disconnected_graph()]:
|
|
15
19
|
n = adjacency.shape[0]
|
|
16
20
|
labels = algo.fit_predict(adjacency)
|
|
17
21
|
n_labels = len(set(labels))
|
|
@@ -22,13 +26,13 @@ class TestClusteringAPI(unittest.TestCase):
|
|
|
22
26
|
n_labels = len(set(labels))
|
|
23
27
|
self.assertEqual(labels.shape, (n,))
|
|
24
28
|
self.assertEqual(algo.aggregate_.shape, (n_labels, n_labels))
|
|
25
|
-
membership = algo.fit_transform(
|
|
29
|
+
membership = algo.fit_transform(adjacency_bool)
|
|
26
30
|
self.assertEqual(membership.shape, (n, n_labels))
|
|
27
31
|
|
|
28
32
|
def test_bipartite(self):
|
|
29
33
|
biadjacency = test_bigraph()
|
|
30
34
|
n_row, n_col = biadjacency.shape
|
|
31
|
-
for algo in
|
|
35
|
+
for algo in self.algos:
|
|
32
36
|
algo.fit(biadjacency)
|
|
33
37
|
self.assertEqual(algo.labels_row_.shape, (n_row,))
|
|
34
38
|
self.assertEqual(algo.labels_col_.shape, (n_col,))
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for KCenters"""
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
from sknetwork.clustering import KCenters
|
|
7
|
+
from sknetwork.data import karate_club, painters, star_wars
|
|
8
|
+
from sknetwork.data.test_graphs import *
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestKCentersClustering(unittest.TestCase):
|
|
12
|
+
|
|
13
|
+
def test_kcenters(self):
|
|
14
|
+
# Test undirected graph
|
|
15
|
+
n_clusters = 2
|
|
16
|
+
adjacency = karate_club()
|
|
17
|
+
n_row = adjacency.shape[0]
|
|
18
|
+
kcenters = KCenters(n_clusters=n_clusters)
|
|
19
|
+
labels = kcenters.fit_predict(adjacency)
|
|
20
|
+
self.assertEqual(len(labels), n_row)
|
|
21
|
+
self.assertEqual(len(set(labels)), n_clusters)
|
|
22
|
+
|
|
23
|
+
# Test directed graph
|
|
24
|
+
n_clusters = 3
|
|
25
|
+
adjacency = painters()
|
|
26
|
+
n_row = adjacency.shape[0]
|
|
27
|
+
kcenters = KCenters(n_clusters=n_clusters, directed=True)
|
|
28
|
+
labels = kcenters.fit_predict(adjacency)
|
|
29
|
+
self.assertEqual(len(labels), n_row)
|
|
30
|
+
self.assertEqual(len(set(labels)), n_clusters)
|
|
31
|
+
|
|
32
|
+
# Test bipartite graph
|
|
33
|
+
n_clusters = 2
|
|
34
|
+
biadjacency = star_wars()
|
|
35
|
+
n_row, n_col = biadjacency.shape
|
|
36
|
+
kcenters = KCenters(n_clusters=n_clusters)
|
|
37
|
+
kcenters.fit(biadjacency)
|
|
38
|
+
labels = kcenters.labels_
|
|
39
|
+
self.assertEqual(len(kcenters.labels_row_), n_row)
|
|
40
|
+
self.assertEqual(len(kcenters.labels_col_), n_col)
|
|
41
|
+
self.assertEqual(len(set(labels)), n_clusters)
|
|
42
|
+
|
|
43
|
+
def test_kcenters_centers(self):
|
|
44
|
+
# Test centers for undirected graphs
|
|
45
|
+
n_clusters = 2
|
|
46
|
+
adjacency = karate_club()
|
|
47
|
+
kcenters = KCenters(n_clusters=n_clusters)
|
|
48
|
+
kcenters.fit(adjacency)
|
|
49
|
+
centers = kcenters.centers_
|
|
50
|
+
self.assertEqual(n_clusters, len(set(centers)))
|
|
51
|
+
|
|
52
|
+
# Test centers for bipartite graphs
|
|
53
|
+
n_clusters = 2
|
|
54
|
+
biadjacency = star_wars()
|
|
55
|
+
n_row, n_col = biadjacency.shape
|
|
56
|
+
for position in ["row", "col", "both"]:
|
|
57
|
+
kcenters = KCenters(n_clusters=n_clusters, center_position=position)
|
|
58
|
+
kcenters.fit(biadjacency)
|
|
59
|
+
centers_row = kcenters.centers_row_
|
|
60
|
+
centers_col = kcenters.centers_col_
|
|
61
|
+
if position == "row":
|
|
62
|
+
self.assertEqual(n_clusters, len(set(centers_row)))
|
|
63
|
+
self.assertTrue(np.all(centers_row < n_row))
|
|
64
|
+
self.assertTrue(centers_col is None)
|
|
65
|
+
if position == "col":
|
|
66
|
+
self.assertEqual(n_clusters, len(set(centers_col)))
|
|
67
|
+
self.assertTrue(np.all((centers_col < n_col) & (0 <= centers_col)))
|
|
68
|
+
self.assertTrue(centers_row is None)
|
|
69
|
+
if position == "both":
|
|
70
|
+
self.assertEqual(n_clusters, len(set(centers_row)) + len(set(centers_col)))
|
|
71
|
+
self.assertTrue(np.all(centers_row < n_row))
|
|
72
|
+
self.assertTrue(np.all((centers_col < n_col) & (0 <= centers_col)))
|
|
73
|
+
|
|
74
|
+
def test_kcenters_error(self):
|
|
75
|
+
# Test value errors
|
|
76
|
+
adjacency = karate_club()
|
|
77
|
+
biadjacency = star_wars()
|
|
78
|
+
|
|
79
|
+
# test n_clusters error
|
|
80
|
+
kcenters = KCenters(n_clusters=1)
|
|
81
|
+
with self.assertRaises(ValueError):
|
|
82
|
+
kcenters.fit(adjacency)
|
|
83
|
+
|
|
84
|
+
# test n_init error
|
|
85
|
+
kcenters = KCenters(n_clusters=2, n_init=0)
|
|
86
|
+
with self.assertRaises(ValueError):
|
|
87
|
+
kcenters.fit(adjacency)
|
|
88
|
+
|
|
89
|
+
# test center_position error
|
|
90
|
+
kcenters = KCenters(n_clusters=2, center_position="other")
|
|
91
|
+
with self.assertRaises(ValueError):
|
|
92
|
+
kcenters.fit(biadjacency)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for Leiden"""
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
from sknetwork.clustering import Leiden
|
|
7
|
+
from sknetwork.data.test_graphs import *
|
|
8
|
+
from sknetwork.utils import bipartite2undirected
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestLeidenClustering(unittest.TestCase):
|
|
12
|
+
|
|
13
|
+
def test_disconnected(self):
|
|
14
|
+
adjacency = test_disconnected_graph()
|
|
15
|
+
n = adjacency.shape[0]
|
|
16
|
+
labels = Leiden().fit_predict(adjacency)
|
|
17
|
+
self.assertEqual(len(labels), n)
|
|
18
|
+
|
|
19
|
+
def test_modularity(self):
|
|
20
|
+
adjacency = test_graph()
|
|
21
|
+
leiden_d = Leiden(modularity='dugue')
|
|
22
|
+
leiden_n = Leiden(modularity='newman')
|
|
23
|
+
labels_d = leiden_d.fit_predict(adjacency)
|
|
24
|
+
labels_n = leiden_n.fit_predict(adjacency)
|
|
25
|
+
self.assertTrue((labels_d == labels_n).all())
|
|
26
|
+
|
|
27
|
+
def test_bipartite(self):
|
|
28
|
+
biadjacency = test_bigraph()
|
|
29
|
+
adjacency = bipartite2undirected(biadjacency)
|
|
30
|
+
leiden = Leiden(modularity='newman')
|
|
31
|
+
labels1 = leiden.fit_predict(adjacency)
|
|
32
|
+
leiden.fit(biadjacency)
|
|
33
|
+
labels2 = np.concatenate((leiden.labels_row_, leiden.labels_col_))
|
|
34
|
+
self.assertTrue((labels1 == labels2).all())
|
|
@@ -12,7 +12,7 @@ from sknetwork.utils import bipartite2undirected
|
|
|
12
12
|
class TestLouvainClustering(unittest.TestCase):
|
|
13
13
|
|
|
14
14
|
def test_disconnected(self):
|
|
15
|
-
adjacency =
|
|
15
|
+
adjacency = test_disconnected_graph()
|
|
16
16
|
n = adjacency.shape[0]
|
|
17
17
|
labels = Louvain().fit_predict(adjacency)
|
|
18
18
|
self.assertEqual(len(labels), n)
|
|
@@ -24,7 +24,6 @@ class TestLouvainClustering(unittest.TestCase):
|
|
|
24
24
|
labels_d = louvain_d.fit_predict(adjacency)
|
|
25
25
|
labels_n = louvain_n.fit_predict(adjacency)
|
|
26
26
|
self.assertTrue((labels_d == labels_n).all())
|
|
27
|
-
|
|
28
27
|
louvain_p = Louvain(modularity='potts')
|
|
29
28
|
louvain_p.fit_predict(adjacency)
|
|
30
29
|
|
|
@@ -48,7 +47,7 @@ class TestLouvainClustering(unittest.TestCase):
|
|
|
48
47
|
# tolerance
|
|
49
48
|
louvain = Louvain(resolution=2, tol_aggregation=0.1)
|
|
50
49
|
labels = louvain.fit_predict(adjacency)
|
|
51
|
-
self.assertEqual(len(set(labels)),
|
|
50
|
+
self.assertEqual(len(set(labels)), 7)
|
|
52
51
|
|
|
53
52
|
# shuffling
|
|
54
53
|
louvain = Louvain(resolution=2, shuffle_nodes=True, random_state=42)
|
|
@@ -78,7 +77,7 @@ class TestLouvainClustering(unittest.TestCase):
|
|
|
78
77
|
# tolerance
|
|
79
78
|
louvain = Louvain(resolution=2, tol_aggregation=0.1)
|
|
80
79
|
labels = louvain.fit_predict(adjacency)
|
|
81
|
-
self.assertEqual(len(set(labels)),
|
|
80
|
+
self.assertEqual(len(set(labels)), 7)
|
|
82
81
|
|
|
83
82
|
# shuffling
|
|
84
83
|
louvain = Louvain(resolution=2, shuffle_nodes=True, random_state=42)
|
sknetwork/data/__init__.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""data module"""
|
|
2
|
-
from sknetwork.data.
|
|
2
|
+
from sknetwork.data.base import Bunch
|
|
3
|
+
from sknetwork.data.load import *
|
|
3
4
|
from sknetwork.data.models import *
|
|
4
5
|
from sknetwork.data.parse import from_edge_list, from_adjacency_list, from_csv, from_graphml
|
|
5
6
|
from sknetwork.data.toy_graphs import *
|
sknetwork/data/base.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in May 2023
|
|
5
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Bunch(dict):
|
|
10
|
+
"""Container object for datasets.
|
|
11
|
+
Dictionary-like object that exposes its keys as attributes.
|
|
12
|
+
>>> dataset = Bunch(name='dataset')
|
|
13
|
+
>>> dataset['name']
|
|
14
|
+
'dataset'
|
|
15
|
+
>>> dataset.name
|
|
16
|
+
'dataset'
|
|
17
|
+
"""
|
|
18
|
+
def __init__(self, **kwargs):
|
|
19
|
+
super().__init__(kwargs)
|
|
20
|
+
|
|
21
|
+
def __setattr__(self, key, value):
|
|
22
|
+
self[key] = value
|
|
23
|
+
|
|
24
|
+
def __getattr__(self, key):
|
|
25
|
+
try:
|
|
26
|
+
return self[key]
|
|
27
|
+
except KeyError:
|
|
28
|
+
raise AttributeError(key)
|