scikit-network 0.31.0__cp311-cp311-win_amd64.whl → 0.32.1__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/AUTHORS.rst +3 -0
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/METADATA +19 -3
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/RECORD +112 -105
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/WHEEL +1 -1
- sknetwork/__init__.py +1 -1
- sknetwork/classification/base.py +1 -1
- sknetwork/classification/base_rank.py +3 -3
- sknetwork/classification/diffusion.py +21 -13
- sknetwork/classification/knn.py +19 -13
- sknetwork/classification/metrics.py +1 -1
- sknetwork/classification/pagerank.py +12 -8
- sknetwork/classification/propagation.py +22 -15
- sknetwork/classification/tests/test_diffusion.py +10 -0
- sknetwork/classification/vote.cp311-win_amd64.pyd +0 -0
- sknetwork/classification/vote.cpp +14549 -8668
- sknetwork/clustering/__init__.py +3 -1
- sknetwork/clustering/base.py +1 -1
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +241 -0
- sknetwork/clustering/leiden_core.cp311-win_amd64.pyd +0 -0
- sknetwork/clustering/leiden_core.cpp +31564 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +118 -83
- sknetwork/clustering/louvain_core.cp311-win_amd64.pyd +0 -0
- sknetwork/clustering/louvain_core.cpp +21876 -16332
- sknetwork/clustering/louvain_core.pyx +86 -94
- sknetwork/clustering/postprocess.py +2 -2
- sknetwork/clustering/propagation_clustering.py +4 -4
- sknetwork/clustering/tests/test_API.py +7 -3
- sknetwork/clustering/tests/test_kcenters.py +92 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +2 -3
- sknetwork/data/load.py +2 -4
- sknetwork/data/parse.py +41 -20
- sknetwork/data/tests/test_parse.py +9 -12
- sknetwork/embedding/__init__.py +0 -1
- sknetwork/embedding/base.py +20 -19
- sknetwork/embedding/force_atlas.py +3 -2
- sknetwork/embedding/louvain_embedding.py +1 -1
- sknetwork/embedding/random_projection.py +5 -3
- sknetwork/embedding/spectral.py +0 -73
- sknetwork/embedding/tests/test_API.py +4 -28
- sknetwork/embedding/tests/test_louvain_embedding.py +4 -9
- sknetwork/embedding/tests/test_spectral.py +2 -5
- sknetwork/embedding/tests/test_svd.py +1 -1
- sknetwork/gnn/base_layer.py +3 -3
- sknetwork/gnn/gnn_classifier.py +40 -86
- sknetwork/gnn/layer.py +1 -1
- sknetwork/gnn/loss.py +1 -1
- sknetwork/gnn/optimizer.py +4 -3
- sknetwork/gnn/tests/test_base_layer.py +4 -4
- sknetwork/gnn/tests/test_gnn_classifier.py +12 -39
- sknetwork/gnn/utils.py +8 -8
- sknetwork/hierarchy/base.py +27 -0
- sknetwork/hierarchy/louvain_hierarchy.py +45 -41
- sknetwork/hierarchy/paris.cp311-win_amd64.pyd +0 -0
- sknetwork/hierarchy/paris.cpp +27521 -20771
- sknetwork/hierarchy/paris.pyx +7 -7
- sknetwork/hierarchy/postprocess.py +16 -16
- sknetwork/hierarchy/tests/test_algos.py +5 -0
- sknetwork/linalg/__init__.py +1 -1
- sknetwork/linalg/diteration.cp311-win_amd64.pyd +0 -0
- sknetwork/linalg/diteration.cpp +13916 -8050
- sknetwork/linalg/{normalization.py → normalizer.py} +17 -14
- sknetwork/linalg/operators.py +1 -1
- sknetwork/linalg/ppr_solver.py +1 -1
- sknetwork/linalg/push.cp311-win_amd64.pyd +0 -0
- sknetwork/linalg/push.cpp +23187 -16973
- sknetwork/linalg/tests/test_normalization.py +3 -7
- sknetwork/linalg/tests/test_operators.py +2 -6
- sknetwork/linalg/tests/test_ppr.py +1 -1
- sknetwork/linkpred/base.py +12 -1
- sknetwork/linkpred/nn.py +6 -6
- sknetwork/path/distances.py +11 -4
- sknetwork/path/shortest_path.py +1 -1
- sknetwork/path/tests/test_distances.py +7 -0
- sknetwork/path/tests/test_search.py +2 -2
- sknetwork/ranking/base.py +11 -6
- sknetwork/ranking/betweenness.cp311-win_amd64.pyd +0 -0
- sknetwork/ranking/betweenness.cpp +5256 -2190
- sknetwork/ranking/pagerank.py +13 -12
- sknetwork/ranking/tests/test_API.py +0 -2
- sknetwork/ranking/tests/test_betweenness.py +1 -1
- sknetwork/ranking/tests/test_pagerank.py +11 -5
- sknetwork/regression/base.py +18 -1
- sknetwork/regression/diffusion.py +24 -10
- sknetwork/regression/tests/test_diffusion.py +8 -0
- sknetwork/topology/__init__.py +3 -1
- sknetwork/topology/cliques.cp311-win_amd64.pyd +0 -0
- sknetwork/topology/cliques.cpp +23528 -16848
- sknetwork/topology/core.cp311-win_amd64.pyd +0 -0
- sknetwork/topology/core.cpp +22849 -16581
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cp311-win_amd64.pyd +0 -0
- sknetwork/topology/minheap.cpp +19495 -13469
- sknetwork/topology/structure.py +2 -42
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +2 -16
- sknetwork/topology/triangles.cp311-win_amd64.pyd +0 -0
- sknetwork/topology/triangles.cpp +5283 -1397
- sknetwork/topology/triangles.pyx +7 -4
- sknetwork/topology/weisfeiler_lehman_core.cp311-win_amd64.pyd +0 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +14781 -8915
- sknetwork/utils/format.py +1 -1
- sknetwork/utils/membership.py +2 -2
- sknetwork/visualization/__init__.py +2 -2
- sknetwork/visualization/dendrograms.py +55 -7
- sknetwork/visualization/graphs.py +261 -44
- sknetwork/visualization/tests/test_dendrograms.py +9 -9
- sknetwork/visualization/tests/test_graphs.py +63 -57
- sknetwork/embedding/louvain_hierarchy.py +0 -142
- sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/LICENSE +0 -0
- {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
# distutils: language
|
|
1
|
+
# distutils: language=c++
|
|
2
2
|
# cython: language_level=3
|
|
3
3
|
from libcpp.set cimport set
|
|
4
|
-
from libcpp.vector cimport vector
|
|
5
4
|
cimport cython
|
|
6
5
|
|
|
7
6
|
ctypedef fused int_or_long:
|
|
@@ -10,123 +9,116 @@ ctypedef fused int_or_long:
|
|
|
10
9
|
|
|
11
10
|
@cython.boundscheck(False)
|
|
12
11
|
@cython.wraparound(False)
|
|
13
|
-
def
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
def optimize_core(int_or_long[:] labels, int_or_long[:] indices, int_or_long[:] indptr, float[:] data,
|
|
13
|
+
float[:] out_weights, float[:] in_weights, float[:] out_cluster_weights, float[:] in_cluster_weights,
|
|
14
|
+
float[:] cluster_weights, float[:] self_loops, float resolution, float tol_optimization): # pragma: no cover
|
|
15
|
+
"""Find clusters maximizing modularity.
|
|
16
16
|
|
|
17
17
|
Parameters
|
|
18
18
|
----------
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
tol :
|
|
22
|
-
Minimum increase in modularity to enter a new optimization pass.
|
|
23
|
-
ou_node_probs :
|
|
24
|
-
Distribution of node weights based on their out-edges (sums to 1).
|
|
25
|
-
in_node_probs :
|
|
26
|
-
Distribution of node weights based on their in-edges (sums to 1).
|
|
27
|
-
self_loops :
|
|
28
|
-
Weights of self loops.
|
|
29
|
-
data :
|
|
30
|
-
CSR format data array of the normalized adjacency matrix.
|
|
19
|
+
labels :
|
|
20
|
+
Initial labels.
|
|
31
21
|
indices :
|
|
32
22
|
CSR format index array of the normalized adjacency matrix.
|
|
33
23
|
indptr :
|
|
34
24
|
CSR format index pointer array of the normalized adjacency matrix.
|
|
25
|
+
data :
|
|
26
|
+
CSR format data array of the normalized adjacency matrix.
|
|
27
|
+
out_weights :
|
|
28
|
+
Out-weights of nodes (sum to 1).
|
|
29
|
+
in_weights :
|
|
30
|
+
In-weights of nodes (sum to 1).
|
|
31
|
+
out_cluster_weights :
|
|
32
|
+
Out-weights of clusters (sum to 1).
|
|
33
|
+
in_cluster_weights :
|
|
34
|
+
In-weights of clusters (sum to 1).
|
|
35
|
+
cluster_weights :
|
|
36
|
+
Weights of clusters (initialized to 0).
|
|
37
|
+
self_loops :
|
|
38
|
+
Weights of self loops.
|
|
39
|
+
resolution :
|
|
40
|
+
Resolution parameter (positive).
|
|
41
|
+
tol_optimization :
|
|
42
|
+
Minimum increase in modularity to enter a new optimization pass.
|
|
35
43
|
|
|
36
44
|
Returns
|
|
37
45
|
-------
|
|
38
46
|
labels :
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
47
|
+
Labels of nodes.
|
|
48
|
+
increase :
|
|
49
|
+
Increase in modularity.
|
|
42
50
|
"""
|
|
43
|
-
cdef int_or_long n
|
|
44
|
-
cdef int_or_long
|
|
45
|
-
cdef int_or_long
|
|
46
|
-
cdef int_or_long
|
|
47
|
-
cdef int_or_long
|
|
51
|
+
cdef int_or_long n
|
|
52
|
+
cdef int_or_long stop = 0
|
|
53
|
+
cdef int_or_long label
|
|
54
|
+
cdef int_or_long label_target
|
|
55
|
+
cdef int_or_long label_best
|
|
48
56
|
cdef int_or_long i
|
|
49
57
|
cdef int_or_long j
|
|
50
|
-
cdef int_or_long
|
|
51
|
-
cdef int_or_long
|
|
52
|
-
cdef int_or_long label
|
|
58
|
+
cdef int_or_long start
|
|
59
|
+
cdef int_or_long end
|
|
53
60
|
|
|
54
|
-
cdef float
|
|
61
|
+
cdef float increase = 0
|
|
55
62
|
cdef float increase_pass
|
|
56
63
|
cdef float delta
|
|
57
|
-
cdef float delta_best
|
|
58
|
-
cdef float delta_exit
|
|
59
64
|
cdef float delta_local
|
|
60
|
-
cdef float
|
|
61
|
-
cdef float
|
|
62
|
-
cdef float
|
|
63
|
-
cdef float ratio_ou
|
|
64
|
-
|
|
65
|
-
cdef vector[int_or_long] labels
|
|
66
|
-
cdef vector[float] neighbor_clusters_weights
|
|
67
|
-
cdef vector[float] ou_clusters_weights
|
|
68
|
-
cdef vector[float] in_clusters_weights
|
|
69
|
-
cdef set[int_or_long] unique_clusters = ()
|
|
70
|
-
|
|
71
|
-
for i in range(n):
|
|
72
|
-
labels.push_back(i)
|
|
73
|
-
neighbor_clusters_weights.push_back(0.)
|
|
74
|
-
ou_clusters_weights.push_back(ou_node_probs[i])
|
|
75
|
-
in_clusters_weights.push_back(in_node_probs[i])
|
|
76
|
-
|
|
77
|
-
while increase == 1:
|
|
78
|
-
increase = 0
|
|
79
|
-
increase_pass = 0
|
|
80
|
-
|
|
81
|
-
for i in range(n):
|
|
82
|
-
unique_clusters.clear()
|
|
83
|
-
cluster_node = labels[i]
|
|
84
|
-
j1 = indptr[i]
|
|
85
|
-
j2 = indptr[i + 1]
|
|
86
|
-
|
|
87
|
-
for j in range(j1, j2):
|
|
88
|
-
label = labels[indices[j]]
|
|
89
|
-
neighbor_clusters_weights[label] += data[j]
|
|
90
|
-
unique_clusters.insert(label)
|
|
65
|
+
cdef float delta_best
|
|
66
|
+
cdef float in_weight
|
|
67
|
+
cdef float out_weight
|
|
91
68
|
|
|
92
|
-
|
|
69
|
+
cdef set[int_or_long] label_set = ()
|
|
93
70
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
ratio_ou = resolution * node_prob_ou
|
|
98
|
-
ratio_in = resolution * node_prob_in
|
|
71
|
+
n = labels.shape[0]
|
|
72
|
+
while not stop:
|
|
73
|
+
increase_pass = 0
|
|
99
74
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
75
|
+
for i in range(n):
|
|
76
|
+
label_set.clear()
|
|
77
|
+
label = labels[i]
|
|
78
|
+
start = indptr[i]
|
|
79
|
+
end = indptr[i+1]
|
|
80
|
+
|
|
81
|
+
# neighboring clusters
|
|
82
|
+
for j in range(start, end):
|
|
83
|
+
label_target = labels[indices[j]]
|
|
84
|
+
label_set.insert(label_target)
|
|
85
|
+
cluster_weights[label_target] += data[j]
|
|
86
|
+
label_set.erase(label)
|
|
87
|
+
|
|
88
|
+
if not label_set.empty():
|
|
89
|
+
out_weight = out_weights[i]
|
|
90
|
+
in_weight = in_weights[i]
|
|
91
|
+
|
|
92
|
+
# node leaving the current cluster
|
|
93
|
+
delta = 2 * (cluster_weights[label] - self_loops[i])
|
|
94
|
+
delta -= resolution * out_weight * (in_cluster_weights[label] - in_weight)
|
|
95
|
+
delta -= resolution * in_weight * (out_cluster_weights[label] - out_weight)
|
|
103
96
|
|
|
104
97
|
delta_best = 0
|
|
105
|
-
|
|
98
|
+
label_best = label
|
|
106
99
|
|
|
107
|
-
for
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
delta_local = delta - delta_exit
|
|
100
|
+
for label_target in label_set:
|
|
101
|
+
delta_local = 2 * cluster_weights[label_target]
|
|
102
|
+
delta_local -= resolution * out_weight * in_cluster_weights[label_target]
|
|
103
|
+
delta_local -= resolution * in_weight * out_cluster_weights[label_target]
|
|
104
|
+
delta_local -= delta
|
|
113
105
|
if delta_local > delta_best:
|
|
114
106
|
delta_best = delta_local
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
neighbor_clusters_weights[cluster] = 0
|
|
107
|
+
label_best = label_target
|
|
108
|
+
cluster_weights[label_target] = 0
|
|
118
109
|
|
|
119
|
-
if
|
|
110
|
+
if label_best != label:
|
|
120
111
|
increase_pass += delta_best
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
112
|
+
labels[i] = label_best
|
|
113
|
+
# update weights
|
|
114
|
+
out_cluster_weights[label] -= out_weight
|
|
115
|
+
in_cluster_weights[label] -= in_weight
|
|
116
|
+
out_cluster_weights[label_best] += out_weight
|
|
117
|
+
in_cluster_weights[label_best] += in_weight
|
|
118
|
+
|
|
119
|
+
cluster_weights[label] = 0
|
|
120
|
+
|
|
121
|
+
increase += increase_pass
|
|
122
|
+
stop = increase_pass <= tol_optimization
|
|
123
|
+
|
|
124
|
+
return labels, increase
|
|
@@ -41,7 +41,7 @@ def aggregate_graph(input_matrix: sparse.csr_matrix, labels: Optional[np.ndarray
|
|
|
41
41
|
labels_row: Optional[np.ndarray] = None, labels_col: Optional[np.ndarray] = None) \
|
|
42
42
|
-> sparse.csr_matrix:
|
|
43
43
|
"""Aggregate graph per label. All nodes with the same label become a single node.
|
|
44
|
-
Negative labels are ignored (corresponding nodes are
|
|
44
|
+
Negative labels are ignored (corresponding nodes are discarded).
|
|
45
45
|
|
|
46
46
|
Parameters
|
|
47
47
|
----------
|
|
@@ -63,4 +63,4 @@ def aggregate_graph(input_matrix: sparse.csr_matrix, labels: Optional[np.ndarray
|
|
|
63
63
|
else:
|
|
64
64
|
membership_col = membership_row
|
|
65
65
|
aggregate_matrix = membership_row.T.dot(input_matrix).dot(membership_col)
|
|
66
|
-
return aggregate_matrix
|
|
66
|
+
return aggregate_matrix.tocsr()
|
|
@@ -29,11 +29,11 @@ class PropagationClustering(BaseClustering, Propagation):
|
|
|
29
29
|
weighted : bool
|
|
30
30
|
If ``True``, the vote of each neighbor is proportional to the edge weight.
|
|
31
31
|
Otherwise, all votes have weight 1.
|
|
32
|
-
sort_clusters :
|
|
32
|
+
sort_clusters : bool
|
|
33
33
|
If ``True``, sort labels in decreasing order of cluster size.
|
|
34
|
-
return_probs :
|
|
34
|
+
return_probs : bool
|
|
35
35
|
If ``True``, return the probability distribution over clusters (soft clustering).
|
|
36
|
-
return_aggregate :
|
|
36
|
+
return_aggregate : bool
|
|
37
37
|
If ``True``, return the aggregate adjacency matrix or biadjacency matrix between clusters.
|
|
38
38
|
|
|
39
39
|
Attributes
|
|
@@ -78,7 +78,7 @@ class PropagationClustering(BaseClustering, Propagation):
|
|
|
78
78
|
|
|
79
79
|
Parameters
|
|
80
80
|
----------
|
|
81
|
-
input_matrix :
|
|
81
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
82
82
|
Adjacency matrix or biadjacency matrix of the graph.
|
|
83
83
|
|
|
84
84
|
Returns
|
|
@@ -9,8 +9,12 @@ from sknetwork.data.test_graphs import *
|
|
|
9
9
|
|
|
10
10
|
class TestClusteringAPI(unittest.TestCase):
|
|
11
11
|
|
|
12
|
+
def setUp(self):
|
|
13
|
+
self.algos = [Louvain(return_aggregate=True), Leiden(return_aggregate=True),
|
|
14
|
+
PropagationClustering(return_aggregate=True)]
|
|
15
|
+
|
|
12
16
|
def test_regular(self):
|
|
13
|
-
for algo in
|
|
17
|
+
for algo in self.algos:
|
|
14
18
|
for adjacency in [test_graph(), test_digraph(), test_disconnected_graph()]:
|
|
15
19
|
n = adjacency.shape[0]
|
|
16
20
|
labels = algo.fit_predict(adjacency)
|
|
@@ -22,13 +26,13 @@ class TestClusteringAPI(unittest.TestCase):
|
|
|
22
26
|
n_labels = len(set(labels))
|
|
23
27
|
self.assertEqual(labels.shape, (n,))
|
|
24
28
|
self.assertEqual(algo.aggregate_.shape, (n_labels, n_labels))
|
|
25
|
-
membership = algo.fit_transform(
|
|
29
|
+
membership = algo.fit_transform(adjacency_bool)
|
|
26
30
|
self.assertEqual(membership.shape, (n, n_labels))
|
|
27
31
|
|
|
28
32
|
def test_bipartite(self):
|
|
29
33
|
biadjacency = test_bigraph()
|
|
30
34
|
n_row, n_col = biadjacency.shape
|
|
31
|
-
for algo in
|
|
35
|
+
for algo in self.algos:
|
|
32
36
|
algo.fit(biadjacency)
|
|
33
37
|
self.assertEqual(algo.labels_row_.shape, (n_row,))
|
|
34
38
|
self.assertEqual(algo.labels_col_.shape, (n_col,))
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for KCenters"""
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
from sknetwork.clustering import KCenters
|
|
7
|
+
from sknetwork.data import karate_club, painters, star_wars
|
|
8
|
+
from sknetwork.data.test_graphs import *
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestKCentersClustering(unittest.TestCase):
|
|
12
|
+
|
|
13
|
+
def test_kcenters(self):
|
|
14
|
+
# Test undirected graph
|
|
15
|
+
n_clusters = 2
|
|
16
|
+
adjacency = karate_club()
|
|
17
|
+
n_row = adjacency.shape[0]
|
|
18
|
+
kcenters = KCenters(n_clusters=n_clusters)
|
|
19
|
+
labels = kcenters.fit_predict(adjacency)
|
|
20
|
+
self.assertEqual(len(labels), n_row)
|
|
21
|
+
self.assertEqual(len(set(labels)), n_clusters)
|
|
22
|
+
|
|
23
|
+
# Test directed graph
|
|
24
|
+
n_clusters = 3
|
|
25
|
+
adjacency = painters()
|
|
26
|
+
n_row = adjacency.shape[0]
|
|
27
|
+
kcenters = KCenters(n_clusters=n_clusters, directed=True)
|
|
28
|
+
labels = kcenters.fit_predict(adjacency)
|
|
29
|
+
self.assertEqual(len(labels), n_row)
|
|
30
|
+
self.assertEqual(len(set(labels)), n_clusters)
|
|
31
|
+
|
|
32
|
+
# Test bipartite graph
|
|
33
|
+
n_clusters = 2
|
|
34
|
+
biadjacency = star_wars()
|
|
35
|
+
n_row, n_col = biadjacency.shape
|
|
36
|
+
kcenters = KCenters(n_clusters=n_clusters)
|
|
37
|
+
kcenters.fit(biadjacency)
|
|
38
|
+
labels = kcenters.labels_
|
|
39
|
+
self.assertEqual(len(kcenters.labels_row_), n_row)
|
|
40
|
+
self.assertEqual(len(kcenters.labels_col_), n_col)
|
|
41
|
+
self.assertEqual(len(set(labels)), n_clusters)
|
|
42
|
+
|
|
43
|
+
def test_kcenters_centers(self):
|
|
44
|
+
# Test centers for undirected graphs
|
|
45
|
+
n_clusters = 2
|
|
46
|
+
adjacency = karate_club()
|
|
47
|
+
kcenters = KCenters(n_clusters=n_clusters)
|
|
48
|
+
kcenters.fit(adjacency)
|
|
49
|
+
centers = kcenters.centers_
|
|
50
|
+
self.assertEqual(n_clusters, len(set(centers)))
|
|
51
|
+
|
|
52
|
+
# Test centers for bipartite graphs
|
|
53
|
+
n_clusters = 2
|
|
54
|
+
biadjacency = star_wars()
|
|
55
|
+
n_row, n_col = biadjacency.shape
|
|
56
|
+
for position in ["row", "col", "both"]:
|
|
57
|
+
kcenters = KCenters(n_clusters=n_clusters, center_position=position)
|
|
58
|
+
kcenters.fit(biadjacency)
|
|
59
|
+
centers_row = kcenters.centers_row_
|
|
60
|
+
centers_col = kcenters.centers_col_
|
|
61
|
+
if position == "row":
|
|
62
|
+
self.assertEqual(n_clusters, len(set(centers_row)))
|
|
63
|
+
self.assertTrue(np.all(centers_row < n_row))
|
|
64
|
+
self.assertTrue(centers_col is None)
|
|
65
|
+
if position == "col":
|
|
66
|
+
self.assertEqual(n_clusters, len(set(centers_col)))
|
|
67
|
+
self.assertTrue(np.all((centers_col < n_col) & (0 <= centers_col)))
|
|
68
|
+
self.assertTrue(centers_row is None)
|
|
69
|
+
if position == "both":
|
|
70
|
+
self.assertEqual(n_clusters, len(set(centers_row)) + len(set(centers_col)))
|
|
71
|
+
self.assertTrue(np.all(centers_row < n_row))
|
|
72
|
+
self.assertTrue(np.all((centers_col < n_col) & (0 <= centers_col)))
|
|
73
|
+
|
|
74
|
+
def test_kcenters_error(self):
|
|
75
|
+
# Test value errors
|
|
76
|
+
adjacency = karate_club()
|
|
77
|
+
biadjacency = star_wars()
|
|
78
|
+
|
|
79
|
+
# test n_clusters error
|
|
80
|
+
kcenters = KCenters(n_clusters=1)
|
|
81
|
+
with self.assertRaises(ValueError):
|
|
82
|
+
kcenters.fit(adjacency)
|
|
83
|
+
|
|
84
|
+
# test n_init error
|
|
85
|
+
kcenters = KCenters(n_clusters=2, n_init=0)
|
|
86
|
+
with self.assertRaises(ValueError):
|
|
87
|
+
kcenters.fit(adjacency)
|
|
88
|
+
|
|
89
|
+
# test center_position error
|
|
90
|
+
kcenters = KCenters(n_clusters=2, center_position="other")
|
|
91
|
+
with self.assertRaises(ValueError):
|
|
92
|
+
kcenters.fit(biadjacency)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for Leiden"""
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
from sknetwork.clustering import Leiden
|
|
7
|
+
from sknetwork.data.test_graphs import *
|
|
8
|
+
from sknetwork.utils import bipartite2undirected
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestLeidenClustering(unittest.TestCase):
|
|
12
|
+
|
|
13
|
+
def test_disconnected(self):
|
|
14
|
+
adjacency = test_disconnected_graph()
|
|
15
|
+
n = adjacency.shape[0]
|
|
16
|
+
labels = Leiden().fit_predict(adjacency)
|
|
17
|
+
self.assertEqual(len(labels), n)
|
|
18
|
+
|
|
19
|
+
def test_modularity(self):
|
|
20
|
+
adjacency = test_graph()
|
|
21
|
+
leiden_d = Leiden(modularity='dugue')
|
|
22
|
+
leiden_n = Leiden(modularity='newman')
|
|
23
|
+
labels_d = leiden_d.fit_predict(adjacency)
|
|
24
|
+
labels_n = leiden_n.fit_predict(adjacency)
|
|
25
|
+
self.assertTrue((labels_d == labels_n).all())
|
|
26
|
+
|
|
27
|
+
def test_bipartite(self):
|
|
28
|
+
biadjacency = test_bigraph()
|
|
29
|
+
adjacency = bipartite2undirected(biadjacency)
|
|
30
|
+
leiden = Leiden(modularity='newman')
|
|
31
|
+
labels1 = leiden.fit_predict(adjacency)
|
|
32
|
+
leiden.fit(biadjacency)
|
|
33
|
+
labels2 = np.concatenate((leiden.labels_row_, leiden.labels_col_))
|
|
34
|
+
self.assertTrue((labels1 == labels2).all())
|
|
@@ -24,7 +24,6 @@ class TestLouvainClustering(unittest.TestCase):
|
|
|
24
24
|
labels_d = louvain_d.fit_predict(adjacency)
|
|
25
25
|
labels_n = louvain_n.fit_predict(adjacency)
|
|
26
26
|
self.assertTrue((labels_d == labels_n).all())
|
|
27
|
-
|
|
28
27
|
louvain_p = Louvain(modularity='potts')
|
|
29
28
|
louvain_p.fit_predict(adjacency)
|
|
30
29
|
|
|
@@ -48,7 +47,7 @@ class TestLouvainClustering(unittest.TestCase):
|
|
|
48
47
|
# tolerance
|
|
49
48
|
louvain = Louvain(resolution=2, tol_aggregation=0.1)
|
|
50
49
|
labels = louvain.fit_predict(adjacency)
|
|
51
|
-
self.assertEqual(len(set(labels)),
|
|
50
|
+
self.assertEqual(len(set(labels)), 7)
|
|
52
51
|
|
|
53
52
|
# shuffling
|
|
54
53
|
louvain = Louvain(resolution=2, shuffle_nodes=True, random_state=42)
|
|
@@ -78,7 +77,7 @@ class TestLouvainClustering(unittest.TestCase):
|
|
|
78
77
|
# tolerance
|
|
79
78
|
louvain = Louvain(resolution=2, tol_aggregation=0.1)
|
|
80
79
|
labels = louvain.fit_predict(adjacency)
|
|
81
|
-
self.assertEqual(len(set(labels)),
|
|
80
|
+
self.assertEqual(len(set(labels)), 7)
|
|
82
81
|
|
|
83
82
|
# shuffling
|
|
84
83
|
louvain = Louvain(resolution=2, shuffle_nodes=True, random_state=42)
|
sknetwork/data/load.py
CHANGED
|
@@ -250,7 +250,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
|
|
|
250
250
|
if matrix:
|
|
251
251
|
file = matrix[0]
|
|
252
252
|
directed, bipartite, weighted = load_header(path / file)
|
|
253
|
-
dataset = from_csv(path / file, directed=directed, bipartite=bipartite, weighted=weighted)
|
|
253
|
+
dataset = from_csv(path / file, directed=directed, bipartite=bipartite, weighted=weighted, reindex=True)
|
|
254
254
|
|
|
255
255
|
metadata = [file for file in files if 'meta.' in file]
|
|
256
256
|
if metadata:
|
|
@@ -300,11 +300,9 @@ def save_to_numpy_bundle(data: Bunch, bundle_name: str, data_home: Optional[Unio
|
|
|
300
300
|
sparse.save_npz(data_path / attribute, data[attribute])
|
|
301
301
|
elif type(data[attribute]) == np.ndarray:
|
|
302
302
|
np.save(data_path / attribute, data[attribute])
|
|
303
|
-
|
|
303
|
+
else:
|
|
304
304
|
with open(data_path / (attribute + '.p'), 'wb') as file:
|
|
305
305
|
pickle.dump(data[attribute], file)
|
|
306
|
-
else:
|
|
307
|
-
raise TypeError('Unsupported data attribute type '+str(type(data[attribute])) + '.')
|
|
308
306
|
|
|
309
307
|
|
|
310
308
|
def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path]] = None):
|
sknetwork/data/parse.py
CHANGED
|
@@ -8,7 +8,7 @@ Created in December 2018
|
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
from csv import reader
|
|
11
|
-
from typing import Dict, List, Tuple, Union
|
|
11
|
+
from typing import Dict, List, Tuple, Union, Optional
|
|
12
12
|
from xml.etree import ElementTree
|
|
13
13
|
|
|
14
14
|
import numpy as np
|
|
@@ -19,7 +19,7 @@ from sknetwork.utils.format import directed2undirected
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = False,
|
|
22
|
-
bipartite: bool = False, weighted: bool = True, reindex: bool =
|
|
22
|
+
bipartite: bool = False, weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
|
|
23
23
|
sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
|
|
24
24
|
"""Load a graph from an edge list.
|
|
25
25
|
|
|
@@ -37,6 +37,9 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
|
|
|
37
37
|
reindex : bool
|
|
38
38
|
If ``True``, reindex nodes and returns the original node indices as names.
|
|
39
39
|
Reindexing is enforced if nodes are not integers.
|
|
40
|
+
shape : tuple
|
|
41
|
+
Shape of the adjacency or biadjacency matrix.
|
|
42
|
+
If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
|
|
40
43
|
sum_duplicates : bool
|
|
41
44
|
If ``True`` (default), sums weights of duplicate edges.
|
|
42
45
|
Otherwise, the weight of each edge is that of the first occurrence of this edge.
|
|
@@ -83,12 +86,14 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
|
|
|
83
86
|
else:
|
|
84
87
|
raise TypeError('The edge list must be given as a NumPy array or a list of tuples.')
|
|
85
88
|
return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
|
|
86
|
-
weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
|
|
89
|
+
weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
|
|
90
|
+
matrix_only=matrix_only)
|
|
87
91
|
|
|
88
92
|
|
|
89
93
|
def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], directed: bool = False,
|
|
90
|
-
bipartite: bool = False, weighted: bool = True, reindex: bool =
|
|
91
|
-
sum_duplicates: bool = True, matrix_only: bool = None)
|
|
94
|
+
bipartite: bool = False, weighted: bool = True, reindex: bool = False,
|
|
95
|
+
shape: Optional[tuple] = None, sum_duplicates: bool = True, matrix_only: bool = None) \
|
|
96
|
+
-> Union[Bunch, sparse.csr_matrix]:
|
|
92
97
|
"""Load a graph from an adjacency list.
|
|
93
98
|
|
|
94
99
|
Parameters
|
|
@@ -104,6 +109,9 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
|
|
|
104
109
|
reindex : bool
|
|
105
110
|
If ``True``, reindex nodes and returns the original node indices as names.
|
|
106
111
|
Reindexing is enforced if nodes are not integers.
|
|
112
|
+
shape : tuple
|
|
113
|
+
Shape of the adjacency or biadjacency matrix.
|
|
114
|
+
If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
|
|
107
115
|
sum_duplicates : bool
|
|
108
116
|
If ``True`` (default), sums weights of duplicate edges.
|
|
109
117
|
Otherwise, the weight of each edge is that of the first occurrence of this edge.
|
|
@@ -134,12 +142,12 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
|
|
|
134
142
|
else:
|
|
135
143
|
raise TypeError('The adjacency list must be given as a list of lists or a dict of lists.')
|
|
136
144
|
return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite, weighted=weighted,
|
|
137
|
-
reindex=reindex, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
|
|
145
|
+
reindex=reindex, shape=shape, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
|
|
138
146
|
|
|
139
147
|
|
|
140
148
|
def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed: bool = False, bipartite: bool = False,
|
|
141
|
-
weighted: bool = True, reindex: bool =
|
|
142
|
-
matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
|
|
149
|
+
weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
|
|
150
|
+
sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
|
|
143
151
|
"""Load a graph from an edge array of shape (n_edges, 2) and weights (optional).
|
|
144
152
|
|
|
145
153
|
Parameters
|
|
@@ -157,6 +165,9 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
|
|
|
157
165
|
reindex : bool
|
|
158
166
|
If ``True``, reindex nodes and returns the original node indices as names.
|
|
159
167
|
Reindexing is enforced if nodes are not integers.
|
|
168
|
+
shape : tuple
|
|
169
|
+
Shape of the adjacency or biadjacency matrix.
|
|
170
|
+
If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
|
|
160
171
|
sum_duplicates : bool
|
|
161
172
|
If ``True`` (default), sums weights of duplicate edges.
|
|
162
173
|
Otherwise, the weight of each edge is that of the first occurrence of this edge.
|
|
@@ -195,28 +206,34 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
|
|
|
195
206
|
if bipartite:
|
|
196
207
|
row = edge_array[:, 0]
|
|
197
208
|
col = edge_array[:, 1]
|
|
198
|
-
if row.dtype != int or
|
|
209
|
+
if row.dtype != int or reindex:
|
|
199
210
|
names_row, row = np.unique(row, return_inverse=True)
|
|
200
211
|
graph.names_row = names_row
|
|
201
212
|
graph.names = names_row
|
|
202
213
|
n_row = len(names_row)
|
|
214
|
+
elif shape is not None:
|
|
215
|
+
n_row = max(shape[0], max(row) + 1)
|
|
203
216
|
else:
|
|
204
217
|
n_row = max(row) + 1
|
|
205
|
-
if col.dtype != int or
|
|
218
|
+
if col.dtype != int or reindex:
|
|
206
219
|
names_col, col = np.unique(col, return_inverse=True)
|
|
207
220
|
graph.names_col = names_col
|
|
208
221
|
n_col = len(names_col)
|
|
222
|
+
elif shape is not None:
|
|
223
|
+
n_col = max(shape[1], max(col) + 1)
|
|
209
224
|
else:
|
|
210
225
|
n_col = max(col) + 1
|
|
211
226
|
matrix = sparse.csr_matrix((weights, (row, col)), shape=(n_row, n_col))
|
|
212
227
|
graph.biadjacency = matrix
|
|
213
228
|
else:
|
|
214
229
|
nodes = edge_array.ravel()
|
|
215
|
-
if nodes.dtype != int or
|
|
230
|
+
if nodes.dtype != int or reindex:
|
|
216
231
|
names, nodes = np.unique(nodes, return_inverse=True)
|
|
217
232
|
graph.names = names
|
|
218
233
|
n = len(names)
|
|
219
234
|
edge_array = nodes.reshape(-1, 2)
|
|
235
|
+
elif shape is not None:
|
|
236
|
+
n = max(shape[0], max(nodes) + 1)
|
|
220
237
|
else:
|
|
221
238
|
n = max(nodes) + 1
|
|
222
239
|
row = edge_array[:, 0]
|
|
@@ -233,8 +250,8 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
|
|
|
233
250
|
|
|
234
251
|
def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: str = '#%',
|
|
235
252
|
data_structure: str = None, directed: bool = False, bipartite: bool = False, weighted: bool = True,
|
|
236
|
-
reindex: bool =
|
|
237
|
-
|
|
253
|
+
reindex: bool = False, shape: Optional[tuple] = None, sum_duplicates: bool = True,
|
|
254
|
+
matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
|
|
238
255
|
"""Load a graph from a CSV or TSV file.
|
|
239
256
|
The delimiter can be specified (e.g., ' ' for space-separated values).
|
|
240
257
|
|
|
@@ -249,9 +266,10 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
|
|
|
249
266
|
comments : str
|
|
250
267
|
Characters for comment lines.
|
|
251
268
|
data_structure : str
|
|
252
|
-
If 'edge_list',
|
|
253
|
-
If 'adjacency_list',
|
|
254
|
-
|
|
269
|
+
If 'edge_list', consider each row of the file as an edge (tuple of size 2 or 3).
|
|
270
|
+
If 'adjacency_list', consider each row of the file as an adjacency list (list of neighbors,
|
|
271
|
+
in the order of node indices; an empty line means no neighbor).
|
|
272
|
+
If 'adjacency_dict', consider each row of the file as an adjacency dictionary with key
|
|
255
273
|
given by the first column (node: list of neighbors).
|
|
256
274
|
If ``None`` (default), data_structure is guessed from the first rows of the file.
|
|
257
275
|
directed : bool
|
|
@@ -263,6 +281,9 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
|
|
|
263
281
|
reindex : bool
|
|
264
282
|
If ``True``, reindex nodes and returns the original node indices as names.
|
|
265
283
|
Reindexing is enforced if nodes are not integers.
|
|
284
|
+
shape : tuple
|
|
285
|
+
Shape of the adjacency or biadjacency matrix.
|
|
286
|
+
If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
|
|
266
287
|
sum_duplicates : bool
|
|
267
288
|
If ``True`` (default), sums weights of duplicate edges.
|
|
268
289
|
Otherwise, the weight of each edge is that of the first occurrence of this edge.
|
|
@@ -295,7 +316,7 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
|
|
|
295
316
|
else:
|
|
296
317
|
weights = None
|
|
297
318
|
return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
|
|
298
|
-
weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
|
|
319
|
+
weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
|
|
299
320
|
matrix_only=matrix_only)
|
|
300
321
|
except TypeError:
|
|
301
322
|
pass
|
|
@@ -306,17 +327,17 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
|
|
|
306
327
|
if data_structure == 'edge_list':
|
|
307
328
|
edge_list = [tuple(row) for row in csv_reader]
|
|
308
329
|
return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite,
|
|
309
|
-
weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
|
|
330
|
+
weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
|
|
310
331
|
matrix_only=matrix_only)
|
|
311
332
|
elif data_structure == 'adjacency_list':
|
|
312
333
|
adjacency_list = [row for row in csv_reader]
|
|
313
334
|
return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
|
|
314
|
-
weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
|
|
335
|
+
weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
|
|
315
336
|
matrix_only=matrix_only)
|
|
316
337
|
elif data_structure == 'adjacency_dict':
|
|
317
338
|
adjacency_list = {row[0]: row[1:] for row in csv_reader}
|
|
318
339
|
return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
|
|
319
|
-
weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
|
|
340
|
+
weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
|
|
320
341
|
matrix_only=matrix_only)
|
|
321
342
|
|
|
322
343
|
|