scikit-network 0.31.0__cp310-cp310-win_amd64.whl → 0.33.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/AUTHORS.rst +3 -1
- {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/METADATA +27 -5
- scikit_network-0.33.0.dist-info/RECORD +228 -0
- {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/WHEEL +1 -1
- sknetwork/__init__.py +1 -1
- sknetwork/classification/base.py +1 -1
- sknetwork/classification/base_rank.py +3 -3
- sknetwork/classification/diffusion.py +25 -16
- sknetwork/classification/knn.py +23 -16
- sknetwork/classification/metrics.py +4 -4
- sknetwork/classification/pagerank.py +12 -8
- sknetwork/classification/propagation.py +25 -17
- sknetwork/classification/tests/test_diffusion.py +10 -0
- sknetwork/classification/vote.cp310-win_amd64.pyd +0 -0
- sknetwork/classification/vote.cpp +14549 -8668
- sknetwork/clustering/__init__.py +3 -1
- sknetwork/clustering/base.py +1 -1
- sknetwork/clustering/kcenters.py +253 -0
- sknetwork/clustering/leiden.py +242 -0
- sknetwork/clustering/leiden_core.cp310-win_amd64.pyd +0 -0
- sknetwork/clustering/leiden_core.cpp +31564 -0
- sknetwork/clustering/leiden_core.pyx +124 -0
- sknetwork/clustering/louvain.py +118 -83
- sknetwork/clustering/louvain_core.cp310-win_amd64.pyd +0 -0
- sknetwork/clustering/louvain_core.cpp +21876 -16332
- sknetwork/clustering/louvain_core.pyx +86 -94
- sknetwork/clustering/postprocess.py +2 -2
- sknetwork/clustering/propagation_clustering.py +4 -4
- sknetwork/clustering/tests/test_API.py +7 -3
- sknetwork/clustering/tests/test_kcenters.py +60 -0
- sknetwork/clustering/tests/test_leiden.py +34 -0
- sknetwork/clustering/tests/test_louvain.py +2 -3
- sknetwork/data/__init__.py +1 -1
- sknetwork/data/base.py +7 -2
- sknetwork/data/load.py +20 -25
- sknetwork/data/models.py +15 -15
- sknetwork/data/parse.py +57 -34
- sknetwork/data/tests/test_API.py +3 -3
- sknetwork/data/tests/test_base.py +2 -2
- sknetwork/data/tests/test_parse.py +9 -12
- sknetwork/data/tests/test_toy_graphs.py +33 -33
- sknetwork/data/toy_graphs.py +35 -43
- sknetwork/embedding/__init__.py +0 -1
- sknetwork/embedding/base.py +23 -19
- sknetwork/embedding/force_atlas.py +3 -2
- sknetwork/embedding/louvain_embedding.py +1 -27
- sknetwork/embedding/random_projection.py +5 -3
- sknetwork/embedding/spectral.py +0 -73
- sknetwork/embedding/svd.py +0 -4
- sknetwork/embedding/tests/test_API.py +4 -28
- sknetwork/embedding/tests/test_louvain_embedding.py +13 -13
- sknetwork/embedding/tests/test_spectral.py +2 -5
- sknetwork/embedding/tests/test_svd.py +7 -1
- sknetwork/gnn/base_layer.py +3 -3
- sknetwork/gnn/gnn_classifier.py +41 -87
- sknetwork/gnn/layer.py +1 -1
- sknetwork/gnn/loss.py +1 -1
- sknetwork/gnn/optimizer.py +4 -3
- sknetwork/gnn/tests/test_base_layer.py +4 -4
- sknetwork/gnn/tests/test_gnn_classifier.py +12 -39
- sknetwork/gnn/utils.py +8 -8
- sknetwork/hierarchy/base.py +27 -0
- sknetwork/hierarchy/louvain_hierarchy.py +55 -47
- sknetwork/hierarchy/paris.cp310-win_amd64.pyd +0 -0
- sknetwork/hierarchy/paris.cpp +27667 -20915
- sknetwork/hierarchy/paris.pyx +11 -10
- sknetwork/hierarchy/postprocess.py +16 -16
- sknetwork/hierarchy/tests/test_algos.py +5 -0
- sknetwork/hierarchy/tests/test_metrics.py +4 -4
- sknetwork/linalg/__init__.py +1 -1
- sknetwork/linalg/diteration.cp310-win_amd64.pyd +0 -0
- sknetwork/linalg/diteration.cpp +13916 -8050
- sknetwork/linalg/{normalization.py → normalizer.py} +17 -14
- sknetwork/linalg/operators.py +1 -1
- sknetwork/linalg/ppr_solver.py +1 -1
- sknetwork/linalg/push.cp310-win_amd64.pyd +0 -0
- sknetwork/linalg/push.cpp +23187 -16973
- sknetwork/linalg/tests/test_normalization.py +3 -7
- sknetwork/linalg/tests/test_operators.py +2 -6
- sknetwork/linalg/tests/test_ppr.py +1 -1
- sknetwork/linkpred/base.py +12 -1
- sknetwork/linkpred/nn.py +6 -6
- sknetwork/path/distances.py +11 -4
- sknetwork/path/shortest_path.py +1 -1
- sknetwork/path/tests/test_distances.py +7 -0
- sknetwork/path/tests/test_search.py +2 -2
- sknetwork/ranking/base.py +11 -6
- sknetwork/ranking/betweenness.cp310-win_amd64.pyd +0 -0
- sknetwork/ranking/betweenness.cpp +5256 -2190
- sknetwork/ranking/pagerank.py +13 -12
- sknetwork/ranking/tests/test_API.py +0 -2
- sknetwork/ranking/tests/test_betweenness.py +1 -1
- sknetwork/ranking/tests/test_pagerank.py +11 -5
- sknetwork/regression/base.py +18 -1
- sknetwork/regression/diffusion.py +30 -14
- sknetwork/regression/tests/test_diffusion.py +8 -0
- sknetwork/topology/__init__.py +3 -1
- sknetwork/topology/cliques.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/cliques.cpp +23528 -16848
- sknetwork/topology/core.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/core.cpp +22849 -16581
- sknetwork/topology/cycles.py +243 -0
- sknetwork/topology/minheap.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/minheap.cpp +19495 -13469
- sknetwork/topology/structure.py +2 -42
- sknetwork/topology/tests/test_cycles.py +65 -0
- sknetwork/topology/tests/test_structure.py +2 -16
- sknetwork/topology/triangles.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/triangles.cpp +5283 -1397
- sknetwork/topology/triangles.pyx +7 -4
- sknetwork/topology/weisfeiler_lehman_core.cp310-win_amd64.pyd +0 -0
- sknetwork/topology/weisfeiler_lehman_core.cpp +14781 -8915
- sknetwork/utils/__init__.py +1 -1
- sknetwork/utils/format.py +1 -1
- sknetwork/utils/membership.py +2 -2
- sknetwork/utils/values.py +5 -3
- sknetwork/visualization/__init__.py +2 -2
- sknetwork/visualization/dendrograms.py +55 -7
- sknetwork/visualization/graphs.py +261 -44
- sknetwork/visualization/tests/test_dendrograms.py +9 -9
- sknetwork/visualization/tests/test_graphs.py +63 -57
- scikit_network-0.31.0.dist-info/RECORD +0 -221
- sknetwork/embedding/louvain_hierarchy.py +0 -142
- sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
- {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/LICENSE +0 -0
- {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
# distutils: language
|
|
1
|
+
# distutils: language=c++
|
|
2
2
|
# cython: language_level=3
|
|
3
3
|
from libcpp.set cimport set
|
|
4
|
-
from libcpp.vector cimport vector
|
|
5
4
|
cimport cython
|
|
6
5
|
|
|
7
6
|
ctypedef fused int_or_long:
|
|
@@ -10,123 +9,116 @@ ctypedef fused int_or_long:
|
|
|
10
9
|
|
|
11
10
|
@cython.boundscheck(False)
|
|
12
11
|
@cython.wraparound(False)
|
|
13
|
-
def
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
def optimize_core(int_or_long[:] labels, int_or_long[:] indices, int_or_long[:] indptr, float[:] data,
|
|
13
|
+
float[:] out_weights, float[:] in_weights, float[:] out_cluster_weights, float[:] in_cluster_weights,
|
|
14
|
+
float[:] cluster_weights, float[:] self_loops, float resolution, float tol_optimization): # pragma: no cover
|
|
15
|
+
"""Find clusters maximizing modularity.
|
|
16
16
|
|
|
17
17
|
Parameters
|
|
18
18
|
----------
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
tol :
|
|
22
|
-
Minimum increase in modularity to enter a new optimization pass.
|
|
23
|
-
ou_node_probs :
|
|
24
|
-
Distribution of node weights based on their out-edges (sums to 1).
|
|
25
|
-
in_node_probs :
|
|
26
|
-
Distribution of node weights based on their in-edges (sums to 1).
|
|
27
|
-
self_loops :
|
|
28
|
-
Weights of self loops.
|
|
29
|
-
data :
|
|
30
|
-
CSR format data array of the normalized adjacency matrix.
|
|
19
|
+
labels :
|
|
20
|
+
Initial labels.
|
|
31
21
|
indices :
|
|
32
22
|
CSR format index array of the normalized adjacency matrix.
|
|
33
23
|
indptr :
|
|
34
24
|
CSR format index pointer array of the normalized adjacency matrix.
|
|
25
|
+
data :
|
|
26
|
+
CSR format data array of the normalized adjacency matrix.
|
|
27
|
+
out_weights :
|
|
28
|
+
Out-weights of nodes (sum to 1).
|
|
29
|
+
in_weights :
|
|
30
|
+
In-weights of nodes (sum to 1).
|
|
31
|
+
out_cluster_weights :
|
|
32
|
+
Out-weights of clusters (sum to 1).
|
|
33
|
+
in_cluster_weights :
|
|
34
|
+
In-weights of clusters (sum to 1).
|
|
35
|
+
cluster_weights :
|
|
36
|
+
Weights of clusters (initialized to 0).
|
|
37
|
+
self_loops :
|
|
38
|
+
Weights of self loops.
|
|
39
|
+
resolution :
|
|
40
|
+
Resolution parameter (positive).
|
|
41
|
+
tol_optimization :
|
|
42
|
+
Minimum increase in modularity to enter a new optimization pass.
|
|
35
43
|
|
|
36
44
|
Returns
|
|
37
45
|
-------
|
|
38
46
|
labels :
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
47
|
+
Labels of nodes.
|
|
48
|
+
increase :
|
|
49
|
+
Increase in modularity.
|
|
42
50
|
"""
|
|
43
|
-
cdef int_or_long n
|
|
44
|
-
cdef int_or_long
|
|
45
|
-
cdef int_or_long
|
|
46
|
-
cdef int_or_long
|
|
47
|
-
cdef int_or_long
|
|
51
|
+
cdef int_or_long n
|
|
52
|
+
cdef int_or_long stop = 0
|
|
53
|
+
cdef int_or_long label
|
|
54
|
+
cdef int_or_long label_target
|
|
55
|
+
cdef int_or_long label_best
|
|
48
56
|
cdef int_or_long i
|
|
49
57
|
cdef int_or_long j
|
|
50
|
-
cdef int_or_long
|
|
51
|
-
cdef int_or_long
|
|
52
|
-
cdef int_or_long label
|
|
58
|
+
cdef int_or_long start
|
|
59
|
+
cdef int_or_long end
|
|
53
60
|
|
|
54
|
-
cdef float
|
|
61
|
+
cdef float increase = 0
|
|
55
62
|
cdef float increase_pass
|
|
56
63
|
cdef float delta
|
|
57
|
-
cdef float delta_best
|
|
58
|
-
cdef float delta_exit
|
|
59
64
|
cdef float delta_local
|
|
60
|
-
cdef float
|
|
61
|
-
cdef float
|
|
62
|
-
cdef float
|
|
63
|
-
cdef float ratio_ou
|
|
64
|
-
|
|
65
|
-
cdef vector[int_or_long] labels
|
|
66
|
-
cdef vector[float] neighbor_clusters_weights
|
|
67
|
-
cdef vector[float] ou_clusters_weights
|
|
68
|
-
cdef vector[float] in_clusters_weights
|
|
69
|
-
cdef set[int_or_long] unique_clusters = ()
|
|
70
|
-
|
|
71
|
-
for i in range(n):
|
|
72
|
-
labels.push_back(i)
|
|
73
|
-
neighbor_clusters_weights.push_back(0.)
|
|
74
|
-
ou_clusters_weights.push_back(ou_node_probs[i])
|
|
75
|
-
in_clusters_weights.push_back(in_node_probs[i])
|
|
76
|
-
|
|
77
|
-
while increase == 1:
|
|
78
|
-
increase = 0
|
|
79
|
-
increase_pass = 0
|
|
80
|
-
|
|
81
|
-
for i in range(n):
|
|
82
|
-
unique_clusters.clear()
|
|
83
|
-
cluster_node = labels[i]
|
|
84
|
-
j1 = indptr[i]
|
|
85
|
-
j2 = indptr[i + 1]
|
|
86
|
-
|
|
87
|
-
for j in range(j1, j2):
|
|
88
|
-
label = labels[indices[j]]
|
|
89
|
-
neighbor_clusters_weights[label] += data[j]
|
|
90
|
-
unique_clusters.insert(label)
|
|
65
|
+
cdef float delta_best
|
|
66
|
+
cdef float in_weight
|
|
67
|
+
cdef float out_weight
|
|
91
68
|
|
|
92
|
-
|
|
69
|
+
cdef set[int_or_long] label_set = ()
|
|
93
70
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
ratio_ou = resolution * node_prob_ou
|
|
98
|
-
ratio_in = resolution * node_prob_in
|
|
71
|
+
n = labels.shape[0]
|
|
72
|
+
while not stop:
|
|
73
|
+
increase_pass = 0
|
|
99
74
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
75
|
+
for i in range(n):
|
|
76
|
+
label_set.clear()
|
|
77
|
+
label = labels[i]
|
|
78
|
+
start = indptr[i]
|
|
79
|
+
end = indptr[i+1]
|
|
80
|
+
|
|
81
|
+
# neighboring clusters
|
|
82
|
+
for j in range(start, end):
|
|
83
|
+
label_target = labels[indices[j]]
|
|
84
|
+
label_set.insert(label_target)
|
|
85
|
+
cluster_weights[label_target] += data[j]
|
|
86
|
+
label_set.erase(label)
|
|
87
|
+
|
|
88
|
+
if not label_set.empty():
|
|
89
|
+
out_weight = out_weights[i]
|
|
90
|
+
in_weight = in_weights[i]
|
|
91
|
+
|
|
92
|
+
# node leaving the current cluster
|
|
93
|
+
delta = 2 * (cluster_weights[label] - self_loops[i])
|
|
94
|
+
delta -= resolution * out_weight * (in_cluster_weights[label] - in_weight)
|
|
95
|
+
delta -= resolution * in_weight * (out_cluster_weights[label] - out_weight)
|
|
103
96
|
|
|
104
97
|
delta_best = 0
|
|
105
|
-
|
|
98
|
+
label_best = label
|
|
106
99
|
|
|
107
|
-
for
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
delta_local = delta - delta_exit
|
|
100
|
+
for label_target in label_set:
|
|
101
|
+
delta_local = 2 * cluster_weights[label_target]
|
|
102
|
+
delta_local -= resolution * out_weight * in_cluster_weights[label_target]
|
|
103
|
+
delta_local -= resolution * in_weight * out_cluster_weights[label_target]
|
|
104
|
+
delta_local -= delta
|
|
113
105
|
if delta_local > delta_best:
|
|
114
106
|
delta_best = delta_local
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
neighbor_clusters_weights[cluster] = 0
|
|
107
|
+
label_best = label_target
|
|
108
|
+
cluster_weights[label_target] = 0
|
|
118
109
|
|
|
119
|
-
if
|
|
110
|
+
if label_best != label:
|
|
120
111
|
increase_pass += delta_best
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
112
|
+
labels[i] = label_best
|
|
113
|
+
# update weights
|
|
114
|
+
out_cluster_weights[label] -= out_weight
|
|
115
|
+
in_cluster_weights[label] -= in_weight
|
|
116
|
+
out_cluster_weights[label_best] += out_weight
|
|
117
|
+
in_cluster_weights[label_best] += in_weight
|
|
118
|
+
|
|
119
|
+
cluster_weights[label] = 0
|
|
120
|
+
|
|
121
|
+
increase += increase_pass
|
|
122
|
+
stop = increase_pass <= tol_optimization
|
|
123
|
+
|
|
124
|
+
return labels, increase
|
|
@@ -41,7 +41,7 @@ def aggregate_graph(input_matrix: sparse.csr_matrix, labels: Optional[np.ndarray
|
|
|
41
41
|
labels_row: Optional[np.ndarray] = None, labels_col: Optional[np.ndarray] = None) \
|
|
42
42
|
-> sparse.csr_matrix:
|
|
43
43
|
"""Aggregate graph per label. All nodes with the same label become a single node.
|
|
44
|
-
Negative labels are ignored (corresponding nodes are
|
|
44
|
+
Negative labels are ignored (corresponding nodes are discarded).
|
|
45
45
|
|
|
46
46
|
Parameters
|
|
47
47
|
----------
|
|
@@ -63,4 +63,4 @@ def aggregate_graph(input_matrix: sparse.csr_matrix, labels: Optional[np.ndarray
|
|
|
63
63
|
else:
|
|
64
64
|
membership_col = membership_row
|
|
65
65
|
aggregate_matrix = membership_row.T.dot(input_matrix).dot(membership_col)
|
|
66
|
-
return aggregate_matrix
|
|
66
|
+
return aggregate_matrix.tocsr()
|
|
@@ -29,11 +29,11 @@ class PropagationClustering(BaseClustering, Propagation):
|
|
|
29
29
|
weighted : bool
|
|
30
30
|
If ``True``, the vote of each neighbor is proportional to the edge weight.
|
|
31
31
|
Otherwise, all votes have weight 1.
|
|
32
|
-
sort_clusters :
|
|
32
|
+
sort_clusters : bool
|
|
33
33
|
If ``True``, sort labels in decreasing order of cluster size.
|
|
34
|
-
return_probs :
|
|
34
|
+
return_probs : bool
|
|
35
35
|
If ``True``, return the probability distribution over clusters (soft clustering).
|
|
36
|
-
return_aggregate :
|
|
36
|
+
return_aggregate : bool
|
|
37
37
|
If ``True``, return the aggregate adjacency matrix or biadjacency matrix between clusters.
|
|
38
38
|
|
|
39
39
|
Attributes
|
|
@@ -78,7 +78,7 @@ class PropagationClustering(BaseClustering, Propagation):
|
|
|
78
78
|
|
|
79
79
|
Parameters
|
|
80
80
|
----------
|
|
81
|
-
input_matrix :
|
|
81
|
+
input_matrix : sparse.csr_matrix, np.ndarray
|
|
82
82
|
Adjacency matrix or biadjacency matrix of the graph.
|
|
83
83
|
|
|
84
84
|
Returns
|
|
@@ -9,8 +9,12 @@ from sknetwork.data.test_graphs import *
|
|
|
9
9
|
|
|
10
10
|
class TestClusteringAPI(unittest.TestCase):
|
|
11
11
|
|
|
12
|
+
def setUp(self):
|
|
13
|
+
self.algos = [Louvain(return_aggregate=True), Leiden(return_aggregate=True),
|
|
14
|
+
PropagationClustering(return_aggregate=True)]
|
|
15
|
+
|
|
12
16
|
def test_regular(self):
|
|
13
|
-
for algo in
|
|
17
|
+
for algo in self.algos:
|
|
14
18
|
for adjacency in [test_graph(), test_digraph(), test_disconnected_graph()]:
|
|
15
19
|
n = adjacency.shape[0]
|
|
16
20
|
labels = algo.fit_predict(adjacency)
|
|
@@ -22,13 +26,13 @@ class TestClusteringAPI(unittest.TestCase):
|
|
|
22
26
|
n_labels = len(set(labels))
|
|
23
27
|
self.assertEqual(labels.shape, (n,))
|
|
24
28
|
self.assertEqual(algo.aggregate_.shape, (n_labels, n_labels))
|
|
25
|
-
membership = algo.fit_transform(
|
|
29
|
+
membership = algo.fit_transform(adjacency_bool)
|
|
26
30
|
self.assertEqual(membership.shape, (n, n_labels))
|
|
27
31
|
|
|
28
32
|
def test_bipartite(self):
|
|
29
33
|
biadjacency = test_bigraph()
|
|
30
34
|
n_row, n_col = biadjacency.shape
|
|
31
|
-
for algo in
|
|
35
|
+
for algo in self.algos:
|
|
32
36
|
algo.fit(biadjacency)
|
|
33
37
|
self.assertEqual(algo.labels_row_.shape, (n_row,))
|
|
34
38
|
self.assertEqual(algo.labels_col_.shape, (n_col,))
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for KCenters"""
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
from sknetwork.clustering import KCenters
|
|
7
|
+
from sknetwork.data.test_graphs import *
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestKCentersClustering(unittest.TestCase):
|
|
11
|
+
|
|
12
|
+
def test_kcenters(self):
|
|
13
|
+
# Test undirected graph
|
|
14
|
+
n_clusters = 2
|
|
15
|
+
adjacency = test_graph()
|
|
16
|
+
n_row = adjacency.shape[0]
|
|
17
|
+
kcenters = KCenters(n_clusters=n_clusters)
|
|
18
|
+
labels = kcenters.fit_predict(adjacency)
|
|
19
|
+
self.assertEqual(len(labels), n_row)
|
|
20
|
+
self.assertEqual(len(set(labels)), n_clusters)
|
|
21
|
+
|
|
22
|
+
# Test directed graph
|
|
23
|
+
n_clusters = 3
|
|
24
|
+
adjacency = test_digraph()
|
|
25
|
+
n_row = adjacency.shape[0]
|
|
26
|
+
kcenters = KCenters(n_clusters=n_clusters, directed=True)
|
|
27
|
+
labels = kcenters.fit_predict(adjacency)
|
|
28
|
+
self.assertEqual(len(labels), n_row)
|
|
29
|
+
self.assertEqual(len(set(labels)), n_clusters)
|
|
30
|
+
|
|
31
|
+
# Test bipartite graph
|
|
32
|
+
n_clusters = 2
|
|
33
|
+
biadjacency = test_bigraph()
|
|
34
|
+
n_row, n_col = biadjacency.shape
|
|
35
|
+
kcenters = KCenters(n_clusters=n_clusters)
|
|
36
|
+
kcenters.fit(biadjacency)
|
|
37
|
+
labels = kcenters.labels_
|
|
38
|
+
self.assertEqual(len(kcenters.labels_row_), n_row)
|
|
39
|
+
self.assertEqual(len(kcenters.labels_col_), n_col)
|
|
40
|
+
self.assertEqual(len(set(labels)), n_clusters)
|
|
41
|
+
|
|
42
|
+
def test_kcenters_error(self):
|
|
43
|
+
# Test value errors
|
|
44
|
+
adjacency = test_graph()
|
|
45
|
+
biadjacency = test_bigraph()
|
|
46
|
+
|
|
47
|
+
# test n_clusters error
|
|
48
|
+
kcenters = KCenters(n_clusters=1)
|
|
49
|
+
with self.assertRaises(ValueError):
|
|
50
|
+
kcenters.fit(adjacency)
|
|
51
|
+
|
|
52
|
+
# test n_init error
|
|
53
|
+
kcenters = KCenters(n_clusters=2, n_init=0)
|
|
54
|
+
with self.assertRaises(ValueError):
|
|
55
|
+
kcenters.fit(adjacency)
|
|
56
|
+
|
|
57
|
+
# test center_position error
|
|
58
|
+
kcenters = KCenters(n_clusters=2, center_position="other")
|
|
59
|
+
with self.assertRaises(ValueError):
|
|
60
|
+
kcenters.fit(biadjacency)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for Leiden"""
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
from sknetwork.clustering import Leiden
|
|
7
|
+
from sknetwork.data.test_graphs import *
|
|
8
|
+
from sknetwork.utils import bipartite2undirected
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestLeidenClustering(unittest.TestCase):
|
|
12
|
+
|
|
13
|
+
def test_disconnected(self):
|
|
14
|
+
adjacency = test_disconnected_graph()
|
|
15
|
+
n = adjacency.shape[0]
|
|
16
|
+
labels = Leiden().fit_predict(adjacency)
|
|
17
|
+
self.assertEqual(len(labels), n)
|
|
18
|
+
|
|
19
|
+
def test_modularity(self):
|
|
20
|
+
adjacency = test_graph()
|
|
21
|
+
leiden_d = Leiden(modularity='dugue')
|
|
22
|
+
leiden_n = Leiden(modularity='newman')
|
|
23
|
+
labels_d = leiden_d.fit_predict(adjacency)
|
|
24
|
+
labels_n = leiden_n.fit_predict(adjacency)
|
|
25
|
+
self.assertTrue((labels_d == labels_n).all())
|
|
26
|
+
|
|
27
|
+
def test_bipartite(self):
|
|
28
|
+
biadjacency = test_bigraph()
|
|
29
|
+
adjacency = bipartite2undirected(biadjacency)
|
|
30
|
+
leiden = Leiden(modularity='newman')
|
|
31
|
+
labels1 = leiden.fit_predict(adjacency)
|
|
32
|
+
leiden.fit(biadjacency)
|
|
33
|
+
labels2 = np.concatenate((leiden.labels_row_, leiden.labels_col_))
|
|
34
|
+
self.assertTrue((labels1 == labels2).all())
|
|
@@ -24,7 +24,6 @@ class TestLouvainClustering(unittest.TestCase):
|
|
|
24
24
|
labels_d = louvain_d.fit_predict(adjacency)
|
|
25
25
|
labels_n = louvain_n.fit_predict(adjacency)
|
|
26
26
|
self.assertTrue((labels_d == labels_n).all())
|
|
27
|
-
|
|
28
27
|
louvain_p = Louvain(modularity='potts')
|
|
29
28
|
louvain_p.fit_predict(adjacency)
|
|
30
29
|
|
|
@@ -48,7 +47,7 @@ class TestLouvainClustering(unittest.TestCase):
|
|
|
48
47
|
# tolerance
|
|
49
48
|
louvain = Louvain(resolution=2, tol_aggregation=0.1)
|
|
50
49
|
labels = louvain.fit_predict(adjacency)
|
|
51
|
-
self.assertEqual(len(set(labels)),
|
|
50
|
+
self.assertEqual(len(set(labels)), 7)
|
|
52
51
|
|
|
53
52
|
# shuffling
|
|
54
53
|
louvain = Louvain(resolution=2, shuffle_nodes=True, random_state=42)
|
|
@@ -78,7 +77,7 @@ class TestLouvainClustering(unittest.TestCase):
|
|
|
78
77
|
# tolerance
|
|
79
78
|
louvain = Louvain(resolution=2, tol_aggregation=0.1)
|
|
80
79
|
labels = louvain.fit_predict(adjacency)
|
|
81
|
-
self.assertEqual(len(set(labels)),
|
|
80
|
+
self.assertEqual(len(set(labels)), 7)
|
|
82
81
|
|
|
83
82
|
# shuffling
|
|
84
83
|
louvain = Louvain(resolution=2, shuffle_nodes=True, random_state=42)
|
sknetwork/data/__init__.py
CHANGED
sknetwork/data/base.py
CHANGED
|
@@ -6,10 +6,10 @@ Created in May 2023
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
class
|
|
9
|
+
class Dataset(dict):
|
|
10
10
|
"""Container object for datasets.
|
|
11
11
|
Dictionary-like object that exposes its keys as attributes.
|
|
12
|
-
>>> dataset =
|
|
12
|
+
>>> dataset = Dataset(name='dataset')
|
|
13
13
|
>>> dataset['name']
|
|
14
14
|
'dataset'
|
|
15
15
|
>>> dataset.name
|
|
@@ -26,3 +26,8 @@ class Bunch(dict):
|
|
|
26
26
|
return self[key]
|
|
27
27
|
except KeyError:
|
|
28
28
|
raise AttributeError(key)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# alias for Dataset
|
|
32
|
+
Bunch = Dataset
|
|
33
|
+
|
sknetwork/data/load.py
CHANGED
|
@@ -19,15 +19,12 @@ import numpy as np
|
|
|
19
19
|
from scipy import sparse
|
|
20
20
|
|
|
21
21
|
from sknetwork.data.parse import from_csv, load_labels, load_header, load_metadata
|
|
22
|
-
from sknetwork.data.base import
|
|
22
|
+
from sknetwork.data.base import Dataset
|
|
23
23
|
from sknetwork.utils.check import is_square
|
|
24
24
|
from sknetwork.log import Log
|
|
25
25
|
|
|
26
26
|
NETSET_URL = 'https://netset.telecom-paris.fr'
|
|
27
27
|
|
|
28
|
-
# former name of Dataset
|
|
29
|
-
Bunch = Bunch
|
|
30
|
-
|
|
31
28
|
|
|
32
29
|
def is_within_directory(directory, target):
|
|
33
30
|
"""Utility function."""
|
|
@@ -89,7 +86,7 @@ def clean_data_home(data_home: Optional[Union[str, Path]] = None):
|
|
|
89
86
|
|
|
90
87
|
|
|
91
88
|
def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]] = None,
|
|
92
|
-
verbose: bool = True) -> Optional[
|
|
89
|
+
verbose: bool = True) -> Optional[Dataset]:
|
|
93
90
|
"""Load a dataset from the `NetSet collection
|
|
94
91
|
<https://netset.telecom-paris.fr/>`_.
|
|
95
92
|
|
|
@@ -105,10 +102,10 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
|
|
|
105
102
|
|
|
106
103
|
Returns
|
|
107
104
|
-------
|
|
108
|
-
dataset : :class:`
|
|
105
|
+
dataset : :class:`Dataset`
|
|
109
106
|
Returned dataset.
|
|
110
107
|
"""
|
|
111
|
-
dataset =
|
|
108
|
+
dataset = Dataset()
|
|
112
109
|
dataset_folder = NETSET_URL + '/datasets/'
|
|
113
110
|
folder_npz = NETSET_URL + '/datasets_npz/'
|
|
114
111
|
|
|
@@ -167,7 +164,7 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
|
|
|
167
164
|
|
|
168
165
|
|
|
169
166
|
def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_numpy_bundle: bool = True,
|
|
170
|
-
verbose: bool = True) ->
|
|
167
|
+
verbose: bool = True) -> Dataset:
|
|
171
168
|
"""Load a dataset from the `Konect database
|
|
172
169
|
<http://konect.cc/networks/>`_.
|
|
173
170
|
|
|
@@ -186,7 +183,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
|
|
|
186
183
|
|
|
187
184
|
Returns
|
|
188
185
|
-------
|
|
189
|
-
dataset : :class:`
|
|
186
|
+
dataset : :class:`Dataset`
|
|
190
187
|
Object with the following attributes:
|
|
191
188
|
|
|
192
189
|
* `adjacency` or `biadjacency`: the adjacency/biadjacency matrix for the dataset
|
|
@@ -240,7 +237,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
|
|
|
240
237
|
logger.print_log('Loading from local bundle...')
|
|
241
238
|
return load_from_numpy_bundle(name + '_bundle', data_path)
|
|
242
239
|
|
|
243
|
-
dataset =
|
|
240
|
+
dataset = Dataset()
|
|
244
241
|
path = data_konect / name / name
|
|
245
242
|
if not path.exists() or len(listdir(path)) == 0:
|
|
246
243
|
raise Exception("No data downloaded.")
|
|
@@ -250,7 +247,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
|
|
|
250
247
|
if matrix:
|
|
251
248
|
file = matrix[0]
|
|
252
249
|
directed, bipartite, weighted = load_header(path / file)
|
|
253
|
-
dataset = from_csv(path / file, directed=directed, bipartite=bipartite, weighted=weighted)
|
|
250
|
+
dataset = from_csv(path / file, directed=directed, bipartite=bipartite, weighted=weighted, reindex=True)
|
|
254
251
|
|
|
255
252
|
metadata = [file for file in files if 'meta.' in file]
|
|
256
253
|
if metadata:
|
|
@@ -269,7 +266,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
|
|
|
269
266
|
else:
|
|
270
267
|
dataset.meta.name = name
|
|
271
268
|
else:
|
|
272
|
-
dataset.meta =
|
|
269
|
+
dataset.meta = Dataset()
|
|
273
270
|
dataset.meta.name = name
|
|
274
271
|
|
|
275
272
|
if auto_numpy_bundle:
|
|
@@ -280,12 +277,12 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
|
|
|
280
277
|
return dataset
|
|
281
278
|
|
|
282
279
|
|
|
283
|
-
def save_to_numpy_bundle(data:
|
|
280
|
+
def save_to_numpy_bundle(data: Dataset, bundle_name: str, data_home: Optional[Union[str, Path]] = None):
|
|
284
281
|
"""Save a dataset in the specified data home to a collection of Numpy and Pickle files for faster subsequent loads.
|
|
285
282
|
|
|
286
283
|
Parameters
|
|
287
284
|
----------
|
|
288
|
-
data:
|
|
285
|
+
data: Dataset
|
|
289
286
|
Data to save.
|
|
290
287
|
bundle_name: str
|
|
291
288
|
Name to be used for the bundle folder.
|
|
@@ -300,11 +297,9 @@ def save_to_numpy_bundle(data: Bunch, bundle_name: str, data_home: Optional[Unio
|
|
|
300
297
|
sparse.save_npz(data_path / attribute, data[attribute])
|
|
301
298
|
elif type(data[attribute]) == np.ndarray:
|
|
302
299
|
np.save(data_path / attribute, data[attribute])
|
|
303
|
-
|
|
300
|
+
else:
|
|
304
301
|
with open(data_path / (attribute + '.p'), 'wb') as file:
|
|
305
302
|
pickle.dump(data[attribute], file)
|
|
306
|
-
else:
|
|
307
|
-
raise TypeError('Unsupported data attribute type '+str(type(data[attribute])) + '.')
|
|
308
303
|
|
|
309
304
|
|
|
310
305
|
def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path]] = None):
|
|
@@ -319,7 +314,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
|
|
|
319
314
|
|
|
320
315
|
Returns
|
|
321
316
|
-------
|
|
322
|
-
data:
|
|
317
|
+
data: Dataset
|
|
323
318
|
Data.
|
|
324
319
|
"""
|
|
325
320
|
data_home = get_data_home(data_home)
|
|
@@ -328,7 +323,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
|
|
|
328
323
|
raise FileNotFoundError('No bundle at ' + str(data_path))
|
|
329
324
|
else:
|
|
330
325
|
files = listdir(data_path)
|
|
331
|
-
data =
|
|
326
|
+
data = Dataset()
|
|
332
327
|
for file in files:
|
|
333
328
|
if len(file.split('.')) == 2:
|
|
334
329
|
file_name, file_extension = file.split('.')
|
|
@@ -342,7 +337,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
|
|
|
342
337
|
return data
|
|
343
338
|
|
|
344
339
|
|
|
345
|
-
def save(folder: Union[str, Path], data: Union[sparse.csr_matrix,
|
|
340
|
+
def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Dataset]):
|
|
346
341
|
"""Save a dataset or a CSR matrix in the current directory to a collection of Numpy and Pickle files for faster
|
|
347
342
|
subsequent loads. Supported attribute types include sparse matrices, NumPy arrays, strings and objects Dataset.
|
|
348
343
|
|
|
@@ -350,13 +345,13 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
|
|
|
350
345
|
----------
|
|
351
346
|
folder : str or :class:`pathlib.Path`
|
|
352
347
|
Name of the bundle folder.
|
|
353
|
-
data : Union[sparse.csr_matrix,
|
|
348
|
+
data : Union[sparse.csr_matrix, Dataset]
|
|
354
349
|
Data to save.
|
|
355
350
|
|
|
356
351
|
Example
|
|
357
352
|
-------
|
|
358
353
|
>>> from sknetwork.data import save
|
|
359
|
-
>>> dataset =
|
|
354
|
+
>>> dataset = Dataset()
|
|
360
355
|
>>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
|
|
361
356
|
>>> dataset.names = np.array(['a', 'b', 'c'])
|
|
362
357
|
>>> save('dataset', dataset)
|
|
@@ -368,7 +363,7 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
|
|
|
368
363
|
if folder.exists():
|
|
369
364
|
shutil.rmtree(folder)
|
|
370
365
|
if isinstance(data, sparse.csr_matrix):
|
|
371
|
-
dataset =
|
|
366
|
+
dataset = Dataset()
|
|
372
367
|
if is_square(data):
|
|
373
368
|
dataset.adjacency = data
|
|
374
369
|
else:
|
|
@@ -390,13 +385,13 @@ def load(folder: Union[str, Path]):
|
|
|
390
385
|
|
|
391
386
|
Returns
|
|
392
387
|
-------
|
|
393
|
-
data:
|
|
388
|
+
data: Dataset
|
|
394
389
|
Data.
|
|
395
390
|
|
|
396
391
|
Example
|
|
397
392
|
-------
|
|
398
393
|
>>> from sknetwork.data import save
|
|
399
|
-
>>> dataset =
|
|
394
|
+
>>> dataset = Dataset()
|
|
400
395
|
>>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
|
|
401
396
|
>>> dataset.names = np.array(['a', 'b', 'c'])
|
|
402
397
|
>>> save('dataset', dataset)
|