scikit-network 0.31.0__cp311-cp311-win_amd64.whl → 0.33.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (126) hide show
  1. {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/AUTHORS.rst +3 -1
  2. {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/METADATA +27 -5
  3. scikit_network-0.33.0.dist-info/RECORD +228 -0
  4. {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/WHEEL +1 -1
  5. sknetwork/__init__.py +1 -1
  6. sknetwork/classification/base.py +1 -1
  7. sknetwork/classification/base_rank.py +3 -3
  8. sknetwork/classification/diffusion.py +25 -16
  9. sknetwork/classification/knn.py +23 -16
  10. sknetwork/classification/metrics.py +4 -4
  11. sknetwork/classification/pagerank.py +12 -8
  12. sknetwork/classification/propagation.py +25 -17
  13. sknetwork/classification/tests/test_diffusion.py +10 -0
  14. sknetwork/classification/vote.cp311-win_amd64.pyd +0 -0
  15. sknetwork/classification/vote.cpp +14549 -8668
  16. sknetwork/clustering/__init__.py +3 -1
  17. sknetwork/clustering/base.py +1 -1
  18. sknetwork/clustering/kcenters.py +253 -0
  19. sknetwork/clustering/leiden.py +242 -0
  20. sknetwork/clustering/leiden_core.cp311-win_amd64.pyd +0 -0
  21. sknetwork/clustering/leiden_core.cpp +31564 -0
  22. sknetwork/clustering/leiden_core.pyx +124 -0
  23. sknetwork/clustering/louvain.py +118 -83
  24. sknetwork/clustering/louvain_core.cp311-win_amd64.pyd +0 -0
  25. sknetwork/clustering/louvain_core.cpp +21876 -16332
  26. sknetwork/clustering/louvain_core.pyx +86 -94
  27. sknetwork/clustering/postprocess.py +2 -2
  28. sknetwork/clustering/propagation_clustering.py +4 -4
  29. sknetwork/clustering/tests/test_API.py +7 -3
  30. sknetwork/clustering/tests/test_kcenters.py +60 -0
  31. sknetwork/clustering/tests/test_leiden.py +34 -0
  32. sknetwork/clustering/tests/test_louvain.py +2 -3
  33. sknetwork/data/__init__.py +1 -1
  34. sknetwork/data/base.py +7 -2
  35. sknetwork/data/load.py +20 -25
  36. sknetwork/data/models.py +15 -15
  37. sknetwork/data/parse.py +57 -34
  38. sknetwork/data/tests/test_API.py +3 -3
  39. sknetwork/data/tests/test_base.py +2 -2
  40. sknetwork/data/tests/test_parse.py +9 -12
  41. sknetwork/data/tests/test_toy_graphs.py +33 -33
  42. sknetwork/data/toy_graphs.py +35 -43
  43. sknetwork/embedding/__init__.py +0 -1
  44. sknetwork/embedding/base.py +23 -19
  45. sknetwork/embedding/force_atlas.py +3 -2
  46. sknetwork/embedding/louvain_embedding.py +1 -27
  47. sknetwork/embedding/random_projection.py +5 -3
  48. sknetwork/embedding/spectral.py +0 -73
  49. sknetwork/embedding/svd.py +0 -4
  50. sknetwork/embedding/tests/test_API.py +4 -28
  51. sknetwork/embedding/tests/test_louvain_embedding.py +13 -13
  52. sknetwork/embedding/tests/test_spectral.py +2 -5
  53. sknetwork/embedding/tests/test_svd.py +7 -1
  54. sknetwork/gnn/base_layer.py +3 -3
  55. sknetwork/gnn/gnn_classifier.py +41 -87
  56. sknetwork/gnn/layer.py +1 -1
  57. sknetwork/gnn/loss.py +1 -1
  58. sknetwork/gnn/optimizer.py +4 -3
  59. sknetwork/gnn/tests/test_base_layer.py +4 -4
  60. sknetwork/gnn/tests/test_gnn_classifier.py +12 -39
  61. sknetwork/gnn/utils.py +8 -8
  62. sknetwork/hierarchy/base.py +27 -0
  63. sknetwork/hierarchy/louvain_hierarchy.py +55 -47
  64. sknetwork/hierarchy/paris.cp311-win_amd64.pyd +0 -0
  65. sknetwork/hierarchy/paris.cpp +27667 -20915
  66. sknetwork/hierarchy/paris.pyx +11 -10
  67. sknetwork/hierarchy/postprocess.py +16 -16
  68. sknetwork/hierarchy/tests/test_algos.py +5 -0
  69. sknetwork/hierarchy/tests/test_metrics.py +4 -4
  70. sknetwork/linalg/__init__.py +1 -1
  71. sknetwork/linalg/diteration.cp311-win_amd64.pyd +0 -0
  72. sknetwork/linalg/diteration.cpp +13916 -8050
  73. sknetwork/linalg/{normalization.py → normalizer.py} +17 -14
  74. sknetwork/linalg/operators.py +1 -1
  75. sknetwork/linalg/ppr_solver.py +1 -1
  76. sknetwork/linalg/push.cp311-win_amd64.pyd +0 -0
  77. sknetwork/linalg/push.cpp +23187 -16973
  78. sknetwork/linalg/tests/test_normalization.py +3 -7
  79. sknetwork/linalg/tests/test_operators.py +2 -6
  80. sknetwork/linalg/tests/test_ppr.py +1 -1
  81. sknetwork/linkpred/base.py +12 -1
  82. sknetwork/linkpred/nn.py +6 -6
  83. sknetwork/path/distances.py +11 -4
  84. sknetwork/path/shortest_path.py +1 -1
  85. sknetwork/path/tests/test_distances.py +7 -0
  86. sknetwork/path/tests/test_search.py +2 -2
  87. sknetwork/ranking/base.py +11 -6
  88. sknetwork/ranking/betweenness.cp311-win_amd64.pyd +0 -0
  89. sknetwork/ranking/betweenness.cpp +5256 -2190
  90. sknetwork/ranking/pagerank.py +13 -12
  91. sknetwork/ranking/tests/test_API.py +0 -2
  92. sknetwork/ranking/tests/test_betweenness.py +1 -1
  93. sknetwork/ranking/tests/test_pagerank.py +11 -5
  94. sknetwork/regression/base.py +18 -1
  95. sknetwork/regression/diffusion.py +30 -14
  96. sknetwork/regression/tests/test_diffusion.py +8 -0
  97. sknetwork/topology/__init__.py +3 -1
  98. sknetwork/topology/cliques.cp311-win_amd64.pyd +0 -0
  99. sknetwork/topology/cliques.cpp +23528 -16848
  100. sknetwork/topology/core.cp311-win_amd64.pyd +0 -0
  101. sknetwork/topology/core.cpp +22849 -16581
  102. sknetwork/topology/cycles.py +243 -0
  103. sknetwork/topology/minheap.cp311-win_amd64.pyd +0 -0
  104. sknetwork/topology/minheap.cpp +19495 -13469
  105. sknetwork/topology/structure.py +2 -42
  106. sknetwork/topology/tests/test_cycles.py +65 -0
  107. sknetwork/topology/tests/test_structure.py +2 -16
  108. sknetwork/topology/triangles.cp311-win_amd64.pyd +0 -0
  109. sknetwork/topology/triangles.cpp +5283 -1397
  110. sknetwork/topology/triangles.pyx +7 -4
  111. sknetwork/topology/weisfeiler_lehman_core.cp311-win_amd64.pyd +0 -0
  112. sknetwork/topology/weisfeiler_lehman_core.cpp +14781 -8915
  113. sknetwork/utils/__init__.py +1 -1
  114. sknetwork/utils/format.py +1 -1
  115. sknetwork/utils/membership.py +2 -2
  116. sknetwork/utils/values.py +5 -3
  117. sknetwork/visualization/__init__.py +2 -2
  118. sknetwork/visualization/dendrograms.py +55 -7
  119. sknetwork/visualization/graphs.py +261 -44
  120. sknetwork/visualization/tests/test_dendrograms.py +9 -9
  121. sknetwork/visualization/tests/test_graphs.py +63 -57
  122. scikit_network-0.31.0.dist-info/RECORD +0 -221
  123. sknetwork/embedding/louvain_hierarchy.py +0 -142
  124. sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
  125. {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/LICENSE +0 -0
  126. {scikit_network-0.31.0.dist-info → scikit_network-0.33.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,6 @@
1
- # distutils: language = c++
1
+ # distutils: language=c++
2
2
  # cython: language_level=3
3
3
  from libcpp.set cimport set
4
- from libcpp.vector cimport vector
5
4
  cimport cython
6
5
 
7
6
  ctypedef fused int_or_long:
@@ -10,123 +9,116 @@ ctypedef fused int_or_long:
10
9
 
11
10
  @cython.boundscheck(False)
12
11
  @cython.wraparound(False)
13
- def fit_core(float resolution, float tol, float[:] ou_node_probs, float[:] in_node_probs, float[:] self_loops,
14
- float[:] data, int_or_long[:] indices, int_or_long[:] indptr): # pragma: no cover
15
- """Fit the clusters to the objective function.
12
+ def optimize_core(int_or_long[:] labels, int_or_long[:] indices, int_or_long[:] indptr, float[:] data,
13
+ float[:] out_weights, float[:] in_weights, float[:] out_cluster_weights, float[:] in_cluster_weights,
14
+ float[:] cluster_weights, float[:] self_loops, float resolution, float tol_optimization): # pragma: no cover
15
+ """Find clusters maximizing modularity.
16
16
 
17
17
  Parameters
18
18
  ----------
19
- resolution :
20
- Resolution parameter (positive).
21
- tol :
22
- Minimum increase in modularity to enter a new optimization pass.
23
- ou_node_probs :
24
- Distribution of node weights based on their out-edges (sums to 1).
25
- in_node_probs :
26
- Distribution of node weights based on their in-edges (sums to 1).
27
- self_loops :
28
- Weights of self loops.
29
- data :
30
- CSR format data array of the normalized adjacency matrix.
19
+ labels :
20
+ Initial labels.
31
21
  indices :
32
22
  CSR format index array of the normalized adjacency matrix.
33
23
  indptr :
34
24
  CSR format index pointer array of the normalized adjacency matrix.
25
+ data :
26
+ CSR format data array of the normalized adjacency matrix.
27
+ out_weights :
28
+ Out-weights of nodes (sum to 1).
29
+ in_weights :
30
+ In-weights of nodes (sum to 1).
31
+ out_cluster_weights :
32
+ Out-weights of clusters (sum to 1).
33
+ in_cluster_weights :
34
+ In-weights of clusters (sum to 1).
35
+ cluster_weights :
36
+ Weights of clusters (initialized to 0).
37
+ self_loops :
38
+ Weights of self loops.
39
+ resolution :
40
+ Resolution parameter (positive).
41
+ tol_optimization :
42
+ Minimum increase in modularity to enter a new optimization pass.
35
43
 
36
44
  Returns
37
45
  -------
38
46
  labels :
39
- Cluster index of each node.
40
- total_increase :
41
- Score of the clustering (total increase in modularity).
47
+ Labels of nodes.
48
+ increase :
49
+ Increase in modularity.
42
50
  """
43
- cdef int_or_long n = indptr.shape[0] - 1
44
- cdef int_or_long increase = 1
45
- cdef int_or_long cluster
46
- cdef int_or_long cluster_best
47
- cdef int_or_long cluster_node
51
+ cdef int_or_long n
52
+ cdef int_or_long stop = 0
53
+ cdef int_or_long label
54
+ cdef int_or_long label_target
55
+ cdef int_or_long label_best
48
56
  cdef int_or_long i
49
57
  cdef int_or_long j
50
- cdef int_or_long j1
51
- cdef int_or_long j2
52
- cdef int_or_long label
58
+ cdef int_or_long start
59
+ cdef int_or_long end
53
60
 
54
- cdef float increase_total = 0
61
+ cdef float increase = 0
55
62
  cdef float increase_pass
56
63
  cdef float delta
57
- cdef float delta_best
58
- cdef float delta_exit
59
64
  cdef float delta_local
60
- cdef float node_prob_in
61
- cdef float node_prob_ou
62
- cdef float ratio_in
63
- cdef float ratio_ou
64
-
65
- cdef vector[int_or_long] labels
66
- cdef vector[float] neighbor_clusters_weights
67
- cdef vector[float] ou_clusters_weights
68
- cdef vector[float] in_clusters_weights
69
- cdef set[int_or_long] unique_clusters = ()
70
-
71
- for i in range(n):
72
- labels.push_back(i)
73
- neighbor_clusters_weights.push_back(0.)
74
- ou_clusters_weights.push_back(ou_node_probs[i])
75
- in_clusters_weights.push_back(in_node_probs[i])
76
-
77
- while increase == 1:
78
- increase = 0
79
- increase_pass = 0
80
-
81
- for i in range(n):
82
- unique_clusters.clear()
83
- cluster_node = labels[i]
84
- j1 = indptr[i]
85
- j2 = indptr[i + 1]
86
-
87
- for j in range(j1, j2):
88
- label = labels[indices[j]]
89
- neighbor_clusters_weights[label] += data[j]
90
- unique_clusters.insert(label)
65
+ cdef float delta_best
66
+ cdef float in_weight
67
+ cdef float out_weight
91
68
 
92
- unique_clusters.erase(cluster_node)
69
+ cdef set[int_or_long] label_set = ()
93
70
 
94
- if not unique_clusters.empty():
95
- node_prob_ou = ou_node_probs[i]
96
- node_prob_in = in_node_probs[i]
97
- ratio_ou = resolution * node_prob_ou
98
- ratio_in = resolution * node_prob_in
71
+ n = labels.shape[0]
72
+ while not stop:
73
+ increase_pass = 0
99
74
 
100
- delta_exit = 2 * (neighbor_clusters_weights[cluster_node] - self_loops[i])
101
- delta_exit -= ratio_ou * (in_clusters_weights[cluster_node] - node_prob_in)
102
- delta_exit -= ratio_in * (ou_clusters_weights[cluster_node] - node_prob_ou)
75
+ for i in range(n):
76
+ label_set.clear()
77
+ label = labels[i]
78
+ start = indptr[i]
79
+ end = indptr[i+1]
80
+
81
+ # neighboring clusters
82
+ for j in range(start, end):
83
+ label_target = labels[indices[j]]
84
+ label_set.insert(label_target)
85
+ cluster_weights[label_target] += data[j]
86
+ label_set.erase(label)
87
+
88
+ if not label_set.empty():
89
+ out_weight = out_weights[i]
90
+ in_weight = in_weights[i]
91
+
92
+ # node leaving the current cluster
93
+ delta = 2 * (cluster_weights[label] - self_loops[i])
94
+ delta -= resolution * out_weight * (in_cluster_weights[label] - in_weight)
95
+ delta -= resolution * in_weight * (out_cluster_weights[label] - out_weight)
103
96
 
104
97
  delta_best = 0
105
- cluster_best = cluster_node
98
+ label_best = label
106
99
 
107
- for cluster in unique_clusters:
108
- delta = 2 * neighbor_clusters_weights[cluster]
109
- delta -= ratio_ou * in_clusters_weights[cluster]
110
- delta -= ratio_in * ou_clusters_weights[cluster]
111
-
112
- delta_local = delta - delta_exit
100
+ for label_target in label_set:
101
+ delta_local = 2 * cluster_weights[label_target]
102
+ delta_local -= resolution * out_weight * in_cluster_weights[label_target]
103
+ delta_local -= resolution * in_weight * out_cluster_weights[label_target]
104
+ delta_local -= delta
113
105
  if delta_local > delta_best:
114
106
  delta_best = delta_local
115
- cluster_best = cluster
116
-
117
- neighbor_clusters_weights[cluster] = 0
107
+ label_best = label_target
108
+ cluster_weights[label_target] = 0
118
109
 
119
- if delta_best > 0:
110
+ if label_best != label:
120
111
  increase_pass += delta_best
121
- ou_clusters_weights[cluster_node] -= node_prob_ou
122
- in_clusters_weights[cluster_node] -= node_prob_in
123
- ou_clusters_weights[cluster_best] += node_prob_ou
124
- in_clusters_weights[cluster_best] += node_prob_in
125
- labels[i] = cluster_best
126
-
127
- neighbor_clusters_weights[cluster_node] = 0
128
-
129
- increase_total += increase_pass
130
- if increase_pass > tol:
131
- increase = 1
132
- return labels, increase_total
112
+ labels[i] = label_best
113
+ # update weights
114
+ out_cluster_weights[label] -= out_weight
115
+ in_cluster_weights[label] -= in_weight
116
+ out_cluster_weights[label_best] += out_weight
117
+ in_cluster_weights[label_best] += in_weight
118
+
119
+ cluster_weights[label] = 0
120
+
121
+ increase += increase_pass
122
+ stop = increase_pass <= tol_optimization
123
+
124
+ return labels, increase
@@ -41,7 +41,7 @@ def aggregate_graph(input_matrix: sparse.csr_matrix, labels: Optional[np.ndarray
41
41
  labels_row: Optional[np.ndarray] = None, labels_col: Optional[np.ndarray] = None) \
42
42
  -> sparse.csr_matrix:
43
43
  """Aggregate graph per label. All nodes with the same label become a single node.
44
- Negative labels are ignored (corresponding nodes are not discarded).
44
+ Negative labels are ignored (corresponding nodes are discarded).
45
45
 
46
46
  Parameters
47
47
  ----------
@@ -63,4 +63,4 @@ def aggregate_graph(input_matrix: sparse.csr_matrix, labels: Optional[np.ndarray
63
63
  else:
64
64
  membership_col = membership_row
65
65
  aggregate_matrix = membership_row.T.dot(input_matrix).dot(membership_col)
66
- return aggregate_matrix
66
+ return aggregate_matrix.tocsr()
@@ -29,11 +29,11 @@ class PropagationClustering(BaseClustering, Propagation):
29
29
  weighted : bool
30
30
  If ``True``, the vote of each neighbor is proportional to the edge weight.
31
31
  Otherwise, all votes have weight 1.
32
- sort_clusters :
32
+ sort_clusters : bool
33
33
  If ``True``, sort labels in decreasing order of cluster size.
34
- return_probs :
34
+ return_probs : bool
35
35
  If ``True``, return the probability distribution over clusters (soft clustering).
36
- return_aggregate :
36
+ return_aggregate : bool
37
37
  If ``True``, return the aggregate adjacency matrix or biadjacency matrix between clusters.
38
38
 
39
39
  Attributes
@@ -78,7 +78,7 @@ class PropagationClustering(BaseClustering, Propagation):
78
78
 
79
79
  Parameters
80
80
  ----------
81
- input_matrix :
81
+ input_matrix : sparse.csr_matrix, np.ndarray
82
82
  Adjacency matrix or biadjacency matrix of the graph.
83
83
 
84
84
  Returns
@@ -9,8 +9,12 @@ from sknetwork.data.test_graphs import *
9
9
 
10
10
  class TestClusteringAPI(unittest.TestCase):
11
11
 
12
+ def setUp(self):
13
+ self.algos = [Louvain(return_aggregate=True), Leiden(return_aggregate=True),
14
+ PropagationClustering(return_aggregate=True)]
15
+
12
16
  def test_regular(self):
13
- for algo in [Louvain(return_aggregate=True), PropagationClustering(return_aggregate=True)]:
17
+ for algo in self.algos:
14
18
  for adjacency in [test_graph(), test_digraph(), test_disconnected_graph()]:
15
19
  n = adjacency.shape[0]
16
20
  labels = algo.fit_predict(adjacency)
@@ -22,13 +26,13 @@ class TestClusteringAPI(unittest.TestCase):
22
26
  n_labels = len(set(labels))
23
27
  self.assertEqual(labels.shape, (n,))
24
28
  self.assertEqual(algo.aggregate_.shape, (n_labels, n_labels))
25
- membership = algo.fit_transform(adjacency)
29
+ membership = algo.fit_transform(adjacency_bool)
26
30
  self.assertEqual(membership.shape, (n, n_labels))
27
31
 
28
32
  def test_bipartite(self):
29
33
  biadjacency = test_bigraph()
30
34
  n_row, n_col = biadjacency.shape
31
- for algo in [Louvain(return_aggregate=True), PropagationClustering(return_aggregate=True)]:
35
+ for algo in self.algos:
32
36
  algo.fit(biadjacency)
33
37
  self.assertEqual(algo.labels_row_.shape, (n_row,))
34
38
  self.assertEqual(algo.labels_col_.shape, (n_col,))
@@ -0,0 +1,60 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for KCenters"""
4
+ import unittest
5
+
6
+ from sknetwork.clustering import KCenters
7
+ from sknetwork.data.test_graphs import *
8
+
9
+
10
+ class TestKCentersClustering(unittest.TestCase):
11
+
12
+ def test_kcenters(self):
13
+ # Test undirected graph
14
+ n_clusters = 2
15
+ adjacency = test_graph()
16
+ n_row = adjacency.shape[0]
17
+ kcenters = KCenters(n_clusters=n_clusters)
18
+ labels = kcenters.fit_predict(adjacency)
19
+ self.assertEqual(len(labels), n_row)
20
+ self.assertEqual(len(set(labels)), n_clusters)
21
+
22
+ # Test directed graph
23
+ n_clusters = 3
24
+ adjacency = test_digraph()
25
+ n_row = adjacency.shape[0]
26
+ kcenters = KCenters(n_clusters=n_clusters, directed=True)
27
+ labels = kcenters.fit_predict(adjacency)
28
+ self.assertEqual(len(labels), n_row)
29
+ self.assertEqual(len(set(labels)), n_clusters)
30
+
31
+ # Test bipartite graph
32
+ n_clusters = 2
33
+ biadjacency = test_bigraph()
34
+ n_row, n_col = biadjacency.shape
35
+ kcenters = KCenters(n_clusters=n_clusters)
36
+ kcenters.fit(biadjacency)
37
+ labels = kcenters.labels_
38
+ self.assertEqual(len(kcenters.labels_row_), n_row)
39
+ self.assertEqual(len(kcenters.labels_col_), n_col)
40
+ self.assertEqual(len(set(labels)), n_clusters)
41
+
42
+ def test_kcenters_error(self):
43
+ # Test value errors
44
+ adjacency = test_graph()
45
+ biadjacency = test_bigraph()
46
+
47
+ # test n_clusters error
48
+ kcenters = KCenters(n_clusters=1)
49
+ with self.assertRaises(ValueError):
50
+ kcenters.fit(adjacency)
51
+
52
+ # test n_init error
53
+ kcenters = KCenters(n_clusters=2, n_init=0)
54
+ with self.assertRaises(ValueError):
55
+ kcenters.fit(adjacency)
56
+
57
+ # test center_position error
58
+ kcenters = KCenters(n_clusters=2, center_position="other")
59
+ with self.assertRaises(ValueError):
60
+ kcenters.fit(biadjacency)
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for Leiden"""
4
+ import unittest
5
+
6
+ from sknetwork.clustering import Leiden
7
+ from sknetwork.data.test_graphs import *
8
+ from sknetwork.utils import bipartite2undirected
9
+
10
+
11
+ class TestLeidenClustering(unittest.TestCase):
12
+
13
+ def test_disconnected(self):
14
+ adjacency = test_disconnected_graph()
15
+ n = adjacency.shape[0]
16
+ labels = Leiden().fit_predict(adjacency)
17
+ self.assertEqual(len(labels), n)
18
+
19
+ def test_modularity(self):
20
+ adjacency = test_graph()
21
+ leiden_d = Leiden(modularity='dugue')
22
+ leiden_n = Leiden(modularity='newman')
23
+ labels_d = leiden_d.fit_predict(adjacency)
24
+ labels_n = leiden_n.fit_predict(adjacency)
25
+ self.assertTrue((labels_d == labels_n).all())
26
+
27
+ def test_bipartite(self):
28
+ biadjacency = test_bigraph()
29
+ adjacency = bipartite2undirected(biadjacency)
30
+ leiden = Leiden(modularity='newman')
31
+ labels1 = leiden.fit_predict(adjacency)
32
+ leiden.fit(biadjacency)
33
+ labels2 = np.concatenate((leiden.labels_row_, leiden.labels_col_))
34
+ self.assertTrue((labels1 == labels2).all())
@@ -24,7 +24,6 @@ class TestLouvainClustering(unittest.TestCase):
24
24
  labels_d = louvain_d.fit_predict(adjacency)
25
25
  labels_n = louvain_n.fit_predict(adjacency)
26
26
  self.assertTrue((labels_d == labels_n).all())
27
-
28
27
  louvain_p = Louvain(modularity='potts')
29
28
  louvain_p.fit_predict(adjacency)
30
29
 
@@ -48,7 +47,7 @@ class TestLouvainClustering(unittest.TestCase):
48
47
  # tolerance
49
48
  louvain = Louvain(resolution=2, tol_aggregation=0.1)
50
49
  labels = louvain.fit_predict(adjacency)
51
- self.assertEqual(len(set(labels)), 12)
50
+ self.assertEqual(len(set(labels)), 7)
52
51
 
53
52
  # shuffling
54
53
  louvain = Louvain(resolution=2, shuffle_nodes=True, random_state=42)
@@ -78,7 +77,7 @@ class TestLouvainClustering(unittest.TestCase):
78
77
  # tolerance
79
78
  louvain = Louvain(resolution=2, tol_aggregation=0.1)
80
79
  labels = louvain.fit_predict(adjacency)
81
- self.assertEqual(len(set(labels)), 12)
80
+ self.assertEqual(len(set(labels)), 7)
82
81
 
83
82
  # shuffling
84
83
  louvain = Louvain(resolution=2, shuffle_nodes=True, random_state=42)
@@ -1,5 +1,5 @@
1
1
  """data module"""
2
- from sknetwork.data.base import Bunch
2
+ from sknetwork.data.base import *
3
3
  from sknetwork.data.load import *
4
4
  from sknetwork.data.models import *
5
5
  from sknetwork.data.parse import from_edge_list, from_adjacency_list, from_csv, from_graphml
sknetwork/data/base.py CHANGED
@@ -6,10 +6,10 @@ Created in May 2023
6
6
  """
7
7
 
8
8
 
9
- class Bunch(dict):
9
+ class Dataset(dict):
10
10
  """Container object for datasets.
11
11
  Dictionary-like object that exposes its keys as attributes.
12
- >>> dataset = Bunch(name='dataset')
12
+ >>> dataset = Dataset(name='dataset')
13
13
  >>> dataset['name']
14
14
  'dataset'
15
15
  >>> dataset.name
@@ -26,3 +26,8 @@ class Bunch(dict):
26
26
  return self[key]
27
27
  except KeyError:
28
28
  raise AttributeError(key)
29
+
30
+
31
+ # alias for Dataset
32
+ Bunch = Dataset
33
+
sknetwork/data/load.py CHANGED
@@ -19,15 +19,12 @@ import numpy as np
19
19
  from scipy import sparse
20
20
 
21
21
  from sknetwork.data.parse import from_csv, load_labels, load_header, load_metadata
22
- from sknetwork.data.base import Bunch
22
+ from sknetwork.data.base import Dataset
23
23
  from sknetwork.utils.check import is_square
24
24
  from sknetwork.log import Log
25
25
 
26
26
  NETSET_URL = 'https://netset.telecom-paris.fr'
27
27
 
28
- # former name of Dataset
29
- Bunch = Bunch
30
-
31
28
 
32
29
  def is_within_directory(directory, target):
33
30
  """Utility function."""
@@ -89,7 +86,7 @@ def clean_data_home(data_home: Optional[Union[str, Path]] = None):
89
86
 
90
87
 
91
88
  def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]] = None,
92
- verbose: bool = True) -> Optional[Bunch]:
89
+ verbose: bool = True) -> Optional[Dataset]:
93
90
  """Load a dataset from the `NetSet collection
94
91
  <https://netset.telecom-paris.fr/>`_.
95
92
 
@@ -105,10 +102,10 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
105
102
 
106
103
  Returns
107
104
  -------
108
- dataset : :class:`Bunch`
105
+ dataset : :class:`Dataset`
109
106
  Returned dataset.
110
107
  """
111
- dataset = Bunch()
108
+ dataset = Dataset()
112
109
  dataset_folder = NETSET_URL + '/datasets/'
113
110
  folder_npz = NETSET_URL + '/datasets_npz/'
114
111
 
@@ -167,7 +164,7 @@ def load_netset(name: Optional[str] = None, data_home: Optional[Union[str, Path]
167
164
 
168
165
 
169
166
  def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_numpy_bundle: bool = True,
170
- verbose: bool = True) -> Bunch:
167
+ verbose: bool = True) -> Dataset:
171
168
  """Load a dataset from the `Konect database
172
169
  <http://konect.cc/networks/>`_.
173
170
 
@@ -186,7 +183,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
186
183
 
187
184
  Returns
188
185
  -------
189
- dataset : :class:`Bunch`
186
+ dataset : :class:`Dataset`
190
187
  Object with the following attributes:
191
188
 
192
189
  * `adjacency` or `biadjacency`: the adjacency/biadjacency matrix for the dataset
@@ -240,7 +237,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
240
237
  logger.print_log('Loading from local bundle...')
241
238
  return load_from_numpy_bundle(name + '_bundle', data_path)
242
239
 
243
- dataset = Bunch()
240
+ dataset = Dataset()
244
241
  path = data_konect / name / name
245
242
  if not path.exists() or len(listdir(path)) == 0:
246
243
  raise Exception("No data downloaded.")
@@ -250,7 +247,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
250
247
  if matrix:
251
248
  file = matrix[0]
252
249
  directed, bipartite, weighted = load_header(path / file)
253
- dataset = from_csv(path / file, directed=directed, bipartite=bipartite, weighted=weighted)
250
+ dataset = from_csv(path / file, directed=directed, bipartite=bipartite, weighted=weighted, reindex=True)
254
251
 
255
252
  metadata = [file for file in files if 'meta.' in file]
256
253
  if metadata:
@@ -269,7 +266,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
269
266
  else:
270
267
  dataset.meta.name = name
271
268
  else:
272
- dataset.meta = Bunch()
269
+ dataset.meta = Dataset()
273
270
  dataset.meta.name = name
274
271
 
275
272
  if auto_numpy_bundle:
@@ -280,12 +277,12 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
280
277
  return dataset
281
278
 
282
279
 
283
- def save_to_numpy_bundle(data: Bunch, bundle_name: str, data_home: Optional[Union[str, Path]] = None):
280
+ def save_to_numpy_bundle(data: Dataset, bundle_name: str, data_home: Optional[Union[str, Path]] = None):
284
281
  """Save a dataset in the specified data home to a collection of Numpy and Pickle files for faster subsequent loads.
285
282
 
286
283
  Parameters
287
284
  ----------
288
- data: Bunch
285
+ data: Dataset
289
286
  Data to save.
290
287
  bundle_name: str
291
288
  Name to be used for the bundle folder.
@@ -300,11 +297,9 @@ def save_to_numpy_bundle(data: Bunch, bundle_name: str, data_home: Optional[Unio
300
297
  sparse.save_npz(data_path / attribute, data[attribute])
301
298
  elif type(data[attribute]) == np.ndarray:
302
299
  np.save(data_path / attribute, data[attribute])
303
- elif type(data[attribute]) == Bunch or type(data[attribute]) == str:
300
+ else:
304
301
  with open(data_path / (attribute + '.p'), 'wb') as file:
305
302
  pickle.dump(data[attribute], file)
306
- else:
307
- raise TypeError('Unsupported data attribute type '+str(type(data[attribute])) + '.')
308
303
 
309
304
 
310
305
  def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path]] = None):
@@ -319,7 +314,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
319
314
 
320
315
  Returns
321
316
  -------
322
- data: Bunch
317
+ data: Dataset
323
318
  Data.
324
319
  """
325
320
  data_home = get_data_home(data_home)
@@ -328,7 +323,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
328
323
  raise FileNotFoundError('No bundle at ' + str(data_path))
329
324
  else:
330
325
  files = listdir(data_path)
331
- data = Bunch()
326
+ data = Dataset()
332
327
  for file in files:
333
328
  if len(file.split('.')) == 2:
334
329
  file_name, file_extension = file.split('.')
@@ -342,7 +337,7 @@ def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path
342
337
  return data
343
338
 
344
339
 
345
- def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
340
+ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Dataset]):
346
341
  """Save a dataset or a CSR matrix in the current directory to a collection of Numpy and Pickle files for faster
347
342
  subsequent loads. Supported attribute types include sparse matrices, NumPy arrays, strings and objects Dataset.
348
343
 
@@ -350,13 +345,13 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
350
345
  ----------
351
346
  folder : str or :class:`pathlib.Path`
352
347
  Name of the bundle folder.
353
- data : Union[sparse.csr_matrix, Bunch]
348
+ data : Union[sparse.csr_matrix, Dataset]
354
349
  Data to save.
355
350
 
356
351
  Example
357
352
  -------
358
353
  >>> from sknetwork.data import save
359
- >>> dataset = Bunch()
354
+ >>> dataset = Dataset()
360
355
  >>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
361
356
  >>> dataset.names = np.array(['a', 'b', 'c'])
362
357
  >>> save('dataset', dataset)
@@ -368,7 +363,7 @@ def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]):
368
363
  if folder.exists():
369
364
  shutil.rmtree(folder)
370
365
  if isinstance(data, sparse.csr_matrix):
371
- dataset = Bunch()
366
+ dataset = Dataset()
372
367
  if is_square(data):
373
368
  dataset.adjacency = data
374
369
  else:
@@ -390,13 +385,13 @@ def load(folder: Union[str, Path]):
390
385
 
391
386
  Returns
392
387
  -------
393
- data: Bunch
388
+ data: Dataset
394
389
  Data.
395
390
 
396
391
  Example
397
392
  -------
398
393
  >>> from sknetwork.data import save
399
- >>> dataset = Bunch()
394
+ >>> dataset = Dataset()
400
395
  >>> dataset.adjacency = sparse.csr_matrix(np.random.random((3, 3)) < 0.5)
401
396
  >>> dataset.names = np.array(['a', 'b', 'c'])
402
397
  >>> save('dataset', dataset)