scikit-network 0.31.0__cp38-cp38-win_amd64.whl → 0.32.1__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (114) hide show
  1. {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/AUTHORS.rst +3 -0
  2. {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/METADATA +19 -3
  3. {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/RECORD +112 -105
  4. {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/WHEEL +1 -1
  5. sknetwork/__init__.py +1 -1
  6. sknetwork/classification/base.py +1 -1
  7. sknetwork/classification/base_rank.py +3 -3
  8. sknetwork/classification/diffusion.py +21 -13
  9. sknetwork/classification/knn.py +19 -13
  10. sknetwork/classification/metrics.py +1 -1
  11. sknetwork/classification/pagerank.py +12 -8
  12. sknetwork/classification/propagation.py +22 -15
  13. sknetwork/classification/tests/test_diffusion.py +10 -0
  14. sknetwork/classification/vote.cp38-win_amd64.pyd +0 -0
  15. sknetwork/classification/vote.cpp +14549 -8668
  16. sknetwork/clustering/__init__.py +3 -1
  17. sknetwork/clustering/base.py +1 -1
  18. sknetwork/clustering/kcenters.py +253 -0
  19. sknetwork/clustering/leiden.py +241 -0
  20. sknetwork/clustering/leiden_core.cp38-win_amd64.pyd +0 -0
  21. sknetwork/clustering/leiden_core.cpp +31564 -0
  22. sknetwork/clustering/leiden_core.pyx +124 -0
  23. sknetwork/clustering/louvain.py +118 -83
  24. sknetwork/clustering/louvain_core.cp38-win_amd64.pyd +0 -0
  25. sknetwork/clustering/louvain_core.cpp +21876 -16332
  26. sknetwork/clustering/louvain_core.pyx +86 -94
  27. sknetwork/clustering/postprocess.py +2 -2
  28. sknetwork/clustering/propagation_clustering.py +4 -4
  29. sknetwork/clustering/tests/test_API.py +7 -3
  30. sknetwork/clustering/tests/test_kcenters.py +92 -0
  31. sknetwork/clustering/tests/test_leiden.py +34 -0
  32. sknetwork/clustering/tests/test_louvain.py +2 -3
  33. sknetwork/data/load.py +2 -4
  34. sknetwork/data/parse.py +41 -20
  35. sknetwork/data/tests/test_parse.py +9 -12
  36. sknetwork/embedding/__init__.py +0 -1
  37. sknetwork/embedding/base.py +20 -19
  38. sknetwork/embedding/force_atlas.py +3 -2
  39. sknetwork/embedding/louvain_embedding.py +1 -1
  40. sknetwork/embedding/random_projection.py +5 -3
  41. sknetwork/embedding/spectral.py +0 -73
  42. sknetwork/embedding/tests/test_API.py +4 -28
  43. sknetwork/embedding/tests/test_louvain_embedding.py +4 -9
  44. sknetwork/embedding/tests/test_spectral.py +2 -5
  45. sknetwork/embedding/tests/test_svd.py +1 -1
  46. sknetwork/gnn/base_layer.py +3 -3
  47. sknetwork/gnn/gnn_classifier.py +40 -86
  48. sknetwork/gnn/layer.py +1 -1
  49. sknetwork/gnn/loss.py +1 -1
  50. sknetwork/gnn/optimizer.py +4 -3
  51. sknetwork/gnn/tests/test_base_layer.py +4 -4
  52. sknetwork/gnn/tests/test_gnn_classifier.py +12 -39
  53. sknetwork/gnn/utils.py +8 -8
  54. sknetwork/hierarchy/base.py +27 -0
  55. sknetwork/hierarchy/louvain_hierarchy.py +45 -41
  56. sknetwork/hierarchy/paris.cp38-win_amd64.pyd +0 -0
  57. sknetwork/hierarchy/paris.cpp +27719 -20959
  58. sknetwork/hierarchy/paris.pyx +7 -7
  59. sknetwork/hierarchy/postprocess.py +16 -16
  60. sknetwork/hierarchy/tests/test_algos.py +5 -0
  61. sknetwork/linalg/__init__.py +1 -1
  62. sknetwork/linalg/diteration.cp38-win_amd64.pyd +0 -0
  63. sknetwork/linalg/diteration.cpp +13916 -8050
  64. sknetwork/linalg/{normalization.py → normalizer.py} +17 -14
  65. sknetwork/linalg/operators.py +1 -1
  66. sknetwork/linalg/ppr_solver.py +1 -1
  67. sknetwork/linalg/push.cp38-win_amd64.pyd +0 -0
  68. sknetwork/linalg/push.cpp +23144 -16920
  69. sknetwork/linalg/tests/test_normalization.py +3 -7
  70. sknetwork/linalg/tests/test_operators.py +2 -6
  71. sknetwork/linalg/tests/test_ppr.py +1 -1
  72. sknetwork/linkpred/base.py +12 -1
  73. sknetwork/linkpred/nn.py +6 -6
  74. sknetwork/path/distances.py +11 -4
  75. sknetwork/path/shortest_path.py +1 -1
  76. sknetwork/path/tests/test_distances.py +7 -0
  77. sknetwork/path/tests/test_search.py +2 -2
  78. sknetwork/ranking/base.py +11 -6
  79. sknetwork/ranking/betweenness.cp38-win_amd64.pyd +0 -0
  80. sknetwork/ranking/betweenness.cpp +5256 -2190
  81. sknetwork/ranking/pagerank.py +13 -12
  82. sknetwork/ranking/tests/test_API.py +0 -2
  83. sknetwork/ranking/tests/test_betweenness.py +1 -1
  84. sknetwork/ranking/tests/test_pagerank.py +11 -5
  85. sknetwork/regression/base.py +18 -1
  86. sknetwork/regression/diffusion.py +24 -10
  87. sknetwork/regression/tests/test_diffusion.py +8 -0
  88. sknetwork/topology/__init__.py +3 -1
  89. sknetwork/topology/cliques.cp38-win_amd64.pyd +0 -0
  90. sknetwork/topology/cliques.cpp +23147 -16457
  91. sknetwork/topology/core.cp38-win_amd64.pyd +0 -0
  92. sknetwork/topology/core.cpp +22854 -16576
  93. sknetwork/topology/cycles.py +243 -0
  94. sknetwork/topology/minheap.cp38-win_amd64.pyd +0 -0
  95. sknetwork/topology/minheap.cpp +19495 -13469
  96. sknetwork/topology/structure.py +2 -42
  97. sknetwork/topology/tests/test_cycles.py +65 -0
  98. sknetwork/topology/tests/test_structure.py +2 -16
  99. sknetwork/topology/triangles.cp38-win_amd64.pyd +0 -0
  100. sknetwork/topology/triangles.cpp +5283 -1397
  101. sknetwork/topology/triangles.pyx +7 -4
  102. sknetwork/topology/weisfeiler_lehman_core.cp38-win_amd64.pyd +0 -0
  103. sknetwork/topology/weisfeiler_lehman_core.cpp +14781 -8915
  104. sknetwork/utils/format.py +1 -1
  105. sknetwork/utils/membership.py +2 -2
  106. sknetwork/visualization/__init__.py +2 -2
  107. sknetwork/visualization/dendrograms.py +55 -7
  108. sknetwork/visualization/graphs.py +261 -44
  109. sknetwork/visualization/tests/test_dendrograms.py +9 -9
  110. sknetwork/visualization/tests/test_graphs.py +63 -57
  111. sknetwork/embedding/louvain_hierarchy.py +0 -142
  112. sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
  113. {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/LICENSE +0 -0
  114. {scikit_network-0.31.0.dist-info → scikit_network-0.32.1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,6 @@
1
- # distutils: language = c++
1
+ # distutils: language=c++
2
2
  # cython: language_level=3
3
3
  from libcpp.set cimport set
4
- from libcpp.vector cimport vector
5
4
  cimport cython
6
5
 
7
6
  ctypedef fused int_or_long:
@@ -10,123 +9,116 @@ ctypedef fused int_or_long:
10
9
 
11
10
  @cython.boundscheck(False)
12
11
  @cython.wraparound(False)
13
- def fit_core(float resolution, float tol, float[:] ou_node_probs, float[:] in_node_probs, float[:] self_loops,
14
- float[:] data, int_or_long[:] indices, int_or_long[:] indptr): # pragma: no cover
15
- """Fit the clusters to the objective function.
12
+ def optimize_core(int_or_long[:] labels, int_or_long[:] indices, int_or_long[:] indptr, float[:] data,
13
+ float[:] out_weights, float[:] in_weights, float[:] out_cluster_weights, float[:] in_cluster_weights,
14
+ float[:] cluster_weights, float[:] self_loops, float resolution, float tol_optimization): # pragma: no cover
15
+ """Find clusters maximizing modularity.
16
16
 
17
17
  Parameters
18
18
  ----------
19
- resolution :
20
- Resolution parameter (positive).
21
- tol :
22
- Minimum increase in modularity to enter a new optimization pass.
23
- ou_node_probs :
24
- Distribution of node weights based on their out-edges (sums to 1).
25
- in_node_probs :
26
- Distribution of node weights based on their in-edges (sums to 1).
27
- self_loops :
28
- Weights of self loops.
29
- data :
30
- CSR format data array of the normalized adjacency matrix.
19
+ labels :
20
+ Initial labels.
31
21
  indices :
32
22
  CSR format index array of the normalized adjacency matrix.
33
23
  indptr :
34
24
  CSR format index pointer array of the normalized adjacency matrix.
25
+ data :
26
+ CSR format data array of the normalized adjacency matrix.
27
+ out_weights :
28
+ Out-weights of nodes (sum to 1).
29
+ in_weights :
30
+ In-weights of nodes (sum to 1).
31
+ out_cluster_weights :
32
+ Out-weights of clusters (sum to 1).
33
+ in_cluster_weights :
34
+ In-weights of clusters (sum to 1).
35
+ cluster_weights :
36
+ Weights of clusters (initialized to 0).
37
+ self_loops :
38
+ Weights of self loops.
39
+ resolution :
40
+ Resolution parameter (positive).
41
+ tol_optimization :
42
+ Minimum increase in modularity to enter a new optimization pass.
35
43
 
36
44
  Returns
37
45
  -------
38
46
  labels :
39
- Cluster index of each node.
40
- total_increase :
41
- Score of the clustering (total increase in modularity).
47
+ Labels of nodes.
48
+ increase :
49
+ Increase in modularity.
42
50
  """
43
- cdef int_or_long n = indptr.shape[0] - 1
44
- cdef int_or_long increase = 1
45
- cdef int_or_long cluster
46
- cdef int_or_long cluster_best
47
- cdef int_or_long cluster_node
51
+ cdef int_or_long n
52
+ cdef int_or_long stop = 0
53
+ cdef int_or_long label
54
+ cdef int_or_long label_target
55
+ cdef int_or_long label_best
48
56
  cdef int_or_long i
49
57
  cdef int_or_long j
50
- cdef int_or_long j1
51
- cdef int_or_long j2
52
- cdef int_or_long label
58
+ cdef int_or_long start
59
+ cdef int_or_long end
53
60
 
54
- cdef float increase_total = 0
61
+ cdef float increase = 0
55
62
  cdef float increase_pass
56
63
  cdef float delta
57
- cdef float delta_best
58
- cdef float delta_exit
59
64
  cdef float delta_local
60
- cdef float node_prob_in
61
- cdef float node_prob_ou
62
- cdef float ratio_in
63
- cdef float ratio_ou
64
-
65
- cdef vector[int_or_long] labels
66
- cdef vector[float] neighbor_clusters_weights
67
- cdef vector[float] ou_clusters_weights
68
- cdef vector[float] in_clusters_weights
69
- cdef set[int_or_long] unique_clusters = ()
70
-
71
- for i in range(n):
72
- labels.push_back(i)
73
- neighbor_clusters_weights.push_back(0.)
74
- ou_clusters_weights.push_back(ou_node_probs[i])
75
- in_clusters_weights.push_back(in_node_probs[i])
76
-
77
- while increase == 1:
78
- increase = 0
79
- increase_pass = 0
80
-
81
- for i in range(n):
82
- unique_clusters.clear()
83
- cluster_node = labels[i]
84
- j1 = indptr[i]
85
- j2 = indptr[i + 1]
86
-
87
- for j in range(j1, j2):
88
- label = labels[indices[j]]
89
- neighbor_clusters_weights[label] += data[j]
90
- unique_clusters.insert(label)
65
+ cdef float delta_best
66
+ cdef float in_weight
67
+ cdef float out_weight
91
68
 
92
- unique_clusters.erase(cluster_node)
69
+ cdef set[int_or_long] label_set = ()
93
70
 
94
- if not unique_clusters.empty():
95
- node_prob_ou = ou_node_probs[i]
96
- node_prob_in = in_node_probs[i]
97
- ratio_ou = resolution * node_prob_ou
98
- ratio_in = resolution * node_prob_in
71
+ n = labels.shape[0]
72
+ while not stop:
73
+ increase_pass = 0
99
74
 
100
- delta_exit = 2 * (neighbor_clusters_weights[cluster_node] - self_loops[i])
101
- delta_exit -= ratio_ou * (in_clusters_weights[cluster_node] - node_prob_in)
102
- delta_exit -= ratio_in * (ou_clusters_weights[cluster_node] - node_prob_ou)
75
+ for i in range(n):
76
+ label_set.clear()
77
+ label = labels[i]
78
+ start = indptr[i]
79
+ end = indptr[i+1]
80
+
81
+ # neighboring clusters
82
+ for j in range(start, end):
83
+ label_target = labels[indices[j]]
84
+ label_set.insert(label_target)
85
+ cluster_weights[label_target] += data[j]
86
+ label_set.erase(label)
87
+
88
+ if not label_set.empty():
89
+ out_weight = out_weights[i]
90
+ in_weight = in_weights[i]
91
+
92
+ # node leaving the current cluster
93
+ delta = 2 * (cluster_weights[label] - self_loops[i])
94
+ delta -= resolution * out_weight * (in_cluster_weights[label] - in_weight)
95
+ delta -= resolution * in_weight * (out_cluster_weights[label] - out_weight)
103
96
 
104
97
  delta_best = 0
105
- cluster_best = cluster_node
98
+ label_best = label
106
99
 
107
- for cluster in unique_clusters:
108
- delta = 2 * neighbor_clusters_weights[cluster]
109
- delta -= ratio_ou * in_clusters_weights[cluster]
110
- delta -= ratio_in * ou_clusters_weights[cluster]
111
-
112
- delta_local = delta - delta_exit
100
+ for label_target in label_set:
101
+ delta_local = 2 * cluster_weights[label_target]
102
+ delta_local -= resolution * out_weight * in_cluster_weights[label_target]
103
+ delta_local -= resolution * in_weight * out_cluster_weights[label_target]
104
+ delta_local -= delta
113
105
  if delta_local > delta_best:
114
106
  delta_best = delta_local
115
- cluster_best = cluster
116
-
117
- neighbor_clusters_weights[cluster] = 0
107
+ label_best = label_target
108
+ cluster_weights[label_target] = 0
118
109
 
119
- if delta_best > 0:
110
+ if label_best != label:
120
111
  increase_pass += delta_best
121
- ou_clusters_weights[cluster_node] -= node_prob_ou
122
- in_clusters_weights[cluster_node] -= node_prob_in
123
- ou_clusters_weights[cluster_best] += node_prob_ou
124
- in_clusters_weights[cluster_best] += node_prob_in
125
- labels[i] = cluster_best
126
-
127
- neighbor_clusters_weights[cluster_node] = 0
128
-
129
- increase_total += increase_pass
130
- if increase_pass > tol:
131
- increase = 1
132
- return labels, increase_total
112
+ labels[i] = label_best
113
+ # update weights
114
+ out_cluster_weights[label] -= out_weight
115
+ in_cluster_weights[label] -= in_weight
116
+ out_cluster_weights[label_best] += out_weight
117
+ in_cluster_weights[label_best] += in_weight
118
+
119
+ cluster_weights[label] = 0
120
+
121
+ increase += increase_pass
122
+ stop = increase_pass <= tol_optimization
123
+
124
+ return labels, increase
@@ -41,7 +41,7 @@ def aggregate_graph(input_matrix: sparse.csr_matrix, labels: Optional[np.ndarray
41
41
  labels_row: Optional[np.ndarray] = None, labels_col: Optional[np.ndarray] = None) \
42
42
  -> sparse.csr_matrix:
43
43
  """Aggregate graph per label. All nodes with the same label become a single node.
44
- Negative labels are ignored (corresponding nodes are not discarded).
44
+ Negative labels are ignored (corresponding nodes are discarded).
45
45
 
46
46
  Parameters
47
47
  ----------
@@ -63,4 +63,4 @@ def aggregate_graph(input_matrix: sparse.csr_matrix, labels: Optional[np.ndarray
63
63
  else:
64
64
  membership_col = membership_row
65
65
  aggregate_matrix = membership_row.T.dot(input_matrix).dot(membership_col)
66
- return aggregate_matrix
66
+ return aggregate_matrix.tocsr()
@@ -29,11 +29,11 @@ class PropagationClustering(BaseClustering, Propagation):
29
29
  weighted : bool
30
30
  If ``True``, the vote of each neighbor is proportional to the edge weight.
31
31
  Otherwise, all votes have weight 1.
32
- sort_clusters :
32
+ sort_clusters : bool
33
33
  If ``True``, sort labels in decreasing order of cluster size.
34
- return_probs :
34
+ return_probs : bool
35
35
  If ``True``, return the probability distribution over clusters (soft clustering).
36
- return_aggregate :
36
+ return_aggregate : bool
37
37
  If ``True``, return the aggregate adjacency matrix or biadjacency matrix between clusters.
38
38
 
39
39
  Attributes
@@ -78,7 +78,7 @@ class PropagationClustering(BaseClustering, Propagation):
78
78
 
79
79
  Parameters
80
80
  ----------
81
- input_matrix :
81
+ input_matrix : sparse.csr_matrix, np.ndarray
82
82
  Adjacency matrix or biadjacency matrix of the graph.
83
83
 
84
84
  Returns
@@ -9,8 +9,12 @@ from sknetwork.data.test_graphs import *
9
9
 
10
10
  class TestClusteringAPI(unittest.TestCase):
11
11
 
12
+ def setUp(self):
13
+ self.algos = [Louvain(return_aggregate=True), Leiden(return_aggregate=True),
14
+ PropagationClustering(return_aggregate=True)]
15
+
12
16
  def test_regular(self):
13
- for algo in [Louvain(return_aggregate=True), PropagationClustering(return_aggregate=True)]:
17
+ for algo in self.algos:
14
18
  for adjacency in [test_graph(), test_digraph(), test_disconnected_graph()]:
15
19
  n = adjacency.shape[0]
16
20
  labels = algo.fit_predict(adjacency)
@@ -22,13 +26,13 @@ class TestClusteringAPI(unittest.TestCase):
22
26
  n_labels = len(set(labels))
23
27
  self.assertEqual(labels.shape, (n,))
24
28
  self.assertEqual(algo.aggregate_.shape, (n_labels, n_labels))
25
- membership = algo.fit_transform(adjacency)
29
+ membership = algo.fit_transform(adjacency_bool)
26
30
  self.assertEqual(membership.shape, (n, n_labels))
27
31
 
28
32
  def test_bipartite(self):
29
33
  biadjacency = test_bigraph()
30
34
  n_row, n_col = biadjacency.shape
31
- for algo in [Louvain(return_aggregate=True), PropagationClustering(return_aggregate=True)]:
35
+ for algo in self.algos:
32
36
  algo.fit(biadjacency)
33
37
  self.assertEqual(algo.labels_row_.shape, (n_row,))
34
38
  self.assertEqual(algo.labels_col_.shape, (n_col,))
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for KCenters"""
4
+ import unittest
5
+
6
+ from sknetwork.clustering import KCenters
7
+ from sknetwork.data import karate_club, painters, star_wars
8
+ from sknetwork.data.test_graphs import *
9
+
10
+
11
+ class TestKCentersClustering(unittest.TestCase):
12
+
13
+ def test_kcenters(self):
14
+ # Test undirected graph
15
+ n_clusters = 2
16
+ adjacency = karate_club()
17
+ n_row = adjacency.shape[0]
18
+ kcenters = KCenters(n_clusters=n_clusters)
19
+ labels = kcenters.fit_predict(adjacency)
20
+ self.assertEqual(len(labels), n_row)
21
+ self.assertEqual(len(set(labels)), n_clusters)
22
+
23
+ # Test directed graph
24
+ n_clusters = 3
25
+ adjacency = painters()
26
+ n_row = adjacency.shape[0]
27
+ kcenters = KCenters(n_clusters=n_clusters, directed=True)
28
+ labels = kcenters.fit_predict(adjacency)
29
+ self.assertEqual(len(labels), n_row)
30
+ self.assertEqual(len(set(labels)), n_clusters)
31
+
32
+ # Test bipartite graph
33
+ n_clusters = 2
34
+ biadjacency = star_wars()
35
+ n_row, n_col = biadjacency.shape
36
+ kcenters = KCenters(n_clusters=n_clusters)
37
+ kcenters.fit(biadjacency)
38
+ labels = kcenters.labels_
39
+ self.assertEqual(len(kcenters.labels_row_), n_row)
40
+ self.assertEqual(len(kcenters.labels_col_), n_col)
41
+ self.assertEqual(len(set(labels)), n_clusters)
42
+
43
+ def test_kcenters_centers(self):
44
+ # Test centers for undirected graphs
45
+ n_clusters = 2
46
+ adjacency = karate_club()
47
+ kcenters = KCenters(n_clusters=n_clusters)
48
+ kcenters.fit(adjacency)
49
+ centers = kcenters.centers_
50
+ self.assertEqual(n_clusters, len(set(centers)))
51
+
52
+ # Test centers for bipartite graphs
53
+ n_clusters = 2
54
+ biadjacency = star_wars()
55
+ n_row, n_col = biadjacency.shape
56
+ for position in ["row", "col", "both"]:
57
+ kcenters = KCenters(n_clusters=n_clusters, center_position=position)
58
+ kcenters.fit(biadjacency)
59
+ centers_row = kcenters.centers_row_
60
+ centers_col = kcenters.centers_col_
61
+ if position == "row":
62
+ self.assertEqual(n_clusters, len(set(centers_row)))
63
+ self.assertTrue(np.all(centers_row < n_row))
64
+ self.assertTrue(centers_col is None)
65
+ if position == "col":
66
+ self.assertEqual(n_clusters, len(set(centers_col)))
67
+ self.assertTrue(np.all((centers_col < n_col) & (0 <= centers_col)))
68
+ self.assertTrue(centers_row is None)
69
+ if position == "both":
70
+ self.assertEqual(n_clusters, len(set(centers_row)) + len(set(centers_col)))
71
+ self.assertTrue(np.all(centers_row < n_row))
72
+ self.assertTrue(np.all((centers_col < n_col) & (0 <= centers_col)))
73
+
74
+ def test_kcenters_error(self):
75
+ # Test value errors
76
+ adjacency = karate_club()
77
+ biadjacency = star_wars()
78
+
79
+ # test n_clusters error
80
+ kcenters = KCenters(n_clusters=1)
81
+ with self.assertRaises(ValueError):
82
+ kcenters.fit(adjacency)
83
+
84
+ # test n_init error
85
+ kcenters = KCenters(n_clusters=2, n_init=0)
86
+ with self.assertRaises(ValueError):
87
+ kcenters.fit(adjacency)
88
+
89
+ # test center_position error
90
+ kcenters = KCenters(n_clusters=2, center_position="other")
91
+ with self.assertRaises(ValueError):
92
+ kcenters.fit(biadjacency)
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for Leiden"""
4
+ import unittest
5
+
6
+ from sknetwork.clustering import Leiden
7
+ from sknetwork.data.test_graphs import *
8
+ from sknetwork.utils import bipartite2undirected
9
+
10
+
11
+ class TestLeidenClustering(unittest.TestCase):
12
+
13
+ def test_disconnected(self):
14
+ adjacency = test_disconnected_graph()
15
+ n = adjacency.shape[0]
16
+ labels = Leiden().fit_predict(adjacency)
17
+ self.assertEqual(len(labels), n)
18
+
19
+ def test_modularity(self):
20
+ adjacency = test_graph()
21
+ leiden_d = Leiden(modularity='dugue')
22
+ leiden_n = Leiden(modularity='newman')
23
+ labels_d = leiden_d.fit_predict(adjacency)
24
+ labels_n = leiden_n.fit_predict(adjacency)
25
+ self.assertTrue((labels_d == labels_n).all())
26
+
27
+ def test_bipartite(self):
28
+ biadjacency = test_bigraph()
29
+ adjacency = bipartite2undirected(biadjacency)
30
+ leiden = Leiden(modularity='newman')
31
+ labels1 = leiden.fit_predict(adjacency)
32
+ leiden.fit(biadjacency)
33
+ labels2 = np.concatenate((leiden.labels_row_, leiden.labels_col_))
34
+ self.assertTrue((labels1 == labels2).all())
@@ -24,7 +24,6 @@ class TestLouvainClustering(unittest.TestCase):
24
24
  labels_d = louvain_d.fit_predict(adjacency)
25
25
  labels_n = louvain_n.fit_predict(adjacency)
26
26
  self.assertTrue((labels_d == labels_n).all())
27
-
28
27
  louvain_p = Louvain(modularity='potts')
29
28
  louvain_p.fit_predict(adjacency)
30
29
 
@@ -48,7 +47,7 @@ class TestLouvainClustering(unittest.TestCase):
48
47
  # tolerance
49
48
  louvain = Louvain(resolution=2, tol_aggregation=0.1)
50
49
  labels = louvain.fit_predict(adjacency)
51
- self.assertEqual(len(set(labels)), 12)
50
+ self.assertEqual(len(set(labels)), 7)
52
51
 
53
52
  # shuffling
54
53
  louvain = Louvain(resolution=2, shuffle_nodes=True, random_state=42)
@@ -78,7 +77,7 @@ class TestLouvainClustering(unittest.TestCase):
78
77
  # tolerance
79
78
  louvain = Louvain(resolution=2, tol_aggregation=0.1)
80
79
  labels = louvain.fit_predict(adjacency)
81
- self.assertEqual(len(set(labels)), 12)
80
+ self.assertEqual(len(set(labels)), 7)
82
81
 
83
82
  # shuffling
84
83
  louvain = Louvain(resolution=2, shuffle_nodes=True, random_state=42)
sknetwork/data/load.py CHANGED
@@ -250,7 +250,7 @@ def load_konect(name: str, data_home: Optional[Union[str, Path]] = None, auto_nu
250
250
  if matrix:
251
251
  file = matrix[0]
252
252
  directed, bipartite, weighted = load_header(path / file)
253
- dataset = from_csv(path / file, directed=directed, bipartite=bipartite, weighted=weighted)
253
+ dataset = from_csv(path / file, directed=directed, bipartite=bipartite, weighted=weighted, reindex=True)
254
254
 
255
255
  metadata = [file for file in files if 'meta.' in file]
256
256
  if metadata:
@@ -300,11 +300,9 @@ def save_to_numpy_bundle(data: Bunch, bundle_name: str, data_home: Optional[Unio
300
300
  sparse.save_npz(data_path / attribute, data[attribute])
301
301
  elif type(data[attribute]) == np.ndarray:
302
302
  np.save(data_path / attribute, data[attribute])
303
- elif type(data[attribute]) == Bunch or type(data[attribute]) == str:
303
+ else:
304
304
  with open(data_path / (attribute + '.p'), 'wb') as file:
305
305
  pickle.dump(data[attribute], file)
306
- else:
307
- raise TypeError('Unsupported data attribute type '+str(type(data[attribute])) + '.')
308
306
 
309
307
 
310
308
  def load_from_numpy_bundle(bundle_name: str, data_home: Optional[Union[str, Path]] = None):
sknetwork/data/parse.py CHANGED
@@ -8,7 +8,7 @@ Created in December 2018
8
8
  """
9
9
 
10
10
  from csv import reader
11
- from typing import Dict, List, Tuple, Union
11
+ from typing import Dict, List, Tuple, Union, Optional
12
12
  from xml.etree import ElementTree
13
13
 
14
14
  import numpy as np
@@ -19,7 +19,7 @@ from sknetwork.utils.format import directed2undirected
19
19
 
20
20
 
21
21
  def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = False,
22
- bipartite: bool = False, weighted: bool = True, reindex: bool = True,
22
+ bipartite: bool = False, weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
23
23
  sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
24
24
  """Load a graph from an edge list.
25
25
 
@@ -37,6 +37,9 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
37
37
  reindex : bool
38
38
  If ``True``, reindex nodes and returns the original node indices as names.
39
39
  Reindexing is enforced if nodes are not integers.
40
+ shape : tuple
41
+ Shape of the adjacency or biadjacency matrix.
42
+ If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
40
43
  sum_duplicates : bool
41
44
  If ``True`` (default), sums weights of duplicate edges.
42
45
  Otherwise, the weight of each edge is that of the first occurrence of this edge.
@@ -83,12 +86,14 @@ def from_edge_list(edge_list: Union[np.ndarray, List[Tuple]], directed: bool = F
83
86
  else:
84
87
  raise TypeError('The edge list must be given as a NumPy array or a list of tuples.')
85
88
  return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
86
- weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
89
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
90
+ matrix_only=matrix_only)
87
91
 
88
92
 
89
93
  def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], directed: bool = False,
90
- bipartite: bool = False, weighted: bool = True, reindex: bool = True,
91
- sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
94
+ bipartite: bool = False, weighted: bool = True, reindex: bool = False,
95
+ shape: Optional[tuple] = None, sum_duplicates: bool = True, matrix_only: bool = None) \
96
+ -> Union[Bunch, sparse.csr_matrix]:
92
97
  """Load a graph from an adjacency list.
93
98
 
94
99
  Parameters
@@ -104,6 +109,9 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
104
109
  reindex : bool
105
110
  If ``True``, reindex nodes and returns the original node indices as names.
106
111
  Reindexing is enforced if nodes are not integers.
112
+ shape : tuple
113
+ Shape of the adjacency or biadjacency matrix.
114
+ If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
107
115
  sum_duplicates : bool
108
116
  If ``True`` (default), sums weights of duplicate edges.
109
117
  Otherwise, the weight of each edge is that of the first occurrence of this edge.
@@ -134,12 +142,12 @@ def from_adjacency_list(adjacency_list: Union[List[List], Dict[str, List]], dire
134
142
  else:
135
143
  raise TypeError('The adjacency list must be given as a list of lists or a dict of lists.')
136
144
  return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite, weighted=weighted,
137
- reindex=reindex, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
145
+ reindex=reindex, shape=shape, sum_duplicates=sum_duplicates, matrix_only=matrix_only)
138
146
 
139
147
 
140
148
  def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed: bool = False, bipartite: bool = False,
141
- weighted: bool = True, reindex: bool = True, sum_duplicates: bool = True,
142
- matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
149
+ weighted: bool = True, reindex: bool = False, shape: Optional[tuple] = None,
150
+ sum_duplicates: bool = True, matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
143
151
  """Load a graph from an edge array of shape (n_edges, 2) and weights (optional).
144
152
 
145
153
  Parameters
@@ -157,6 +165,9 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
157
165
  reindex : bool
158
166
  If ``True``, reindex nodes and returns the original node indices as names.
159
167
  Reindexing is enforced if nodes are not integers.
168
+ shape : tuple
169
+ Shape of the adjacency or biadjacency matrix.
170
+ If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
160
171
  sum_duplicates : bool
161
172
  If ``True`` (default), sums weights of duplicate edges.
162
173
  Otherwise, the weight of each edge is that of the first occurrence of this edge.
@@ -195,28 +206,34 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
195
206
  if bipartite:
196
207
  row = edge_array[:, 0]
197
208
  col = edge_array[:, 1]
198
- if row.dtype != int or (reindex and len(set(row)) < max(row) + 1):
209
+ if row.dtype != int or reindex:
199
210
  names_row, row = np.unique(row, return_inverse=True)
200
211
  graph.names_row = names_row
201
212
  graph.names = names_row
202
213
  n_row = len(names_row)
214
+ elif shape is not None:
215
+ n_row = max(shape[0], max(row) + 1)
203
216
  else:
204
217
  n_row = max(row) + 1
205
- if col.dtype != int or (reindex and len(set(col)) < max(col) + 1):
218
+ if col.dtype != int or reindex:
206
219
  names_col, col = np.unique(col, return_inverse=True)
207
220
  graph.names_col = names_col
208
221
  n_col = len(names_col)
222
+ elif shape is not None:
223
+ n_col = max(shape[1], max(col) + 1)
209
224
  else:
210
225
  n_col = max(col) + 1
211
226
  matrix = sparse.csr_matrix((weights, (row, col)), shape=(n_row, n_col))
212
227
  graph.biadjacency = matrix
213
228
  else:
214
229
  nodes = edge_array.ravel()
215
- if nodes.dtype != int or (reindex and len(set(nodes)) < max(nodes) + 1):
230
+ if nodes.dtype != int or reindex:
216
231
  names, nodes = np.unique(nodes, return_inverse=True)
217
232
  graph.names = names
218
233
  n = len(names)
219
234
  edge_array = nodes.reshape(-1, 2)
235
+ elif shape is not None:
236
+ n = max(shape[0], max(nodes) + 1)
220
237
  else:
221
238
  n = max(nodes) + 1
222
239
  row = edge_array[:, 0]
@@ -233,8 +250,8 @@ def from_edge_array(edge_array: np.ndarray, weights: np.ndarray = None, directed
233
250
 
234
251
  def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: str = '#%',
235
252
  data_structure: str = None, directed: bool = False, bipartite: bool = False, weighted: bool = True,
236
- reindex: bool = True, sum_duplicates: bool = True, matrix_only: bool = None) \
237
- -> Union[Bunch, sparse.csr_matrix]:
253
+ reindex: bool = False, shape: Optional[tuple] = None, sum_duplicates: bool = True,
254
+ matrix_only: bool = None) -> Union[Bunch, sparse.csr_matrix]:
238
255
  """Load a graph from a CSV or TSV file.
239
256
  The delimiter can be specified (e.g., ' ' for space-separated values).
240
257
 
@@ -249,9 +266,10 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
249
266
  comments : str
250
267
  Characters for comment lines.
251
268
  data_structure : str
252
- If 'edge_list', considers each row of the file as an edge (tuple of size 2 or 3).
253
- If 'adjacency_list', considers each row of the file as an adjacency list (list of neighbors).
254
- If 'adjacency_dict', considers each row of the file as an adjacency dictionary with key
269
+ If 'edge_list', consider each row of the file as an edge (tuple of size 2 or 3).
270
+ If 'adjacency_list', consider each row of the file as an adjacency list (list of neighbors,
271
+ in the order of node indices; an empty line means no neighbor).
272
+ If 'adjacency_dict', consider each row of the file as an adjacency dictionary with key
255
273
  given by the first column (node: list of neighbors).
256
274
  If ``None`` (default), data_structure is guessed from the first rows of the file.
257
275
  directed : bool
@@ -263,6 +281,9 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
263
281
  reindex : bool
264
282
  If ``True``, reindex nodes and returns the original node indices as names.
265
283
  Reindexing is enforced if nodes are not integers.
284
+ shape : tuple
285
+ Shape of the adjacency or biadjacency matrix.
286
+ If not specified or if nodes are reindexed, the shape is the smallest compatible with node indices.
266
287
  sum_duplicates : bool
267
288
  If ``True`` (default), sums weights of duplicate edges.
268
289
  Otherwise, the weight of each edge is that of the first occurrence of this edge.
@@ -295,7 +316,7 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
295
316
  else:
296
317
  weights = None
297
318
  return from_edge_array(edge_array=edge_array, weights=weights, directed=directed, bipartite=bipartite,
298
- weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
319
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
299
320
  matrix_only=matrix_only)
300
321
  except TypeError:
301
322
  pass
@@ -306,17 +327,17 @@ def from_csv(file_path: str, delimiter: str = None, sep: str = None, comments: s
306
327
  if data_structure == 'edge_list':
307
328
  edge_list = [tuple(row) for row in csv_reader]
308
329
  return from_edge_list(edge_list=edge_list, directed=directed, bipartite=bipartite,
309
- weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
330
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
310
331
  matrix_only=matrix_only)
311
332
  elif data_structure == 'adjacency_list':
312
333
  adjacency_list = [row for row in csv_reader]
313
334
  return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
314
- weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
335
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
315
336
  matrix_only=matrix_only)
316
337
  elif data_structure == 'adjacency_dict':
317
338
  adjacency_list = {row[0]: row[1:] for row in csv_reader}
318
339
  return from_adjacency_list(adjacency_list=adjacency_list, directed=directed, bipartite=bipartite,
319
- weighted=weighted, reindex=reindex, sum_duplicates=sum_duplicates,
340
+ weighted=weighted, reindex=reindex, shape=shape, sum_duplicates=sum_duplicates,
320
341
  matrix_only=matrix_only)
321
342
 
322
343