scikit-network 0.30.0__cp310-cp310-win_amd64.whl → 0.32.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (187) hide show
  1. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/AUTHORS.rst +3 -0
  2. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/METADATA +31 -3
  3. scikit_network-0.32.1.dist-info/RECORD +228 -0
  4. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/WHEEL +1 -1
  5. sknetwork/__init__.py +1 -1
  6. sknetwork/base.py +67 -0
  7. sknetwork/classification/base.py +24 -24
  8. sknetwork/classification/base_rank.py +17 -25
  9. sknetwork/classification/diffusion.py +35 -35
  10. sknetwork/classification/knn.py +24 -21
  11. sknetwork/classification/metrics.py +1 -1
  12. sknetwork/classification/pagerank.py +10 -10
  13. sknetwork/classification/propagation.py +23 -20
  14. sknetwork/classification/tests/test_diffusion.py +13 -3
  15. sknetwork/classification/vote.cp310-win_amd64.pyd +0 -0
  16. sknetwork/classification/vote.cpp +14482 -10351
  17. sknetwork/classification/vote.pyx +1 -3
  18. sknetwork/clustering/__init__.py +3 -1
  19. sknetwork/clustering/base.py +36 -40
  20. sknetwork/clustering/kcenters.py +253 -0
  21. sknetwork/clustering/leiden.py +241 -0
  22. sknetwork/clustering/leiden_core.cp310-win_amd64.pyd +0 -0
  23. sknetwork/clustering/leiden_core.cpp +31564 -0
  24. sknetwork/clustering/leiden_core.pyx +124 -0
  25. sknetwork/clustering/louvain.py +133 -102
  26. sknetwork/clustering/louvain_core.cp310-win_amd64.pyd +0 -0
  27. sknetwork/clustering/louvain_core.cpp +22457 -18792
  28. sknetwork/clustering/louvain_core.pyx +86 -96
  29. sknetwork/clustering/postprocess.py +2 -2
  30. sknetwork/clustering/propagation_clustering.py +15 -19
  31. sknetwork/clustering/tests/test_API.py +8 -4
  32. sknetwork/clustering/tests/test_kcenters.py +92 -0
  33. sknetwork/clustering/tests/test_leiden.py +34 -0
  34. sknetwork/clustering/tests/test_louvain.py +3 -4
  35. sknetwork/data/__init__.py +2 -1
  36. sknetwork/data/base.py +28 -0
  37. sknetwork/data/load.py +38 -37
  38. sknetwork/data/models.py +18 -18
  39. sknetwork/data/parse.py +54 -33
  40. sknetwork/data/test_graphs.py +2 -2
  41. sknetwork/data/tests/test_API.py +1 -1
  42. sknetwork/data/tests/test_base.py +14 -0
  43. sknetwork/data/tests/test_load.py +1 -1
  44. sknetwork/data/tests/test_parse.py +9 -12
  45. sknetwork/data/tests/test_test_graphs.py +1 -2
  46. sknetwork/data/toy_graphs.py +18 -18
  47. sknetwork/embedding/__init__.py +0 -1
  48. sknetwork/embedding/base.py +21 -20
  49. sknetwork/embedding/force_atlas.py +3 -2
  50. sknetwork/embedding/louvain_embedding.py +2 -2
  51. sknetwork/embedding/random_projection.py +5 -3
  52. sknetwork/embedding/spectral.py +0 -73
  53. sknetwork/embedding/tests/test_API.py +4 -28
  54. sknetwork/embedding/tests/test_louvain_embedding.py +4 -9
  55. sknetwork/embedding/tests/test_random_projection.py +2 -2
  56. sknetwork/embedding/tests/test_spectral.py +5 -8
  57. sknetwork/embedding/tests/test_svd.py +1 -1
  58. sknetwork/gnn/base.py +4 -4
  59. sknetwork/gnn/base_layer.py +3 -3
  60. sknetwork/gnn/gnn_classifier.py +45 -89
  61. sknetwork/gnn/layer.py +1 -1
  62. sknetwork/gnn/loss.py +1 -1
  63. sknetwork/gnn/optimizer.py +4 -3
  64. sknetwork/gnn/tests/test_base_layer.py +4 -4
  65. sknetwork/gnn/tests/test_gnn_classifier.py +12 -35
  66. sknetwork/gnn/utils.py +8 -8
  67. sknetwork/hierarchy/base.py +29 -2
  68. sknetwork/hierarchy/louvain_hierarchy.py +45 -41
  69. sknetwork/hierarchy/paris.cp310-win_amd64.pyd +0 -0
  70. sknetwork/hierarchy/paris.cpp +27369 -22852
  71. sknetwork/hierarchy/paris.pyx +7 -9
  72. sknetwork/hierarchy/postprocess.py +16 -16
  73. sknetwork/hierarchy/tests/test_API.py +1 -1
  74. sknetwork/hierarchy/tests/test_algos.py +5 -0
  75. sknetwork/hierarchy/tests/test_metrics.py +1 -1
  76. sknetwork/linalg/__init__.py +1 -1
  77. sknetwork/linalg/diteration.cp310-win_amd64.pyd +0 -0
  78. sknetwork/linalg/diteration.cpp +13474 -9454
  79. sknetwork/linalg/diteration.pyx +0 -2
  80. sknetwork/linalg/eig_solver.py +1 -1
  81. sknetwork/linalg/{normalization.py → normalizer.py} +18 -15
  82. sknetwork/linalg/operators.py +1 -1
  83. sknetwork/linalg/ppr_solver.py +1 -1
  84. sknetwork/linalg/push.cp310-win_amd64.pyd +0 -0
  85. sknetwork/linalg/push.cpp +22993 -18807
  86. sknetwork/linalg/push.pyx +0 -2
  87. sknetwork/linalg/svd_solver.py +1 -1
  88. sknetwork/linalg/tests/test_normalization.py +3 -7
  89. sknetwork/linalg/tests/test_operators.py +4 -8
  90. sknetwork/linalg/tests/test_ppr.py +1 -1
  91. sknetwork/linkpred/base.py +13 -2
  92. sknetwork/linkpred/nn.py +6 -6
  93. sknetwork/log.py +19 -0
  94. sknetwork/path/__init__.py +4 -3
  95. sknetwork/path/dag.py +54 -0
  96. sknetwork/path/distances.py +98 -0
  97. sknetwork/path/search.py +13 -47
  98. sknetwork/path/shortest_path.py +37 -162
  99. sknetwork/path/tests/test_dag.py +37 -0
  100. sknetwork/path/tests/test_distances.py +62 -0
  101. sknetwork/path/tests/test_search.py +26 -11
  102. sknetwork/path/tests/test_shortest_path.py +31 -36
  103. sknetwork/ranking/__init__.py +0 -1
  104. sknetwork/ranking/base.py +13 -8
  105. sknetwork/ranking/betweenness.cp310-win_amd64.pyd +0 -0
  106. sknetwork/ranking/betweenness.cpp +5709 -3017
  107. sknetwork/ranking/betweenness.pyx +0 -2
  108. sknetwork/ranking/closeness.py +7 -10
  109. sknetwork/ranking/pagerank.py +14 -14
  110. sknetwork/ranking/postprocess.py +12 -3
  111. sknetwork/ranking/tests/test_API.py +2 -4
  112. sknetwork/ranking/tests/test_betweenness.py +3 -3
  113. sknetwork/ranking/tests/test_closeness.py +3 -7
  114. sknetwork/ranking/tests/test_pagerank.py +11 -5
  115. sknetwork/ranking/tests/test_postprocess.py +5 -0
  116. sknetwork/regression/base.py +19 -2
  117. sknetwork/regression/diffusion.py +24 -10
  118. sknetwork/regression/tests/test_diffusion.py +8 -0
  119. sknetwork/test_base.py +35 -0
  120. sknetwork/test_log.py +15 -0
  121. sknetwork/topology/__init__.py +7 -8
  122. sknetwork/topology/cliques.cp310-win_amd64.pyd +0 -0
  123. sknetwork/topology/{kcliques.cpp → cliques.cpp} +23412 -20276
  124. sknetwork/topology/cliques.pyx +149 -0
  125. sknetwork/topology/core.cp310-win_amd64.pyd +0 -0
  126. sknetwork/topology/{kcore.cpp → core.cpp} +21732 -18867
  127. sknetwork/topology/core.pyx +90 -0
  128. sknetwork/topology/cycles.py +243 -0
  129. sknetwork/topology/minheap.cp310-win_amd64.pyd +0 -0
  130. sknetwork/{utils → topology}/minheap.cpp +19452 -15368
  131. sknetwork/{utils → topology}/minheap.pxd +1 -3
  132. sknetwork/{utils → topology}/minheap.pyx +1 -3
  133. sknetwork/topology/structure.py +3 -43
  134. sknetwork/topology/tests/test_cliques.py +11 -11
  135. sknetwork/topology/tests/test_core.py +19 -0
  136. sknetwork/topology/tests/test_cycles.py +65 -0
  137. sknetwork/topology/tests/test_structure.py +2 -16
  138. sknetwork/topology/tests/test_triangles.py +11 -15
  139. sknetwork/topology/tests/test_wl.py +72 -0
  140. sknetwork/topology/triangles.cp310-win_amd64.pyd +0 -0
  141. sknetwork/topology/triangles.cpp +5056 -2696
  142. sknetwork/topology/triangles.pyx +74 -89
  143. sknetwork/topology/weisfeiler_lehman.py +56 -86
  144. sknetwork/topology/weisfeiler_lehman_core.cp310-win_amd64.pyd +0 -0
  145. sknetwork/topology/weisfeiler_lehman_core.cpp +14727 -10622
  146. sknetwork/topology/weisfeiler_lehman_core.pyx +0 -2
  147. sknetwork/utils/__init__.py +1 -31
  148. sknetwork/utils/check.py +2 -2
  149. sknetwork/utils/format.py +5 -3
  150. sknetwork/utils/membership.py +2 -2
  151. sknetwork/utils/tests/test_check.py +3 -3
  152. sknetwork/utils/tests/test_format.py +3 -1
  153. sknetwork/utils/values.py +1 -1
  154. sknetwork/visualization/__init__.py +2 -2
  155. sknetwork/visualization/dendrograms.py +55 -7
  156. sknetwork/visualization/graphs.py +292 -72
  157. sknetwork/visualization/tests/test_dendrograms.py +9 -9
  158. sknetwork/visualization/tests/test_graphs.py +71 -62
  159. scikit_network-0.30.0.dist-info/RECORD +0 -227
  160. sknetwork/embedding/louvain_hierarchy.py +0 -142
  161. sknetwork/embedding/tests/test_louvain_hierarchy.py +0 -19
  162. sknetwork/path/metrics.py +0 -148
  163. sknetwork/path/tests/test_metrics.py +0 -29
  164. sknetwork/ranking/harmonic.py +0 -82
  165. sknetwork/topology/dag.py +0 -74
  166. sknetwork/topology/dag_core.cp310-win_amd64.pyd +0 -0
  167. sknetwork/topology/dag_core.cpp +0 -23350
  168. sknetwork/topology/dag_core.pyx +0 -38
  169. sknetwork/topology/kcliques.cp310-win_amd64.pyd +0 -0
  170. sknetwork/topology/kcliques.pyx +0 -193
  171. sknetwork/topology/kcore.cp310-win_amd64.pyd +0 -0
  172. sknetwork/topology/kcore.pyx +0 -120
  173. sknetwork/topology/tests/test_cores.py +0 -21
  174. sknetwork/topology/tests/test_dag.py +0 -26
  175. sknetwork/topology/tests/test_wl_coloring.py +0 -49
  176. sknetwork/topology/tests/test_wl_kernel.py +0 -31
  177. sknetwork/utils/base.py +0 -35
  178. sknetwork/utils/minheap.cp310-win_amd64.pyd +0 -0
  179. sknetwork/utils/simplex.py +0 -140
  180. sknetwork/utils/tests/test_base.py +0 -28
  181. sknetwork/utils/tests/test_bunch.py +0 -16
  182. sknetwork/utils/tests/test_projection_simplex.py +0 -33
  183. sknetwork/utils/tests/test_verbose.py +0 -15
  184. sknetwork/utils/verbose.py +0 -37
  185. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/LICENSE +0 -0
  186. {scikit_network-0.30.0.dist-info → scikit_network-0.32.1.dist-info}/top_level.txt +0 -0
  187. /sknetwork/{utils → data}/timeout.py +0 -0
@@ -10,7 +10,8 @@ import numpy as np
10
10
  from scipy import sparse
11
11
 
12
12
  from sknetwork.classification.base import BaseClassifier
13
- from sknetwork.linalg.normalization import normalize
13
+ from sknetwork.path.distances import get_distances
14
+ from sknetwork.linalg.normalizer import normalize
14
15
  from sknetwork.utils.format import get_adjacency_values
15
16
  from sknetwork.utils.membership import get_membership
16
17
  from sknetwork.utils.neighbors import get_degrees
@@ -27,25 +28,24 @@ class DiffusionClassifier(BaseClassifier):
27
28
  Number of iterations of the diffusion (discrete time).
28
29
  centering : bool
29
30
  If ``True``, center the temperature of each label to its mean before classification (default).
30
- threshold : float
31
- Minimum difference of temperatures between the 2 top labels to classify a node (default = 0).
32
- If the difference of temperatures does not exceed this threshold, return -1 for this node (no label).
31
+ scale : float
32
+ Multiplicative factor applied to tempreatures before softmax (default = 5).
33
+ Used only when centering is ``True``.
33
34
 
34
35
  Attributes
35
36
  ----------
36
37
  labels_ : np.ndarray, shape (n_labels,)
37
- Label of each node.
38
- membership_ : sparse.csr_matrix, shape (n_row, n_labels)
39
- Membership matrix.
38
+ Labels of nodes.
39
+ probs_ : sparse.csr_matrix, shape (n_row, n_labels)
40
+ Probability distribution over labels.
40
41
  labels_row_ : np.ndarray
41
42
  Labels of rows, for bipartite graphs.
42
43
  labels_col_ : np.ndarray
43
44
  Labels of columns, for bipartite graphs.
44
- membership_row_ : sparse.csr_matrix, shape (n_row, n_labels)
45
- Membership matrix of rows, for bipartite graphs.
46
- membership_col_ : sparse.csr_matrix, shape (n_col, n_labels)
47
- Membership matrix of columns, for bipartite graphs.
48
-
45
+ probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
46
+ Probability distributions over labels of rows, for bipartite graphs.
47
+ probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
48
+ Probability distributions over labels of columns, for bipartite graphs.
49
49
  Example
50
50
  -------
51
51
  >>> from sknetwork.data import karate_club
@@ -63,7 +63,7 @@ class DiffusionClassifier(BaseClassifier):
63
63
  Zhu, X., Lafferty, J., & Rosenfeld, R. (2005). `Semi-supervised learning with graphs`
64
64
  (Doctoral dissertation, Carnegie Mellon University, language technologies institute, school of computer science).
65
65
  """
66
- def __init__(self, n_iter: int = 10, centering: bool = True, threshold: float = 0):
66
+ def __init__(self, n_iter: int = 10, centering: bool = True, scale: float = 5):
67
67
  super(DiffusionClassifier, self).__init__()
68
68
 
69
69
  if n_iter <= 0:
@@ -71,7 +71,7 @@ class DiffusionClassifier(BaseClassifier):
71
71
  else:
72
72
  self.n_iter = n_iter
73
73
  self.centering = centering
74
- self.threshold = threshold
74
+ self.scale = scale
75
75
 
76
76
  def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
77
77
  labels: Optional[Union[dict, np.ndarray]] = None, labels_row: Optional[Union[dict, np.ndarray]] = None,
@@ -81,13 +81,15 @@ class DiffusionClassifier(BaseClassifier):
81
81
 
82
82
  Parameters
83
83
  ----------
84
- input_matrix :
84
+ input_matrix : sparse.csr_matrix, np.ndarray
85
85
  Adjacency matrix or biadjacency matrix of the graph.
86
- labels :
86
+ labels : dict, np.ndarray
87
87
  Known labels (dictionary or vector of int). Negative values ignored.
88
- labels_row, labels_col :
89
- Labels of rows and columns for bipartite graphs. Negative values ignored.
90
- force_bipartite :
88
+ labels_row : dict, np.ndarray
89
+ Labels of rows for bipartite graphs. Negative values ignored.
90
+ labels_col : dict, np.ndarray
91
+ Labels of columns for bipartite graphs. Negative values ignored.
92
+ force_bipartite : bool
91
93
  If ``True``, consider the input matrix as a biadjacency matrix (default = ``False``).
92
94
 
93
95
  Returns
@@ -101,33 +103,31 @@ class DiffusionClassifier(BaseClassifier):
101
103
  labels = values.astype(int)
102
104
  if (labels < 0).all():
103
105
  raise ValueError('At least one node must be given a non-negative label.')
104
- temperatures = get_membership(labels).toarray()
106
+ labels_reindex = labels.copy()
107
+ labels_unique, inverse = np.unique(labels[labels >= 0], return_inverse=True)
108
+ labels_reindex[labels >= 0] = inverse
109
+ temperatures = get_membership(labels_reindex).toarray()
105
110
  temperatures_seeds = temperatures[labels >= 0]
106
- n_labels = temperatures.shape[1]
107
- temperatures[labels < 0] = 1 / n_labels
111
+ temperatures[labels < 0] = 0.5
108
112
  diffusion = normalize(adjacency)
109
113
  for i in range(self.n_iter):
110
114
  temperatures = diffusion.dot(temperatures)
111
115
  temperatures[labels >= 0] = temperatures_seeds
112
-
113
- self.membership_ = sparse.csr_matrix(temperatures)
114
-
115
116
  if self.centering:
116
117
  temperatures -= temperatures.mean(axis=0)
118
+ labels_ = labels_unique[temperatures.argmax(axis=1)]
117
119
 
118
- labels_ = temperatures.argmax(axis=1)
119
- # set label -1 to nodes without temperature (no diffusion to them)
120
- labels_[get_degrees(self.membership_) == 0] = -1
120
+ # softmax
121
+ if self.centering:
122
+ temperatures = np.exp(self.scale * temperatures)
121
123
 
122
- if self.threshold >= 0:
123
- if n_labels > 2:
124
- top_temperatures = np.partition(-temperatures, 2, axis=1)[:, :2]
125
- else:
126
- top_temperatures = temperatures
127
- differences = np.abs(top_temperatures[:, 0] - top_temperatures[:, 1])
128
- labels_[differences <= self.threshold] = -1
124
+ # set label -1 to nodes not reached by diffusion
125
+ distances = get_distances(adjacency, source=np.flatnonzero(labels >= 0))
126
+ labels_[distances < 0] = -1
127
+ temperatures[distances < 0] = 0
129
128
 
130
129
  self.labels_ = labels_
130
+ self.probs_ = sparse.csr_matrix(normalize(temperatures))
131
131
  self._split_vars(input_matrix.shape)
132
132
 
133
133
  return self
@@ -12,7 +12,7 @@ from scipy import sparse
12
12
 
13
13
  from sknetwork.classification.base import BaseClassifier
14
14
  from sknetwork.embedding.base import BaseEmbedding
15
- from sknetwork.linalg.normalization import get_norms, normalize
15
+ from sknetwork.linalg.normalizer import get_norms, normalize
16
16
  from sknetwork.utils.check import check_n_neighbors
17
17
  from sknetwork.utils.format import get_adjacency_values
18
18
 
@@ -22,28 +22,29 @@ class NNClassifier(BaseClassifier):
22
22
 
23
23
  Parameters
24
24
  ----------
25
- n_neighbors :
25
+ n_neighbors : int
26
26
  Number of nearest neighbors .
27
- embedding_method :
27
+ embedding_method : :class:`BaseEmbedding`
28
28
  Embedding method used to represent nodes in vector space.
29
29
  If ``None`` (default), use identity.
30
- normalize :
30
+ normalize : bool
31
31
  If ``True``, apply normalization so that all vectors have norm 1 in the embedding space.
32
32
 
33
33
  Attributes
34
34
  ----------
35
35
  labels_ : np.ndarray, shape (n_labels,)
36
- Label of each node.
37
- membership_ : sparse.csr_matrix, shape (n_row, n_labels)
38
- Membership matrix.
36
+ Labels of nodes.
37
+ probs_ : sparse.csr_matrix, shape (n_row, n_labels)
38
+ Probability distribution over labels.
39
39
  labels_row_ : np.ndarray
40
40
  Labels of rows, for bipartite graphs.
41
41
  labels_col_ : np.ndarray
42
42
  Labels of columns, for bipartite graphs.
43
- membership_row_ : sparse.csr_matrix, shape (n_row, n_labels)
44
- Membership matrix of rows, for bipartite graphs.
45
- membership_col_ : sparse.csr_matrix, shape (n_col, n_labels)
46
- Membership matrix of columns, for bipartite graphs.
43
+ probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
44
+ Probability distributions over labels of rows, for bipartite graphs.
45
+ probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
46
+ Probability distributions over labels of columns, for bipartite graphs.
47
+
47
48
  Example
48
49
  -------
49
50
  >>> from sknetwork.classification import NNClassifier
@@ -91,10 +92,10 @@ class NNClassifier(BaseClassifier):
91
92
  col += list(labels[index_train])
92
93
  data += list(np.ones_like(index_train))
93
94
 
94
- membership = normalize(sparse.csr_matrix((data, (row, col)), shape=(len(labels), np.max(labels) + 1)))
95
- labels = np.argmax(membership.toarray(), axis=1)
95
+ probs = normalize(sparse.csr_matrix((data, (row, col)), shape=(len(labels), np.max(labels) + 1)))
96
+ labels = np.argmax(probs.toarray(), axis=1)
96
97
 
97
- return membership, labels
98
+ return probs, labels
98
99
 
99
100
  def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: Union[np.ndarray, dict] = None,
100
101
  labels_row: Union[np.ndarray, dict] = None, labels_col: Union[np.ndarray, dict] = None) -> 'NNClassifier':
@@ -102,12 +103,14 @@ class NNClassifier(BaseClassifier):
102
103
 
103
104
  Parameters
104
105
  ----------
105
- input_matrix :
106
+ input_matrix : sparse.csr_matrix, np.ndarray
106
107
  Adjacency matrix or biadjacency matrix of the graph.
107
- labels :
108
- Known labels (dictionary or array). Negative values ignored.
109
- labels_row, labels_col :
110
- Labels of rows and columns (for bipartite graphs).
108
+ labels : np.ndarray, dict
109
+ Known labels. Negative values ignored.
110
+ labels_row : np.ndarray, dict
111
+ Known labels of rows, for bipartite graphs.
112
+ labels_col : np.ndarray, dict
113
+ Known labels of columns, for bipartite graphs.
111
114
 
112
115
  Returns
113
116
  -------
@@ -126,10 +129,10 @@ class NNClassifier(BaseClassifier):
126
129
  if self.normalize:
127
130
  embedding = normalize(embedding, p=2)
128
131
 
129
- membership, labels = self._fit_core(embedding, labels, index_seed, index_remain)
132
+ probs, labels = self._fit_core(embedding, labels, index_seed, index_remain)
130
133
 
131
134
  self.labels_ = labels
132
- self.membership_ = membership
135
+ self.probs_ = probs
133
136
  self._split_vars(input_matrix.shape)
134
137
 
135
138
  return self
@@ -158,7 +158,7 @@ def get_f1_scores(labels_true: np.ndarray, labels_pred: np.ndarray, return_preci
158
158
  mask = counts_pred > 0
159
159
  precisions[mask] = counts_correct[mask] / counts_pred[mask]
160
160
  f1_scores = np.zeros(n_labels)
161
- mask = (counts_true > 0) & (counts_pred > 0)
161
+ mask = (precisions > 0) & (recalls > 0)
162
162
  f1_scores[mask] = 2 / (1 / precisions[mask] + 1 / recalls[mask])
163
163
  if return_precision_recall:
164
164
  return f1_scores, precisions, recalls
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
3
  """
4
- Created on March 2020
4
+ Created in March 2020
5
5
  @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
6
  """
7
7
  from typing import Optional
@@ -17,9 +17,9 @@ class PageRankClassifier(RankClassifier):
17
17
 
18
18
  Parameters
19
19
  ----------
20
- damping_factor:
20
+ damping_factor: float
21
21
  Probability to continue the random walk.
22
- solver : :obj:`str`
22
+ solver : str
23
23
  Which solver to use: 'piteration', 'diteration', 'bicgstab', 'lanczos'.
24
24
  n_iter : int
25
25
  Number of iterations for some solvers such as ``'piteration'`` or ``'diteration'``.
@@ -29,17 +29,17 @@ class PageRankClassifier(RankClassifier):
29
29
  Attributes
30
30
  ----------
31
31
  labels_ : np.ndarray, shape (n_labels,)
32
- Label of each node.
33
- membership_ : sparse.csr_matrix, shape (n_row, n_labels)
34
- Membership matrix.
32
+ Labels of nodes.
33
+ probs_ : sparse.csr_matrix, shape (n_row, n_labels)
34
+ Probability distribution over labels.
35
35
  labels_row_ : np.ndarray
36
36
  Labels of rows, for bipartite graphs.
37
37
  labels_col_ : np.ndarray
38
38
  Labels of columns, for bipartite graphs.
39
- membership_row_ : sparse.csr_matrix, shape (n_row, n_labels)
40
- Membership matrix of rows, for bipartite graphs.
41
- membership_col_ : sparse.csr_matrix, shape (n_col, n_labels)
42
- Membership matrix of columns, for bipartite graphs.
39
+ probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
40
+ Probability distributions over labels of rows, for bipartite graphs.
41
+ probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
42
+ Probability distributions over labels of columns, for bipartite graphs.
43
43
 
44
44
  Example
45
45
  -------
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  # coding: utf-8
3
3
  """
4
- Created on April 2020
4
+ Created in April 2020
5
5
  @author: Thomas Bonald <tbonald@enst.fr>
6
6
  """
7
7
 
@@ -12,7 +12,7 @@ from scipy import sparse
12
12
 
13
13
  from sknetwork.classification.base import BaseClassifier
14
14
  from sknetwork.classification.vote import vote_update
15
- from sknetwork.linalg.normalization import normalize
15
+ from sknetwork.linalg.normalizer import normalize
16
16
  from sknetwork.utils.format import get_adjacency_values
17
17
  from sknetwork.utils.membership import get_membership
18
18
 
@@ -25,9 +25,9 @@ class Propagation(BaseClassifier):
25
25
  n_iter : float
26
26
  Maximum number of iterations (-1 for infinity).
27
27
  node_order : str
28
- * `'random'`: node labels are updated in random order.
29
- * `'increasing'`: node labels are updated by increasing order of (in-)weight.
30
- * `'decreasing'`: node labels are updated by decreasing order of (in-)weight.
28
+ * ``'random'``: node labels are updated in random order.
29
+ * ``'increasing'``: node labels are updated by increasing order of (in-) weight.
30
+ * ``'decreasing'``: node labels are updated by decreasing order of (in-) weight.
31
31
  * Otherwise, node labels are updated by index order.
32
32
  weighted : bool
33
33
  If ``True``, the vote of each neighbor is proportional to the edge weight.
@@ -36,17 +36,17 @@ class Propagation(BaseClassifier):
36
36
  Attributes
37
37
  ----------
38
38
  labels_ : np.ndarray, shape (n_labels,)
39
- Label of each node.
40
- membership_ : sparse.csr_matrix, shape (n_row, n_labels)
41
- Membership matrix.
39
+ Labels of nodes.
40
+ probs_ : sparse.csr_matrix, shape (n_row, n_labels)
41
+ Probability distribution over labels.
42
42
  labels_row_ : np.ndarray
43
43
  Labels of rows, for bipartite graphs.
44
44
  labels_col_ : np.ndarray
45
45
  Labels of columns, for bipartite graphs.
46
- membership_row_ : sparse.csr_matrix, shape (n_row, n_labels)
47
- Membership matrix of rows, for bipartite graphs.
48
- membership_col_ : sparse.csr_matrix, shape (n_col, n_labels)
49
- Membership matrix of columns, for bipartite graphs.
46
+ probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
47
+ Probability distributions over labels of rows, for bipartite graphs.
48
+ probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
49
+ Probability distributions over labels of columns, for bipartite graphs.
50
50
 
51
51
  Example
52
52
  -------
@@ -97,12 +97,15 @@ class Propagation(BaseClassifier):
97
97
 
98
98
  Parameters
99
99
  ----------
100
- input_matrix :
100
+ input_matrix : sparse.csr_matrix, np.ndarray
101
101
  Adjacency matrix or biadjacency matrix of the graph.
102
- labels :
103
- Known labels (dictionary or array). Negative values ignored.
104
- labels_row, labels_col :
105
- Labels of rows and columns (for bipartite graphs).
102
+ labels : np.ndarray, dict
103
+ Known labels. Negative values ignored.
104
+ labels_row : np.ndarray, dict
105
+ Known labels of rows, for bipartite graphs.
106
+ labels_col : np.ndarray, dict
107
+ Known labels of columns, for bipartite graphs.
108
+
106
109
  Returns
107
110
  -------
108
111
  self: :class:`Propagation`
@@ -138,11 +141,11 @@ class Propagation(BaseClassifier):
138
141
  labels_remain = labels[index_remain].copy()
139
142
  labels = np.asarray(vote_update(indptr, indices, data, labels, index_remain))
140
143
 
141
- membership = get_membership(labels)
142
- membership = normalize(adjacency.dot(membership))
144
+ probs = get_membership(labels)
145
+ probs = normalize(adjacency.dot(probs))
143
146
 
144
147
  self.labels_ = labels
145
- self.membership_ = membership
148
+ self.probs_ = probs
146
149
  self._split_vars(input_matrix.shape)
147
150
 
148
151
  return self
@@ -23,9 +23,9 @@ class TestDiffusionClassifier(unittest.TestCase):
23
23
  self.assertTrue(len(algo.labels_) == n_nodes)
24
24
  with self.assertRaises(ValueError):
25
25
  DiffusionClassifier(n_iter=0)
26
- algo = DiffusionClassifier(centering=False, threshold=1)
27
- algo.fit(adjacency, labels=labels)
28
- self.assertTrue(max(algo.labels_) == -1)
26
+ algo = DiffusionClassifier(centering=True, scale=10)
27
+ probs = algo.fit_predict_proba(adjacency, labels=labels)[:, 1]
28
+ self.assertTrue(max(probs) > 0.99)
29
29
 
30
30
  def test_bipartite(self):
31
31
  biadjacency = test_bigraph()
@@ -36,6 +36,7 @@ class TestDiffusionClassifier(unittest.TestCase):
36
36
  algo.fit(biadjacency, labels_row=labels_row, labels_col=labels_col)
37
37
  self.assertTrue(len(algo.labels_row_) == n_row)
38
38
  self.assertTrue(len(algo.labels_col_) == n_col)
39
+ self.assertTrue(all(algo.labels_col_ == algo.predict(columns=True)))
39
40
 
40
41
  def test_predict(self):
41
42
  adjacency = test_graph()
@@ -65,3 +66,12 @@ class TestDiffusionClassifier(unittest.TestCase):
65
66
  self.assertTrue(membership.shape == (n_row, 2))
66
67
  membership = algo.transform(columns=True)
67
68
  self.assertTrue(membership.shape == (n_col, 2))
69
+
70
+ def test_reindex_label(self):
71
+ adjacency = test_graph()
72
+ n_nodes = adjacency.shape[0]
73
+ labels = {0: 0, 1: 2, 2: 3}
74
+ algo = DiffusionClassifier()
75
+ labels_pred = algo.fit_predict(adjacency, labels=labels)
76
+ self.assertTrue(len(labels_pred) == n_nodes)
77
+ self.assertTrue(set(list(labels_pred)) == {0, 2, 3})