scikit-network 0.33.4__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. scikit_network-0.33.4.dist-info/METADATA +122 -0
  2. scikit_network-0.33.4.dist-info/RECORD +229 -0
  3. scikit_network-0.33.4.dist-info/WHEEL +6 -0
  4. scikit_network-0.33.4.dist-info/licenses/AUTHORS.rst +43 -0
  5. scikit_network-0.33.4.dist-info/licenses/LICENSE +34 -0
  6. scikit_network-0.33.4.dist-info/top_level.txt +1 -0
  7. scikit_network.libs/libgomp-a34b3233.so.1.0.0 +0 -0
  8. sknetwork/__init__.py +21 -0
  9. sknetwork/base.py +67 -0
  10. sknetwork/classification/__init__.py +8 -0
  11. sknetwork/classification/base.py +138 -0
  12. sknetwork/classification/base_rank.py +129 -0
  13. sknetwork/classification/diffusion.py +127 -0
  14. sknetwork/classification/knn.py +131 -0
  15. sknetwork/classification/metrics.py +205 -0
  16. sknetwork/classification/pagerank.py +58 -0
  17. sknetwork/classification/propagation.py +144 -0
  18. sknetwork/classification/tests/__init__.py +1 -0
  19. sknetwork/classification/tests/test_API.py +30 -0
  20. sknetwork/classification/tests/test_diffusion.py +77 -0
  21. sknetwork/classification/tests/test_knn.py +23 -0
  22. sknetwork/classification/tests/test_metrics.py +53 -0
  23. sknetwork/classification/tests/test_pagerank.py +20 -0
  24. sknetwork/classification/tests/test_propagation.py +24 -0
  25. sknetwork/classification/vote.cpp +27593 -0
  26. sknetwork/classification/vote.cpython-312-x86_64-linux-gnu.so +0 -0
  27. sknetwork/classification/vote.pyx +56 -0
  28. sknetwork/clustering/__init__.py +8 -0
  29. sknetwork/clustering/base.py +168 -0
  30. sknetwork/clustering/kcenters.py +251 -0
  31. sknetwork/clustering/leiden.py +238 -0
  32. sknetwork/clustering/leiden_core.cpp +31928 -0
  33. sknetwork/clustering/leiden_core.cpython-312-x86_64-linux-gnu.so +0 -0
  34. sknetwork/clustering/leiden_core.pyx +124 -0
  35. sknetwork/clustering/louvain.py +282 -0
  36. sknetwork/clustering/louvain_core.cpp +31573 -0
  37. sknetwork/clustering/louvain_core.cpython-312-x86_64-linux-gnu.so +0 -0
  38. sknetwork/clustering/louvain_core.pyx +124 -0
  39. sknetwork/clustering/metrics.py +91 -0
  40. sknetwork/clustering/postprocess.py +66 -0
  41. sknetwork/clustering/propagation_clustering.py +100 -0
  42. sknetwork/clustering/tests/__init__.py +1 -0
  43. sknetwork/clustering/tests/test_API.py +38 -0
  44. sknetwork/clustering/tests/test_kcenters.py +60 -0
  45. sknetwork/clustering/tests/test_leiden.py +34 -0
  46. sknetwork/clustering/tests/test_louvain.py +135 -0
  47. sknetwork/clustering/tests/test_metrics.py +50 -0
  48. sknetwork/clustering/tests/test_postprocess.py +39 -0
  49. sknetwork/data/__init__.py +6 -0
  50. sknetwork/data/base.py +33 -0
  51. sknetwork/data/load.py +292 -0
  52. sknetwork/data/models.py +459 -0
  53. sknetwork/data/parse.py +644 -0
  54. sknetwork/data/test_graphs.py +93 -0
  55. sknetwork/data/tests/__init__.py +1 -0
  56. sknetwork/data/tests/test_API.py +30 -0
  57. sknetwork/data/tests/test_base.py +14 -0
  58. sknetwork/data/tests/test_load.py +61 -0
  59. sknetwork/data/tests/test_models.py +52 -0
  60. sknetwork/data/tests/test_parse.py +250 -0
  61. sknetwork/data/tests/test_test_graphs.py +29 -0
  62. sknetwork/data/tests/test_toy_graphs.py +68 -0
  63. sknetwork/data/timeout.py +38 -0
  64. sknetwork/data/toy_graphs.py +611 -0
  65. sknetwork/embedding/__init__.py +8 -0
  66. sknetwork/embedding/base.py +90 -0
  67. sknetwork/embedding/force_atlas.py +198 -0
  68. sknetwork/embedding/louvain_embedding.py +142 -0
  69. sknetwork/embedding/random_projection.py +131 -0
  70. sknetwork/embedding/spectral.py +137 -0
  71. sknetwork/embedding/spring.py +198 -0
  72. sknetwork/embedding/svd.py +351 -0
  73. sknetwork/embedding/tests/__init__.py +1 -0
  74. sknetwork/embedding/tests/test_API.py +49 -0
  75. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  76. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  77. sknetwork/embedding/tests/test_random_projection.py +28 -0
  78. sknetwork/embedding/tests/test_spectral.py +81 -0
  79. sknetwork/embedding/tests/test_spring.py +50 -0
  80. sknetwork/embedding/tests/test_svd.py +43 -0
  81. sknetwork/gnn/__init__.py +10 -0
  82. sknetwork/gnn/activation.py +117 -0
  83. sknetwork/gnn/base.py +181 -0
  84. sknetwork/gnn/base_activation.py +90 -0
  85. sknetwork/gnn/base_layer.py +109 -0
  86. sknetwork/gnn/gnn_classifier.py +305 -0
  87. sknetwork/gnn/layer.py +153 -0
  88. sknetwork/gnn/loss.py +180 -0
  89. sknetwork/gnn/neighbor_sampler.py +65 -0
  90. sknetwork/gnn/optimizer.py +164 -0
  91. sknetwork/gnn/tests/__init__.py +1 -0
  92. sknetwork/gnn/tests/test_activation.py +56 -0
  93. sknetwork/gnn/tests/test_base.py +75 -0
  94. sknetwork/gnn/tests/test_base_layer.py +37 -0
  95. sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
  96. sknetwork/gnn/tests/test_layers.py +80 -0
  97. sknetwork/gnn/tests/test_loss.py +33 -0
  98. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  99. sknetwork/gnn/tests/test_optimizer.py +43 -0
  100. sknetwork/gnn/tests/test_utils.py +41 -0
  101. sknetwork/gnn/utils.py +127 -0
  102. sknetwork/hierarchy/__init__.py +6 -0
  103. sknetwork/hierarchy/base.py +90 -0
  104. sknetwork/hierarchy/louvain_hierarchy.py +260 -0
  105. sknetwork/hierarchy/metrics.py +234 -0
  106. sknetwork/hierarchy/paris.cpp +37877 -0
  107. sknetwork/hierarchy/paris.cpython-312-x86_64-linux-gnu.so +0 -0
  108. sknetwork/hierarchy/paris.pyx +310 -0
  109. sknetwork/hierarchy/postprocess.py +350 -0
  110. sknetwork/hierarchy/tests/__init__.py +1 -0
  111. sknetwork/hierarchy/tests/test_API.py +24 -0
  112. sknetwork/hierarchy/tests/test_algos.py +34 -0
  113. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  114. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  115. sknetwork/linalg/__init__.py +9 -0
  116. sknetwork/linalg/basics.py +37 -0
  117. sknetwork/linalg/diteration.cpp +27409 -0
  118. sknetwork/linalg/diteration.cpython-312-x86_64-linux-gnu.so +0 -0
  119. sknetwork/linalg/diteration.pyx +47 -0
  120. sknetwork/linalg/eig_solver.py +93 -0
  121. sknetwork/linalg/laplacian.py +15 -0
  122. sknetwork/linalg/normalizer.py +86 -0
  123. sknetwork/linalg/operators.py +225 -0
  124. sknetwork/linalg/polynome.py +76 -0
  125. sknetwork/linalg/ppr_solver.py +170 -0
  126. sknetwork/linalg/push.cpp +31081 -0
  127. sknetwork/linalg/push.cpython-312-x86_64-linux-gnu.so +0 -0
  128. sknetwork/linalg/push.pyx +71 -0
  129. sknetwork/linalg/sparse_lowrank.py +142 -0
  130. sknetwork/linalg/svd_solver.py +91 -0
  131. sknetwork/linalg/tests/__init__.py +1 -0
  132. sknetwork/linalg/tests/test_eig.py +44 -0
  133. sknetwork/linalg/tests/test_laplacian.py +18 -0
  134. sknetwork/linalg/tests/test_normalization.py +34 -0
  135. sknetwork/linalg/tests/test_operators.py +66 -0
  136. sknetwork/linalg/tests/test_polynome.py +38 -0
  137. sknetwork/linalg/tests/test_ppr.py +50 -0
  138. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  139. sknetwork/linalg/tests/test_svd.py +38 -0
  140. sknetwork/linkpred/__init__.py +2 -0
  141. sknetwork/linkpred/base.py +46 -0
  142. sknetwork/linkpred/nn.py +126 -0
  143. sknetwork/linkpred/tests/__init__.py +1 -0
  144. sknetwork/linkpred/tests/test_nn.py +26 -0
  145. sknetwork/log.py +19 -0
  146. sknetwork/path/__init__.py +5 -0
  147. sknetwork/path/dag.py +54 -0
  148. sknetwork/path/distances.py +98 -0
  149. sknetwork/path/search.py +31 -0
  150. sknetwork/path/shortest_path.py +61 -0
  151. sknetwork/path/tests/__init__.py +1 -0
  152. sknetwork/path/tests/test_dag.py +37 -0
  153. sknetwork/path/tests/test_distances.py +62 -0
  154. sknetwork/path/tests/test_search.py +40 -0
  155. sknetwork/path/tests/test_shortest_path.py +40 -0
  156. sknetwork/ranking/__init__.py +8 -0
  157. sknetwork/ranking/base.py +57 -0
  158. sknetwork/ranking/betweenness.cpp +9716 -0
  159. sknetwork/ranking/betweenness.cpython-312-x86_64-linux-gnu.so +0 -0
  160. sknetwork/ranking/betweenness.pyx +97 -0
  161. sknetwork/ranking/closeness.py +92 -0
  162. sknetwork/ranking/hits.py +90 -0
  163. sknetwork/ranking/katz.py +79 -0
  164. sknetwork/ranking/pagerank.py +106 -0
  165. sknetwork/ranking/postprocess.py +37 -0
  166. sknetwork/ranking/tests/__init__.py +1 -0
  167. sknetwork/ranking/tests/test_API.py +32 -0
  168. sknetwork/ranking/tests/test_betweenness.py +38 -0
  169. sknetwork/ranking/tests/test_closeness.py +30 -0
  170. sknetwork/ranking/tests/test_hits.py +20 -0
  171. sknetwork/ranking/tests/test_pagerank.py +62 -0
  172. sknetwork/ranking/tests/test_postprocess.py +26 -0
  173. sknetwork/regression/__init__.py +4 -0
  174. sknetwork/regression/base.py +57 -0
  175. sknetwork/regression/diffusion.py +204 -0
  176. sknetwork/regression/tests/__init__.py +1 -0
  177. sknetwork/regression/tests/test_API.py +32 -0
  178. sknetwork/regression/tests/test_diffusion.py +56 -0
  179. sknetwork/sknetwork.py +3 -0
  180. sknetwork/test_base.py +35 -0
  181. sknetwork/test_log.py +15 -0
  182. sknetwork/topology/__init__.py +8 -0
  183. sknetwork/topology/cliques.cpp +32574 -0
  184. sknetwork/topology/cliques.cpython-312-x86_64-linux-gnu.so +0 -0
  185. sknetwork/topology/cliques.pyx +149 -0
  186. sknetwork/topology/core.cpp +30660 -0
  187. sknetwork/topology/core.cpython-312-x86_64-linux-gnu.so +0 -0
  188. sknetwork/topology/core.pyx +90 -0
  189. sknetwork/topology/cycles.py +243 -0
  190. sknetwork/topology/minheap.cpp +27341 -0
  191. sknetwork/topology/minheap.cpython-312-x86_64-linux-gnu.so +0 -0
  192. sknetwork/topology/minheap.pxd +20 -0
  193. sknetwork/topology/minheap.pyx +109 -0
  194. sknetwork/topology/structure.py +194 -0
  195. sknetwork/topology/tests/__init__.py +1 -0
  196. sknetwork/topology/tests/test_cliques.py +28 -0
  197. sknetwork/topology/tests/test_core.py +19 -0
  198. sknetwork/topology/tests/test_cycles.py +65 -0
  199. sknetwork/topology/tests/test_structure.py +85 -0
  200. sknetwork/topology/tests/test_triangles.py +38 -0
  201. sknetwork/topology/tests/test_wl.py +72 -0
  202. sknetwork/topology/triangles.cpp +8903 -0
  203. sknetwork/topology/triangles.cpython-312-x86_64-linux-gnu.so +0 -0
  204. sknetwork/topology/triangles.pyx +151 -0
  205. sknetwork/topology/weisfeiler_lehman.py +133 -0
  206. sknetwork/topology/weisfeiler_lehman_core.cpp +27644 -0
  207. sknetwork/topology/weisfeiler_lehman_core.cpython-312-x86_64-linux-gnu.so +0 -0
  208. sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
  209. sknetwork/utils/__init__.py +7 -0
  210. sknetwork/utils/check.py +355 -0
  211. sknetwork/utils/format.py +221 -0
  212. sknetwork/utils/membership.py +82 -0
  213. sknetwork/utils/neighbors.py +115 -0
  214. sknetwork/utils/tests/__init__.py +1 -0
  215. sknetwork/utils/tests/test_check.py +190 -0
  216. sknetwork/utils/tests/test_format.py +63 -0
  217. sknetwork/utils/tests/test_membership.py +24 -0
  218. sknetwork/utils/tests/test_neighbors.py +41 -0
  219. sknetwork/utils/tests/test_tfidf.py +18 -0
  220. sknetwork/utils/tests/test_values.py +66 -0
  221. sknetwork/utils/tfidf.py +37 -0
  222. sknetwork/utils/values.py +76 -0
  223. sknetwork/visualization/__init__.py +4 -0
  224. sknetwork/visualization/colors.py +34 -0
  225. sknetwork/visualization/dendrograms.py +277 -0
  226. sknetwork/visualization/graphs.py +1039 -0
  227. sknetwork/visualization/tests/__init__.py +1 -0
  228. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  229. sknetwork/visualization/tests/test_graphs.py +176 -0
@@ -0,0 +1,205 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in July 2020
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ @author: Thomas Bonald <thomas.bonald@telecom-paris.fr>
7
+ """
8
+ from typing import Union, Tuple
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.utils.check import check_vector_format
14
+
15
+
16
+ def get_accuracy_score(labels_true: np.ndarray, labels_pred: np.ndarray) -> float:
17
+ """Return the proportion of correctly labeled samples.
18
+ Negative labels ignored.
19
+
20
+ Parameters
21
+ ----------
22
+ labels_true : np.ndarray
23
+ True labels.
24
+ labels_pred : np.ndarray
25
+ Predicted labels
26
+
27
+ Returns
28
+ -------
29
+ accuracy : float
30
+ A score between 0 and 1.
31
+
32
+ Examples
33
+ --------
34
+ >>> import numpy as np
35
+ >>> labels_true = np.array([0, 0, 1, 1])
36
+ >>> labels_pred = np.array([0, 0, 0, 1])
37
+ >>> float(round(get_accuracy_score(labels_true, labels_pred), 2))
38
+ 0.75
39
+ """
40
+ check_vector_format(labels_true, labels_pred)
41
+ mask = (labels_true >= 0) & (labels_pred >= 0)
42
+ if np.sum(mask):
43
+ return np.mean(labels_true[mask] == labels_pred[mask])
44
+ else:
45
+ raise ValueError('No sample with both true non-negative label and predicted non-negative label.')
46
+
47
+
48
+ def get_confusion_matrix(labels_true: np.ndarray, labels_pred: np.ndarray) -> sparse.csr_matrix:
49
+ """Return the confusion matrix in sparse format (true labels on rows, predicted labels on columns).
50
+ Negative labels ignored.
51
+
52
+ Parameters
53
+ ----------
54
+ labels_true : np.ndarray
55
+ True labels.
56
+ labels_pred : np.ndarray
57
+ Predicted labels
58
+
59
+ Returns
60
+ -------
61
+ confusion matrix : sparse.csr_matrix
62
+ Confusion matrix.
63
+
64
+ Examples
65
+ --------
66
+ >>> import numpy as np
67
+ >>> labels_true = np.array([0, 0, 1, 1])
68
+ >>> labels_pred = np.array([0, 0, 0, 1])
69
+ >>> get_confusion_matrix(labels_true, labels_pred).toarray()
70
+ array([[2, 0],
71
+ [1, 1]])
72
+ """
73
+ check_vector_format(labels_true, labels_pred)
74
+ mask = (labels_true >= 0) & (labels_pred >= 0)
75
+ if np.sum(mask):
76
+ n_labels = max(max(labels_true), max(labels_pred)) + 1
77
+ row = labels_true[mask]
78
+ col = labels_pred[mask]
79
+ data = np.ones(np.sum(mask), dtype=int)
80
+ return sparse.csr_matrix((data, (row, col)), shape=(n_labels, n_labels))
81
+ else:
82
+ raise ValueError('No sample with both true non-negative label and predicted non-negative label.')
83
+
84
+
85
+ def get_f1_score(labels_true: np.ndarray, labels_pred: np.ndarray, return_precision_recall: bool = False) \
86
+ -> Union[float, Tuple[float, float, float]]:
87
+ """Return the f1 score of binary classification.
88
+ Negative labels ignored.
89
+
90
+ Parameters
91
+ ----------
92
+ labels_true : np.ndarray
93
+ True labels.
94
+ labels_pred : np.ndarray
95
+ Predicted labels
96
+ return_precision_recall : bool
97
+ If ``True``, also return precision and recall.
98
+
99
+ Returns
100
+ -------
101
+ score, [precision, recall] : np.ndarray
102
+ F1 score (between 0 and 1). Optionally, also return precision and recall.
103
+ Examples
104
+ --------
105
+ >>> import numpy as np
106
+ >>> labels_true = np.array([0, 0, 1, 1])
107
+ >>> labels_pred = np.array([0, 0, 0, 1])
108
+ >>> float(round(get_f1_score(labels_true, labels_pred), 2))
109
+ 0.67
110
+ """
111
+ values = set(labels_true[labels_true >= 0]) | set(labels_pred[labels_pred >= 0])
112
+ if values != {0, 1}:
113
+ raise ValueError('Labels must be binary. '
114
+ 'Check get_f1_scores or get_average_f1_score for multi-label classification.')
115
+ if return_precision_recall:
116
+ f1_scores, precisions, recalls = get_f1_scores(labels_true, labels_pred, True)
117
+ return f1_scores[1], precisions[1], recalls[1]
118
+ else:
119
+ f1_scores = get_f1_scores(labels_true, labels_pred, False)
120
+ return f1_scores[1]
121
+
122
+
123
+ def get_f1_scores(labels_true: np.ndarray, labels_pred: np.ndarray, return_precision_recall: bool = False) \
124
+ -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]:
125
+ """Return the f1 scores of multi-label classification (one per label).
126
+ Negative labels ignored.
127
+
128
+ Parameters
129
+ ----------
130
+ labels_true : np.ndarray
131
+ True labels.
132
+ labels_pred : np.ndarray
133
+ Predicted labels
134
+ return_precision_recall : bool
135
+ If ``True``, also return precisions and recalls.
136
+
137
+ Returns
138
+ -------
139
+ scores, [precisions, recalls] : np.ndarray
140
+ F1 scores (between 0 and 1). Optionally, also return F1 precisions and recalls.
141
+ Examples
142
+ --------
143
+ >>> import numpy as np
144
+ >>> labels_true = np.array([0, 0, 1, 1])
145
+ >>> labels_pred = np.array([0, 0, 0, 1])
146
+ >>> np.round(get_f1_scores(labels_true, labels_pred), 2)
147
+ array([0.8 , 0.67])
148
+ """
149
+ confusion = get_confusion_matrix(labels_true, labels_pred)
150
+ n_labels = confusion.shape[0]
151
+ counts_correct = confusion.diagonal()
152
+ counts_true = confusion.dot(np.ones(n_labels))
153
+ counts_pred = confusion.T.dot(np.ones(n_labels))
154
+ mask = counts_true > 0
155
+ recalls = np.zeros(n_labels)
156
+ recalls[mask] = counts_correct[mask] / counts_true[mask]
157
+ precisions = np.zeros(n_labels)
158
+ mask = counts_pred > 0
159
+ precisions[mask] = counts_correct[mask] / counts_pred[mask]
160
+ f1_scores = np.zeros(n_labels)
161
+ mask = (precisions > 0) & (recalls > 0)
162
+ f1_scores[mask] = 2 / (1 / precisions[mask] + 1 / recalls[mask])
163
+ if return_precision_recall:
164
+ return f1_scores, precisions, recalls
165
+ else:
166
+ return f1_scores
167
+
168
+
169
+ def get_average_f1_score(labels_true: np.ndarray, labels_pred: np.ndarray, average: str = 'macro') -> float:
170
+ """Return the average f1 score of multi-label classification.
171
+ Negative labels ignored.
172
+
173
+ Parameters
174
+ ----------
175
+ labels_true : np.ndarray
176
+ True labels.
177
+ labels_pred : np.ndarray
178
+ Predicted labels
179
+ average : str
180
+ Averaging method. Can be either ``'macro'`` (default), ``'micro'`` or ``'weighted'``.
181
+
182
+ Returns
183
+ -------
184
+ score : float
185
+ Average F1 score (between 0 and 1).
186
+ Examples
187
+ --------
188
+ >>> import numpy as np
189
+ >>> labels_true = np.array([0, 0, 1, 1])
190
+ >>> labels_pred = np.array([0, 0, 0, 1])
191
+ >>> float(round(get_average_f1_score(labels_true, labels_pred), 2))
192
+ 0.73
193
+ """
194
+ if average == 'micro':
195
+ # micro averaging = accuracy
196
+ return get_accuracy_score(labels_true, labels_pred)
197
+ else:
198
+ f1_scores = get_f1_scores(labels_true, labels_pred)
199
+ if average == 'macro':
200
+ return np.mean(f1_scores)
201
+ elif average == 'weighted':
202
+ labels_unique, counts = np.unique(labels_true[labels_true >= 0], return_counts=True)
203
+ return np.sum(f1_scores[labels_unique] * counts) / np.sum(counts)
204
+ else:
205
+ raise ValueError('Check the ``average`` parameter.')
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in March 2020
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ from typing import Optional
8
+
9
+ import numpy as np
10
+
11
+ from sknetwork.classification.base_rank import RankClassifier
12
+ from sknetwork.ranking.pagerank import PageRank
13
+
14
+
15
+ class PageRankClassifier(RankClassifier):
16
+ """Node classification by multiple personalized PageRanks.
17
+
18
+ Parameters
19
+ ----------
20
+ damping_factor: float
21
+ Probability to continue the random walk.
22
+ solver : str
23
+ Which solver to use: 'piteration', 'diteration', 'bicgstab', 'lanczos'.
24
+ n_iter : int
25
+ Number of iterations for some solvers such as ``'piteration'`` or ``'diteration'``.
26
+ tol : float
27
+ Tolerance for the convergence of some solvers such as ``'bicgstab'`` or ``'lanczos'``.
28
+
29
+ Attributes
30
+ ----------
31
+ labels\_ : np.ndarray, shape (n_nodes,)
32
+ Labels of nodes.
33
+ probs\_ : sparse.csr_matrix, shape (n_nodes, n_labels)
34
+ Probability distribution over labels.
35
+
36
+ Example
37
+ -------
38
+ >>> from sknetwork.classification import PageRankClassifier
39
+ >>> from sknetwork.data import karate_club
40
+ >>> pagerank = PageRankClassifier()
41
+ >>> graph = karate_club(metadata=True)
42
+ >>> adjacency = graph.adjacency
43
+ >>> labels_true = graph.labels
44
+ >>> labels = {0: labels_true[0], 33: labels_true[33]}
45
+ >>> labels_pred = pagerank.fit_predict(adjacency, labels)
46
+ >>> float(np.round(np.mean(labels_pred == labels_true), 2))
47
+ 0.97
48
+
49
+ References
50
+ ----------
51
+ Lin, F., & Cohen, W. W. (2010). `Semi-supervised classification of network data using very few labels.
52
+ <https://lti.cs.cmu.edu/sites/default/files/research/reports/2009/cmulti09017.pdf>`_
53
+ In IEEE International Conference on Advances in Social Networks Analysis and Mining.
54
+ """
55
+ def __init__(self, damping_factor: float = 0.85, solver: str = 'piteration', n_iter: int = 10, tol: float = 0.,
56
+ n_jobs: Optional[int] = None, verbose: bool = False):
57
+ algorithm = PageRank(damping_factor, solver, n_iter, tol)
58
+ super(PageRankClassifier, self).__init__(algorithm, n_jobs, verbose)
@@ -0,0 +1,144 @@
1
+ #!/usr/bin/env python3
2
+ # coding: utf-8
3
+ """
4
+ Created in April 2020
5
+ @author: Thomas Bonald <tbonald@enst.fr>
6
+ """
7
+
8
+ from typing import Union
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.classification.base import BaseClassifier
14
+ from sknetwork.classification.vote import vote_update
15
+ from sknetwork.linalg.normalizer import normalize
16
+ from sknetwork.utils.format import get_adjacency_values
17
+ from sknetwork.utils.membership import get_membership
18
+
19
+
20
+ class Propagation(BaseClassifier):
21
+ """Node classification by label propagation.
22
+
23
+ Parameters
24
+ ----------
25
+ n_iter : float
26
+ Maximum number of iterations (-1 for infinity).
27
+ node_order : str
28
+ * ``'random'``: node labels are updated in random order.
29
+ * ``'increasing'``: node labels are updated by increasing order of (in-) weight.
30
+ * ``'decreasing'``: node labels are updated by decreasing order of (in-) weight.
31
+ * Otherwise, node labels are updated by index order.
32
+ weighted : bool
33
+ If ``True``, the vote of each neighbor is proportional to the edge weight.
34
+ Otherwise, all votes have weight 1.
35
+
36
+ Attributes
37
+ ----------
38
+ labels\_ : np.ndarray, shape (n_nodes,)
39
+ Labels of nodes.
40
+ probs\_ : sparse.csr_matrix, shape (n_nodes, n_labels)
41
+ Probability distribution over labels.
42
+
43
+ Example
44
+ -------
45
+ >>> from sknetwork.classification import Propagation
46
+ >>> from sknetwork.data import karate_club
47
+ >>> propagation = Propagation()
48
+ >>> graph = karate_club(metadata=True)
49
+ >>> adjacency = graph.adjacency
50
+ >>> labels_true = graph.labels
51
+ >>> labels = {0: labels_true[0], 33: labels_true[33]}
52
+ >>> labels_pred = propagation.fit_predict(adjacency, labels)
53
+ >>> float(np.round(np.mean(labels_pred == labels_true), 2))
54
+ 0.94
55
+
56
+ References
57
+ ----------
58
+ Raghavan, U. N., Albert, R., & Kumara, S. (2007).
59
+ `Near linear time algorithm to detect community structures in large-scale networks.
60
+ <https://arxiv.org/pdf/0709.2938.pdf>`_
61
+ Physical review E, 76(3), 036106.
62
+ """
63
+ def __init__(self, n_iter: float = -1, node_order: str = None, weighted: bool = True):
64
+ super(Propagation, self).__init__()
65
+
66
+ if n_iter < 0:
67
+ self.n_iter = np.inf
68
+ else:
69
+ self.n_iter = n_iter
70
+ self.node_order = node_order
71
+ self.weighted = weighted
72
+
73
+ @staticmethod
74
+ def _instantiate_vars(labels: np.ndarray):
75
+ """Instantiate variables for label propagation."""
76
+ n = len(labels)
77
+ if len(set(labels)) == n:
78
+ index_seed = np.arange(n)
79
+ index_remain = np.arange(n)
80
+ else:
81
+ index_seed = np.argwhere(labels >= 0).ravel()
82
+ index_remain = np.argwhere(labels < 0).ravel()
83
+ labels = labels[index_seed]
84
+ return index_seed.astype(np.int32), index_remain.astype(np.int32), labels.astype(np.int32)
85
+
86
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: Union[np.ndarray, list, dict] = None,
87
+ labels_row: Union[np.ndarray, list, dict] = None,
88
+ labels_col: Union[np.ndarray, list, dict] = None) -> 'Propagation':
89
+ """Node classification by label propagation.
90
+
91
+ Parameters
92
+ ----------
93
+ input_matrix : sparse.csr_matrix, np.ndarray
94
+ Adjacency matrix or biadjacency matrix of the graph.
95
+ labels : array, list or dict
96
+ Known labels. Negative values ignored.
97
+ labels_row : array, list or dict
98
+ Known labels of rows, for bipartite graphs.
99
+ labels_col : array, list or dict
100
+ Known labels of columns, for bipartite graphs.
101
+
102
+ Returns
103
+ -------
104
+ self: :class:`Propagation`
105
+ """
106
+ adjacency, seeds, self.bipartite = get_adjacency_values(input_matrix, values=labels, values_row=labels_row,
107
+ values_col=labels_col, which='labels')
108
+ n = adjacency.shape[0]
109
+ index_seed, index_remain, labels_seed = self._instantiate_vars(seeds)
110
+
111
+ if self.node_order == 'random':
112
+ np.random.shuffle(index_remain)
113
+ elif self.node_order == 'decreasing':
114
+ index = np.argsort(-adjacency.T.dot(np.ones(n))).astype(np.int32)
115
+ index_remain = index[index_remain]
116
+ elif self.node_order == 'increasing':
117
+ index = np.argsort(adjacency.T.dot(np.ones(n))).astype(np.int32)
118
+ index_remain = index[index_remain]
119
+
120
+ labels = -np.ones(n, dtype=np.int32)
121
+ labels[index_seed] = labels_seed
122
+ labels_remain = np.zeros_like(index_remain, dtype=np.int32)
123
+
124
+ indptr = adjacency.indptr.astype(np.int32)
125
+ indices = adjacency.indices.astype(np.int32)
126
+ if self.weighted:
127
+ data = adjacency.data.astype(np.float32)
128
+ else:
129
+ data = np.ones(n, dtype=np.float32)
130
+
131
+ t = 0
132
+ while t < self.n_iter and not np.array_equal(labels_remain, labels[index_remain]):
133
+ t += 1
134
+ labels_remain = labels[index_remain].copy()
135
+ labels = np.asarray(vote_update(indptr, indices, data, labels, index_remain))
136
+
137
+ probs = get_membership(labels)
138
+ probs = normalize(adjacency.dot(probs))
139
+
140
+ self.labels_ = labels
141
+ self.probs_ = probs
142
+ self._split_vars(input_matrix.shape)
143
+
144
+ return self
@@ -0,0 +1 @@
1
+ """tests for classification"""
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for classification API"""
4
+
5
+ import unittest
6
+
7
+ from sknetwork.classification import *
8
+ from sknetwork.data.test_graphs import *
9
+ from sknetwork.embedding import LouvainEmbedding
10
+
11
+
12
+ class TestClassificationAPI(unittest.TestCase):
13
+
14
+ def test_undirected(self):
15
+ for adjacency in [test_graph(), test_digraph()]:
16
+ n = adjacency.shape[0]
17
+ seeds_array = -np.ones(n)
18
+ seeds_array[:2] = np.arange(2)
19
+ seeds_dict = {0: 0, 1: 1}
20
+
21
+ classifiers = [PageRankClassifier(), DiffusionClassifier(),
22
+ NNClassifier(embedding_method=LouvainEmbedding(), n_neighbors=1), Propagation()]
23
+
24
+ for algo in classifiers:
25
+ labels1 = algo.fit_predict(adjacency, seeds_array)
26
+ labels2 = algo.fit_predict(adjacency, seeds_dict)
27
+ self.assertTrue((labels1 == labels2).all())
28
+ self.assertEqual(labels2.shape, (n,))
29
+ membership = algo.fit_transform(adjacency, seeds_array)
30
+ self.assertTupleEqual(membership.shape, (n, 2))
@@ -0,0 +1,77 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for DiffusionClassifier"""
4
+
5
+ import unittest
6
+
7
+ from sknetwork.classification import DiffusionClassifier
8
+ from sknetwork.data.test_graphs import *
9
+
10
+
11
+ class TestDiffusionClassifier(unittest.TestCase):
12
+
13
+ def test_graph(self):
14
+ adjacency = test_graph()
15
+ n_nodes = adjacency.shape[0]
16
+ labels = {0: 0, 1: 1}
17
+ algo = DiffusionClassifier()
18
+ algo.fit(adjacency, labels=labels)
19
+ self.assertTrue(len(algo.labels_) == n_nodes)
20
+ adjacency = test_digraph()
21
+ algo = DiffusionClassifier(centering=False)
22
+ algo.fit(adjacency, labels=labels)
23
+ self.assertTrue(len(algo.labels_) == n_nodes)
24
+ with self.assertRaises(ValueError):
25
+ DiffusionClassifier(n_iter=0)
26
+ algo = DiffusionClassifier(centering=True, scale=10)
27
+ probs = algo.fit_predict_proba(adjacency, labels=labels)[:, 1]
28
+ self.assertTrue(max(probs) > 0.99)
29
+
30
+ def test_bipartite(self):
31
+ biadjacency = test_bigraph()
32
+ n_row, n_col = biadjacency.shape
33
+ labels_row = {0: 0, 1: 1}
34
+ labels_col = {5: 1}
35
+ algo = DiffusionClassifier()
36
+ algo.fit(biadjacency, labels_row=labels_row, labels_col=labels_col)
37
+ self.assertTrue(len(algo.labels_row_) == n_row)
38
+ self.assertTrue(len(algo.labels_col_) == n_col)
39
+ self.assertTrue(all(algo.labels_col_ == algo.predict(columns=True)))
40
+
41
+ def test_predict(self):
42
+ adjacency = test_graph()
43
+ n_nodes = adjacency.shape[0]
44
+ labels = {0: 0, 1: 1}
45
+ algo = DiffusionClassifier()
46
+ labels_pred = algo.fit_predict(adjacency, labels=labels)
47
+ self.assertTrue(len(labels_pred) == n_nodes)
48
+ probs_pred = algo.fit_predict_proba(adjacency, labels=labels)
49
+ self.assertTrue(probs_pred.shape == (n_nodes, 2))
50
+ membership = algo.fit_transform(adjacency, labels=labels)
51
+ self.assertTrue(membership.shape == (n_nodes, 2))
52
+
53
+ biadjacency = test_bigraph()
54
+ n_row, n_col = biadjacency.shape
55
+ labels_row = {0: 0, 1: 1}
56
+ algo = DiffusionClassifier()
57
+ labels_pred = algo.fit_predict(biadjacency, labels_row=labels_row)
58
+ self.assertTrue(len(labels_pred) == n_row)
59
+ labels_pred = algo.predict(columns=True)
60
+ self.assertTrue(len(labels_pred) == n_col)
61
+ probs_pred = algo.fit_predict_proba(biadjacency, labels_row=labels_row)
62
+ self.assertTrue(probs_pred.shape == (n_row, 2))
63
+ probs_pred = algo.predict_proba(columns=True)
64
+ self.assertTrue(probs_pred.shape == (n_col, 2))
65
+ membership = algo.fit_transform(biadjacency, labels_row=labels_row)
66
+ self.assertTrue(membership.shape == (n_row, 2))
67
+ membership = algo.transform(columns=True)
68
+ self.assertTrue(membership.shape == (n_col, 2))
69
+
70
+ def test_reindex_label(self):
71
+ adjacency = test_graph()
72
+ n_nodes = adjacency.shape[0]
73
+ labels = {0: 0, 1: 2, 2: 3}
74
+ algo = DiffusionClassifier()
75
+ labels_pred = algo.fit_predict(adjacency, labels=labels)
76
+ self.assertTrue(len(labels_pred) == n_nodes)
77
+ self.assertTrue(set(list(labels_pred)) == {0, 2, 3})
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for KNN"""
4
+ import unittest
5
+
6
+ from sknetwork.classification import NNClassifier
7
+ from sknetwork.data.test_graphs import *
8
+ from sknetwork.embedding import Spectral
9
+
10
+
11
+ class TestKNNClassifier(unittest.TestCase):
12
+
13
+ def test_classification(self):
14
+ for adjacency in [test_graph(), test_digraph(), test_bigraph()]:
15
+ labels = {0: 0, 1: 1}
16
+
17
+ algo = NNClassifier(n_neighbors=1)
18
+ labels_pred = algo.fit_predict(adjacency, labels)
19
+ self.assertTrue(len(set(labels_pred)) == 2)
20
+
21
+ algo = NNClassifier(n_neighbors=1, embedding_method=Spectral(2), normalize=False)
22
+ labels_pred = algo.fit_predict(adjacency, labels)
23
+ self.assertTrue(len(set(labels_pred)) == 2)
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for classification metrics"""
4
+
5
+ import unittest
6
+
7
+ from sknetwork.classification.metrics import *
8
+
9
+
10
+ class TestMetrics(unittest.TestCase):
11
+
12
+ def setUp(self) -> None:
13
+ self.labels_true = np.array([0, 1, 1, 2, 2, -1])
14
+ self.labels_pred1 = np.array([0, -1, 1, 2, 0, 0])
15
+ self.labels_pred2 = np.array([-1, -1, -1, -1, -1, 0])
16
+
17
+ def test_accuracy(self):
18
+ self.assertEqual(get_accuracy_score(self.labels_true, self.labels_pred1), 0.75)
19
+ with self.assertRaises(ValueError):
20
+ get_accuracy_score(self.labels_true, self.labels_pred2)
21
+
22
+ def test_confusion(self):
23
+ confusion = get_confusion_matrix(self.labels_true, self.labels_pred1)
24
+ self.assertEqual(confusion.data.sum(), 4)
25
+ self.assertEqual(confusion.diagonal().sum(), 3)
26
+ with self.assertRaises(ValueError):
27
+ get_accuracy_score(self.labels_true, self.labels_pred2)
28
+
29
+ def test_f1_score(self):
30
+ f1_score = get_f1_score(np.array([0, 0, 1]), np.array([0, 1, 1]))
31
+ self.assertAlmostEqual(f1_score, 0.67, 2)
32
+ with self.assertRaises(ValueError):
33
+ get_f1_score(self.labels_true, self.labels_pred1)
34
+
35
+ def test_f1_scores(self):
36
+ f1_scores = get_f1_scores(self.labels_true, self.labels_pred1)
37
+ self.assertAlmostEqual(min(f1_scores), 0.67, 2)
38
+ f1_scores, precisions, recalls = get_f1_scores(self.labels_true, self.labels_pred1, True)
39
+ self.assertAlmostEqual(min(f1_scores), 0.67, 2)
40
+ self.assertAlmostEqual(min(precisions), 0.5, 2)
41
+ self.assertAlmostEqual(min(recalls), 0.5, 2)
42
+ with self.assertRaises(ValueError):
43
+ get_f1_scores(self.labels_true, self.labels_pred2)
44
+
45
+ def test_average_f1_score(self):
46
+ f1_score = get_average_f1_score(self.labels_true, self.labels_pred1)
47
+ self.assertAlmostEqual(f1_score, 0.78, 2)
48
+ f1_score = get_average_f1_score(self.labels_true, self.labels_pred1, average='micro')
49
+ self.assertEqual(f1_score, 0.75)
50
+ f1_score = get_average_f1_score(self.labels_true, self.labels_pred1, average='weighted')
51
+ self.assertEqual(f1_score, 0.80)
52
+ with self.assertRaises(ValueError):
53
+ get_average_f1_score(self.labels_true, self.labels_pred2, 'toto')
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for PageRankClassifier"""
4
+
5
+ import unittest
6
+
7
+ from sknetwork.classification import PageRankClassifier
8
+ from sknetwork.data.test_graphs import *
9
+
10
+
11
+ class TestPageRankClassifier(unittest.TestCase):
12
+
13
+ def test_solvers(self):
14
+ adjacency = test_graph()
15
+ labels = {0: 0, 1: 1}
16
+
17
+ ref = PageRankClassifier(solver='piteration').fit_predict(adjacency, labels)
18
+ for solver in ['lanczos', 'bicgstab']:
19
+ labels_pred = PageRankClassifier(solver=solver).fit_predict(adjacency, labels)
20
+ self.assertTrue((ref == labels_pred).all())
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Tests for label propagation"""
4
+
5
+ import unittest
6
+
7
+ from sknetwork.classification import Propagation
8
+ from sknetwork.data.test_graphs import *
9
+
10
+
11
+ class TestLabelPropagation(unittest.TestCase):
12
+
13
+ def test_algo(self):
14
+ for adjacency in [test_graph(), test_digraph(), test_bigraph()]:
15
+ n = adjacency.shape[0]
16
+ labels = {0: 0, 1: 1}
17
+ propagation = Propagation(n_iter=3, weighted=False)
18
+ labels_pred = propagation.fit_predict(adjacency, labels)
19
+ self.assertEqual(labels_pred.shape, (n,))
20
+
21
+ for order in ['random', 'decreasing', 'increasing']:
22
+ propagation = Propagation(node_order=order)
23
+ labels_pred = propagation.fit_predict(adjacency, labels)
24
+ self.assertEqual(labels_pred.shape, (n,))