scikit-network 0.33.4__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. scikit_network-0.33.4.dist-info/METADATA +122 -0
  2. scikit_network-0.33.4.dist-info/RECORD +229 -0
  3. scikit_network-0.33.4.dist-info/WHEEL +6 -0
  4. scikit_network-0.33.4.dist-info/licenses/AUTHORS.rst +43 -0
  5. scikit_network-0.33.4.dist-info/licenses/LICENSE +34 -0
  6. scikit_network-0.33.4.dist-info/top_level.txt +1 -0
  7. scikit_network.libs/libgomp-a34b3233.so.1.0.0 +0 -0
  8. sknetwork/__init__.py +21 -0
  9. sknetwork/base.py +67 -0
  10. sknetwork/classification/__init__.py +8 -0
  11. sknetwork/classification/base.py +138 -0
  12. sknetwork/classification/base_rank.py +129 -0
  13. sknetwork/classification/diffusion.py +127 -0
  14. sknetwork/classification/knn.py +131 -0
  15. sknetwork/classification/metrics.py +205 -0
  16. sknetwork/classification/pagerank.py +58 -0
  17. sknetwork/classification/propagation.py +144 -0
  18. sknetwork/classification/tests/__init__.py +1 -0
  19. sknetwork/classification/tests/test_API.py +30 -0
  20. sknetwork/classification/tests/test_diffusion.py +77 -0
  21. sknetwork/classification/tests/test_knn.py +23 -0
  22. sknetwork/classification/tests/test_metrics.py +53 -0
  23. sknetwork/classification/tests/test_pagerank.py +20 -0
  24. sknetwork/classification/tests/test_propagation.py +24 -0
  25. sknetwork/classification/vote.cpp +27593 -0
  26. sknetwork/classification/vote.cpython-312-x86_64-linux-gnu.so +0 -0
  27. sknetwork/classification/vote.pyx +56 -0
  28. sknetwork/clustering/__init__.py +8 -0
  29. sknetwork/clustering/base.py +168 -0
  30. sknetwork/clustering/kcenters.py +251 -0
  31. sknetwork/clustering/leiden.py +238 -0
  32. sknetwork/clustering/leiden_core.cpp +31928 -0
  33. sknetwork/clustering/leiden_core.cpython-312-x86_64-linux-gnu.so +0 -0
  34. sknetwork/clustering/leiden_core.pyx +124 -0
  35. sknetwork/clustering/louvain.py +282 -0
  36. sknetwork/clustering/louvain_core.cpp +31573 -0
  37. sknetwork/clustering/louvain_core.cpython-312-x86_64-linux-gnu.so +0 -0
  38. sknetwork/clustering/louvain_core.pyx +124 -0
  39. sknetwork/clustering/metrics.py +91 -0
  40. sknetwork/clustering/postprocess.py +66 -0
  41. sknetwork/clustering/propagation_clustering.py +100 -0
  42. sknetwork/clustering/tests/__init__.py +1 -0
  43. sknetwork/clustering/tests/test_API.py +38 -0
  44. sknetwork/clustering/tests/test_kcenters.py +60 -0
  45. sknetwork/clustering/tests/test_leiden.py +34 -0
  46. sknetwork/clustering/tests/test_louvain.py +135 -0
  47. sknetwork/clustering/tests/test_metrics.py +50 -0
  48. sknetwork/clustering/tests/test_postprocess.py +39 -0
  49. sknetwork/data/__init__.py +6 -0
  50. sknetwork/data/base.py +33 -0
  51. sknetwork/data/load.py +292 -0
  52. sknetwork/data/models.py +459 -0
  53. sknetwork/data/parse.py +644 -0
  54. sknetwork/data/test_graphs.py +93 -0
  55. sknetwork/data/tests/__init__.py +1 -0
  56. sknetwork/data/tests/test_API.py +30 -0
  57. sknetwork/data/tests/test_base.py +14 -0
  58. sknetwork/data/tests/test_load.py +61 -0
  59. sknetwork/data/tests/test_models.py +52 -0
  60. sknetwork/data/tests/test_parse.py +250 -0
  61. sknetwork/data/tests/test_test_graphs.py +29 -0
  62. sknetwork/data/tests/test_toy_graphs.py +68 -0
  63. sknetwork/data/timeout.py +38 -0
  64. sknetwork/data/toy_graphs.py +611 -0
  65. sknetwork/embedding/__init__.py +8 -0
  66. sknetwork/embedding/base.py +90 -0
  67. sknetwork/embedding/force_atlas.py +198 -0
  68. sknetwork/embedding/louvain_embedding.py +142 -0
  69. sknetwork/embedding/random_projection.py +131 -0
  70. sknetwork/embedding/spectral.py +137 -0
  71. sknetwork/embedding/spring.py +198 -0
  72. sknetwork/embedding/svd.py +351 -0
  73. sknetwork/embedding/tests/__init__.py +1 -0
  74. sknetwork/embedding/tests/test_API.py +49 -0
  75. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  76. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  77. sknetwork/embedding/tests/test_random_projection.py +28 -0
  78. sknetwork/embedding/tests/test_spectral.py +81 -0
  79. sknetwork/embedding/tests/test_spring.py +50 -0
  80. sknetwork/embedding/tests/test_svd.py +43 -0
  81. sknetwork/gnn/__init__.py +10 -0
  82. sknetwork/gnn/activation.py +117 -0
  83. sknetwork/gnn/base.py +181 -0
  84. sknetwork/gnn/base_activation.py +90 -0
  85. sknetwork/gnn/base_layer.py +109 -0
  86. sknetwork/gnn/gnn_classifier.py +305 -0
  87. sknetwork/gnn/layer.py +153 -0
  88. sknetwork/gnn/loss.py +180 -0
  89. sknetwork/gnn/neighbor_sampler.py +65 -0
  90. sknetwork/gnn/optimizer.py +164 -0
  91. sknetwork/gnn/tests/__init__.py +1 -0
  92. sknetwork/gnn/tests/test_activation.py +56 -0
  93. sknetwork/gnn/tests/test_base.py +75 -0
  94. sknetwork/gnn/tests/test_base_layer.py +37 -0
  95. sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
  96. sknetwork/gnn/tests/test_layers.py +80 -0
  97. sknetwork/gnn/tests/test_loss.py +33 -0
  98. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  99. sknetwork/gnn/tests/test_optimizer.py +43 -0
  100. sknetwork/gnn/tests/test_utils.py +41 -0
  101. sknetwork/gnn/utils.py +127 -0
  102. sknetwork/hierarchy/__init__.py +6 -0
  103. sknetwork/hierarchy/base.py +90 -0
  104. sknetwork/hierarchy/louvain_hierarchy.py +260 -0
  105. sknetwork/hierarchy/metrics.py +234 -0
  106. sknetwork/hierarchy/paris.cpp +37877 -0
  107. sknetwork/hierarchy/paris.cpython-312-x86_64-linux-gnu.so +0 -0
  108. sknetwork/hierarchy/paris.pyx +310 -0
  109. sknetwork/hierarchy/postprocess.py +350 -0
  110. sknetwork/hierarchy/tests/__init__.py +1 -0
  111. sknetwork/hierarchy/tests/test_API.py +24 -0
  112. sknetwork/hierarchy/tests/test_algos.py +34 -0
  113. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  114. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  115. sknetwork/linalg/__init__.py +9 -0
  116. sknetwork/linalg/basics.py +37 -0
  117. sknetwork/linalg/diteration.cpp +27409 -0
  118. sknetwork/linalg/diteration.cpython-312-x86_64-linux-gnu.so +0 -0
  119. sknetwork/linalg/diteration.pyx +47 -0
  120. sknetwork/linalg/eig_solver.py +93 -0
  121. sknetwork/linalg/laplacian.py +15 -0
  122. sknetwork/linalg/normalizer.py +86 -0
  123. sknetwork/linalg/operators.py +225 -0
  124. sknetwork/linalg/polynome.py +76 -0
  125. sknetwork/linalg/ppr_solver.py +170 -0
  126. sknetwork/linalg/push.cpp +31081 -0
  127. sknetwork/linalg/push.cpython-312-x86_64-linux-gnu.so +0 -0
  128. sknetwork/linalg/push.pyx +71 -0
  129. sknetwork/linalg/sparse_lowrank.py +142 -0
  130. sknetwork/linalg/svd_solver.py +91 -0
  131. sknetwork/linalg/tests/__init__.py +1 -0
  132. sknetwork/linalg/tests/test_eig.py +44 -0
  133. sknetwork/linalg/tests/test_laplacian.py +18 -0
  134. sknetwork/linalg/tests/test_normalization.py +34 -0
  135. sknetwork/linalg/tests/test_operators.py +66 -0
  136. sknetwork/linalg/tests/test_polynome.py +38 -0
  137. sknetwork/linalg/tests/test_ppr.py +50 -0
  138. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  139. sknetwork/linalg/tests/test_svd.py +38 -0
  140. sknetwork/linkpred/__init__.py +2 -0
  141. sknetwork/linkpred/base.py +46 -0
  142. sknetwork/linkpred/nn.py +126 -0
  143. sknetwork/linkpred/tests/__init__.py +1 -0
  144. sknetwork/linkpred/tests/test_nn.py +26 -0
  145. sknetwork/log.py +19 -0
  146. sknetwork/path/__init__.py +5 -0
  147. sknetwork/path/dag.py +54 -0
  148. sknetwork/path/distances.py +98 -0
  149. sknetwork/path/search.py +31 -0
  150. sknetwork/path/shortest_path.py +61 -0
  151. sknetwork/path/tests/__init__.py +1 -0
  152. sknetwork/path/tests/test_dag.py +37 -0
  153. sknetwork/path/tests/test_distances.py +62 -0
  154. sknetwork/path/tests/test_search.py +40 -0
  155. sknetwork/path/tests/test_shortest_path.py +40 -0
  156. sknetwork/ranking/__init__.py +8 -0
  157. sknetwork/ranking/base.py +57 -0
  158. sknetwork/ranking/betweenness.cpp +9716 -0
  159. sknetwork/ranking/betweenness.cpython-312-x86_64-linux-gnu.so +0 -0
  160. sknetwork/ranking/betweenness.pyx +97 -0
  161. sknetwork/ranking/closeness.py +92 -0
  162. sknetwork/ranking/hits.py +90 -0
  163. sknetwork/ranking/katz.py +79 -0
  164. sknetwork/ranking/pagerank.py +106 -0
  165. sknetwork/ranking/postprocess.py +37 -0
  166. sknetwork/ranking/tests/__init__.py +1 -0
  167. sknetwork/ranking/tests/test_API.py +32 -0
  168. sknetwork/ranking/tests/test_betweenness.py +38 -0
  169. sknetwork/ranking/tests/test_closeness.py +30 -0
  170. sknetwork/ranking/tests/test_hits.py +20 -0
  171. sknetwork/ranking/tests/test_pagerank.py +62 -0
  172. sknetwork/ranking/tests/test_postprocess.py +26 -0
  173. sknetwork/regression/__init__.py +4 -0
  174. sknetwork/regression/base.py +57 -0
  175. sknetwork/regression/diffusion.py +204 -0
  176. sknetwork/regression/tests/__init__.py +1 -0
  177. sknetwork/regression/tests/test_API.py +32 -0
  178. sknetwork/regression/tests/test_diffusion.py +56 -0
  179. sknetwork/sknetwork.py +3 -0
  180. sknetwork/test_base.py +35 -0
  181. sknetwork/test_log.py +15 -0
  182. sknetwork/topology/__init__.py +8 -0
  183. sknetwork/topology/cliques.cpp +32574 -0
  184. sknetwork/topology/cliques.cpython-312-x86_64-linux-gnu.so +0 -0
  185. sknetwork/topology/cliques.pyx +149 -0
  186. sknetwork/topology/core.cpp +30660 -0
  187. sknetwork/topology/core.cpython-312-x86_64-linux-gnu.so +0 -0
  188. sknetwork/topology/core.pyx +90 -0
  189. sknetwork/topology/cycles.py +243 -0
  190. sknetwork/topology/minheap.cpp +27341 -0
  191. sknetwork/topology/minheap.cpython-312-x86_64-linux-gnu.so +0 -0
  192. sknetwork/topology/minheap.pxd +20 -0
  193. sknetwork/topology/minheap.pyx +109 -0
  194. sknetwork/topology/structure.py +194 -0
  195. sknetwork/topology/tests/__init__.py +1 -0
  196. sknetwork/topology/tests/test_cliques.py +28 -0
  197. sknetwork/topology/tests/test_core.py +19 -0
  198. sknetwork/topology/tests/test_cycles.py +65 -0
  199. sknetwork/topology/tests/test_structure.py +85 -0
  200. sknetwork/topology/tests/test_triangles.py +38 -0
  201. sknetwork/topology/tests/test_wl.py +72 -0
  202. sknetwork/topology/triangles.cpp +8903 -0
  203. sknetwork/topology/triangles.cpython-312-x86_64-linux-gnu.so +0 -0
  204. sknetwork/topology/triangles.pyx +151 -0
  205. sknetwork/topology/weisfeiler_lehman.py +133 -0
  206. sknetwork/topology/weisfeiler_lehman_core.cpp +27644 -0
  207. sknetwork/topology/weisfeiler_lehman_core.cpython-312-x86_64-linux-gnu.so +0 -0
  208. sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
  209. sknetwork/utils/__init__.py +7 -0
  210. sknetwork/utils/check.py +355 -0
  211. sknetwork/utils/format.py +221 -0
  212. sknetwork/utils/membership.py +82 -0
  213. sknetwork/utils/neighbors.py +115 -0
  214. sknetwork/utils/tests/__init__.py +1 -0
  215. sknetwork/utils/tests/test_check.py +190 -0
  216. sknetwork/utils/tests/test_format.py +63 -0
  217. sknetwork/utils/tests/test_membership.py +24 -0
  218. sknetwork/utils/tests/test_neighbors.py +41 -0
  219. sknetwork/utils/tests/test_tfidf.py +18 -0
  220. sknetwork/utils/tests/test_values.py +66 -0
  221. sknetwork/utils/tfidf.py +37 -0
  222. sknetwork/utils/values.py +76 -0
  223. sknetwork/visualization/__init__.py +4 -0
  224. sknetwork/visualization/colors.py +34 -0
  225. sknetwork/visualization/dendrograms.py +277 -0
  226. sknetwork/visualization/graphs.py +1039 -0
  227. sknetwork/visualization/tests/__init__.py +1 -0
  228. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  229. sknetwork/visualization/tests/test_graphs.py +176 -0
@@ -0,0 +1,138 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in November 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ from abc import ABC
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+
12
+ from sknetwork.base import Algorithm
13
+
14
+
15
+ class BaseClassifier(Algorithm, ABC):
16
+ """Base class for classifiers.
17
+
18
+ Attributes
19
+ ----------
20
+ bipartite : bool
21
+ If ``True``, the fitted graph is bipartite.
22
+ labels\_ : np.ndarray, shape (n_nodes,)
23
+ Labels of nodes.
24
+ probs\_ : sparse.csr_matrix, shape (n_nodes, n_labels)
25
+ Probability distribution over labels (soft classification).
26
+ """
27
+
28
+ def __init__(self):
29
+ self.bipartite = None
30
+ self.labels_ = None
31
+ self.probs_ = None
32
+ self.labels_row_ = None
33
+ self.labels_col_ = None
34
+ self.probs_row_ = None
35
+ self.probs_col_ = None
36
+
37
+ def predict(self, columns: bool = False) -> np.ndarray:
38
+ """Return the labels predicted by the algorithm.
39
+
40
+ Parameters
41
+ ----------
42
+ columns : bool
43
+ If ``True``, return the prediction for columns.
44
+
45
+ Returns
46
+ -------
47
+ labels : np.ndarray
48
+ Labels.
49
+ """
50
+ if columns:
51
+ return self.labels_col_
52
+ return self.labels_
53
+
54
+ def fit_predict(self, *args, **kwargs) -> np.ndarray:
55
+ """Fit algorithm to the data and return the labels. Same parameters as the ``fit`` method.
56
+
57
+ Returns
58
+ -------
59
+ labels : np.ndarray
60
+ Labels.
61
+ """
62
+ self.fit(*args, **kwargs)
63
+ return self.predict()
64
+
65
+ def predict_proba(self, columns=False) -> np.ndarray:
66
+ """Return the probability distribution over labels as predicted by the algorithm.
67
+
68
+ Parameters
69
+ ----------
70
+ columns : bool
71
+ If ``True``, return the prediction for columns.
72
+
73
+ Returns
74
+ -------
75
+ probs : np.ndarray
76
+ Probability distribution over labels.
77
+ """
78
+ if columns:
79
+ return self.probs_col_.toarray()
80
+ return self.probs_.toarray()
81
+
82
+ def fit_predict_proba(self, *args, **kwargs) -> np.ndarray:
83
+ """Fit algorithm to the data and return the probability distribution over labels.
84
+ Same parameters as the ``fit`` method.
85
+
86
+ Returns
87
+ -------
88
+ probs : np.ndarray
89
+ Probability of each label.
90
+ """
91
+ self.fit(*args, **kwargs)
92
+ return self.predict_proba()
93
+
94
+ def transform(self, columns=False) -> sparse.csr_matrix:
95
+ """Return the probability distribution over labels in sparse format.
96
+
97
+ Parameters
98
+ ----------
99
+ columns : bool
100
+ If ``True``, return the prediction for columns.
101
+
102
+ Returns
103
+ -------
104
+ probs : sparse.csr_matrix
105
+ Probability distribution over labels.
106
+ """
107
+ if columns:
108
+ return self.probs_col_
109
+ return self.probs_
110
+
111
+ def fit_transform(self, *args, **kwargs) -> sparse.csr_matrix:
112
+ """Fit algorithm to the data and return the probability distribution over labels in sparse format.
113
+ Same parameters as the ``fit`` method.
114
+
115
+ Returns
116
+ -------
117
+ probs : sparse.csr_matrix
118
+ Probability distribution over labels.
119
+ """
120
+ self.fit(*args, **kwargs)
121
+ return self.transform()
122
+
123
+ def _split_vars(self, shape: tuple):
124
+ """Split variables for bipartite graphs."""
125
+ if self.bipartite:
126
+ n_row = shape[0]
127
+ self.labels_row_ = self.labels_[:n_row]
128
+ self.labels_col_ = self.labels_[n_row:]
129
+ self.labels_ = self.labels_row_
130
+ self.probs_row_ = self.probs_[:n_row]
131
+ self.probs_col_ = self.probs_[n_row:]
132
+ self.probs_ = self.probs_row_
133
+ else:
134
+ self.labels_row_ = self.labels_
135
+ self.labels_col_ = self.labels_
136
+ self.probs_row_ = self.probs_
137
+ self.probs_col_ = self.probs_
138
+ return self
@@ -0,0 +1,129 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in March 2020
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ from functools import partial
8
+ from multiprocessing import Pool
9
+ from typing import Union, Optional
10
+
11
+ import numpy as np
12
+ from scipy import sparse
13
+
14
+ from sknetwork.classification.base import BaseClassifier
15
+ from sknetwork.linalg.normalizer import normalize
16
+ from sknetwork.ranking.base import BaseRanking
17
+ from sknetwork.utils.check import check_labels, check_n_jobs
18
+ from sknetwork.utils.format import get_adjacency_values
19
+
20
+
21
+ class RankClassifier(BaseClassifier):
22
+ """Generic class for ranking based classifiers.
23
+
24
+ Parameters
25
+ ----------
26
+ algorithm :
27
+ Which ranking algorithm to use.
28
+ n_jobs :
29
+ If positive, number of parallel jobs allowed (-1 means maximum number).
30
+ If ``None``, no parallel computations are made.
31
+
32
+ Attributes
33
+ ----------
34
+ labels\_ : np.ndarray, shape (n_nodes,)
35
+ Label of each node.
36
+ probs\_ : sparse.csr_matrix, shape (n_nodes, n_labels)
37
+ Probability distribution over labels.
38
+ """
39
+ def __init__(self, algorithm: BaseRanking, n_jobs: Optional[int] = None, verbose: bool = False):
40
+ super(RankClassifier, self).__init__()
41
+
42
+ self.algorithm = algorithm
43
+ self.n_jobs = check_n_jobs(n_jobs)
44
+ self.verbose = verbose
45
+
46
+ @staticmethod
47
+ def _process_labels(labels: np.ndarray) -> list:
48
+ """Make one-vs-all binary labels from labels.
49
+
50
+ Parameters
51
+ ----------
52
+ labels
53
+
54
+ Returns
55
+ -------
56
+ List of binary labels.
57
+ """
58
+ labels_all = []
59
+ labels_unique, _ = check_labels(labels)
60
+
61
+ for label in labels_unique:
62
+ labels_binary = np.array(labels == label).astype(int)
63
+ labels_all.append(labels_binary)
64
+
65
+ return labels_all
66
+
67
+ @staticmethod
68
+ def _process_scores(scores: np.ndarray) -> np.ndarray:
69
+ """Post-processing of the scores.
70
+
71
+ Parameters
72
+ ----------
73
+ scores
74
+ Matrix of scores, shape number of nodes x number of labels.
75
+
76
+ Returns
77
+ -------
78
+ scores : np.ndarray
79
+ """
80
+ return scores
81
+
82
+ def _split_vars(self, shape):
83
+ """Split the vector of labels and build membership matrix."""
84
+ n_row = shape[0]
85
+ self.labels_row_ = self.labels_[:n_row]
86
+ self.labels_col_ = self.labels_[n_row:]
87
+ self.labels_ = self.labels_row_
88
+ self.probs_row_ = self.probs_[:n_row]
89
+ self.probs_col_ = self.probs_[n_row:]
90
+ self.probs_ = self.probs_row_
91
+
92
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: Union[np.ndarray, dict] = None,
93
+ labels_row: Union[np.ndarray, dict] = None, labels_col: Union[np.ndarray, dict] = None) -> 'RankClassifier':
94
+ """Fit algorithm to data.
95
+
96
+ Parameters
97
+ ----------
98
+ input_matrix :
99
+ Adjacency matrix or biadjacency matrix of the graph.
100
+ labels :
101
+ Known labels (dictionary or array; negative values ignored).
102
+ labels_row, labels_col :
103
+ Known labels on rows and columns (for bipartite graphs).
104
+ Returns
105
+ -------
106
+ self: :class:`RankClassifier`
107
+ """
108
+ adjacency, seeds_labels, bipartite = get_adjacency_values(input_matrix, values=labels, values_row=labels_row,
109
+ values_col=labels_col)
110
+ seeds_labels = seeds_labels.astype(int)
111
+ labels_unique, n_classes = check_labels(seeds_labels)
112
+ seeds_all = self._process_labels(seeds_labels)
113
+ local_function = partial(self.algorithm.fit_predict, adjacency)
114
+ with Pool(self.n_jobs) as pool:
115
+ scores = np.array(pool.map(local_function, seeds_all))
116
+ scores = scores.T
117
+
118
+ scores = self._process_scores(scores)
119
+ scores = normalize(scores)
120
+
121
+ probs = sparse.coo_matrix(scores)
122
+ probs.col = labels_unique[probs.col]
123
+
124
+ labels = np.argmax(scores, axis=1)
125
+ self.labels_ = labels_unique[labels]
126
+ self.probs_ = sparse.csr_matrix(probs, shape=(adjacency.shape[0], np.max(seeds_labels) + 1))
127
+ self._split_vars(input_matrix.shape)
128
+
129
+ return self
@@ -0,0 +1,127 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in July 2022
5
+ @author: Thomas Bonald <thomas.bonald@telecom-paris.fr>
6
+ """
7
+ from typing import Optional, Union
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+
12
+ from sknetwork.classification.base import BaseClassifier
13
+ from sknetwork.path.distances import get_distances
14
+ from sknetwork.linalg.normalizer import normalize
15
+ from sknetwork.utils.format import get_adjacency_values
16
+ from sknetwork.utils.membership import get_membership
17
+ from sknetwork.utils.neighbors import get_degrees
18
+
19
+
20
+ class DiffusionClassifier(BaseClassifier):
21
+ """Node classification by heat diffusion.
22
+
23
+ For each label, the temperature of a node corresponds to its probability to have this label.
24
+
25
+ Parameters
26
+ ----------
27
+ n_iter : int
28
+ Number of iterations of the diffusion (discrete time).
29
+ centering : bool
30
+ If ``True``, center the temperature of each label to its mean before classification (default).
31
+ scale : float
32
+ Multiplicative factor applied to tempreatures before softmax (default = 5).
33
+ Used only when centering is ``True``.
34
+
35
+ Attributes
36
+ ----------
37
+ labels\_ : np.ndarray, shape (n_nodes,)
38
+ Labels of nodes.
39
+ probs\_ : sparse.csr_matrix, shape (n_nodes, n_labels)
40
+ Probability distribution over labels.
41
+
42
+ Example
43
+ -------
44
+ >>> from sknetwork.data import karate_club
45
+ >>> diffusion = DiffusionClassifier()
46
+ >>> graph = karate_club(metadata=True)
47
+ >>> adjacency = graph.adjacency
48
+ >>> labels_true = graph.labels
49
+ >>> labels = {0: labels_true[0], 33: labels_true[33]}
50
+ >>> labels_pred = diffusion.fit_predict(adjacency, labels)
51
+ >>> float(round(np.mean(labels_pred == labels_true), 2))
52
+ 0.97
53
+
54
+ References
55
+ ----------
56
+ Zhu, X., Lafferty, J., & Rosenfeld, R. (2005). `Semi-supervised learning with graphs`
57
+ (Doctoral dissertation, Carnegie Mellon University, language technologies institute, school of computer science).
58
+ """
59
+ def __init__(self, n_iter: int = 10, centering: bool = True, scale: float = 5):
60
+ super(DiffusionClassifier, self).__init__()
61
+
62
+ if n_iter <= 0:
63
+ raise ValueError('The number of iterations must be positive.')
64
+ else:
65
+ self.n_iter = n_iter
66
+ self.centering = centering
67
+ self.scale = scale
68
+
69
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
70
+ labels: Optional[Union[dict, list, np.ndarray]] = None,
71
+ labels_row: Optional[Union[dict, list, np.ndarray]] = None,
72
+ labels_col: Optional[Union[dict, list, np.ndarray]] = None, force_bipartite: bool = False) \
73
+ -> 'DiffusionClassifier':
74
+ """Compute the solution to the Dirichlet problem (temperatures at equilibrium).
75
+
76
+ Parameters
77
+ ----------
78
+ input_matrix : sparse.csr_matrix, np.ndarray
79
+ Adjacency matrix or biadjacency matrix of the graph.
80
+ labels : dict, np.ndarray
81
+ Known labels (dictionary or vector of int). Negative values ignored.
82
+ labels_row : dict, np.ndarray
83
+ Labels of rows for bipartite graphs. Negative values ignored.
84
+ labels_col : dict, np.ndarray
85
+ Labels of columns for bipartite graphs. Negative values ignored.
86
+ force_bipartite : bool
87
+ If ``True``, consider the input matrix as a biadjacency matrix (default = ``False``).
88
+
89
+ Returns
90
+ -------
91
+ self: :class:`DiffusionClassifier`
92
+ """
93
+ adjacency, values, self.bipartite = get_adjacency_values(input_matrix, force_bipartite=force_bipartite,
94
+ values=labels,
95
+ values_row=labels_row,
96
+ values_col=labels_col)
97
+ labels = values.astype(int)
98
+ if (labels < 0).all():
99
+ raise ValueError('At least one node must be given a non-negative label.')
100
+ labels_reindex = labels.copy()
101
+ labels_unique, inverse = np.unique(labels[labels >= 0], return_inverse=True)
102
+ labels_reindex[labels >= 0] = inverse
103
+ temperatures = get_membership(labels_reindex).toarray()
104
+ temperatures_seeds = temperatures[labels >= 0]
105
+ temperatures[labels < 0] = 0.5
106
+ diffusion = normalize(adjacency)
107
+ for i in range(self.n_iter):
108
+ temperatures = diffusion.dot(temperatures)
109
+ temperatures[labels >= 0] = temperatures_seeds
110
+ if self.centering:
111
+ temperatures -= temperatures.mean(axis=0)
112
+ labels_ = labels_unique[temperatures.argmax(axis=1)]
113
+
114
+ # softmax
115
+ if self.centering:
116
+ temperatures = np.exp(self.scale * temperatures)
117
+
118
+ # set label -1 to nodes not reached by diffusion
119
+ distances = get_distances(adjacency, source=np.flatnonzero(labels >= 0))
120
+ labels_[distances < 0] = -1
121
+ temperatures[distances < 0] = 0
122
+
123
+ self.labels_ = labels_
124
+ self.probs_ = sparse.csr_matrix(normalize(temperatures))
125
+ self._split_vars(input_matrix.shape)
126
+
127
+ return self
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in November 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ @author: Thomas Bonald <tbonald@enst.fr>
7
+ """
8
+ from typing import Optional, Union
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.classification.base import BaseClassifier
14
+ from sknetwork.embedding.base import BaseEmbedding
15
+ from sknetwork.linalg.normalizer import get_norms, normalize
16
+ from sknetwork.utils.check import check_n_neighbors
17
+ from sknetwork.utils.format import get_adjacency_values
18
+
19
+
20
+ class NNClassifier(BaseClassifier):
21
+ """Node classification by K-nearest neighbors in the embedding space.
22
+
23
+ Parameters
24
+ ----------
25
+ n_neighbors : int
26
+ Number of nearest neighbors .
27
+ embedding_method : :class:`BaseEmbedding`
28
+ Embedding method used to represent nodes in vector space.
29
+ If ``None`` (default), use identity.
30
+ normalize : bool
31
+ If ``True``, apply normalization so that all vectors have norm 1 in the embedding space.
32
+
33
+ Attributes
34
+ ----------
35
+ labels\_ : np.ndarray, shape (n_nodes,)
36
+ Labels of nodes.
37
+ probs\_ : sparse.csr_matrix, shape (n_nodes, n_labels)
38
+ Probability distribution over labels.
39
+
40
+ Example
41
+ -------
42
+ >>> from sknetwork.classification import NNClassifier
43
+ >>> from sknetwork.data import karate_club
44
+ >>> classifier = NNClassifier(n_neighbors=1)
45
+ >>> graph = karate_club(metadata=True)
46
+ >>> adjacency = graph.adjacency
47
+ >>> labels_true = graph.labels
48
+ >>> labels = {0: labels_true[0], 33: labels_true[33]}
49
+ >>> labels_pred = classifier.fit_predict(adjacency, labels)
50
+ >>> float(round(np.mean(labels_pred == labels_true), 2))
51
+ 0.82
52
+ """
53
+ def __init__(self, n_neighbors: int = 3, embedding_method: Optional[BaseEmbedding] = None, normalize: bool = True):
54
+ super(NNClassifier, self).__init__()
55
+ self.n_neighbors = n_neighbors
56
+ self.embedding_method = embedding_method
57
+ self.normalize = normalize
58
+
59
+ @staticmethod
60
+ def _instantiate_vars(labels: np.ndarray):
61
+ index_train = np.flatnonzero(labels >= 0)
62
+ index_test = np.flatnonzero(labels < 0)
63
+ return index_train, index_test
64
+
65
+ def _fit_core(self, embedding, labels, index_train, index_test):
66
+ n_neighbors = check_n_neighbors(self.n_neighbors, len(index_train))
67
+
68
+ norms_train = get_norms(embedding[index_train], p=2)
69
+ neighbors = []
70
+ for i in index_test:
71
+ vector = embedding[i]
72
+ if sparse.issparse(vector):
73
+ vector = vector.toarray().ravel()
74
+ distances = norms_train**2 - 2 * embedding[index_train].dot(vector) + np.sum(vector**2)
75
+ neighbors += list(index_train[np.argpartition(distances, n_neighbors)[:n_neighbors]])
76
+ labels_neighbor = labels[neighbors]
77
+
78
+ # membership matrix
79
+ row = list(np.repeat(index_test, n_neighbors))
80
+ col = list(labels_neighbor)
81
+ data = list(np.ones_like(labels_neighbor))
82
+
83
+ row += list(index_train)
84
+ col += list(labels[index_train])
85
+ data += list(np.ones_like(index_train))
86
+
87
+ probs = normalize(sparse.csr_matrix((data, (row, col)), shape=(len(labels), np.max(labels) + 1)))
88
+ labels = np.argmax(probs.toarray(), axis=1)
89
+
90
+ return probs, labels
91
+
92
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: Union[np.ndarray, list, dict] = None,
93
+ labels_row: Union[np.ndarray, list, dict] = None,
94
+ labels_col: Union[np.ndarray, list, dict] = None) -> 'NNClassifier':
95
+ """Node classification by k-nearest neighbors in the embedding space.
96
+
97
+ Parameters
98
+ ----------
99
+ input_matrix : sparse.csr_matrix, np.ndarray
100
+ Adjacency matrix or biadjacency matrix of the graph.
101
+ labels : np.ndarray, dict
102
+ Known labels. Negative values ignored.
103
+ labels_row : np.ndarray, dict
104
+ Known labels of rows, for bipartite graphs.
105
+ labels_col : np.ndarray, dict
106
+ Known labels of columns, for bipartite graphs.
107
+
108
+ Returns
109
+ -------
110
+ self: :class:`KNN`
111
+ """
112
+ adjacency, labels, self.bipartite = get_adjacency_values(input_matrix, values=labels, values_row=labels_row,
113
+ values_col=labels_col)
114
+ labels = labels.astype(int)
115
+ index_seed, index_remain = self._instantiate_vars(labels)
116
+
117
+ if self.embedding_method is None:
118
+ embedding = adjacency
119
+ else:
120
+ embedding = self.embedding_method.fit_transform(adjacency)
121
+
122
+ if self.normalize:
123
+ embedding = normalize(embedding, p=2)
124
+
125
+ probs, labels = self._fit_core(embedding, labels, index_seed, index_remain)
126
+
127
+ self.labels_ = labels
128
+ self.probs_ = probs
129
+ self._split_vars(input_matrix.shape)
130
+
131
+ return self