scikit-network 0.33.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (228) hide show
  1. scikit_network-0.33.3.dist-info/METADATA +122 -0
  2. scikit_network-0.33.3.dist-info/RECORD +228 -0
  3. scikit_network-0.33.3.dist-info/WHEEL +5 -0
  4. scikit_network-0.33.3.dist-info/licenses/AUTHORS.rst +43 -0
  5. scikit_network-0.33.3.dist-info/licenses/LICENSE +34 -0
  6. scikit_network-0.33.3.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/base.py +67 -0
  9. sknetwork/classification/__init__.py +8 -0
  10. sknetwork/classification/base.py +142 -0
  11. sknetwork/classification/base_rank.py +133 -0
  12. sknetwork/classification/diffusion.py +134 -0
  13. sknetwork/classification/knn.py +139 -0
  14. sknetwork/classification/metrics.py +205 -0
  15. sknetwork/classification/pagerank.py +66 -0
  16. sknetwork/classification/propagation.py +152 -0
  17. sknetwork/classification/tests/__init__.py +1 -0
  18. sknetwork/classification/tests/test_API.py +30 -0
  19. sknetwork/classification/tests/test_diffusion.py +77 -0
  20. sknetwork/classification/tests/test_knn.py +23 -0
  21. sknetwork/classification/tests/test_metrics.py +53 -0
  22. sknetwork/classification/tests/test_pagerank.py +20 -0
  23. sknetwork/classification/tests/test_propagation.py +24 -0
  24. sknetwork/classification/vote.cp313-win_amd64.pyd +0 -0
  25. sknetwork/classification/vote.cpp +27584 -0
  26. sknetwork/classification/vote.pyx +56 -0
  27. sknetwork/clustering/__init__.py +8 -0
  28. sknetwork/clustering/base.py +172 -0
  29. sknetwork/clustering/kcenters.py +253 -0
  30. sknetwork/clustering/leiden.py +242 -0
  31. sknetwork/clustering/leiden_core.cp313-win_amd64.pyd +0 -0
  32. sknetwork/clustering/leiden_core.cpp +31575 -0
  33. sknetwork/clustering/leiden_core.pyx +124 -0
  34. sknetwork/clustering/louvain.py +286 -0
  35. sknetwork/clustering/louvain_core.cp313-win_amd64.pyd +0 -0
  36. sknetwork/clustering/louvain_core.cpp +31220 -0
  37. sknetwork/clustering/louvain_core.pyx +124 -0
  38. sknetwork/clustering/metrics.py +91 -0
  39. sknetwork/clustering/postprocess.py +66 -0
  40. sknetwork/clustering/propagation_clustering.py +104 -0
  41. sknetwork/clustering/tests/__init__.py +1 -0
  42. sknetwork/clustering/tests/test_API.py +38 -0
  43. sknetwork/clustering/tests/test_kcenters.py +60 -0
  44. sknetwork/clustering/tests/test_leiden.py +34 -0
  45. sknetwork/clustering/tests/test_louvain.py +135 -0
  46. sknetwork/clustering/tests/test_metrics.py +50 -0
  47. sknetwork/clustering/tests/test_postprocess.py +39 -0
  48. sknetwork/data/__init__.py +6 -0
  49. sknetwork/data/base.py +33 -0
  50. sknetwork/data/load.py +406 -0
  51. sknetwork/data/models.py +459 -0
  52. sknetwork/data/parse.py +644 -0
  53. sknetwork/data/test_graphs.py +84 -0
  54. sknetwork/data/tests/__init__.py +1 -0
  55. sknetwork/data/tests/test_API.py +30 -0
  56. sknetwork/data/tests/test_base.py +14 -0
  57. sknetwork/data/tests/test_load.py +95 -0
  58. sknetwork/data/tests/test_models.py +52 -0
  59. sknetwork/data/tests/test_parse.py +250 -0
  60. sknetwork/data/tests/test_test_graphs.py +29 -0
  61. sknetwork/data/tests/test_toy_graphs.py +68 -0
  62. sknetwork/data/timeout.py +38 -0
  63. sknetwork/data/toy_graphs.py +611 -0
  64. sknetwork/embedding/__init__.py +8 -0
  65. sknetwork/embedding/base.py +94 -0
  66. sknetwork/embedding/force_atlas.py +198 -0
  67. sknetwork/embedding/louvain_embedding.py +148 -0
  68. sknetwork/embedding/random_projection.py +135 -0
  69. sknetwork/embedding/spectral.py +141 -0
  70. sknetwork/embedding/spring.py +198 -0
  71. sknetwork/embedding/svd.py +359 -0
  72. sknetwork/embedding/tests/__init__.py +1 -0
  73. sknetwork/embedding/tests/test_API.py +49 -0
  74. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  75. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  76. sknetwork/embedding/tests/test_random_projection.py +28 -0
  77. sknetwork/embedding/tests/test_spectral.py +81 -0
  78. sknetwork/embedding/tests/test_spring.py +50 -0
  79. sknetwork/embedding/tests/test_svd.py +43 -0
  80. sknetwork/gnn/__init__.py +10 -0
  81. sknetwork/gnn/activation.py +117 -0
  82. sknetwork/gnn/base.py +181 -0
  83. sknetwork/gnn/base_activation.py +90 -0
  84. sknetwork/gnn/base_layer.py +109 -0
  85. sknetwork/gnn/gnn_classifier.py +305 -0
  86. sknetwork/gnn/layer.py +153 -0
  87. sknetwork/gnn/loss.py +180 -0
  88. sknetwork/gnn/neighbor_sampler.py +65 -0
  89. sknetwork/gnn/optimizer.py +164 -0
  90. sknetwork/gnn/tests/__init__.py +1 -0
  91. sknetwork/gnn/tests/test_activation.py +56 -0
  92. sknetwork/gnn/tests/test_base.py +75 -0
  93. sknetwork/gnn/tests/test_base_layer.py +37 -0
  94. sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
  95. sknetwork/gnn/tests/test_layers.py +80 -0
  96. sknetwork/gnn/tests/test_loss.py +33 -0
  97. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  98. sknetwork/gnn/tests/test_optimizer.py +43 -0
  99. sknetwork/gnn/tests/test_utils.py +41 -0
  100. sknetwork/gnn/utils.py +127 -0
  101. sknetwork/hierarchy/__init__.py +6 -0
  102. sknetwork/hierarchy/base.py +96 -0
  103. sknetwork/hierarchy/louvain_hierarchy.py +272 -0
  104. sknetwork/hierarchy/metrics.py +234 -0
  105. sknetwork/hierarchy/paris.cp313-win_amd64.pyd +0 -0
  106. sknetwork/hierarchy/paris.cpp +37868 -0
  107. sknetwork/hierarchy/paris.pyx +316 -0
  108. sknetwork/hierarchy/postprocess.py +350 -0
  109. sknetwork/hierarchy/tests/__init__.py +1 -0
  110. sknetwork/hierarchy/tests/test_API.py +24 -0
  111. sknetwork/hierarchy/tests/test_algos.py +34 -0
  112. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  113. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  114. sknetwork/linalg/__init__.py +9 -0
  115. sknetwork/linalg/basics.py +37 -0
  116. sknetwork/linalg/diteration.cp313-win_amd64.pyd +0 -0
  117. sknetwork/linalg/diteration.cpp +27400 -0
  118. sknetwork/linalg/diteration.pyx +47 -0
  119. sknetwork/linalg/eig_solver.py +93 -0
  120. sknetwork/linalg/laplacian.py +15 -0
  121. sknetwork/linalg/normalizer.py +86 -0
  122. sknetwork/linalg/operators.py +225 -0
  123. sknetwork/linalg/polynome.py +76 -0
  124. sknetwork/linalg/ppr_solver.py +170 -0
  125. sknetwork/linalg/push.cp313-win_amd64.pyd +0 -0
  126. sknetwork/linalg/push.cpp +31072 -0
  127. sknetwork/linalg/push.pyx +71 -0
  128. sknetwork/linalg/sparse_lowrank.py +142 -0
  129. sknetwork/linalg/svd_solver.py +91 -0
  130. sknetwork/linalg/tests/__init__.py +1 -0
  131. sknetwork/linalg/tests/test_eig.py +44 -0
  132. sknetwork/linalg/tests/test_laplacian.py +18 -0
  133. sknetwork/linalg/tests/test_normalization.py +34 -0
  134. sknetwork/linalg/tests/test_operators.py +66 -0
  135. sknetwork/linalg/tests/test_polynome.py +38 -0
  136. sknetwork/linalg/tests/test_ppr.py +50 -0
  137. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  138. sknetwork/linalg/tests/test_svd.py +38 -0
  139. sknetwork/linkpred/__init__.py +2 -0
  140. sknetwork/linkpred/base.py +46 -0
  141. sknetwork/linkpred/nn.py +126 -0
  142. sknetwork/linkpred/tests/__init__.py +1 -0
  143. sknetwork/linkpred/tests/test_nn.py +27 -0
  144. sknetwork/log.py +19 -0
  145. sknetwork/path/__init__.py +5 -0
  146. sknetwork/path/dag.py +54 -0
  147. sknetwork/path/distances.py +98 -0
  148. sknetwork/path/search.py +31 -0
  149. sknetwork/path/shortest_path.py +61 -0
  150. sknetwork/path/tests/__init__.py +1 -0
  151. sknetwork/path/tests/test_dag.py +37 -0
  152. sknetwork/path/tests/test_distances.py +62 -0
  153. sknetwork/path/tests/test_search.py +40 -0
  154. sknetwork/path/tests/test_shortest_path.py +40 -0
  155. sknetwork/ranking/__init__.py +8 -0
  156. sknetwork/ranking/base.py +61 -0
  157. sknetwork/ranking/betweenness.cp313-win_amd64.pyd +0 -0
  158. sknetwork/ranking/betweenness.cpp +9707 -0
  159. sknetwork/ranking/betweenness.pyx +97 -0
  160. sknetwork/ranking/closeness.py +92 -0
  161. sknetwork/ranking/hits.py +94 -0
  162. sknetwork/ranking/katz.py +83 -0
  163. sknetwork/ranking/pagerank.py +110 -0
  164. sknetwork/ranking/postprocess.py +37 -0
  165. sknetwork/ranking/tests/__init__.py +1 -0
  166. sknetwork/ranking/tests/test_API.py +32 -0
  167. sknetwork/ranking/tests/test_betweenness.py +38 -0
  168. sknetwork/ranking/tests/test_closeness.py +30 -0
  169. sknetwork/ranking/tests/test_hits.py +20 -0
  170. sknetwork/ranking/tests/test_pagerank.py +62 -0
  171. sknetwork/ranking/tests/test_postprocess.py +26 -0
  172. sknetwork/regression/__init__.py +4 -0
  173. sknetwork/regression/base.py +61 -0
  174. sknetwork/regression/diffusion.py +210 -0
  175. sknetwork/regression/tests/__init__.py +1 -0
  176. sknetwork/regression/tests/test_API.py +32 -0
  177. sknetwork/regression/tests/test_diffusion.py +56 -0
  178. sknetwork/sknetwork.py +3 -0
  179. sknetwork/test_base.py +35 -0
  180. sknetwork/test_log.py +15 -0
  181. sknetwork/topology/__init__.py +8 -0
  182. sknetwork/topology/cliques.cp313-win_amd64.pyd +0 -0
  183. sknetwork/topology/cliques.cpp +32565 -0
  184. sknetwork/topology/cliques.pyx +149 -0
  185. sknetwork/topology/core.cp313-win_amd64.pyd +0 -0
  186. sknetwork/topology/core.cpp +30651 -0
  187. sknetwork/topology/core.pyx +90 -0
  188. sknetwork/topology/cycles.py +243 -0
  189. sknetwork/topology/minheap.cp313-win_amd64.pyd +0 -0
  190. sknetwork/topology/minheap.cpp +27332 -0
  191. sknetwork/topology/minheap.pxd +20 -0
  192. sknetwork/topology/minheap.pyx +109 -0
  193. sknetwork/topology/structure.py +194 -0
  194. sknetwork/topology/tests/__init__.py +1 -0
  195. sknetwork/topology/tests/test_cliques.py +28 -0
  196. sknetwork/topology/tests/test_core.py +19 -0
  197. sknetwork/topology/tests/test_cycles.py +65 -0
  198. sknetwork/topology/tests/test_structure.py +85 -0
  199. sknetwork/topology/tests/test_triangles.py +38 -0
  200. sknetwork/topology/tests/test_wl.py +72 -0
  201. sknetwork/topology/triangles.cp313-win_amd64.pyd +0 -0
  202. sknetwork/topology/triangles.cpp +8894 -0
  203. sknetwork/topology/triangles.pyx +151 -0
  204. sknetwork/topology/weisfeiler_lehman.py +133 -0
  205. sknetwork/topology/weisfeiler_lehman_core.cp313-win_amd64.pyd +0 -0
  206. sknetwork/topology/weisfeiler_lehman_core.cpp +27635 -0
  207. sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
  208. sknetwork/utils/__init__.py +7 -0
  209. sknetwork/utils/check.py +355 -0
  210. sknetwork/utils/format.py +221 -0
  211. sknetwork/utils/membership.py +82 -0
  212. sknetwork/utils/neighbors.py +115 -0
  213. sknetwork/utils/tests/__init__.py +1 -0
  214. sknetwork/utils/tests/test_check.py +190 -0
  215. sknetwork/utils/tests/test_format.py +63 -0
  216. sknetwork/utils/tests/test_membership.py +24 -0
  217. sknetwork/utils/tests/test_neighbors.py +41 -0
  218. sknetwork/utils/tests/test_tfidf.py +18 -0
  219. sknetwork/utils/tests/test_values.py +66 -0
  220. sknetwork/utils/tfidf.py +37 -0
  221. sknetwork/utils/values.py +76 -0
  222. sknetwork/visualization/__init__.py +4 -0
  223. sknetwork/visualization/colors.py +34 -0
  224. sknetwork/visualization/dendrograms.py +277 -0
  225. sknetwork/visualization/graphs.py +1039 -0
  226. sknetwork/visualization/tests/__init__.py +1 -0
  227. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  228. sknetwork/visualization/tests/test_graphs.py +176 -0
@@ -0,0 +1,142 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in November 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ from abc import ABC
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+
12
+ from sknetwork.base import Algorithm
13
+
14
+
15
+ class BaseClassifier(Algorithm, ABC):
16
+ """Base class for classifiers.
17
+
18
+ Attributes
19
+ ----------
20
+ bipartite : bool
21
+ If ``True``, the fitted graph is bipartite.
22
+ labels_ : np.ndarray, shape (n_labels,)
23
+ Labels of nodes.
24
+ probs_ : sparse.csr_matrix, shape (n_row, n_labels)
25
+ Probability distribution over labels (soft classification).
26
+ labels_row_ , labels_col_ : np.ndarray
27
+ Labels of rows and columns (for bipartite graphs).
28
+ probs_row_, probs_col_ : sparse.csr_matrix, shapes (n_row, n_labels) and (n_col, n_labels)
29
+ Probability distributions over labels for rows and columns (for bipartite graphs).
30
+ """
31
+
32
+ def __init__(self):
33
+ self.bipartite = None
34
+ self.labels_ = None
35
+ self.probs_ = None
36
+ self.labels_row_ = None
37
+ self.labels_col_ = None
38
+ self.probs_row_ = None
39
+ self.probs_col_ = None
40
+
41
+ def predict(self, columns: bool = False) -> np.ndarray:
42
+ """Return the labels predicted by the algorithm.
43
+
44
+ Parameters
45
+ ----------
46
+ columns : bool
47
+ If ``True``, return the prediction for columns.
48
+
49
+ Returns
50
+ -------
51
+ labels : np.ndarray
52
+ Labels.
53
+ """
54
+ if columns:
55
+ return self.labels_col_
56
+ return self.labels_
57
+
58
+ def fit_predict(self, *args, **kwargs) -> np.ndarray:
59
+ """Fit algorithm to the data and return the labels. Same parameters as the ``fit`` method.
60
+
61
+ Returns
62
+ -------
63
+ labels : np.ndarray
64
+ Labels.
65
+ """
66
+ self.fit(*args, **kwargs)
67
+ return self.predict()
68
+
69
+ def predict_proba(self, columns=False) -> np.ndarray:
70
+ """Return the probability distribution over labels as predicted by the algorithm.
71
+
72
+ Parameters
73
+ ----------
74
+ columns : bool
75
+ If ``True``, return the prediction for columns.
76
+
77
+ Returns
78
+ -------
79
+ probs : np.ndarray
80
+ Probability distribution over labels.
81
+ """
82
+ if columns:
83
+ return self.probs_col_.toarray()
84
+ return self.probs_.toarray()
85
+
86
+ def fit_predict_proba(self, *args, **kwargs) -> np.ndarray:
87
+ """Fit algorithm to the data and return the probability distribution over labels.
88
+ Same parameters as the ``fit`` method.
89
+
90
+ Returns
91
+ -------
92
+ probs : np.ndarray
93
+ Probability of each label.
94
+ """
95
+ self.fit(*args, **kwargs)
96
+ return self.predict_proba()
97
+
98
+ def transform(self, columns=False) -> sparse.csr_matrix:
99
+ """Return the probability distribution over labels in sparse format.
100
+
101
+ Parameters
102
+ ----------
103
+ columns : bool
104
+ If ``True``, return the prediction for columns.
105
+
106
+ Returns
107
+ -------
108
+ probs : sparse.csr_matrix
109
+ Probability distribution over labels.
110
+ """
111
+ if columns:
112
+ return self.probs_col_
113
+ return self.probs_
114
+
115
+ def fit_transform(self, *args, **kwargs) -> sparse.csr_matrix:
116
+ """Fit algorithm to the data and return the probability distribution over labels in sparse format.
117
+ Same parameters as the ``fit`` method.
118
+
119
+ Returns
120
+ -------
121
+ probs : sparse.csr_matrix
122
+ Probability distribution over labels.
123
+ """
124
+ self.fit(*args, **kwargs)
125
+ return self.transform()
126
+
127
+ def _split_vars(self, shape: tuple):
128
+ """Split variables for bipartite graphs."""
129
+ if self.bipartite:
130
+ n_row = shape[0]
131
+ self.labels_row_ = self.labels_[:n_row]
132
+ self.labels_col_ = self.labels_[n_row:]
133
+ self.labels_ = self.labels_row_
134
+ self.probs_row_ = self.probs_[:n_row]
135
+ self.probs_col_ = self.probs_[n_row:]
136
+ self.probs_ = self.probs_row_
137
+ else:
138
+ self.labels_row_ = self.labels_
139
+ self.labels_col_ = self.labels_
140
+ self.probs_row_ = self.probs_
141
+ self.probs_col_ = self.probs_
142
+ return self
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in March 2020
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ from functools import partial
8
+ from multiprocessing import Pool
9
+ from typing import Union, Optional
10
+
11
+ import numpy as np
12
+ from scipy import sparse
13
+
14
+ from sknetwork.classification.base import BaseClassifier
15
+ from sknetwork.linalg.normalizer import normalize
16
+ from sknetwork.ranking.base import BaseRanking
17
+ from sknetwork.utils.check import check_labels, check_n_jobs
18
+ from sknetwork.utils.format import get_adjacency_values
19
+
20
+
21
+ class RankClassifier(BaseClassifier):
22
+ """Generic class for ranking based classifiers.
23
+
24
+ Parameters
25
+ ----------
26
+ algorithm :
27
+ Which ranking algorithm to use.
28
+ n_jobs :
29
+ If positive, number of parallel jobs allowed (-1 means maximum number).
30
+ If ``None``, no parallel computations are made.
31
+
32
+ Attributes
33
+ ----------
34
+ labels_ : np.ndarray, shape (n_labels,)
35
+ Label of each node.
36
+ probs_ : sparse.csr_matrix, shape (n_row, n_labels)
37
+ Probability distribution over labels.
38
+ labels_row_, labels_col_ : np.ndarray
39
+ Labels of rows and columns, for bipartite graphs.
40
+ probs_row_, probs_col_ : sparse.csr_matrix, shape (n_row, n_labels)
41
+ Probability distributions over labels for rows and columns (for bipartite graphs).
42
+ """
43
+ def __init__(self, algorithm: BaseRanking, n_jobs: Optional[int] = None, verbose: bool = False):
44
+ super(RankClassifier, self).__init__()
45
+
46
+ self.algorithm = algorithm
47
+ self.n_jobs = check_n_jobs(n_jobs)
48
+ self.verbose = verbose
49
+
50
+ @staticmethod
51
+ def _process_labels(labels: np.ndarray) -> list:
52
+ """Make one-vs-all binary labels from labels.
53
+
54
+ Parameters
55
+ ----------
56
+ labels
57
+
58
+ Returns
59
+ -------
60
+ List of binary labels.
61
+ """
62
+ labels_all = []
63
+ labels_unique, _ = check_labels(labels)
64
+
65
+ for label in labels_unique:
66
+ labels_binary = np.array(labels == label).astype(int)
67
+ labels_all.append(labels_binary)
68
+
69
+ return labels_all
70
+
71
+ @staticmethod
72
+ def _process_scores(scores: np.ndarray) -> np.ndarray:
73
+ """Post-processing of the scores.
74
+
75
+ Parameters
76
+ ----------
77
+ scores
78
+ Matrix of scores, shape number of nodes x number of labels.
79
+
80
+ Returns
81
+ -------
82
+ scores : np.ndarray
83
+ """
84
+ return scores
85
+
86
+ def _split_vars(self, shape):
87
+ """Split the vector of labels and build membership matrix."""
88
+ n_row = shape[0]
89
+ self.labels_row_ = self.labels_[:n_row]
90
+ self.labels_col_ = self.labels_[n_row:]
91
+ self.labels_ = self.labels_row_
92
+ self.probs_row_ = self.probs_[:n_row]
93
+ self.probs_col_ = self.probs_[n_row:]
94
+ self.probs_ = self.probs_row_
95
+
96
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: Union[np.ndarray, dict] = None,
97
+ labels_row: Union[np.ndarray, dict] = None, labels_col: Union[np.ndarray, dict] = None) -> 'RankClassifier':
98
+ """Fit algorithm to data.
99
+
100
+ Parameters
101
+ ----------
102
+ input_matrix :
103
+ Adjacency matrix or biadjacency matrix of the graph.
104
+ labels :
105
+ Known labels (dictionary or array; negative values ignored).
106
+ labels_row, labels_col :
107
+ Known labels on rows and columns (for bipartite graphs).
108
+ Returns
109
+ -------
110
+ self: :class:`RankClassifier`
111
+ """
112
+ adjacency, seeds_labels, bipartite = get_adjacency_values(input_matrix, values=labels, values_row=labels_row,
113
+ values_col=labels_col)
114
+ seeds_labels = seeds_labels.astype(int)
115
+ labels_unique, n_classes = check_labels(seeds_labels)
116
+ seeds_all = self._process_labels(seeds_labels)
117
+ local_function = partial(self.algorithm.fit_predict, adjacency)
118
+ with Pool(self.n_jobs) as pool:
119
+ scores = np.array(pool.map(local_function, seeds_all))
120
+ scores = scores.T
121
+
122
+ scores = self._process_scores(scores)
123
+ scores = normalize(scores)
124
+
125
+ probs = sparse.coo_matrix(scores)
126
+ probs.col = labels_unique[probs.col]
127
+
128
+ labels = np.argmax(scores, axis=1)
129
+ self.labels_ = labels_unique[labels]
130
+ self.probs_ = sparse.csr_matrix(probs, shape=(adjacency.shape[0], np.max(seeds_labels) + 1))
131
+ self._split_vars(input_matrix.shape)
132
+
133
+ return self
@@ -0,0 +1,134 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in July 2022
5
+ @author: Thomas Bonald <thomas.bonald@telecom-paris.fr>
6
+ """
7
+ from typing import Optional, Union
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+
12
+ from sknetwork.classification.base import BaseClassifier
13
+ from sknetwork.path.distances import get_distances
14
+ from sknetwork.linalg.normalizer import normalize
15
+ from sknetwork.utils.format import get_adjacency_values
16
+ from sknetwork.utils.membership import get_membership
17
+ from sknetwork.utils.neighbors import get_degrees
18
+
19
+
20
+ class DiffusionClassifier(BaseClassifier):
21
+ """Node classification by heat diffusion.
22
+
23
+ For each label, the temperature of a node corresponds to its probability to have this label.
24
+
25
+ Parameters
26
+ ----------
27
+ n_iter : int
28
+ Number of iterations of the diffusion (discrete time).
29
+ centering : bool
30
+ If ``True``, center the temperature of each label to its mean before classification (default).
31
+ scale : float
32
+ Multiplicative factor applied to tempreatures before softmax (default = 5).
33
+ Used only when centering is ``True``.
34
+
35
+ Attributes
36
+ ----------
37
+ labels_ : np.ndarray, shape (n_labels,)
38
+ Labels of nodes.
39
+ probs_ : sparse.csr_matrix, shape (n_row, n_labels)
40
+ Probability distribution over labels.
41
+ labels_row_ : np.ndarray
42
+ Labels of rows, for bipartite graphs.
43
+ labels_col_ : np.ndarray
44
+ Labels of columns, for bipartite graphs.
45
+ probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
46
+ Probability distributions over labels of rows, for bipartite graphs.
47
+ probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
48
+ Probability distributions over labels of columns, for bipartite graphs.
49
+ Example
50
+ -------
51
+ >>> from sknetwork.data import karate_club
52
+ >>> diffusion = DiffusionClassifier()
53
+ >>> graph = karate_club(metadata=True)
54
+ >>> adjacency = graph.adjacency
55
+ >>> labels_true = graph.labels
56
+ >>> labels = {0: labels_true[0], 33: labels_true[33]}
57
+ >>> labels_pred = diffusion.fit_predict(adjacency, labels)
58
+ >>> float(round(np.mean(labels_pred == labels_true), 2))
59
+ 0.97
60
+
61
+ References
62
+ ----------
63
+ Zhu, X., Lafferty, J., & Rosenfeld, R. (2005). `Semi-supervised learning with graphs`
64
+ (Doctoral dissertation, Carnegie Mellon University, language technologies institute, school of computer science).
65
+ """
66
+ def __init__(self, n_iter: int = 10, centering: bool = True, scale: float = 5):
67
+ super(DiffusionClassifier, self).__init__()
68
+
69
+ if n_iter <= 0:
70
+ raise ValueError('The number of iterations must be positive.')
71
+ else:
72
+ self.n_iter = n_iter
73
+ self.centering = centering
74
+ self.scale = scale
75
+
76
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
77
+ labels: Optional[Union[dict, list, np.ndarray]] = None,
78
+ labels_row: Optional[Union[dict, list, np.ndarray]] = None,
79
+ labels_col: Optional[Union[dict, list, np.ndarray]] = None, force_bipartite: bool = False) \
80
+ -> 'DiffusionClassifier':
81
+ """Compute the solution to the Dirichlet problem (temperatures at equilibrium).
82
+
83
+ Parameters
84
+ ----------
85
+ input_matrix : sparse.csr_matrix, np.ndarray
86
+ Adjacency matrix or biadjacency matrix of the graph.
87
+ labels : dict, np.ndarray
88
+ Known labels (dictionary or vector of int). Negative values ignored.
89
+ labels_row : dict, np.ndarray
90
+ Labels of rows for bipartite graphs. Negative values ignored.
91
+ labels_col : dict, np.ndarray
92
+ Labels of columns for bipartite graphs. Negative values ignored.
93
+ force_bipartite : bool
94
+ If ``True``, consider the input matrix as a biadjacency matrix (default = ``False``).
95
+
96
+ Returns
97
+ -------
98
+ self: :class:`DiffusionClassifier`
99
+ """
100
+ adjacency, values, self.bipartite = get_adjacency_values(input_matrix, force_bipartite=force_bipartite,
101
+ values=labels,
102
+ values_row=labels_row,
103
+ values_col=labels_col)
104
+ labels = values.astype(int)
105
+ if (labels < 0).all():
106
+ raise ValueError('At least one node must be given a non-negative label.')
107
+ labels_reindex = labels.copy()
108
+ labels_unique, inverse = np.unique(labels[labels >= 0], return_inverse=True)
109
+ labels_reindex[labels >= 0] = inverse
110
+ temperatures = get_membership(labels_reindex).toarray()
111
+ temperatures_seeds = temperatures[labels >= 0]
112
+ temperatures[labels < 0] = 0.5
113
+ diffusion = normalize(adjacency)
114
+ for i in range(self.n_iter):
115
+ temperatures = diffusion.dot(temperatures)
116
+ temperatures[labels >= 0] = temperatures_seeds
117
+ if self.centering:
118
+ temperatures -= temperatures.mean(axis=0)
119
+ labels_ = labels_unique[temperatures.argmax(axis=1)]
120
+
121
+ # softmax
122
+ if self.centering:
123
+ temperatures = np.exp(self.scale * temperatures)
124
+
125
+ # set label -1 to nodes not reached by diffusion
126
+ distances = get_distances(adjacency, source=np.flatnonzero(labels >= 0))
127
+ labels_[distances < 0] = -1
128
+ temperatures[distances < 0] = 0
129
+
130
+ self.labels_ = labels_
131
+ self.probs_ = sparse.csr_matrix(normalize(temperatures))
132
+ self._split_vars(input_matrix.shape)
133
+
134
+ return self
@@ -0,0 +1,139 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in November 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ @author: Thomas Bonald <tbonald@enst.fr>
7
+ """
8
+ from typing import Optional, Union
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.classification.base import BaseClassifier
14
+ from sknetwork.embedding.base import BaseEmbedding
15
+ from sknetwork.linalg.normalizer import get_norms, normalize
16
+ from sknetwork.utils.check import check_n_neighbors
17
+ from sknetwork.utils.format import get_adjacency_values
18
+
19
+
20
+ class NNClassifier(BaseClassifier):
21
+ """Node classification by K-nearest neighbors in the embedding space.
22
+
23
+ Parameters
24
+ ----------
25
+ n_neighbors : int
26
+ Number of nearest neighbors .
27
+ embedding_method : :class:`BaseEmbedding`
28
+ Embedding method used to represent nodes in vector space.
29
+ If ``None`` (default), use identity.
30
+ normalize : bool
31
+ If ``True``, apply normalization so that all vectors have norm 1 in the embedding space.
32
+
33
+ Attributes
34
+ ----------
35
+ labels_ : np.ndarray, shape (n_labels,)
36
+ Labels of nodes.
37
+ probs_ : sparse.csr_matrix, shape (n_row, n_labels)
38
+ Probability distribution over labels.
39
+ labels_row_ : np.ndarray
40
+ Labels of rows, for bipartite graphs.
41
+ labels_col_ : np.ndarray
42
+ Labels of columns, for bipartite graphs.
43
+ probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
44
+ Probability distributions over labels of rows, for bipartite graphs.
45
+ probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
46
+ Probability distributions over labels of columns, for bipartite graphs.
47
+
48
+ Example
49
+ -------
50
+ >>> from sknetwork.classification import NNClassifier
51
+ >>> from sknetwork.data import karate_club
52
+ >>> classifier = NNClassifier(n_neighbors=1)
53
+ >>> graph = karate_club(metadata=True)
54
+ >>> adjacency = graph.adjacency
55
+ >>> labels_true = graph.labels
56
+ >>> labels = {0: labels_true[0], 33: labels_true[33]}
57
+ >>> labels_pred = classifier.fit_predict(adjacency, labels)
58
+ >>> float(round(np.mean(labels_pred == labels_true), 2))
59
+ 0.82
60
+ """
61
+ def __init__(self, n_neighbors: int = 3, embedding_method: Optional[BaseEmbedding] = None, normalize: bool = True):
62
+ super(NNClassifier, self).__init__()
63
+ self.n_neighbors = n_neighbors
64
+ self.embedding_method = embedding_method
65
+ self.normalize = normalize
66
+
67
+ @staticmethod
68
+ def _instantiate_vars(labels: np.ndarray):
69
+ index_train = np.flatnonzero(labels >= 0)
70
+ index_test = np.flatnonzero(labels < 0)
71
+ return index_train, index_test
72
+
73
+ def _fit_core(self, embedding, labels, index_train, index_test):
74
+ n_neighbors = check_n_neighbors(self.n_neighbors, len(index_train))
75
+
76
+ norms_train = get_norms(embedding[index_train], p=2)
77
+ neighbors = []
78
+ for i in index_test:
79
+ vector = embedding[i]
80
+ if sparse.issparse(vector):
81
+ vector = vector.toarray().ravel()
82
+ distances = norms_train**2 - 2 * embedding[index_train].dot(vector) + np.sum(vector**2)
83
+ neighbors += list(index_train[np.argpartition(distances, n_neighbors)[:n_neighbors]])
84
+ labels_neighbor = labels[neighbors]
85
+
86
+ # membership matrix
87
+ row = list(np.repeat(index_test, n_neighbors))
88
+ col = list(labels_neighbor)
89
+ data = list(np.ones_like(labels_neighbor))
90
+
91
+ row += list(index_train)
92
+ col += list(labels[index_train])
93
+ data += list(np.ones_like(index_train))
94
+
95
+ probs = normalize(sparse.csr_matrix((data, (row, col)), shape=(len(labels), np.max(labels) + 1)))
96
+ labels = np.argmax(probs.toarray(), axis=1)
97
+
98
+ return probs, labels
99
+
100
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: Union[np.ndarray, list, dict] = None,
101
+ labels_row: Union[np.ndarray, list, dict] = None,
102
+ labels_col: Union[np.ndarray, list, dict] = None) -> 'NNClassifier':
103
+ """Node classification by k-nearest neighbors in the embedding space.
104
+
105
+ Parameters
106
+ ----------
107
+ input_matrix : sparse.csr_matrix, np.ndarray
108
+ Adjacency matrix or biadjacency matrix of the graph.
109
+ labels : np.ndarray, dict
110
+ Known labels. Negative values ignored.
111
+ labels_row : np.ndarray, dict
112
+ Known labels of rows, for bipartite graphs.
113
+ labels_col : np.ndarray, dict
114
+ Known labels of columns, for bipartite graphs.
115
+
116
+ Returns
117
+ -------
118
+ self: :class:`KNN`
119
+ """
120
+ adjacency, labels, self.bipartite = get_adjacency_values(input_matrix, values=labels, values_row=labels_row,
121
+ values_col=labels_col)
122
+ labels = labels.astype(int)
123
+ index_seed, index_remain = self._instantiate_vars(labels)
124
+
125
+ if self.embedding_method is None:
126
+ embedding = adjacency
127
+ else:
128
+ embedding = self.embedding_method.fit_transform(adjacency)
129
+
130
+ if self.normalize:
131
+ embedding = normalize(embedding, p=2)
132
+
133
+ probs, labels = self._fit_core(embedding, labels, index_seed, index_remain)
134
+
135
+ self.labels_ = labels
136
+ self.probs_ = probs
137
+ self._split_vars(input_matrix.shape)
138
+
139
+ return self