scikit-network 0.33.0__cp312-cp312-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (216) hide show
  1. scikit_network-0.33.0.dist-info/AUTHORS.rst +43 -0
  2. scikit_network-0.33.0.dist-info/LICENSE +34 -0
  3. scikit_network-0.33.0.dist-info/METADATA +517 -0
  4. scikit_network-0.33.0.dist-info/RECORD +216 -0
  5. scikit_network-0.33.0.dist-info/WHEEL +5 -0
  6. scikit_network-0.33.0.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/base.py +67 -0
  9. sknetwork/classification/__init__.py +8 -0
  10. sknetwork/classification/base.py +142 -0
  11. sknetwork/classification/base_rank.py +133 -0
  12. sknetwork/classification/diffusion.py +134 -0
  13. sknetwork/classification/knn.py +139 -0
  14. sknetwork/classification/metrics.py +205 -0
  15. sknetwork/classification/pagerank.py +66 -0
  16. sknetwork/classification/propagation.py +152 -0
  17. sknetwork/classification/tests/__init__.py +1 -0
  18. sknetwork/classification/tests/test_API.py +30 -0
  19. sknetwork/classification/tests/test_diffusion.py +77 -0
  20. sknetwork/classification/tests/test_knn.py +23 -0
  21. sknetwork/classification/tests/test_metrics.py +53 -0
  22. sknetwork/classification/tests/test_pagerank.py +20 -0
  23. sknetwork/classification/tests/test_propagation.py +24 -0
  24. sknetwork/classification/vote.cpython-312-darwin.so +0 -0
  25. sknetwork/classification/vote.pyx +56 -0
  26. sknetwork/clustering/__init__.py +8 -0
  27. sknetwork/clustering/base.py +172 -0
  28. sknetwork/clustering/kcenters.py +253 -0
  29. sknetwork/clustering/leiden.py +242 -0
  30. sknetwork/clustering/leiden_core.cpython-312-darwin.so +0 -0
  31. sknetwork/clustering/leiden_core.pyx +124 -0
  32. sknetwork/clustering/louvain.py +286 -0
  33. sknetwork/clustering/louvain_core.cpython-312-darwin.so +0 -0
  34. sknetwork/clustering/louvain_core.pyx +124 -0
  35. sknetwork/clustering/metrics.py +91 -0
  36. sknetwork/clustering/postprocess.py +66 -0
  37. sknetwork/clustering/propagation_clustering.py +104 -0
  38. sknetwork/clustering/tests/__init__.py +1 -0
  39. sknetwork/clustering/tests/test_API.py +38 -0
  40. sknetwork/clustering/tests/test_kcenters.py +60 -0
  41. sknetwork/clustering/tests/test_leiden.py +34 -0
  42. sknetwork/clustering/tests/test_louvain.py +129 -0
  43. sknetwork/clustering/tests/test_metrics.py +50 -0
  44. sknetwork/clustering/tests/test_postprocess.py +39 -0
  45. sknetwork/data/__init__.py +6 -0
  46. sknetwork/data/base.py +33 -0
  47. sknetwork/data/load.py +406 -0
  48. sknetwork/data/models.py +459 -0
  49. sknetwork/data/parse.py +644 -0
  50. sknetwork/data/test_graphs.py +84 -0
  51. sknetwork/data/tests/__init__.py +1 -0
  52. sknetwork/data/tests/test_API.py +30 -0
  53. sknetwork/data/tests/test_base.py +14 -0
  54. sknetwork/data/tests/test_load.py +95 -0
  55. sknetwork/data/tests/test_models.py +52 -0
  56. sknetwork/data/tests/test_parse.py +250 -0
  57. sknetwork/data/tests/test_test_graphs.py +29 -0
  58. sknetwork/data/tests/test_toy_graphs.py +68 -0
  59. sknetwork/data/timeout.py +38 -0
  60. sknetwork/data/toy_graphs.py +611 -0
  61. sknetwork/embedding/__init__.py +8 -0
  62. sknetwork/embedding/base.py +94 -0
  63. sknetwork/embedding/force_atlas.py +198 -0
  64. sknetwork/embedding/louvain_embedding.py +148 -0
  65. sknetwork/embedding/random_projection.py +135 -0
  66. sknetwork/embedding/spectral.py +141 -0
  67. sknetwork/embedding/spring.py +198 -0
  68. sknetwork/embedding/svd.py +359 -0
  69. sknetwork/embedding/tests/__init__.py +1 -0
  70. sknetwork/embedding/tests/test_API.py +49 -0
  71. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  72. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  73. sknetwork/embedding/tests/test_random_projection.py +28 -0
  74. sknetwork/embedding/tests/test_spectral.py +81 -0
  75. sknetwork/embedding/tests/test_spring.py +50 -0
  76. sknetwork/embedding/tests/test_svd.py +43 -0
  77. sknetwork/gnn/__init__.py +10 -0
  78. sknetwork/gnn/activation.py +117 -0
  79. sknetwork/gnn/base.py +181 -0
  80. sknetwork/gnn/base_activation.py +89 -0
  81. sknetwork/gnn/base_layer.py +109 -0
  82. sknetwork/gnn/gnn_classifier.py +305 -0
  83. sknetwork/gnn/layer.py +153 -0
  84. sknetwork/gnn/loss.py +180 -0
  85. sknetwork/gnn/neighbor_sampler.py +65 -0
  86. sknetwork/gnn/optimizer.py +164 -0
  87. sknetwork/gnn/tests/__init__.py +1 -0
  88. sknetwork/gnn/tests/test_activation.py +56 -0
  89. sknetwork/gnn/tests/test_base.py +75 -0
  90. sknetwork/gnn/tests/test_base_layer.py +37 -0
  91. sknetwork/gnn/tests/test_gnn_classifier.py +130 -0
  92. sknetwork/gnn/tests/test_layers.py +80 -0
  93. sknetwork/gnn/tests/test_loss.py +33 -0
  94. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  95. sknetwork/gnn/tests/test_optimizer.py +43 -0
  96. sknetwork/gnn/tests/test_utils.py +41 -0
  97. sknetwork/gnn/utils.py +127 -0
  98. sknetwork/hierarchy/__init__.py +6 -0
  99. sknetwork/hierarchy/base.py +96 -0
  100. sknetwork/hierarchy/louvain_hierarchy.py +272 -0
  101. sknetwork/hierarchy/metrics.py +234 -0
  102. sknetwork/hierarchy/paris.cpython-312-darwin.so +0 -0
  103. sknetwork/hierarchy/paris.pyx +316 -0
  104. sknetwork/hierarchy/postprocess.py +350 -0
  105. sknetwork/hierarchy/tests/__init__.py +1 -0
  106. sknetwork/hierarchy/tests/test_API.py +24 -0
  107. sknetwork/hierarchy/tests/test_algos.py +34 -0
  108. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  109. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  110. sknetwork/linalg/__init__.py +9 -0
  111. sknetwork/linalg/basics.py +37 -0
  112. sknetwork/linalg/diteration.cpython-312-darwin.so +0 -0
  113. sknetwork/linalg/diteration.pyx +47 -0
  114. sknetwork/linalg/eig_solver.py +93 -0
  115. sknetwork/linalg/laplacian.py +15 -0
  116. sknetwork/linalg/normalizer.py +86 -0
  117. sknetwork/linalg/operators.py +225 -0
  118. sknetwork/linalg/polynome.py +76 -0
  119. sknetwork/linalg/ppr_solver.py +170 -0
  120. sknetwork/linalg/push.cpython-312-darwin.so +0 -0
  121. sknetwork/linalg/push.pyx +71 -0
  122. sknetwork/linalg/sparse_lowrank.py +142 -0
  123. sknetwork/linalg/svd_solver.py +91 -0
  124. sknetwork/linalg/tests/__init__.py +1 -0
  125. sknetwork/linalg/tests/test_eig.py +44 -0
  126. sknetwork/linalg/tests/test_laplacian.py +18 -0
  127. sknetwork/linalg/tests/test_normalization.py +34 -0
  128. sknetwork/linalg/tests/test_operators.py +66 -0
  129. sknetwork/linalg/tests/test_polynome.py +38 -0
  130. sknetwork/linalg/tests/test_ppr.py +50 -0
  131. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  132. sknetwork/linalg/tests/test_svd.py +38 -0
  133. sknetwork/linkpred/__init__.py +2 -0
  134. sknetwork/linkpred/base.py +46 -0
  135. sknetwork/linkpred/nn.py +126 -0
  136. sknetwork/linkpred/tests/__init__.py +1 -0
  137. sknetwork/linkpred/tests/test_nn.py +27 -0
  138. sknetwork/log.py +19 -0
  139. sknetwork/path/__init__.py +5 -0
  140. sknetwork/path/dag.py +54 -0
  141. sknetwork/path/distances.py +98 -0
  142. sknetwork/path/search.py +31 -0
  143. sknetwork/path/shortest_path.py +61 -0
  144. sknetwork/path/tests/__init__.py +1 -0
  145. sknetwork/path/tests/test_dag.py +37 -0
  146. sknetwork/path/tests/test_distances.py +62 -0
  147. sknetwork/path/tests/test_search.py +40 -0
  148. sknetwork/path/tests/test_shortest_path.py +40 -0
  149. sknetwork/ranking/__init__.py +8 -0
  150. sknetwork/ranking/base.py +61 -0
  151. sknetwork/ranking/betweenness.cpython-312-darwin.so +0 -0
  152. sknetwork/ranking/betweenness.pyx +97 -0
  153. sknetwork/ranking/closeness.py +92 -0
  154. sknetwork/ranking/hits.py +94 -0
  155. sknetwork/ranking/katz.py +83 -0
  156. sknetwork/ranking/pagerank.py +110 -0
  157. sknetwork/ranking/postprocess.py +37 -0
  158. sknetwork/ranking/tests/__init__.py +1 -0
  159. sknetwork/ranking/tests/test_API.py +32 -0
  160. sknetwork/ranking/tests/test_betweenness.py +38 -0
  161. sknetwork/ranking/tests/test_closeness.py +30 -0
  162. sknetwork/ranking/tests/test_hits.py +20 -0
  163. sknetwork/ranking/tests/test_pagerank.py +62 -0
  164. sknetwork/ranking/tests/test_postprocess.py +26 -0
  165. sknetwork/regression/__init__.py +4 -0
  166. sknetwork/regression/base.py +61 -0
  167. sknetwork/regression/diffusion.py +210 -0
  168. sknetwork/regression/tests/__init__.py +1 -0
  169. sknetwork/regression/tests/test_API.py +32 -0
  170. sknetwork/regression/tests/test_diffusion.py +56 -0
  171. sknetwork/sknetwork.py +3 -0
  172. sknetwork/test_base.py +35 -0
  173. sknetwork/test_log.py +15 -0
  174. sknetwork/topology/__init__.py +8 -0
  175. sknetwork/topology/cliques.cpython-312-darwin.so +0 -0
  176. sknetwork/topology/cliques.pyx +149 -0
  177. sknetwork/topology/core.cpython-312-darwin.so +0 -0
  178. sknetwork/topology/core.pyx +90 -0
  179. sknetwork/topology/cycles.py +243 -0
  180. sknetwork/topology/minheap.cpython-312-darwin.so +0 -0
  181. sknetwork/topology/minheap.pxd +20 -0
  182. sknetwork/topology/minheap.pyx +109 -0
  183. sknetwork/topology/structure.py +194 -0
  184. sknetwork/topology/tests/__init__.py +1 -0
  185. sknetwork/topology/tests/test_cliques.py +28 -0
  186. sknetwork/topology/tests/test_core.py +19 -0
  187. sknetwork/topology/tests/test_cycles.py +65 -0
  188. sknetwork/topology/tests/test_structure.py +85 -0
  189. sknetwork/topology/tests/test_triangles.py +38 -0
  190. sknetwork/topology/tests/test_wl.py +72 -0
  191. sknetwork/topology/triangles.cpython-312-darwin.so +0 -0
  192. sknetwork/topology/triangles.pyx +151 -0
  193. sknetwork/topology/weisfeiler_lehman.py +133 -0
  194. sknetwork/topology/weisfeiler_lehman_core.cpython-312-darwin.so +0 -0
  195. sknetwork/topology/weisfeiler_lehman_core.pyx +114 -0
  196. sknetwork/utils/__init__.py +7 -0
  197. sknetwork/utils/check.py +355 -0
  198. sknetwork/utils/format.py +221 -0
  199. sknetwork/utils/membership.py +82 -0
  200. sknetwork/utils/neighbors.py +115 -0
  201. sknetwork/utils/tests/__init__.py +1 -0
  202. sknetwork/utils/tests/test_check.py +190 -0
  203. sknetwork/utils/tests/test_format.py +63 -0
  204. sknetwork/utils/tests/test_membership.py +24 -0
  205. sknetwork/utils/tests/test_neighbors.py +41 -0
  206. sknetwork/utils/tests/test_tfidf.py +18 -0
  207. sknetwork/utils/tests/test_values.py +66 -0
  208. sknetwork/utils/tfidf.py +37 -0
  209. sknetwork/utils/values.py +76 -0
  210. sknetwork/visualization/__init__.py +4 -0
  211. sknetwork/visualization/colors.py +34 -0
  212. sknetwork/visualization/dendrograms.py +277 -0
  213. sknetwork/visualization/graphs.py +1039 -0
  214. sknetwork/visualization/tests/__init__.py +1 -0
  215. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  216. sknetwork/visualization/tests/test_graphs.py +176 -0
@@ -0,0 +1,134 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in July 2022
5
+ @author: Thomas Bonald <thomas.bonald@telecom-paris.fr>
6
+ """
7
+ from typing import Optional, Union
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+
12
+ from sknetwork.classification.base import BaseClassifier
13
+ from sknetwork.path.distances import get_distances
14
+ from sknetwork.linalg.normalizer import normalize
15
+ from sknetwork.utils.format import get_adjacency_values
16
+ from sknetwork.utils.membership import get_membership
17
+ from sknetwork.utils.neighbors import get_degrees
18
+
19
+
20
+ class DiffusionClassifier(BaseClassifier):
21
+ """Node classification by heat diffusion.
22
+
23
+ For each label, the temperature of a node corresponds to its probability to have this label.
24
+
25
+ Parameters
26
+ ----------
27
+ n_iter : int
28
+ Number of iterations of the diffusion (discrete time).
29
+ centering : bool
30
+ If ``True``, center the temperature of each label to its mean before classification (default).
31
+ scale : float
32
+ Multiplicative factor applied to tempreatures before softmax (default = 5).
33
+ Used only when centering is ``True``.
34
+
35
+ Attributes
36
+ ----------
37
+ labels_ : np.ndarray, shape (n_labels,)
38
+ Labels of nodes.
39
+ probs_ : sparse.csr_matrix, shape (n_row, n_labels)
40
+ Probability distribution over labels.
41
+ labels_row_ : np.ndarray
42
+ Labels of rows, for bipartite graphs.
43
+ labels_col_ : np.ndarray
44
+ Labels of columns, for bipartite graphs.
45
+ probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
46
+ Probability distributions over labels of rows, for bipartite graphs.
47
+ probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
48
+ Probability distributions over labels of columns, for bipartite graphs.
49
+ Example
50
+ -------
51
+ >>> from sknetwork.data import karate_club
52
+ >>> diffusion = DiffusionClassifier()
53
+ >>> graph = karate_club(metadata=True)
54
+ >>> adjacency = graph.adjacency
55
+ >>> labels_true = graph.labels
56
+ >>> labels = {0: labels_true[0], 33: labels_true[33]}
57
+ >>> labels_pred = diffusion.fit_predict(adjacency, labels)
58
+ >>> round(np.mean(labels_pred == labels_true), 2)
59
+ 0.97
60
+
61
+ References
62
+ ----------
63
+ Zhu, X., Lafferty, J., & Rosenfeld, R. (2005). `Semi-supervised learning with graphs`
64
+ (Doctoral dissertation, Carnegie Mellon University, language technologies institute, school of computer science).
65
+ """
66
+ def __init__(self, n_iter: int = 10, centering: bool = True, scale: float = 5):
67
+ super(DiffusionClassifier, self).__init__()
68
+
69
+ if n_iter <= 0:
70
+ raise ValueError('The number of iterations must be positive.')
71
+ else:
72
+ self.n_iter = n_iter
73
+ self.centering = centering
74
+ self.scale = scale
75
+
76
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
77
+ labels: Optional[Union[dict, list, np.ndarray]] = None,
78
+ labels_row: Optional[Union[dict, list, np.ndarray]] = None,
79
+ labels_col: Optional[Union[dict, list, np.ndarray]] = None, force_bipartite: bool = False) \
80
+ -> 'DiffusionClassifier':
81
+ """Compute the solution to the Dirichlet problem (temperatures at equilibrium).
82
+
83
+ Parameters
84
+ ----------
85
+ input_matrix : sparse.csr_matrix, np.ndarray
86
+ Adjacency matrix or biadjacency matrix of the graph.
87
+ labels : dict, np.ndarray
88
+ Known labels (dictionary or vector of int). Negative values ignored.
89
+ labels_row : dict, np.ndarray
90
+ Labels of rows for bipartite graphs. Negative values ignored.
91
+ labels_col : dict, np.ndarray
92
+ Labels of columns for bipartite graphs. Negative values ignored.
93
+ force_bipartite : bool
94
+ If ``True``, consider the input matrix as a biadjacency matrix (default = ``False``).
95
+
96
+ Returns
97
+ -------
98
+ self: :class:`DiffusionClassifier`
99
+ """
100
+ adjacency, values, self.bipartite = get_adjacency_values(input_matrix, force_bipartite=force_bipartite,
101
+ values=labels,
102
+ values_row=labels_row,
103
+ values_col=labels_col)
104
+ labels = values.astype(int)
105
+ if (labels < 0).all():
106
+ raise ValueError('At least one node must be given a non-negative label.')
107
+ labels_reindex = labels.copy()
108
+ labels_unique, inverse = np.unique(labels[labels >= 0], return_inverse=True)
109
+ labels_reindex[labels >= 0] = inverse
110
+ temperatures = get_membership(labels_reindex).toarray()
111
+ temperatures_seeds = temperatures[labels >= 0]
112
+ temperatures[labels < 0] = 0.5
113
+ diffusion = normalize(adjacency)
114
+ for i in range(self.n_iter):
115
+ temperatures = diffusion.dot(temperatures)
116
+ temperatures[labels >= 0] = temperatures_seeds
117
+ if self.centering:
118
+ temperatures -= temperatures.mean(axis=0)
119
+ labels_ = labels_unique[temperatures.argmax(axis=1)]
120
+
121
+ # softmax
122
+ if self.centering:
123
+ temperatures = np.exp(self.scale * temperatures)
124
+
125
+ # set label -1 to nodes not reached by diffusion
126
+ distances = get_distances(adjacency, source=np.flatnonzero(labels >= 0))
127
+ labels_[distances < 0] = -1
128
+ temperatures[distances < 0] = 0
129
+
130
+ self.labels_ = labels_
131
+ self.probs_ = sparse.csr_matrix(normalize(temperatures))
132
+ self._split_vars(input_matrix.shape)
133
+
134
+ return self
@@ -0,0 +1,139 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in November 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ @author: Thomas Bonald <tbonald@enst.fr>
7
+ """
8
+ from typing import Optional, Union
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.classification.base import BaseClassifier
14
+ from sknetwork.embedding.base import BaseEmbedding
15
+ from sknetwork.linalg.normalizer import get_norms, normalize
16
+ from sknetwork.utils.check import check_n_neighbors
17
+ from sknetwork.utils.format import get_adjacency_values
18
+
19
+
20
+ class NNClassifier(BaseClassifier):
21
+ """Node classification by K-nearest neighbors in the embedding space.
22
+
23
+ Parameters
24
+ ----------
25
+ n_neighbors : int
26
+ Number of nearest neighbors .
27
+ embedding_method : :class:`BaseEmbedding`
28
+ Embedding method used to represent nodes in vector space.
29
+ If ``None`` (default), use identity.
30
+ normalize : bool
31
+ If ``True``, apply normalization so that all vectors have norm 1 in the embedding space.
32
+
33
+ Attributes
34
+ ----------
35
+ labels_ : np.ndarray, shape (n_labels,)
36
+ Labels of nodes.
37
+ probs_ : sparse.csr_matrix, shape (n_row, n_labels)
38
+ Probability distribution over labels.
39
+ labels_row_ : np.ndarray
40
+ Labels of rows, for bipartite graphs.
41
+ labels_col_ : np.ndarray
42
+ Labels of columns, for bipartite graphs.
43
+ probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
44
+ Probability distributions over labels of rows, for bipartite graphs.
45
+ probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
46
+ Probability distributions over labels of columns, for bipartite graphs.
47
+
48
+ Example
49
+ -------
50
+ >>> from sknetwork.classification import NNClassifier
51
+ >>> from sknetwork.data import karate_club
52
+ >>> classifier = NNClassifier(n_neighbors=1)
53
+ >>> graph = karate_club(metadata=True)
54
+ >>> adjacency = graph.adjacency
55
+ >>> labels_true = graph.labels
56
+ >>> labels = {0: labels_true[0], 33: labels_true[33]}
57
+ >>> labels_pred = classifier.fit_predict(adjacency, labels)
58
+ >>> round(np.mean(labels_pred == labels_true), 2)
59
+ 0.82
60
+ """
61
+ def __init__(self, n_neighbors: int = 3, embedding_method: Optional[BaseEmbedding] = None, normalize: bool = True):
62
+ super(NNClassifier, self).__init__()
63
+ self.n_neighbors = n_neighbors
64
+ self.embedding_method = embedding_method
65
+ self.normalize = normalize
66
+
67
+ @staticmethod
68
+ def _instantiate_vars(labels: np.ndarray):
69
+ index_train = np.flatnonzero(labels >= 0)
70
+ index_test = np.flatnonzero(labels < 0)
71
+ return index_train, index_test
72
+
73
+ def _fit_core(self, embedding, labels, index_train, index_test):
74
+ n_neighbors = check_n_neighbors(self.n_neighbors, len(index_train))
75
+
76
+ norms_train = get_norms(embedding[index_train], p=2)
77
+ neighbors = []
78
+ for i in index_test:
79
+ vector = embedding[i]
80
+ if sparse.issparse(vector):
81
+ vector = vector.toarray().ravel()
82
+ distances = norms_train**2 - 2 * embedding[index_train].dot(vector) + np.sum(vector**2)
83
+ neighbors += list(index_train[np.argpartition(distances, n_neighbors)[:n_neighbors]])
84
+ labels_neighbor = labels[neighbors]
85
+
86
+ # membership matrix
87
+ row = list(np.repeat(index_test, n_neighbors))
88
+ col = list(labels_neighbor)
89
+ data = list(np.ones_like(labels_neighbor))
90
+
91
+ row += list(index_train)
92
+ col += list(labels[index_train])
93
+ data += list(np.ones_like(index_train))
94
+
95
+ probs = normalize(sparse.csr_matrix((data, (row, col)), shape=(len(labels), np.max(labels) + 1)))
96
+ labels = np.argmax(probs.toarray(), axis=1)
97
+
98
+ return probs, labels
99
+
100
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: Union[np.ndarray, list, dict] = None,
101
+ labels_row: Union[np.ndarray, list, dict] = None,
102
+ labels_col: Union[np.ndarray, list, dict] = None) -> 'NNClassifier':
103
+ """Node classification by k-nearest neighbors in the embedding space.
104
+
105
+ Parameters
106
+ ----------
107
+ input_matrix : sparse.csr_matrix, np.ndarray
108
+ Adjacency matrix or biadjacency matrix of the graph.
109
+ labels : np.ndarray, dict
110
+ Known labels. Negative values ignored.
111
+ labels_row : np.ndarray, dict
112
+ Known labels of rows, for bipartite graphs.
113
+ labels_col : np.ndarray, dict
114
+ Known labels of columns, for bipartite graphs.
115
+
116
+ Returns
117
+ -------
118
+ self: :class:`KNN`
119
+ """
120
+ adjacency, labels, self.bipartite = get_adjacency_values(input_matrix, values=labels, values_row=labels_row,
121
+ values_col=labels_col)
122
+ labels = labels.astype(int)
123
+ index_seed, index_remain = self._instantiate_vars(labels)
124
+
125
+ if self.embedding_method is None:
126
+ embedding = adjacency
127
+ else:
128
+ embedding = self.embedding_method.fit_transform(adjacency)
129
+
130
+ if self.normalize:
131
+ embedding = normalize(embedding, p=2)
132
+
133
+ probs, labels = self._fit_core(embedding, labels, index_seed, index_remain)
134
+
135
+ self.labels_ = labels
136
+ self.probs_ = probs
137
+ self._split_vars(input_matrix.shape)
138
+
139
+ return self
@@ -0,0 +1,205 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in July 2020
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ @author: Thomas Bonald <thomas.bonald@telecom-paris.fr>
7
+ """
8
+ from typing import Union, Tuple
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.utils.check import check_vector_format
14
+
15
+
16
+ def get_accuracy_score(labels_true: np.ndarray, labels_pred: np.ndarray) -> float:
17
+ """Return the proportion of correctly labeled samples.
18
+ Negative labels ignored.
19
+
20
+ Parameters
21
+ ----------
22
+ labels_true : np.ndarray
23
+ True labels.
24
+ labels_pred : np.ndarray
25
+ Predicted labels
26
+
27
+ Returns
28
+ -------
29
+ accuracy : float
30
+ A score between 0 and 1.
31
+
32
+ Examples
33
+ --------
34
+ >>> import numpy as np
35
+ >>> labels_true = np.array([0, 0, 1, 1])
36
+ >>> labels_pred = np.array([0, 0, 0, 1])
37
+ >>> round(get_accuracy_score(labels_true, labels_pred), 2)
38
+ 0.75
39
+ """
40
+ check_vector_format(labels_true, labels_pred)
41
+ mask = (labels_true >= 0) & (labels_pred >= 0)
42
+ if np.sum(mask):
43
+ return np.mean(labels_true[mask] == labels_pred[mask])
44
+ else:
45
+ raise ValueError('No sample with both true non-negative label and predicted non-negative label.')
46
+
47
+
48
+ def get_confusion_matrix(labels_true: np.ndarray, labels_pred: np.ndarray) -> sparse.csr_matrix:
49
+ """Return the confusion matrix in sparse format (true labels on rows, predicted labels on columns).
50
+ Negative labels ignored.
51
+
52
+ Parameters
53
+ ----------
54
+ labels_true : np.ndarray
55
+ True labels.
56
+ labels_pred : np.ndarray
57
+ Predicted labels
58
+
59
+ Returns
60
+ -------
61
+ confusion matrix : sparse.csr_matrix
62
+ Confusion matrix.
63
+
64
+ Examples
65
+ --------
66
+ >>> import numpy as np
67
+ >>> labels_true = np.array([0, 0, 1, 1])
68
+ >>> labels_pred = np.array([0, 0, 0, 1])
69
+ >>> get_confusion_matrix(labels_true, labels_pred).toarray()
70
+ array([[2, 0],
71
+ [1, 1]])
72
+ """
73
+ check_vector_format(labels_true, labels_pred)
74
+ mask = (labels_true >= 0) & (labels_pred >= 0)
75
+ if np.sum(mask):
76
+ n_labels = max(max(labels_true), max(labels_pred)) + 1
77
+ row = labels_true[mask]
78
+ col = labels_pred[mask]
79
+ data = np.ones(np.sum(mask), dtype=int)
80
+ return sparse.csr_matrix((data, (row, col)), shape=(n_labels, n_labels))
81
+ else:
82
+ raise ValueError('No sample with both true non-negative label and predicted non-negative label.')
83
+
84
+
85
+ def get_f1_score(labels_true: np.ndarray, labels_pred: np.ndarray, return_precision_recall: bool = False) \
86
+ -> Union[float, Tuple[float, float, float]]:
87
+ """Return the f1 score of binary classification.
88
+ Negative labels ignored.
89
+
90
+ Parameters
91
+ ----------
92
+ labels_true : np.ndarray
93
+ True labels.
94
+ labels_pred : np.ndarray
95
+ Predicted labels
96
+ return_precision_recall : bool
97
+ If ``True``, also return precision and recall.
98
+
99
+ Returns
100
+ -------
101
+ score, [precision, recall] : np.ndarray
102
+ F1 score (between 0 and 1). Optionally, also return precision and recall.
103
+ Examples
104
+ --------
105
+ >>> import numpy as np
106
+ >>> labels_true = np.array([0, 0, 1, 1])
107
+ >>> labels_pred = np.array([0, 0, 0, 1])
108
+ >>> round(get_f1_score(labels_true, labels_pred), 2)
109
+ 0.67
110
+ """
111
+ values = set(labels_true[labels_true >= 0]) | set(labels_pred[labels_pred >= 0])
112
+ if values != {0, 1}:
113
+ raise ValueError('Labels must be binary. '
114
+ 'Check get_f1_scores or get_average_f1_score for multi-label classification.')
115
+ if return_precision_recall:
116
+ f1_scores, precisions, recalls = get_f1_scores(labels_true, labels_pred, True)
117
+ return f1_scores[1], precisions[1], recalls[1]
118
+ else:
119
+ f1_scores = get_f1_scores(labels_true, labels_pred, False)
120
+ return f1_scores[1]
121
+
122
+
123
+ def get_f1_scores(labels_true: np.ndarray, labels_pred: np.ndarray, return_precision_recall: bool = False) \
124
+ -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]:
125
+ """Return the f1 scores of multi-label classification (one per label).
126
+ Negative labels ignored.
127
+
128
+ Parameters
129
+ ----------
130
+ labels_true : np.ndarray
131
+ True labels.
132
+ labels_pred : np.ndarray
133
+ Predicted labels
134
+ return_precision_recall : bool
135
+ If ``True``, also return precisions and recalls.
136
+
137
+ Returns
138
+ -------
139
+ scores, [precisions, recalls] : np.ndarray
140
+ F1 scores (between 0 and 1). Optionally, also return F1 precisions and recalls.
141
+ Examples
142
+ --------
143
+ >>> import numpy as np
144
+ >>> labels_true = np.array([0, 0, 1, 1])
145
+ >>> labels_pred = np.array([0, 0, 0, 1])
146
+ >>> np.round(get_f1_scores(labels_true, labels_pred), 2)
147
+ array([0.8 , 0.67])
148
+ """
149
+ confusion = get_confusion_matrix(labels_true, labels_pred)
150
+ n_labels = confusion.shape[0]
151
+ counts_correct = confusion.diagonal()
152
+ counts_true = confusion.dot(np.ones(n_labels))
153
+ counts_pred = confusion.T.dot(np.ones(n_labels))
154
+ mask = counts_true > 0
155
+ recalls = np.zeros(n_labels)
156
+ recalls[mask] = counts_correct[mask] / counts_true[mask]
157
+ precisions = np.zeros(n_labels)
158
+ mask = counts_pred > 0
159
+ precisions[mask] = counts_correct[mask] / counts_pred[mask]
160
+ f1_scores = np.zeros(n_labels)
161
+ mask = (precisions > 0) & (recalls > 0)
162
+ f1_scores[mask] = 2 / (1 / precisions[mask] + 1 / recalls[mask])
163
+ if return_precision_recall:
164
+ return f1_scores, precisions, recalls
165
+ else:
166
+ return f1_scores
167
+
168
+
169
+ def get_average_f1_score(labels_true: np.ndarray, labels_pred: np.ndarray, average: str = 'macro') -> float:
170
+ """Return the average f1 score of multi-label classification.
171
+ Negative labels ignored.
172
+
173
+ Parameters
174
+ ----------
175
+ labels_true : np.ndarray
176
+ True labels.
177
+ labels_pred : np.ndarray
178
+ Predicted labels
179
+ average : str
180
+ Averaging method. Can be either ``'macro'`` (default), ``'micro'`` or ``'weighted'``.
181
+
182
+ Returns
183
+ -------
184
+ score : float
185
+ Average F1 score (between 0 and 1).
186
+ Examples
187
+ --------
188
+ >>> import numpy as np
189
+ >>> labels_true = np.array([0, 0, 1, 1])
190
+ >>> labels_pred = np.array([0, 0, 0, 1])
191
+ >>> round(get_average_f1_score(labels_true, labels_pred), 2)
192
+ 0.73
193
+ """
194
+ if average == 'micro':
195
+ # micro averaging = accuracy
196
+ return get_accuracy_score(labels_true, labels_pred)
197
+ else:
198
+ f1_scores = get_f1_scores(labels_true, labels_pred)
199
+ if average == 'macro':
200
+ return np.mean(f1_scores)
201
+ elif average == 'weighted':
202
+ labels_unique, counts = np.unique(labels_true[labels_true >= 0], return_counts=True)
203
+ return np.sum(f1_scores[labels_unique] * counts) / np.sum(counts)
204
+ else:
205
+ raise ValueError('Check the ``average`` parameter.')
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in March 2020
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ from typing import Optional
8
+
9
+ import numpy as np
10
+
11
+ from sknetwork.classification.base_rank import RankClassifier
12
+ from sknetwork.ranking.pagerank import PageRank
13
+
14
+
15
+ class PageRankClassifier(RankClassifier):
16
+ """Node classification by multiple personalized PageRanks.
17
+
18
+ Parameters
19
+ ----------
20
+ damping_factor: float
21
+ Probability to continue the random walk.
22
+ solver : str
23
+ Which solver to use: 'piteration', 'diteration', 'bicgstab', 'lanczos'.
24
+ n_iter : int
25
+ Number of iterations for some solvers such as ``'piteration'`` or ``'diteration'``.
26
+ tol : float
27
+ Tolerance for the convergence of some solvers such as ``'bicgstab'`` or ``'lanczos'``.
28
+
29
+ Attributes
30
+ ----------
31
+ labels_ : np.ndarray, shape (n_labels,)
32
+ Labels of nodes.
33
+ probs_ : sparse.csr_matrix, shape (n_row, n_labels)
34
+ Probability distribution over labels.
35
+ labels_row_ : np.ndarray
36
+ Labels of rows, for bipartite graphs.
37
+ labels_col_ : np.ndarray
38
+ Labels of columns, for bipartite graphs.
39
+ probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
40
+ Probability distributions over labels of rows, for bipartite graphs.
41
+ probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
42
+ Probability distributions over labels of columns, for bipartite graphs.
43
+
44
+ Example
45
+ -------
46
+ >>> from sknetwork.classification import PageRankClassifier
47
+ >>> from sknetwork.data import karate_club
48
+ >>> pagerank = PageRankClassifier()
49
+ >>> graph = karate_club(metadata=True)
50
+ >>> adjacency = graph.adjacency
51
+ >>> labels_true = graph.labels
52
+ >>> labels = {0: labels_true[0], 33: labels_true[33]}
53
+ >>> labels_pred = pagerank.fit_predict(adjacency, labels)
54
+ >>> np.round(np.mean(labels_pred == labels_true), 2)
55
+ 0.97
56
+
57
+ References
58
+ ----------
59
+ Lin, F., & Cohen, W. W. (2010). `Semi-supervised classification of network data using very few labels.
60
+ <https://lti.cs.cmu.edu/sites/default/files/research/reports/2009/cmulti09017.pdf>`_
61
+ In IEEE International Conference on Advances in Social Networks Analysis and Mining.
62
+ """
63
+ def __init__(self, damping_factor: float = 0.85, solver: str = 'piteration', n_iter: int = 10, tol: float = 0.,
64
+ n_jobs: Optional[int] = None, verbose: bool = False):
65
+ algorithm = PageRank(damping_factor, solver, n_iter, tol)
66
+ super(PageRankClassifier, self).__init__(algorithm, n_jobs, verbose)
@@ -0,0 +1,152 @@
1
+ #!/usr/bin/env python3
2
+ # coding: utf-8
3
+ """
4
+ Created in April 2020
5
+ @author: Thomas Bonald <tbonald@enst.fr>
6
+ """
7
+
8
+ from typing import Union
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.classification.base import BaseClassifier
14
+ from sknetwork.classification.vote import vote_update
15
+ from sknetwork.linalg.normalizer import normalize
16
+ from sknetwork.utils.format import get_adjacency_values
17
+ from sknetwork.utils.membership import get_membership
18
+
19
+
20
+ class Propagation(BaseClassifier):
21
+ """Node classification by label propagation.
22
+
23
+ Parameters
24
+ ----------
25
+ n_iter : float
26
+ Maximum number of iterations (-1 for infinity).
27
+ node_order : str
28
+ * ``'random'``: node labels are updated in random order.
29
+ * ``'increasing'``: node labels are updated by increasing order of (in-) weight.
30
+ * ``'decreasing'``: node labels are updated by decreasing order of (in-) weight.
31
+ * Otherwise, node labels are updated by index order.
32
+ weighted : bool
33
+ If ``True``, the vote of each neighbor is proportional to the edge weight.
34
+ Otherwise, all votes have weight 1.
35
+
36
+ Attributes
37
+ ----------
38
+ labels_ : np.ndarray, shape (n_labels,)
39
+ Labels of nodes.
40
+ probs_ : sparse.csr_matrix, shape (n_row, n_labels)
41
+ Probability distribution over labels.
42
+ labels_row_ : np.ndarray
43
+ Labels of rows, for bipartite graphs.
44
+ labels_col_ : np.ndarray
45
+ Labels of columns, for bipartite graphs.
46
+ probs_row_ : sparse.csr_matrix, shape (n_row, n_labels)
47
+ Probability distributions over labels of rows, for bipartite graphs.
48
+ probs_col_ : sparse.csr_matrix, shape (n_col, n_labels)
49
+ Probability distributions over labels of columns, for bipartite graphs.
50
+
51
+ Example
52
+ -------
53
+ >>> from sknetwork.classification import Propagation
54
+ >>> from sknetwork.data import karate_club
55
+ >>> propagation = Propagation()
56
+ >>> graph = karate_club(metadata=True)
57
+ >>> adjacency = graph.adjacency
58
+ >>> labels_true = graph.labels
59
+ >>> labels = {0: labels_true[0], 33: labels_true[33]}
60
+ >>> labels_pred = propagation.fit_predict(adjacency, labels)
61
+ >>> np.round(np.mean(labels_pred == labels_true), 2)
62
+ 0.94
63
+
64
+ References
65
+ ----------
66
+ Raghavan, U. N., Albert, R., & Kumara, S. (2007).
67
+ `Near linear time algorithm to detect community structures in large-scale networks.
68
+ <https://arxiv.org/pdf/0709.2938.pdf>`_
69
+ Physical review E, 76(3), 036106.
70
+ """
71
+ def __init__(self, n_iter: float = -1, node_order: str = None, weighted: bool = True):
72
+ super(Propagation, self).__init__()
73
+
74
+ if n_iter < 0:
75
+ self.n_iter = np.inf
76
+ else:
77
+ self.n_iter = n_iter
78
+ self.node_order = node_order
79
+ self.weighted = weighted
80
+
81
+ @staticmethod
82
+ def _instantiate_vars(labels: np.ndarray):
83
+ """Instantiate variables for label propagation."""
84
+ n = len(labels)
85
+ if len(set(labels)) == n:
86
+ index_seed = np.arange(n)
87
+ index_remain = np.arange(n)
88
+ else:
89
+ index_seed = np.argwhere(labels >= 0).ravel()
90
+ index_remain = np.argwhere(labels < 0).ravel()
91
+ labels = labels[index_seed]
92
+ return index_seed.astype(np.int32), index_remain.astype(np.int32), labels.astype(np.int32)
93
+
94
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: Union[np.ndarray, list, dict] = None,
95
+ labels_row: Union[np.ndarray, list, dict] = None,
96
+ labels_col: Union[np.ndarray, list, dict] = None) -> 'Propagation':
97
+ """Node classification by label propagation.
98
+
99
+ Parameters
100
+ ----------
101
+ input_matrix : sparse.csr_matrix, np.ndarray
102
+ Adjacency matrix or biadjacency matrix of the graph.
103
+ labels : array, list or dict
104
+ Known labels. Negative values ignored.
105
+ labels_row : array, list or dict
106
+ Known labels of rows, for bipartite graphs.
107
+ labels_col : array, list or dict
108
+ Known labels of columns, for bipartite graphs.
109
+
110
+ Returns
111
+ -------
112
+ self: :class:`Propagation`
113
+ """
114
+ adjacency, seeds, self.bipartite = get_adjacency_values(input_matrix, values=labels, values_row=labels_row,
115
+ values_col=labels_col, which='labels')
116
+ n = adjacency.shape[0]
117
+ index_seed, index_remain, labels_seed = self._instantiate_vars(seeds)
118
+
119
+ if self.node_order == 'random':
120
+ np.random.shuffle(index_remain)
121
+ elif self.node_order == 'decreasing':
122
+ index = np.argsort(-adjacency.T.dot(np.ones(n))).astype(np.int32)
123
+ index_remain = index[index_remain]
124
+ elif self.node_order == 'increasing':
125
+ index = np.argsort(adjacency.T.dot(np.ones(n))).astype(np.int32)
126
+ index_remain = index[index_remain]
127
+
128
+ labels = -np.ones(n, dtype=np.int32)
129
+ labels[index_seed] = labels_seed
130
+ labels_remain = np.zeros_like(index_remain, dtype=np.int32)
131
+
132
+ indptr = adjacency.indptr.astype(np.int32)
133
+ indices = adjacency.indices.astype(np.int32)
134
+ if self.weighted:
135
+ data = adjacency.data.astype(np.float32)
136
+ else:
137
+ data = np.ones(n, dtype=np.float32)
138
+
139
+ t = 0
140
+ while t < self.n_iter and not np.array_equal(labels_remain, labels[index_remain]):
141
+ t += 1
142
+ labels_remain = labels[index_remain].copy()
143
+ labels = np.asarray(vote_update(indptr, indices, data, labels, index_remain))
144
+
145
+ probs = get_membership(labels)
146
+ probs = normalize(adjacency.dot(probs))
147
+
148
+ self.labels_ = labels
149
+ self.probs_ = probs
150
+ self._split_vars(input_matrix.shape)
151
+
152
+ return self