scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (240) hide show
  1. scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
  2. scikit_network-0.28.3.dist-info/LICENSE +34 -0
  3. scikit_network-0.28.3.dist-info/METADATA +457 -0
  4. scikit_network-0.28.3.dist-info/RECORD +240 -0
  5. scikit_network-0.28.3.dist-info/WHEEL +5 -0
  6. scikit_network-0.28.3.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/classification/__init__.py +8 -0
  9. sknetwork/classification/base.py +84 -0
  10. sknetwork/classification/base_rank.py +143 -0
  11. sknetwork/classification/diffusion.py +134 -0
  12. sknetwork/classification/knn.py +162 -0
  13. sknetwork/classification/metrics.py +205 -0
  14. sknetwork/classification/pagerank.py +66 -0
  15. sknetwork/classification/propagation.py +152 -0
  16. sknetwork/classification/tests/__init__.py +1 -0
  17. sknetwork/classification/tests/test_API.py +35 -0
  18. sknetwork/classification/tests/test_diffusion.py +37 -0
  19. sknetwork/classification/tests/test_knn.py +24 -0
  20. sknetwork/classification/tests/test_metrics.py +53 -0
  21. sknetwork/classification/tests/test_pagerank.py +20 -0
  22. sknetwork/classification/tests/test_propagation.py +24 -0
  23. sknetwork/classification/vote.cpython-39-darwin.so +0 -0
  24. sknetwork/classification/vote.pyx +58 -0
  25. sknetwork/clustering/__init__.py +7 -0
  26. sknetwork/clustering/base.py +102 -0
  27. sknetwork/clustering/kmeans.py +142 -0
  28. sknetwork/clustering/louvain.py +255 -0
  29. sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
  30. sknetwork/clustering/louvain_core.pyx +134 -0
  31. sknetwork/clustering/metrics.py +91 -0
  32. sknetwork/clustering/postprocess.py +66 -0
  33. sknetwork/clustering/propagation_clustering.py +108 -0
  34. sknetwork/clustering/tests/__init__.py +1 -0
  35. sknetwork/clustering/tests/test_API.py +37 -0
  36. sknetwork/clustering/tests/test_kmeans.py +47 -0
  37. sknetwork/clustering/tests/test_louvain.py +104 -0
  38. sknetwork/clustering/tests/test_metrics.py +50 -0
  39. sknetwork/clustering/tests/test_post_processing.py +23 -0
  40. sknetwork/clustering/tests/test_postprocess.py +39 -0
  41. sknetwork/data/__init__.py +5 -0
  42. sknetwork/data/load.py +408 -0
  43. sknetwork/data/models.py +459 -0
  44. sknetwork/data/parse.py +621 -0
  45. sknetwork/data/test_graphs.py +84 -0
  46. sknetwork/data/tests/__init__.py +1 -0
  47. sknetwork/data/tests/test_API.py +30 -0
  48. sknetwork/data/tests/test_load.py +95 -0
  49. sknetwork/data/tests/test_models.py +52 -0
  50. sknetwork/data/tests/test_parse.py +253 -0
  51. sknetwork/data/tests/test_test_graphs.py +30 -0
  52. sknetwork/data/tests/test_toy_graphs.py +68 -0
  53. sknetwork/data/toy_graphs.py +619 -0
  54. sknetwork/embedding/__init__.py +10 -0
  55. sknetwork/embedding/base.py +90 -0
  56. sknetwork/embedding/force_atlas.py +197 -0
  57. sknetwork/embedding/louvain_embedding.py +174 -0
  58. sknetwork/embedding/louvain_hierarchy.py +142 -0
  59. sknetwork/embedding/metrics.py +66 -0
  60. sknetwork/embedding/random_projection.py +133 -0
  61. sknetwork/embedding/spectral.py +214 -0
  62. sknetwork/embedding/spring.py +198 -0
  63. sknetwork/embedding/svd.py +363 -0
  64. sknetwork/embedding/tests/__init__.py +1 -0
  65. sknetwork/embedding/tests/test_API.py +73 -0
  66. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  67. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  68. sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
  69. sknetwork/embedding/tests/test_metrics.py +29 -0
  70. sknetwork/embedding/tests/test_random_projection.py +28 -0
  71. sknetwork/embedding/tests/test_spectral.py +84 -0
  72. sknetwork/embedding/tests/test_spring.py +50 -0
  73. sknetwork/embedding/tests/test_svd.py +37 -0
  74. sknetwork/flow/__init__.py +3 -0
  75. sknetwork/flow/flow.py +73 -0
  76. sknetwork/flow/tests/__init__.py +1 -0
  77. sknetwork/flow/tests/test_flow.py +17 -0
  78. sknetwork/flow/tests/test_utils.py +69 -0
  79. sknetwork/flow/utils.py +91 -0
  80. sknetwork/gnn/__init__.py +10 -0
  81. sknetwork/gnn/activation.py +117 -0
  82. sknetwork/gnn/base.py +155 -0
  83. sknetwork/gnn/base_activation.py +89 -0
  84. sknetwork/gnn/base_layer.py +109 -0
  85. sknetwork/gnn/gnn_classifier.py +381 -0
  86. sknetwork/gnn/layer.py +153 -0
  87. sknetwork/gnn/layers.py +127 -0
  88. sknetwork/gnn/loss.py +180 -0
  89. sknetwork/gnn/neighbor_sampler.py +65 -0
  90. sknetwork/gnn/optimizer.py +163 -0
  91. sknetwork/gnn/tests/__init__.py +1 -0
  92. sknetwork/gnn/tests/test_activation.py +56 -0
  93. sknetwork/gnn/tests/test_base.py +79 -0
  94. sknetwork/gnn/tests/test_base_layer.py +37 -0
  95. sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
  96. sknetwork/gnn/tests/test_layers.py +80 -0
  97. sknetwork/gnn/tests/test_loss.py +33 -0
  98. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  99. sknetwork/gnn/tests/test_optimizer.py +43 -0
  100. sknetwork/gnn/tests/test_utils.py +93 -0
  101. sknetwork/gnn/utils.py +219 -0
  102. sknetwork/hierarchy/__init__.py +7 -0
  103. sknetwork/hierarchy/base.py +69 -0
  104. sknetwork/hierarchy/louvain_hierarchy.py +264 -0
  105. sknetwork/hierarchy/metrics.py +234 -0
  106. sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
  107. sknetwork/hierarchy/paris.pyx +317 -0
  108. sknetwork/hierarchy/postprocess.py +350 -0
  109. sknetwork/hierarchy/tests/__init__.py +1 -0
  110. sknetwork/hierarchy/tests/test_API.py +25 -0
  111. sknetwork/hierarchy/tests/test_algos.py +29 -0
  112. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  113. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  114. sknetwork/hierarchy/tests/test_ward.py +25 -0
  115. sknetwork/hierarchy/ward.py +94 -0
  116. sknetwork/linalg/__init__.py +9 -0
  117. sknetwork/linalg/basics.py +37 -0
  118. sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
  119. sknetwork/linalg/diteration.pyx +49 -0
  120. sknetwork/linalg/eig_solver.py +93 -0
  121. sknetwork/linalg/laplacian.py +15 -0
  122. sknetwork/linalg/normalization.py +66 -0
  123. sknetwork/linalg/operators.py +225 -0
  124. sknetwork/linalg/polynome.py +76 -0
  125. sknetwork/linalg/ppr_solver.py +170 -0
  126. sknetwork/linalg/push.cpython-39-darwin.so +0 -0
  127. sknetwork/linalg/push.pyx +73 -0
  128. sknetwork/linalg/sparse_lowrank.py +142 -0
  129. sknetwork/linalg/svd_solver.py +91 -0
  130. sknetwork/linalg/tests/__init__.py +1 -0
  131. sknetwork/linalg/tests/test_eig.py +44 -0
  132. sknetwork/linalg/tests/test_laplacian.py +18 -0
  133. sknetwork/linalg/tests/test_normalization.py +38 -0
  134. sknetwork/linalg/tests/test_operators.py +70 -0
  135. sknetwork/linalg/tests/test_polynome.py +38 -0
  136. sknetwork/linalg/tests/test_ppr.py +50 -0
  137. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  138. sknetwork/linalg/tests/test_svd.py +38 -0
  139. sknetwork/linkpred/__init__.py +4 -0
  140. sknetwork/linkpred/base.py +80 -0
  141. sknetwork/linkpred/first_order.py +508 -0
  142. sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
  143. sknetwork/linkpred/first_order_core.pyx +315 -0
  144. sknetwork/linkpred/postprocessing.py +98 -0
  145. sknetwork/linkpred/tests/__init__.py +1 -0
  146. sknetwork/linkpred/tests/test_API.py +49 -0
  147. sknetwork/linkpred/tests/test_postprocessing.py +21 -0
  148. sknetwork/path/__init__.py +4 -0
  149. sknetwork/path/metrics.py +148 -0
  150. sknetwork/path/search.py +65 -0
  151. sknetwork/path/shortest_path.py +186 -0
  152. sknetwork/path/tests/__init__.py +1 -0
  153. sknetwork/path/tests/test_metrics.py +29 -0
  154. sknetwork/path/tests/test_search.py +25 -0
  155. sknetwork/path/tests/test_shortest_path.py +45 -0
  156. sknetwork/ranking/__init__.py +9 -0
  157. sknetwork/ranking/base.py +56 -0
  158. sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
  159. sknetwork/ranking/betweenness.pyx +99 -0
  160. sknetwork/ranking/closeness.py +95 -0
  161. sknetwork/ranking/harmonic.py +82 -0
  162. sknetwork/ranking/hits.py +94 -0
  163. sknetwork/ranking/katz.py +81 -0
  164. sknetwork/ranking/pagerank.py +107 -0
  165. sknetwork/ranking/postprocess.py +25 -0
  166. sknetwork/ranking/tests/__init__.py +1 -0
  167. sknetwork/ranking/tests/test_API.py +34 -0
  168. sknetwork/ranking/tests/test_betweenness.py +38 -0
  169. sknetwork/ranking/tests/test_closeness.py +34 -0
  170. sknetwork/ranking/tests/test_hits.py +20 -0
  171. sknetwork/ranking/tests/test_pagerank.py +69 -0
  172. sknetwork/regression/__init__.py +4 -0
  173. sknetwork/regression/base.py +56 -0
  174. sknetwork/regression/diffusion.py +190 -0
  175. sknetwork/regression/tests/__init__.py +1 -0
  176. sknetwork/regression/tests/test_API.py +34 -0
  177. sknetwork/regression/tests/test_diffusion.py +48 -0
  178. sknetwork/sknetwork.py +3 -0
  179. sknetwork/topology/__init__.py +9 -0
  180. sknetwork/topology/dag.py +74 -0
  181. sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
  182. sknetwork/topology/dag_core.pyx +38 -0
  183. sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
  184. sknetwork/topology/kcliques.pyx +193 -0
  185. sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
  186. sknetwork/topology/kcore.pyx +120 -0
  187. sknetwork/topology/structure.py +234 -0
  188. sknetwork/topology/tests/__init__.py +1 -0
  189. sknetwork/topology/tests/test_cliques.py +28 -0
  190. sknetwork/topology/tests/test_cores.py +21 -0
  191. sknetwork/topology/tests/test_dag.py +26 -0
  192. sknetwork/topology/tests/test_structure.py +99 -0
  193. sknetwork/topology/tests/test_triangles.py +42 -0
  194. sknetwork/topology/tests/test_wl_coloring.py +49 -0
  195. sknetwork/topology/tests/test_wl_kernel.py +31 -0
  196. sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
  197. sknetwork/topology/triangles.pyx +166 -0
  198. sknetwork/topology/weisfeiler_lehman.py +163 -0
  199. sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
  200. sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
  201. sknetwork/utils/__init__.py +40 -0
  202. sknetwork/utils/base.py +35 -0
  203. sknetwork/utils/check.py +354 -0
  204. sknetwork/utils/co_neighbor.py +71 -0
  205. sknetwork/utils/format.py +219 -0
  206. sknetwork/utils/kmeans.py +89 -0
  207. sknetwork/utils/knn.py +166 -0
  208. sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
  209. sknetwork/utils/knn1d.pyx +80 -0
  210. sknetwork/utils/membership.py +82 -0
  211. sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
  212. sknetwork/utils/minheap.pxd +22 -0
  213. sknetwork/utils/minheap.pyx +111 -0
  214. sknetwork/utils/neighbors.py +115 -0
  215. sknetwork/utils/seeds.py +75 -0
  216. sknetwork/utils/simplex.py +140 -0
  217. sknetwork/utils/tests/__init__.py +1 -0
  218. sknetwork/utils/tests/test_base.py +28 -0
  219. sknetwork/utils/tests/test_bunch.py +16 -0
  220. sknetwork/utils/tests/test_check.py +190 -0
  221. sknetwork/utils/tests/test_co_neighbor.py +43 -0
  222. sknetwork/utils/tests/test_format.py +61 -0
  223. sknetwork/utils/tests/test_kmeans.py +21 -0
  224. sknetwork/utils/tests/test_knn.py +32 -0
  225. sknetwork/utils/tests/test_membership.py +24 -0
  226. sknetwork/utils/tests/test_neighbors.py +41 -0
  227. sknetwork/utils/tests/test_projection_simplex.py +33 -0
  228. sknetwork/utils/tests/test_seeds.py +67 -0
  229. sknetwork/utils/tests/test_verbose.py +15 -0
  230. sknetwork/utils/tests/test_ward.py +20 -0
  231. sknetwork/utils/timeout.py +38 -0
  232. sknetwork/utils/verbose.py +37 -0
  233. sknetwork/utils/ward.py +60 -0
  234. sknetwork/visualization/__init__.py +4 -0
  235. sknetwork/visualization/colors.py +34 -0
  236. sknetwork/visualization/dendrograms.py +229 -0
  237. sknetwork/visualization/graphs.py +819 -0
  238. sknetwork/visualization/tests/__init__.py +1 -0
  239. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  240. sknetwork/visualization/tests/test_graphs.py +167 -0
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Nov, 2019
5
+ @author: Nathan de Lara <nathan.delara@polytechnique.org>
6
+ """
7
+ from abc import ABC
8
+ from typing import Union
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.topology.structure import is_connected
14
+ from sknetwork.utils.base import Algorithm
15
+
16
+
17
+ class BaseEmbedding(Algorithm, ABC):
18
+ """Base class for embedding algorithms.
19
+
20
+ Attributes
21
+ ----------
22
+ embedding_ : array, shape = (n, n_components)
23
+ Embedding of the nodes.
24
+ embedding_row_ : array, shape = (n_row, n_components)
25
+ Embedding of the rows, for bipartite graphs.
26
+ embedding_col_ : array, shape = (n_col, n_components)
27
+ Embedding of the columns, for bipartite graphs.
28
+ """
29
+
30
+ def __init__(self):
31
+ self._init_vars()
32
+
33
+ def fit_transform(self, *args, **kwargs) -> np.ndarray:
34
+ """Fit to data and return the embedding. Same parameters as the ``fit`` method.
35
+
36
+ Returns
37
+ -------
38
+ embedding : np.ndarray
39
+ Embedding.
40
+ """
41
+ self.fit(*args, **kwargs)
42
+ return self.embedding_
43
+
44
+ def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray:
45
+ """Predict the embedding of new nodes.
46
+
47
+ Each new node is defined by its adjacency row vector.
48
+
49
+ Parameters
50
+ ----------
51
+ adjacency_vectors :
52
+ Adjacency vectors of nodes.
53
+ Array of shape (n_col,) (single vector) or (n_vectors, n_col)
54
+
55
+ Returns
56
+ -------
57
+ embedding_vectors : np.ndarray
58
+ Embedding of the nodes.
59
+ """
60
+ raise NotImplementedError
61
+
62
+ def _check_fitted(self):
63
+ if self.embedding_ is None:
64
+ raise ValueError("This embedding instance is not fitted yet."
65
+ " Call 'fit' with appropriate arguments before using this method.")
66
+ else:
67
+ return self
68
+
69
+ @staticmethod
70
+ def _get_regularization(regularization: float, adjacency: sparse.csr_matrix) -> float:
71
+ """Set proper regularization depending on graph connectivity."""
72
+ if regularization < 0:
73
+ if is_connected(adjacency, connection='strong'):
74
+ regularization = 0
75
+ else:
76
+ regularization = np.abs(regularization)
77
+ return regularization
78
+
79
+ def _init_vars(self):
80
+ self.embedding_ = None
81
+ self.embedding_row_ = None
82
+ self.embedding_col_ = None
83
+
84
+ def _split_vars(self, shape):
85
+ """Split labels_ into labels_row_ and labels_col_"""
86
+ n_row = shape[0]
87
+ self.embedding_row_ = self.embedding_[:n_row]
88
+ self.embedding_col_ = self.embedding_[n_row:]
89
+ self.embedding_ = self.embedding_row_
90
+ return self
@@ -0,0 +1,197 @@
1
+ #!/usr/bin/env python3
2
+ # coding: utf-8
3
+ """
4
+ Created on Jun 2020
5
+ @author: Victor Manach <victor.manach@telecom-paris.fr>
6
+ @author: Rémi Jaylet <remi.jaylet@telecom-paris.fr>
7
+ """
8
+ from typing import Optional, Union
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+ from scipy.spatial import cKDTree
13
+
14
+ from sknetwork.embedding.base import BaseEmbedding
15
+ from sknetwork.utils.check import check_format, is_symmetric, check_square
16
+ from sknetwork.utils.format import directed2undirected
17
+
18
+
19
+ class ForceAtlas(BaseEmbedding):
20
+ """Force Atlas layout for displaying graphs.
21
+
22
+ Parameters
23
+ ----------
24
+ n_components : int
25
+ Dimension of the graph layout.
26
+ n_iter : int
27
+ Number of iterations to update positions.
28
+ If ``None``, use the value of self.n_iter.
29
+ approx_radius : float
30
+ If a positive value is provided, only the nodes within this distance a given node are used to compute
31
+ the repulsive force.
32
+ lin_log : bool
33
+ If ``True``, use lin-log mode.
34
+ gravity_factor : float
35
+ Gravity force scaling constant.
36
+ repulsive_factor : float
37
+ Repulsive force scaling constant.
38
+ tolerance : float
39
+ Tolerance defined in the swinging constant.
40
+ speed : float
41
+ Speed constant.
42
+ speed_max : float
43
+ Constant used to impose constrain on speed.
44
+
45
+ Attributes
46
+ ----------
47
+ embedding_ : np.ndarray
48
+ Layout in given dimension.
49
+
50
+ Example
51
+ -------
52
+ >>> from sknetwork.embedding.force_atlas import ForceAtlas
53
+ >>> from sknetwork.data import karate_club
54
+ >>> force_atlas = ForceAtlas()
55
+ >>> adjacency = karate_club()
56
+ >>> embedding = force_atlas.fit_transform(adjacency)
57
+ >>> embedding.shape
58
+ (34, 2)
59
+
60
+ References
61
+ ----------
62
+ Jacomy M., Venturini T., Heymann S., Bastian M. (2014).
63
+ `ForceAtlas2, a Continuous Graph Layout Algorithm for Handy Network Visualization Designed for the Gephi Software.
64
+ <https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0098679>`_
65
+ Plos One.
66
+ """
67
+ def __init__(self, n_components: int = 2, n_iter: int = 50, approx_radius: float = -1, lin_log: bool = False,
68
+ gravity_factor: float = 0.01, repulsive_factor: float = 0.1, tolerance: float = 0.1,
69
+ speed: float = 0.1, speed_max: float = 10):
70
+ super(ForceAtlas, self).__init__()
71
+ self.n_components = n_components
72
+ self.n_iter = n_iter
73
+ self.approx_radius = approx_radius
74
+ self.lin_log = lin_log
75
+ self.gravity_factor = gravity_factor
76
+ self.repulsive_factor = repulsive_factor
77
+ self.tolerance = tolerance
78
+ self.speed = speed
79
+ self.speed_max = speed_max
80
+
81
+ def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], pos_init: Optional[np.ndarray] = None,
82
+ n_iter: Optional[int] = None) -> 'ForceAtlas':
83
+ """Compute layout.
84
+
85
+ Parameters
86
+ ----------
87
+ adjacency :
88
+ Adjacency matrix of the graph, treated as undirected.
89
+ pos_init :
90
+ Position to start with. Random if not provided.
91
+ n_iter : int
92
+ Number of iterations to update positions.
93
+ If ``None``, use the value of self.n_iter.
94
+
95
+ Returns
96
+ -------
97
+ self: :class:`ForceAtlas`
98
+ """
99
+ # verify the format of the adjacency matrix
100
+ adjacency = check_format(adjacency)
101
+ check_square(adjacency)
102
+ if not is_symmetric(adjacency):
103
+ adjacency = directed2undirected(adjacency)
104
+ n = adjacency.shape[0]
105
+
106
+ # setting of the tolerance according to the size of the graph
107
+ if n < 5000:
108
+ tolerance = 0.1
109
+ elif 5000 <= n < 50000: # pragma: no cover
110
+ tolerance = 1
111
+ else: # pragma: no cover
112
+ tolerance = 10
113
+
114
+ if n_iter is None:
115
+ n_iter = self.n_iter
116
+
117
+ # initial position of the nodes of the graph
118
+ if pos_init is None:
119
+ position: np.ndarray = np.random.randn(n, self.n_components)
120
+ else:
121
+ if pos_init.shape != (n, self.n_components):
122
+ raise ValueError('The initial position does not have valid dimensions.')
123
+ else:
124
+ position = pos_init
125
+ # compute the vector with the degree of each node
126
+ degree: np.ndarray = adjacency.dot(np.ones(adjacency.shape[1])) + 1
127
+
128
+ # initialization of variation of position of nodes
129
+ resultants = np.zeros(n)
130
+ delta: np.ndarray = np.zeros((n, self.n_components))
131
+ swing_vector: np.ndarray = np.zeros(n)
132
+ global_speed = 1
133
+
134
+ for iteration in range(n_iter):
135
+ delta *= 0
136
+ global_swing = 0
137
+ global_traction = 0
138
+
139
+ if self.approx_radius > 0:
140
+ tree = cKDTree(position)
141
+ else:
142
+ tree = None
143
+
144
+ for i in range(n):
145
+
146
+ # attraction
147
+ indices = adjacency.indices[adjacency.indptr[i]:adjacency.indptr[i + 1]]
148
+ attraction = position[i] - position[indices]
149
+
150
+ if self.lin_log:
151
+ attraction = np.sign(attraction) * np.log(1 + np.abs(10 * attraction))
152
+ attraction = attraction.sum(axis=0)
153
+
154
+ # repulsion
155
+ if tree is None:
156
+ neighbors = np.arange(n)
157
+ else:
158
+ neighbors = tree.query_ball_point(position[i], self.approx_radius)
159
+
160
+ grad: np.ndarray = (position[i] - position[neighbors]) # shape (n_neigh, n_components)
161
+ distance: np.ndarray = np.linalg.norm(grad, axis=1) # shape (n_neigh,)
162
+ distance = np.where(distance < 0.01, 0.01, distance)
163
+ repulsion = grad * (degree[neighbors] / distance)[:, np.newaxis]
164
+
165
+ repulsion *= self.repulsive_factor * degree[i]
166
+ repulsion = repulsion.sum(axis=0)
167
+
168
+ # gravity
169
+ gravity = self.gravity_factor * degree[i] * grad
170
+ gravity = gravity.sum(axis=0)
171
+
172
+ # forces resultant applied on node i for traction, swing and speed computation
173
+ force = repulsion - attraction - gravity
174
+ resultant_new: float = np.linalg.norm(force)
175
+ resultant_old: float = resultants[i]
176
+
177
+ swing_node: float = np.abs(resultant_new - resultant_old) # force variation applied on node i
178
+ swing_vector[i] = swing_node
179
+ global_swing += (degree[i] + 1) * swing_node
180
+
181
+ traction: float = np.abs(resultant_new + resultant_old) / 2 # traction force applied on node i
182
+ global_traction += (degree[i] + 1) * traction
183
+
184
+ node_speed = self.speed * global_speed / (1 + global_speed * np.sqrt(swing_node))
185
+ if node_speed > self.speed_max / resultant_new: # pragma: no cover
186
+ node_speed = self.speed_max / resultant_new
187
+
188
+ delta[i]: np.ndarray = node_speed * force
189
+ resultants[i] = resultant_new
190
+ global_speed = tolerance * global_traction / global_swing
191
+
192
+ position += delta # calculating displacement and final position of points after iteration
193
+ if (swing_vector < 1).all():
194
+ break # if the swing of all nodes is zero, then convergence is reached and we break.
195
+
196
+ self.embedding_ = position
197
+ return self
@@ -0,0 +1,174 @@
1
+ #!/usr/bin/env python3
2
+ # coding: utf-8
3
+ """
4
+ Created in September 2020
5
+ @author: Quentin Lutz <qlutz@enst.fr>
6
+ @author: Thomas Bonald <bonald@enst.fr>
7
+ """
8
+ from typing import Optional, Union
9
+
10
+ import numpy as np
11
+ from scipy import sparse
12
+
13
+ from sknetwork.clustering.louvain import Louvain
14
+ from sknetwork.embedding.base import BaseEmbedding
15
+ from sknetwork.linalg.normalization import normalize
16
+ from sknetwork.utils.check import check_random_state, check_adjacency_vector, check_nonnegative, is_square
17
+ from sknetwork.utils.membership import get_membership
18
+
19
+
20
+ def reindex_labels(labels: np.ndarray, labels_secondary: Optional[np.ndarray] = None, which: str = 'remove'):
21
+ """Reindex labels, removing or merging labels of count 1."""
22
+ labels_unique, counts = np.unique(labels, return_counts=True)
23
+ n_labels = max(labels_unique) + 1
24
+ labels_keep = labels_unique[counts > 1]
25
+ if which == 'remove':
26
+ label_index = -np.ones(n_labels, dtype='int')
27
+ label_index[labels_keep] = np.arange(len(labels_keep))
28
+ elif which == 'merge':
29
+ label_index = len(labels_keep) * np.ones(n_labels, dtype='int')
30
+ label_index[labels_keep] = np.arange(len(labels_keep))
31
+ else:
32
+ label_index = np.arange(n_labels)
33
+ labels = label_index[labels]
34
+ if labels_secondary is not None:
35
+ labels_unique = np.unique(labels_secondary)
36
+ n_labels = max(labels_unique) + 1
37
+ label_index = -np.ones(n_labels, dtype='int')
38
+ label_index[labels_keep] = np.arange(len(labels_keep))
39
+ labels_secondary = label_index[labels_secondary]
40
+ return labels, labels_secondary
41
+
42
+
43
+ class LouvainEmbedding(BaseEmbedding):
44
+ """Embedding of graphs induced by Louvain clustering. Each component of the embedding corresponds
45
+ to a cluster obtained by Louvain.
46
+
47
+ Parameters
48
+ ----------
49
+ resolution : float
50
+ Resolution parameter.
51
+ modularity : str
52
+ Which objective function to maximize. Can be ``'Dugue'``, ``'Newman'`` or ``'Potts'``.
53
+ tol_optimization :
54
+ Minimum increase in the objective function to enter a new optimization pass.
55
+ tol_aggregation :
56
+ Minimum increase in the objective function to enter a new aggregation pass.
57
+ n_aggregations :
58
+ Maximum number of aggregations.
59
+ A negative value is interpreted as no limit.
60
+ shuffle_nodes :
61
+ Enables node shuffling before optimization.
62
+ random_state :
63
+ Random number generator or random seed. If ``None``, numpy.random is used.
64
+ isolated_nodes : str
65
+ What to do with isolated column nodes. Can be ``'remove'`` (default), ``'merge'`` or ``'keep'``.
66
+
67
+ Attributes
68
+ ----------
69
+ embedding_ : array, shape = (n, n_components)
70
+ Embedding of the nodes.
71
+ embedding_row_ : array, shape = (n_row, n_components)
72
+ Embedding of the rows, for bipartite graphs.
73
+ embedding_col_ : array, shape = (n_col, n_components)
74
+ Embedding of the columns, for bipartite graphs.
75
+ labels_row_ : np.ndarray
76
+ Labels of the rows (used to build the embedding of the columns).
77
+ labels_col_ : np.ndarray
78
+ Labels of the columns (used to build the embedding of the rows).
79
+
80
+ Example
81
+ -------
82
+ >>> from sknetwork.embedding import LouvainEmbedding
83
+ >>> from sknetwork.data import house
84
+ >>> louvain = LouvainEmbedding()
85
+ >>> adjacency = house()
86
+ >>> embedding = louvain.fit_transform(adjacency)
87
+ >>> embedding.shape
88
+ (5, 2)
89
+ """
90
+ def __init__(self, resolution: float = 1, modularity: str = 'Dugue', tol_optimization: float = 1e-3,
91
+ tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
92
+ random_state: Optional[Union[np.random.RandomState, int]] = None, isolated_nodes: str = 'remove'):
93
+ super(LouvainEmbedding, self).__init__()
94
+ self.resolution = resolution
95
+ self.modularity = modularity.lower()
96
+ self.tol_optimization = tol_optimization
97
+ self.tol_aggregation = tol_aggregation
98
+ self.n_aggregations = n_aggregations
99
+ self.shuffle_nodes = shuffle_nodes
100
+ self.random_state = check_random_state(random_state)
101
+ self.isolated_nodes = isolated_nodes
102
+
103
+ self.labels_ = None
104
+ self.embedding_ = None
105
+ self.embedding_row_ = None
106
+ self.embedding_col_ = None
107
+
108
+ def fit(self, input_matrix: sparse.csr_matrix, force_bipartite: bool = False):
109
+ """Embedding of graphs from the clustering obtained with Louvain.
110
+
111
+ Parameters
112
+ ----------
113
+ input_matrix :
114
+ Adjacency matrix or biadjacency matrix of the graph.
115
+ force_bipartite : bool (default = ``False``)
116
+ If ``True``, force the input matrix to be considered as a biadjacency matrix.
117
+ Returns
118
+ -------
119
+ self: :class:`BiLouvainEmbedding`
120
+ """
121
+ louvain = Louvain(resolution=self.resolution, modularity=self.modularity,
122
+ tol_optimization=self.tol_optimization, tol_aggregation=self.tol_aggregation,
123
+ n_aggregations=self.n_aggregations, shuffle_nodes=self.shuffle_nodes, sort_clusters=False,
124
+ return_membership=True, return_aggregate=True, random_state=self.random_state)
125
+ louvain.fit(input_matrix, force_bipartite=force_bipartite)
126
+
127
+ # isolated nodes
128
+ if is_square(input_matrix):
129
+ labels = louvain.labels_
130
+ labels_secondary = None
131
+ else:
132
+ labels = louvain.labels_col_
133
+ labels_secondary = louvain.labels_row_
134
+
135
+ self.labels_, labels_row = reindex_labels(labels, labels_secondary, self.isolated_nodes)
136
+
137
+ # embedding
138
+ probs = normalize(input_matrix)
139
+ embedding_ = probs.dot(get_membership(self.labels_))
140
+ self.embedding_ = embedding_.toarray()
141
+
142
+ if labels_row is not None:
143
+ probs = normalize(input_matrix.T)
144
+ embedding_col = probs.dot(get_membership(labels_row))
145
+ self.embedding_row_ = self.embedding_
146
+ self.embedding_col_ = embedding_col.toarray()
147
+
148
+ return self
149
+
150
+ def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray:
151
+ """Predict the embedding of new rows, defined by their adjacency vectors.
152
+
153
+ Parameters
154
+ ----------
155
+ adjacency_vectors :
156
+ Adjacency row vectors.
157
+ Array of shape (n_col,) (single vector) or (n_vectors, n_col)
158
+
159
+ Returns
160
+ -------
161
+ embedding_vectors : np.ndarray
162
+ Embedding of the nodes.
163
+ """
164
+ self._check_fitted()
165
+ if self.embedding_col_ is not None:
166
+ n = len(self.embedding_col_)
167
+ else:
168
+ n = len(self.embedding_)
169
+
170
+ adjacency_vectors = check_adjacency_vector(adjacency_vectors, n)
171
+ check_nonnegative(adjacency_vectors)
172
+ membership = get_membership(self.labels_)
173
+
174
+ return normalize(adjacency_vectors).dot(membership)
@@ -0,0 +1,142 @@
1
+ #!/usr/bin/env python3
2
+ # coding: utf-8
3
+ """
4
+ Created on Dec 2020
5
+ @author: Quentin Lutz <qlutz@enst.fr>
6
+ """
7
+ from typing import Optional, Union
8
+
9
+ import numpy as np
10
+ from scipy import sparse
11
+
12
+ from sknetwork.utils.check import check_format, check_random_state
13
+ from sknetwork.utils.format import get_adjacency
14
+ from sknetwork.clustering.louvain import Louvain
15
+ from sknetwork.embedding.base import BaseEmbedding
16
+
17
+
18
+ class LouvainNE(BaseEmbedding):
19
+ """Embedding of graphs based on the hierarchical Louvain algorithm with random scattering per level.
20
+
21
+ Parameters
22
+ ----------
23
+ n_components : int
24
+ Dimension of the embedding.
25
+ scale : float
26
+ Dilution factor to be applied on the random vector to be added at each iteration of the clustering method.
27
+ resolution :
28
+ Resolution parameter.
29
+ tol_optimization :
30
+ Minimum increase in the objective function to enter a new optimization pass.
31
+ tol_aggregation :
32
+ Minimum increase in the objective function to enter a new aggregation pass.
33
+ n_aggregations :
34
+ Maximum number of aggregations.
35
+ A negative value is interpreted as no limit.
36
+ shuffle_nodes :
37
+ Enables node shuffling before optimization.
38
+ random_state :
39
+ Random number generator or random seed. If None, numpy.random is used.
40
+
41
+ Attributes
42
+ ----------
43
+ embedding_ : array, shape = (n, n_components)
44
+ Embedding of the nodes.
45
+ embedding_row_ : array, shape = (n_row, n_components)
46
+ Embedding of the rows, for bipartite graphs.
47
+ embedding_col_ : array, shape = (n_col, n_components)
48
+ Embedding of the columns, for bipartite graphs.
49
+ Example
50
+ -------
51
+ >>> from sknetwork.embedding import LouvainNE
52
+ >>> from sknetwork.data import karate_club
53
+ >>> louvain = LouvainNE(n_components=3)
54
+ >>> adjacency = karate_club()
55
+ >>> embedding = louvain.fit_transform(adjacency)
56
+ >>> embedding.shape
57
+ (34, 3)
58
+
59
+ References
60
+ ----------
61
+ Bhowmick, A. K., Meneni, K., Danisch, M., Guillaume, J. L., & Mitra, B. (2020, January).
62
+ `LouvainNE: Hierarchical Louvain Method for High Quality and Scalable Network Embedding.
63
+ <https://hal.archives-ouvertes.fr/hal-02999888/document>`_
64
+ In Proceedings of the 13th International Conference on Web Search and Data Mining (pp. 43-51).
65
+ """
66
+ def __init__(self, n_components: int = 2, scale: float = .1, resolution: float = 1, tol_optimization: float = 1e-3,
67
+ tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False,
68
+ random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False):
69
+ super(LouvainNE, self).__init__()
70
+
71
+ self.n_components = n_components
72
+ self.scale = scale
73
+ self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization,
74
+ tol_aggregation=tol_aggregation, n_aggregations=n_aggregations,
75
+ shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose)
76
+ self.random_state = check_random_state(random_state)
77
+ self.bipartite = None
78
+
79
+ def _recursive_louvain(self, adjacency: Union[sparse.csr_matrix, np.ndarray], depth: int,
80
+ nodes: Optional[np.ndarray] = None):
81
+ """Recursive function for fit, modifies the embedding in place.
82
+
83
+ Parameters
84
+ ----------
85
+ adjacency :
86
+ Adjacency matrix of the graph.
87
+ depth :
88
+ Depth of the recursion.
89
+ nodes :
90
+ The indices of the current nodes in the original graph.
91
+ """
92
+ n = adjacency.shape[0]
93
+ if nodes is None:
94
+ nodes = np.arange(n)
95
+
96
+ if adjacency.nnz:
97
+ labels = self._clustering_method.fit_transform(adjacency)
98
+ else:
99
+ labels = np.zeros(n)
100
+
101
+ clusters = np.unique(labels)
102
+
103
+ if len(clusters) != 1:
104
+ random_vectors = (self.scale ** depth) * self.random_state.rand(self.n_components, len(clusters))
105
+ for index, cluster in enumerate(clusters):
106
+ mask = (labels == cluster)
107
+ nodes_cluster = nodes[mask]
108
+ self.embedding_[nodes_cluster, :] += random_vectors[:, index]
109
+ n_row = len(mask)
110
+ indptr = np.zeros(n_row + 1, dtype=int)
111
+ indptr[1:] = np.cumsum(mask)
112
+ n_col = indptr[-1]
113
+ combiner = sparse.csr_matrix((np.ones(n_col), np.arange(n_col, dtype=int), indptr),
114
+ shape=(n_row, n_col))
115
+ adjacency_cluster = adjacency[mask, :].dot(combiner)
116
+ self._recursive_louvain(adjacency_cluster, depth + 1, nodes_cluster)
117
+
118
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False):
119
+ """Embedding of graphs from a clustering obtained with Louvain.
120
+
121
+ Parameters
122
+ ----------
123
+ input_matrix :
124
+ Adjacency matrix or biadjacency matrix of the graph.
125
+ force_bipartite :
126
+ If ``True``, force the input matrix to be considered as a biadjacency matrix even if square.
127
+ Returns
128
+ -------
129
+ self: :class:`LouvainNE`
130
+ """
131
+ # input
132
+ input_matrix = check_format(input_matrix)
133
+ adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
134
+ n = adjacency.shape[0]
135
+
136
+ # embedding
137
+ self.embedding_ = np.zeros((n, self.n_components))
138
+ self._recursive_louvain(adjacency, 0)
139
+
140
+ if self.bipartite:
141
+ self._split_vars(input_matrix.shape)
142
+ return self
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created in November 2018
5
+ @author: Thomas Bonald <thomas.bonald@telecom-paris.fr>
6
+ @author: Nathan De Lara <nathan.delara@polytechnique.org>
7
+ """
8
+ import numpy as np
9
+
10
+ from sknetwork.linalg import normalize
11
+ from sknetwork.utils.check import check_format, check_square
12
+
13
+
14
+ def get_cosine_similarity(input_matrix, embedding: np.ndarray, embedding_col=None):
15
+ """Average cosine similarity of an embedding :math:`x` defined by:
16
+
17
+ :math:`Q = \\sum_{ij}\\dfrac{A_{ij}}{w}\\cos(x_i, x_j)}`
18
+
19
+ where :math:`w = 1^TA1` is the total weight of the graph.
20
+
21
+ For bipartite graphs with column embedding :math:`y`, the metric is
22
+
23
+ :math:`Q = \\sum_{ij} \\dfrac{B_{ij}}{w} \\cos(x_i, y_j)`
24
+
25
+ where :math:`w = 1^TB1` is the total weight of the graph.
26
+
27
+ Parameters
28
+ ----------
29
+ input_matrix :
30
+ Adjacency matrix or biadjacency matrix of the graph.
31
+ embedding :
32
+ Embedding of the nodes.
33
+ embedding_col :
34
+ Embedding of the columns (for bipartite graphs).
35
+
36
+ Returns
37
+ -------
38
+ cosine_similarity : float
39
+
40
+ Example
41
+ -------
42
+ >>> from sknetwork.embedding import get_cosine_similarity
43
+ >>> from sknetwork.data import karate_club
44
+ >>> graph = karate_club(metadata=True)
45
+ >>> adjacency = graph.adjacency
46
+ >>> embedding = graph.position
47
+ >>> np.round(get_cosine_similarity(adjacency, embedding), 2)
48
+ 0.7
49
+ """
50
+ input_matrix = check_format(input_matrix)
51
+ total_weight = input_matrix.data.sum()
52
+
53
+ if embedding_col is None:
54
+ check_square(input_matrix)
55
+ embedding_col = embedding.copy()
56
+
57
+ embedding_row_norm = normalize(embedding, p=2)
58
+ embedding_col_norm = normalize(embedding_col, p=2)
59
+
60
+ input_matrix_coo = input_matrix.tocoo()
61
+ row = input_matrix_coo.row
62
+ col = input_matrix_coo.col
63
+
64
+ cosine_similarity = np.multiply(embedding_row_norm[row], embedding_col_norm[col]).sum() / total_weight
65
+
66
+ return cosine_similarity