scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-network might be problematic. Click here for more details.

Files changed (240) hide show
  1. scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
  2. scikit_network-0.28.3.dist-info/LICENSE +34 -0
  3. scikit_network-0.28.3.dist-info/METADATA +457 -0
  4. scikit_network-0.28.3.dist-info/RECORD +240 -0
  5. scikit_network-0.28.3.dist-info/WHEEL +5 -0
  6. scikit_network-0.28.3.dist-info/top_level.txt +1 -0
  7. sknetwork/__init__.py +21 -0
  8. sknetwork/classification/__init__.py +8 -0
  9. sknetwork/classification/base.py +84 -0
  10. sknetwork/classification/base_rank.py +143 -0
  11. sknetwork/classification/diffusion.py +134 -0
  12. sknetwork/classification/knn.py +162 -0
  13. sknetwork/classification/metrics.py +205 -0
  14. sknetwork/classification/pagerank.py +66 -0
  15. sknetwork/classification/propagation.py +152 -0
  16. sknetwork/classification/tests/__init__.py +1 -0
  17. sknetwork/classification/tests/test_API.py +35 -0
  18. sknetwork/classification/tests/test_diffusion.py +37 -0
  19. sknetwork/classification/tests/test_knn.py +24 -0
  20. sknetwork/classification/tests/test_metrics.py +53 -0
  21. sknetwork/classification/tests/test_pagerank.py +20 -0
  22. sknetwork/classification/tests/test_propagation.py +24 -0
  23. sknetwork/classification/vote.cpython-39-darwin.so +0 -0
  24. sknetwork/classification/vote.pyx +58 -0
  25. sknetwork/clustering/__init__.py +7 -0
  26. sknetwork/clustering/base.py +102 -0
  27. sknetwork/clustering/kmeans.py +142 -0
  28. sknetwork/clustering/louvain.py +255 -0
  29. sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
  30. sknetwork/clustering/louvain_core.pyx +134 -0
  31. sknetwork/clustering/metrics.py +91 -0
  32. sknetwork/clustering/postprocess.py +66 -0
  33. sknetwork/clustering/propagation_clustering.py +108 -0
  34. sknetwork/clustering/tests/__init__.py +1 -0
  35. sknetwork/clustering/tests/test_API.py +37 -0
  36. sknetwork/clustering/tests/test_kmeans.py +47 -0
  37. sknetwork/clustering/tests/test_louvain.py +104 -0
  38. sknetwork/clustering/tests/test_metrics.py +50 -0
  39. sknetwork/clustering/tests/test_post_processing.py +23 -0
  40. sknetwork/clustering/tests/test_postprocess.py +39 -0
  41. sknetwork/data/__init__.py +5 -0
  42. sknetwork/data/load.py +408 -0
  43. sknetwork/data/models.py +459 -0
  44. sknetwork/data/parse.py +621 -0
  45. sknetwork/data/test_graphs.py +84 -0
  46. sknetwork/data/tests/__init__.py +1 -0
  47. sknetwork/data/tests/test_API.py +30 -0
  48. sknetwork/data/tests/test_load.py +95 -0
  49. sknetwork/data/tests/test_models.py +52 -0
  50. sknetwork/data/tests/test_parse.py +253 -0
  51. sknetwork/data/tests/test_test_graphs.py +30 -0
  52. sknetwork/data/tests/test_toy_graphs.py +68 -0
  53. sknetwork/data/toy_graphs.py +619 -0
  54. sknetwork/embedding/__init__.py +10 -0
  55. sknetwork/embedding/base.py +90 -0
  56. sknetwork/embedding/force_atlas.py +197 -0
  57. sknetwork/embedding/louvain_embedding.py +174 -0
  58. sknetwork/embedding/louvain_hierarchy.py +142 -0
  59. sknetwork/embedding/metrics.py +66 -0
  60. sknetwork/embedding/random_projection.py +133 -0
  61. sknetwork/embedding/spectral.py +214 -0
  62. sknetwork/embedding/spring.py +198 -0
  63. sknetwork/embedding/svd.py +363 -0
  64. sknetwork/embedding/tests/__init__.py +1 -0
  65. sknetwork/embedding/tests/test_API.py +73 -0
  66. sknetwork/embedding/tests/test_force_atlas.py +35 -0
  67. sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
  68. sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
  69. sknetwork/embedding/tests/test_metrics.py +29 -0
  70. sknetwork/embedding/tests/test_random_projection.py +28 -0
  71. sknetwork/embedding/tests/test_spectral.py +84 -0
  72. sknetwork/embedding/tests/test_spring.py +50 -0
  73. sknetwork/embedding/tests/test_svd.py +37 -0
  74. sknetwork/flow/__init__.py +3 -0
  75. sknetwork/flow/flow.py +73 -0
  76. sknetwork/flow/tests/__init__.py +1 -0
  77. sknetwork/flow/tests/test_flow.py +17 -0
  78. sknetwork/flow/tests/test_utils.py +69 -0
  79. sknetwork/flow/utils.py +91 -0
  80. sknetwork/gnn/__init__.py +10 -0
  81. sknetwork/gnn/activation.py +117 -0
  82. sknetwork/gnn/base.py +155 -0
  83. sknetwork/gnn/base_activation.py +89 -0
  84. sknetwork/gnn/base_layer.py +109 -0
  85. sknetwork/gnn/gnn_classifier.py +381 -0
  86. sknetwork/gnn/layer.py +153 -0
  87. sknetwork/gnn/layers.py +127 -0
  88. sknetwork/gnn/loss.py +180 -0
  89. sknetwork/gnn/neighbor_sampler.py +65 -0
  90. sknetwork/gnn/optimizer.py +163 -0
  91. sknetwork/gnn/tests/__init__.py +1 -0
  92. sknetwork/gnn/tests/test_activation.py +56 -0
  93. sknetwork/gnn/tests/test_base.py +79 -0
  94. sknetwork/gnn/tests/test_base_layer.py +37 -0
  95. sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
  96. sknetwork/gnn/tests/test_layers.py +80 -0
  97. sknetwork/gnn/tests/test_loss.py +33 -0
  98. sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
  99. sknetwork/gnn/tests/test_optimizer.py +43 -0
  100. sknetwork/gnn/tests/test_utils.py +93 -0
  101. sknetwork/gnn/utils.py +219 -0
  102. sknetwork/hierarchy/__init__.py +7 -0
  103. sknetwork/hierarchy/base.py +69 -0
  104. sknetwork/hierarchy/louvain_hierarchy.py +264 -0
  105. sknetwork/hierarchy/metrics.py +234 -0
  106. sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
  107. sknetwork/hierarchy/paris.pyx +317 -0
  108. sknetwork/hierarchy/postprocess.py +350 -0
  109. sknetwork/hierarchy/tests/__init__.py +1 -0
  110. sknetwork/hierarchy/tests/test_API.py +25 -0
  111. sknetwork/hierarchy/tests/test_algos.py +29 -0
  112. sknetwork/hierarchy/tests/test_metrics.py +62 -0
  113. sknetwork/hierarchy/tests/test_postprocess.py +57 -0
  114. sknetwork/hierarchy/tests/test_ward.py +25 -0
  115. sknetwork/hierarchy/ward.py +94 -0
  116. sknetwork/linalg/__init__.py +9 -0
  117. sknetwork/linalg/basics.py +37 -0
  118. sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
  119. sknetwork/linalg/diteration.pyx +49 -0
  120. sknetwork/linalg/eig_solver.py +93 -0
  121. sknetwork/linalg/laplacian.py +15 -0
  122. sknetwork/linalg/normalization.py +66 -0
  123. sknetwork/linalg/operators.py +225 -0
  124. sknetwork/linalg/polynome.py +76 -0
  125. sknetwork/linalg/ppr_solver.py +170 -0
  126. sknetwork/linalg/push.cpython-39-darwin.so +0 -0
  127. sknetwork/linalg/push.pyx +73 -0
  128. sknetwork/linalg/sparse_lowrank.py +142 -0
  129. sknetwork/linalg/svd_solver.py +91 -0
  130. sknetwork/linalg/tests/__init__.py +1 -0
  131. sknetwork/linalg/tests/test_eig.py +44 -0
  132. sknetwork/linalg/tests/test_laplacian.py +18 -0
  133. sknetwork/linalg/tests/test_normalization.py +38 -0
  134. sknetwork/linalg/tests/test_operators.py +70 -0
  135. sknetwork/linalg/tests/test_polynome.py +38 -0
  136. sknetwork/linalg/tests/test_ppr.py +50 -0
  137. sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
  138. sknetwork/linalg/tests/test_svd.py +38 -0
  139. sknetwork/linkpred/__init__.py +4 -0
  140. sknetwork/linkpred/base.py +80 -0
  141. sknetwork/linkpred/first_order.py +508 -0
  142. sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
  143. sknetwork/linkpred/first_order_core.pyx +315 -0
  144. sknetwork/linkpred/postprocessing.py +98 -0
  145. sknetwork/linkpred/tests/__init__.py +1 -0
  146. sknetwork/linkpred/tests/test_API.py +49 -0
  147. sknetwork/linkpred/tests/test_postprocessing.py +21 -0
  148. sknetwork/path/__init__.py +4 -0
  149. sknetwork/path/metrics.py +148 -0
  150. sknetwork/path/search.py +65 -0
  151. sknetwork/path/shortest_path.py +186 -0
  152. sknetwork/path/tests/__init__.py +1 -0
  153. sknetwork/path/tests/test_metrics.py +29 -0
  154. sknetwork/path/tests/test_search.py +25 -0
  155. sknetwork/path/tests/test_shortest_path.py +45 -0
  156. sknetwork/ranking/__init__.py +9 -0
  157. sknetwork/ranking/base.py +56 -0
  158. sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
  159. sknetwork/ranking/betweenness.pyx +99 -0
  160. sknetwork/ranking/closeness.py +95 -0
  161. sknetwork/ranking/harmonic.py +82 -0
  162. sknetwork/ranking/hits.py +94 -0
  163. sknetwork/ranking/katz.py +81 -0
  164. sknetwork/ranking/pagerank.py +107 -0
  165. sknetwork/ranking/postprocess.py +25 -0
  166. sknetwork/ranking/tests/__init__.py +1 -0
  167. sknetwork/ranking/tests/test_API.py +34 -0
  168. sknetwork/ranking/tests/test_betweenness.py +38 -0
  169. sknetwork/ranking/tests/test_closeness.py +34 -0
  170. sknetwork/ranking/tests/test_hits.py +20 -0
  171. sknetwork/ranking/tests/test_pagerank.py +69 -0
  172. sknetwork/regression/__init__.py +4 -0
  173. sknetwork/regression/base.py +56 -0
  174. sknetwork/regression/diffusion.py +190 -0
  175. sknetwork/regression/tests/__init__.py +1 -0
  176. sknetwork/regression/tests/test_API.py +34 -0
  177. sknetwork/regression/tests/test_diffusion.py +48 -0
  178. sknetwork/sknetwork.py +3 -0
  179. sknetwork/topology/__init__.py +9 -0
  180. sknetwork/topology/dag.py +74 -0
  181. sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
  182. sknetwork/topology/dag_core.pyx +38 -0
  183. sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
  184. sknetwork/topology/kcliques.pyx +193 -0
  185. sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
  186. sknetwork/topology/kcore.pyx +120 -0
  187. sknetwork/topology/structure.py +234 -0
  188. sknetwork/topology/tests/__init__.py +1 -0
  189. sknetwork/topology/tests/test_cliques.py +28 -0
  190. sknetwork/topology/tests/test_cores.py +21 -0
  191. sknetwork/topology/tests/test_dag.py +26 -0
  192. sknetwork/topology/tests/test_structure.py +99 -0
  193. sknetwork/topology/tests/test_triangles.py +42 -0
  194. sknetwork/topology/tests/test_wl_coloring.py +49 -0
  195. sknetwork/topology/tests/test_wl_kernel.py +31 -0
  196. sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
  197. sknetwork/topology/triangles.pyx +166 -0
  198. sknetwork/topology/weisfeiler_lehman.py +163 -0
  199. sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
  200. sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
  201. sknetwork/utils/__init__.py +40 -0
  202. sknetwork/utils/base.py +35 -0
  203. sknetwork/utils/check.py +354 -0
  204. sknetwork/utils/co_neighbor.py +71 -0
  205. sknetwork/utils/format.py +219 -0
  206. sknetwork/utils/kmeans.py +89 -0
  207. sknetwork/utils/knn.py +166 -0
  208. sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
  209. sknetwork/utils/knn1d.pyx +80 -0
  210. sknetwork/utils/membership.py +82 -0
  211. sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
  212. sknetwork/utils/minheap.pxd +22 -0
  213. sknetwork/utils/minheap.pyx +111 -0
  214. sknetwork/utils/neighbors.py +115 -0
  215. sknetwork/utils/seeds.py +75 -0
  216. sknetwork/utils/simplex.py +140 -0
  217. sknetwork/utils/tests/__init__.py +1 -0
  218. sknetwork/utils/tests/test_base.py +28 -0
  219. sknetwork/utils/tests/test_bunch.py +16 -0
  220. sknetwork/utils/tests/test_check.py +190 -0
  221. sknetwork/utils/tests/test_co_neighbor.py +43 -0
  222. sknetwork/utils/tests/test_format.py +61 -0
  223. sknetwork/utils/tests/test_kmeans.py +21 -0
  224. sknetwork/utils/tests/test_knn.py +32 -0
  225. sknetwork/utils/tests/test_membership.py +24 -0
  226. sknetwork/utils/tests/test_neighbors.py +41 -0
  227. sknetwork/utils/tests/test_projection_simplex.py +33 -0
  228. sknetwork/utils/tests/test_seeds.py +67 -0
  229. sknetwork/utils/tests/test_verbose.py +15 -0
  230. sknetwork/utils/tests/test_ward.py +20 -0
  231. sknetwork/utils/timeout.py +38 -0
  232. sknetwork/utils/verbose.py +37 -0
  233. sknetwork/utils/ward.py +60 -0
  234. sknetwork/visualization/__init__.py +4 -0
  235. sknetwork/visualization/colors.py +34 -0
  236. sknetwork/visualization/dendrograms.py +229 -0
  237. sknetwork/visualization/graphs.py +819 -0
  238. sknetwork/visualization/tests/__init__.py +1 -0
  239. sknetwork/visualization/tests/test_dendrograms.py +53 -0
  240. sknetwork/visualization/tests/test_graphs.py +167 -0
@@ -0,0 +1,234 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on March 2019
5
+ @author: Thomas Bonald <bonald@enst.fr>
6
+ """
7
+ import numpy as np
8
+ from scipy import sparse
9
+ from sknetwork.hierarchy.paris import AggregateGraph
10
+
11
+ from sknetwork.utils.check import check_format, get_probs, check_square
12
+ from sknetwork.utils.check import check_min_size, check_min_nnz
13
+ from sknetwork.utils.format import directed2undirected
14
+
15
+
16
+ def _instantiate_vars(adjacency: sparse.csr_matrix, weights: str = 'uniform'):
17
+ """Initialize standard variables for metrics."""
18
+ weights_row = get_probs(weights, adjacency)
19
+ weights_col = get_probs(weights, adjacency.T)
20
+ sym_adjacency = directed2undirected(adjacency)
21
+ aggregate_graph = AggregateGraph(weights_row, weights_col, sym_adjacency.data.astype(float),
22
+ sym_adjacency.indices, sym_adjacency.indptr)
23
+ return aggregate_graph, weights_row, weights_col
24
+
25
+
26
+ def get_sampling_distributions(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'uniform'):
27
+ """Get sampling distributions over each internal node of the tree.
28
+ Parameters
29
+ ----------
30
+ adjacency :
31
+ Adjacency matrix of the graph.
32
+ dendrogram :
33
+ Dendrogram.
34
+ weights :
35
+ Weights of nodes.
36
+ ``'degree'`` or ``'uniform'`` (default).
37
+ Returns
38
+ -------
39
+ edge_sampling: np.ndarray
40
+ Edge sampling distribution.
41
+ node_sampling: np.ndarray
42
+ Node sampling distribution.
43
+ cluster_weights: np.ndarray
44
+ Cluster weights.
45
+ """
46
+ n = adjacency.shape[0]
47
+ aggregate_graph, weights_row, weights_col = _instantiate_vars(adjacency, weights)
48
+ cluster_weight = np.zeros(n-1)
49
+ edge_sampling = np.zeros(n-1)
50
+ node_sampling = np.zeros(n-1)
51
+
52
+ for t in range(n - 1):
53
+ i = int(dendrogram[t][0])
54
+ j = int(dendrogram[t][1])
55
+ if j in aggregate_graph.neighbors[i]:
56
+ edge_sampling[t] += 2 * aggregate_graph.neighbors[i][j]
57
+ node_sampling[t] += aggregate_graph.cluster_out_weights[i] * aggregate_graph.cluster_in_weights[j] + \
58
+ aggregate_graph.cluster_out_weights[j] * aggregate_graph.cluster_in_weights[i]
59
+ cluster_weight[t] = aggregate_graph.cluster_out_weights[i] + aggregate_graph.cluster_out_weights[j] + \
60
+ aggregate_graph.cluster_in_weights[i] + aggregate_graph.cluster_in_weights[j]
61
+ for node in {i, j}:
62
+ if node < n:
63
+ # self-loop
64
+ node_sampling[t] += aggregate_graph.cluster_out_weights[node] * aggregate_graph.cluster_in_weights[node]
65
+ if node in aggregate_graph.neighbors[node]:
66
+ edge_sampling[t] += aggregate_graph.neighbors[node][node]
67
+ aggregate_graph.merge(i, j)
68
+ return edge_sampling, node_sampling, cluster_weight / 2
69
+
70
+
71
+ def dasgupta_cost(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'uniform',
72
+ normalized: bool = False) -> float:
73
+ """Dasgupta's cost of a hierarchy.
74
+
75
+ Expected size (weights = ``'uniform'``) or expected volume (weights = ``'degree'``) of the cluster induced by
76
+ random edge sampling (closest ancestor of the two nodes in the hierarchy).
77
+
78
+ Parameters
79
+ ----------
80
+ adjacency :
81
+ Adjacency matrix of the graph.
82
+ dendrogram :
83
+ Dendrogram.
84
+ weights :
85
+ Weights of nodes.
86
+ ``'degree'`` or ``'uniform'`` (default).
87
+ normalized :
88
+ If ``True``, normalized cost (between 0 and 1).
89
+
90
+ Returns
91
+ -------
92
+ cost : float
93
+ Cost.
94
+
95
+ Example
96
+ -------
97
+ >>> from sknetwork.hierarchy import dasgupta_score, Paris
98
+ >>> from sknetwork.data import house
99
+ >>> paris = Paris()
100
+ >>> adjacency = house()
101
+ >>> dendrogram = paris.fit_transform(adjacency)
102
+ >>> cost = dasgupta_cost(adjacency, dendrogram)
103
+ >>> np.round(cost, 2)
104
+ 3.33
105
+
106
+ References
107
+ ----------
108
+ Dasgupta, S. (2016). A cost function for similarity-based hierarchical clustering.
109
+ Proceedings of ACM symposium on Theory of Computing.
110
+ """
111
+ adjacency = check_format(adjacency)
112
+ check_square(adjacency)
113
+
114
+ n = adjacency.shape[0]
115
+ check_min_size(n, 2)
116
+
117
+ edge_sampling, _, cluster_weight = get_sampling_distributions(adjacency, dendrogram, weights)
118
+ cost = edge_sampling.dot(cluster_weight)
119
+
120
+ if not normalized:
121
+ if weights == 'degree':
122
+ cost *= adjacency.data.sum()
123
+ else:
124
+ cost *= n
125
+
126
+ return cost
127
+
128
+
129
+ def dasgupta_score(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'uniform') -> float:
130
+ """Dasgupta's score of a hierarchy (quality metric, between 0 and 1).
131
+
132
+ Defined as 1 - normalized Dasgupta's cost.
133
+
134
+ Parameters
135
+ ----------
136
+ adjacency :
137
+ Adjacency matrix of the graph.
138
+ dendrogram :
139
+ Dendrogram.
140
+ weights :
141
+ Weights of nodes.
142
+ ``'degree'`` or ``'uniform'`` (default).
143
+
144
+ Returns
145
+ -------
146
+ score : float
147
+ Score.
148
+
149
+ Example
150
+ -------
151
+ >>> from sknetwork.hierarchy import dasgupta_score, Paris
152
+ >>> from sknetwork.data import house
153
+ >>> paris = Paris()
154
+ >>> adjacency = house()
155
+ >>> dendrogram = paris.fit_transform(adjacency)
156
+ >>> score = dasgupta_score(adjacency, dendrogram)
157
+ >>> np.round(score, 2)
158
+ 0.33
159
+
160
+ References
161
+ ----------
162
+ Dasgupta, S. (2016). A cost function for similarity-based hierarchical clustering.
163
+ Proceedings of ACM symposium on Theory of Computing.
164
+ """
165
+ return 1 - dasgupta_cost(adjacency, dendrogram, weights, normalized=True)
166
+
167
+
168
+ def tree_sampling_divergence(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'degree',
169
+ normalized: bool = True) -> float:
170
+ """Tree sampling divergence of a hierarchy (quality metric).
171
+
172
+ Parameters
173
+ ----------
174
+ adjacency :
175
+ Adjacency matrix of the graph.
176
+ dendrogram :
177
+ Dendrogram.
178
+ weights :
179
+ Weights of nodes.
180
+ ``'degree'`` (default) or ``'uniform'``.
181
+ normalized :
182
+ If ``True``, normalized score (between 0 and 1).
183
+
184
+ Returns
185
+ -------
186
+ score : float
187
+ Score.
188
+
189
+ Example
190
+ -------
191
+ >>> from sknetwork.hierarchy import tree_sampling_divergence, Paris
192
+ >>> from sknetwork.data import house
193
+ >>> paris = Paris()
194
+ >>> adjacency = house()
195
+ >>> dendrogram = paris.fit_transform(adjacency)
196
+ >>> score = tree_sampling_divergence(adjacency, dendrogram)
197
+ >>> np.round(score, 2)
198
+ 0.05
199
+
200
+ References
201
+ ----------
202
+ Charpentier, B. & Bonald, T. (2019).
203
+ `Tree Sampling Divergence: An Information-Theoretic Metric for
204
+ Hierarchical Graph Clustering.
205
+ <https://hal.telecom-paristech.fr/hal-02144394/document>`_
206
+ Proceedings of IJCAI.
207
+ """
208
+ adjacency = check_format(adjacency)
209
+ check_square(adjacency)
210
+ check_min_nnz(adjacency.nnz, 1)
211
+ adjacency = adjacency.astype(float)
212
+ n = adjacency.shape[0]
213
+ check_min_size(n, 2)
214
+
215
+ adjacency.data /= adjacency.data.sum()
216
+ edge_sampling, node_sampling, _ = get_sampling_distributions(adjacency, dendrogram, weights)
217
+
218
+ index = np.where(edge_sampling)[0]
219
+ score = edge_sampling[index].dot(np.log(edge_sampling[index] / node_sampling[index]))
220
+ if normalized:
221
+ weights_row = get_probs(weights, adjacency)
222
+ weights_col = get_probs(weights, adjacency.T)
223
+ inv_out_weights = sparse.diags(weights_row, shape=(n, n), format='csr')
224
+ inv_out_weights.data = 1 / inv_out_weights.data
225
+ inv_in_weights = sparse.diags(weights_col, shape=(n, n), format='csr')
226
+ inv_in_weights.data = 1 / inv_in_weights.data
227
+ sampling_ratio = inv_out_weights.dot(adjacency.dot(inv_in_weights))
228
+ inv_out_weights.data = np.ones(len(inv_out_weights.data))
229
+ inv_in_weights.data = np.ones(len(inv_in_weights.data))
230
+ edge_sampling = inv_out_weights.dot(adjacency.dot(inv_in_weights))
231
+ mutual_information = edge_sampling.data.dot(np.log(sampling_ratio.data))
232
+ if mutual_information > 0:
233
+ score /= mutual_information
234
+ return score
@@ -0,0 +1,317 @@
1
+ # distutils: language = c++
2
+ # cython: language_level=3
3
+ # cython: linetrace=True
4
+ # distutils: define_macros=CYTHON_TRACE_NOGIL=1
5
+ """
6
+ Created on March 2019
7
+ @author: Thomas Bonald <bonald@enst.fr>
8
+ @author: Bertrand Charpentier <bertrand.charpentier@live.fr>
9
+ @author: Quentin Lutz <qlutz@enst.fr>
10
+ """
11
+ import numpy as np
12
+ cimport numpy as np
13
+
14
+ cimport cython
15
+
16
+ from libcpp.vector cimport vector
17
+
18
+ from typing import Union
19
+
20
+ from scipy import sparse
21
+
22
+ from sknetwork.hierarchy.base import BaseHierarchy
23
+ from sknetwork.hierarchy.postprocess import reorder_dendrogram
24
+ from sknetwork.utils.format import check_format, get_adjacency, directed2undirected
25
+ from sknetwork.utils.check import get_probs, is_symmetric
26
+
27
+
28
+ cdef class AggregateGraph:
29
+ """A class of graphs suitable for aggregation. Each node represents a cluster.
30
+
31
+ Parameters
32
+ ----------
33
+ out_weights :
34
+ Out-weights (sums to 1).
35
+ in_weights :
36
+ In-weights (sums to 1).
37
+ data :
38
+ CSR format data array of the normalized adjacency matrix.
39
+ indices :
40
+ CSR format index array of the normalized adjacency matrix.
41
+ indptr :
42
+ CSR format index pointer array of the normalized adjacency matrix.
43
+
44
+ Attributes
45
+ ----------
46
+ neighbors : dict[dict]
47
+ Dictionary of dictionary of edge weights.
48
+ next_cluster : int
49
+ Index of the next cluster (resulting from aggregation).
50
+ cluster_sizes : dict
51
+ Dictionary of cluster sizes.
52
+ cluster_out_weights : dict
53
+ Dictionary of cluster out-weights (sums to 1).
54
+ cluster_in_weights : dict
55
+ Dictionary of cluster in-weights (sums to 1).
56
+ """
57
+ cdef public int next_cluster
58
+ cdef public dict neighbors
59
+ cdef public dict tmp
60
+ cdef dict cluster_sizes
61
+ cdef public dict cluster_out_weights
62
+ cdef public dict cluster_in_weights
63
+
64
+ def __init__(self, double[:] out_weights, double[:] in_weights, double[:] data, int[:] indices,
65
+ int[:] indptr):
66
+ cdef int n = indptr.shape[0] - 1
67
+ cdef float total_weight = np.sum(data)
68
+ cdef int i
69
+ cdef int j
70
+
71
+ self.next_cluster = n
72
+ self.neighbors = {}
73
+ for i in range(n):
74
+ # normalize so that the sum of edge weights is equal to 1
75
+ self.neighbors[i] = {}
76
+ for j in range(indptr[i], indptr[i + 1]):
77
+ self.neighbors[i][indices[j]] = data[j] / total_weight
78
+
79
+ cluster_sizes = {}
80
+ cluster_out_weights = {}
81
+ cluster_in_weights = {}
82
+ for i in range(n):
83
+ cluster_sizes[i] = 1
84
+ cluster_out_weights[i] = out_weights[i]
85
+ cluster_in_weights[i] = in_weights[i]
86
+ self.cluster_sizes = cluster_sizes
87
+ self.cluster_out_weights = cluster_out_weights
88
+ self.cluster_in_weights = cluster_in_weights
89
+
90
+ cdef float similarity(self, int node1, int node2):
91
+ """Similarity of two nodes.
92
+
93
+ Parameters
94
+ ----------
95
+ node1, node2 :
96
+ Nodes.
97
+
98
+ Returns
99
+ -------
100
+ sim: float
101
+ Similarity.
102
+ """
103
+ cdef float sim = -float("inf")
104
+ cdef float a = self.cluster_out_weights[node1] * self.cluster_in_weights[node2]
105
+ cdef float b = self.cluster_out_weights[node2] * self.cluster_in_weights[node1]
106
+ cdef float den = a + b
107
+
108
+ if den > 0:
109
+ sim = 2 * self.neighbors[node1][node2] / den
110
+ return sim
111
+
112
+ @cython.boundscheck(False)
113
+ @cython.wraparound(False)
114
+ cpdef AggregateGraph merge(self, int node1, int node2):
115
+ """Merges two nodes.
116
+
117
+ Parameters
118
+ ----------
119
+ node1, node2 :
120
+ The two nodes to merge.
121
+
122
+ Returns
123
+ -------
124
+ self: :class:`AggregateGraph`
125
+ The aggregate grate (without self-loop).
126
+ """
127
+ cdef int new_node = self.next_cluster
128
+ self.neighbors[new_node] = {}
129
+ self.neighbors[new_node][new_node] = 0
130
+ cdef set common_neighbors = set(self.neighbors[node1].keys()) & set(self.neighbors[node2].keys()) - {node1, node2}
131
+ for node in common_neighbors:
132
+ self.neighbors[new_node][node] = self.neighbors[node1].pop(node) + self.neighbors[node2].pop(node)
133
+ self.neighbors[node][new_node] = self.neighbors[node].pop(node1) + self.neighbors[node].pop(node2)
134
+ for node in {node1, node2}:
135
+ for neighbor in set(self.neighbors[node].keys()) - {node1, node2}:
136
+ self.neighbors[new_node][neighbor] = self.neighbors[node].pop(neighbor)
137
+ self.neighbors[neighbor][new_node] = self.neighbors[neighbor].pop(node)
138
+ for other_node in {node1, node2}:
139
+ if other_node in self.neighbors[node]:
140
+ self.neighbors[new_node][new_node] += self.neighbors[node][other_node]
141
+ del self.neighbors[node]
142
+ self.cluster_sizes[new_node] = self.cluster_sizes.pop(node1) + self.cluster_sizes.pop(node2)
143
+ self.cluster_out_weights[new_node] = self.cluster_out_weights.pop(node1) + self.cluster_out_weights.pop(node2)
144
+ self.cluster_in_weights[new_node] = self.cluster_in_weights.pop(node1) + self.cluster_in_weights.pop(node2)
145
+ self.next_cluster += 1
146
+ return self
147
+
148
+
149
+ class Paris(BaseHierarchy):
150
+ """Agglomerative clustering algorithm that performs greedy merge of nodes based on their similarity.
151
+
152
+ The similarity between nodes :math:`i,j` is :math:`\\dfrac{A_{ij}}{w_i w_j}` where
153
+
154
+ * :math:`A_{ij}` is the weight of edge :math:`i,j`,
155
+ * :math:`w_i, w_j` are the weights of nodes :math:`i,j`
156
+
157
+ If the input matrix :math:`B` is a biadjacency matrix (i.e., rectangular), the algorithm is applied
158
+ to the corresponding adjacency matrix :math:`A = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`
159
+
160
+ Parameters
161
+ ----------
162
+ weights :
163
+ Weights of nodes.
164
+ ``'degree'`` (default) or ``'uniform'``.
165
+ reorder :
166
+ If ``True`` (default), reorder the dendrogram in non-decreasing order of height.
167
+
168
+ Attributes
169
+ ----------
170
+ dendrogram_ :
171
+ Dendrogram of the graph.
172
+ dendrogram_row_ :
173
+ Dendrogram for the rows, for bipartite graphs.
174
+ dendrogram_col_ :
175
+ Dendrogram for the columns, for bipartite graphs.
176
+ dendrogram_full_ :
177
+ Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
178
+
179
+ Examples
180
+ --------
181
+ >>> from sknetwork.hierarchy import Paris
182
+ >>> from sknetwork.data import house
183
+ >>> paris = Paris()
184
+ >>> adjacency = house()
185
+ >>> dendrogram = paris.fit_predict(adjacency)
186
+ >>> np.round(dendrogram, 2)
187
+ array([[3. , 2. , 0.17 , 2. ],
188
+ [1. , 0. , 0.25 , 2. ],
189
+ [6. , 4. , 0.31 , 3. ],
190
+ [7. , 5. , 0.67 , 5. ]])
191
+
192
+ Notes
193
+ -----
194
+ Each row of the dendrogram = :math:`i, j`, distance, size of cluster :math:`i + j`.
195
+
196
+
197
+ See Also
198
+ --------
199
+ scipy.cluster.hierarchy.linkage
200
+
201
+ References
202
+ ----------
203
+ T. Bonald, B. Charpentier, A. Galland, A. Hollocou (2018).
204
+ `Hierarchical Graph Clustering using Node Pair Sampling.
205
+ <https://arxiv.org/abs/1806.01664>`_
206
+ Workshop on Mining and Learning with Graphs.
207
+ """
208
+ def __init__(self, weights: str = 'degree', reorder: bool = True):
209
+ super(Paris, self).__init__()
210
+ self.dendrogram_ = None
211
+ self.weights = weights
212
+ self.reorder = reorder
213
+ self.bipartite = None
214
+
215
+ @cython.boundscheck(False)
216
+ @cython.wraparound(False)
217
+ def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'Paris':
218
+ """Agglomerative clustering using the nearest neighbor chain.
219
+
220
+ Parameters
221
+ ----------
222
+ input_matrix :
223
+ Adjacency matrix or biadjacency matrix of the graph.
224
+
225
+ Returns
226
+ -------
227
+ self: :class:`Paris`
228
+ """
229
+ self._init_vars()
230
+
231
+ # input
232
+ input_matrix = check_format(input_matrix)
233
+ adjacency, self.bipartite = get_adjacency(input_matrix)
234
+
235
+ weights = self.weights
236
+ out_weights = get_probs(weights, adjacency)
237
+ in_weights = get_probs(weights, adjacency.T)
238
+
239
+ if not is_symmetric(adjacency):
240
+ adjacency = directed2undirected(adjacency)
241
+
242
+ null_weights = (out_weights + in_weights) == 0
243
+ if any(null_weights):
244
+ adjacency += sparse.diags(null_weights.astype(int))
245
+
246
+ if adjacency.shape[0] <= 1:
247
+ raise ValueError('The graph must contain at least two nodes.')
248
+
249
+ # agglomerative clustering
250
+ aggregate_graph = AggregateGraph(out_weights, in_weights, adjacency.data.astype(float),
251
+ adjacency.indices, adjacency.indptr)
252
+
253
+ cdef vector[(int, int)] connected_components
254
+ dendrogram = []
255
+ cdef int node
256
+ cdef int next_node
257
+ cdef int cluster_size
258
+ cdef int next_cluster_size
259
+ cdef int neighbor
260
+ cdef int nearest_neighbor
261
+ cdef int nearest_neighbor_last
262
+ cdef vector[int] chain
263
+ cdef float sim
264
+ cdef float max_sim
265
+
266
+ while len(aggregate_graph.cluster_sizes):
267
+ for node in aggregate_graph.cluster_sizes:
268
+ break
269
+ chain.clear()
270
+ chain.push_back(node)
271
+ while chain.size():
272
+ node = chain[chain.size() - 1]
273
+ chain.pop_back()
274
+ if set(aggregate_graph.neighbors[node].keys()) - {node}:
275
+ max_sim = -float("inf")
276
+ for neighbor in set(aggregate_graph.neighbors[node].keys()) - {node}:
277
+ sim = aggregate_graph.similarity(node, neighbor)
278
+ if sim > max_sim:
279
+ nearest_neighbor = neighbor
280
+ max_sim = sim
281
+ elif sim == max_sim:
282
+ nearest_neighbor = min(neighbor, nearest_neighbor)
283
+ if chain.size():
284
+ nearest_neighbor_last = chain[chain.size() - 1]
285
+ chain.pop_back()
286
+ if nearest_neighbor_last == nearest_neighbor:
287
+ size = aggregate_graph.cluster_sizes[node] + aggregate_graph.cluster_sizes[nearest_neighbor]
288
+ dendrogram.append([node, nearest_neighbor, 1. / max_sim, size])
289
+ aggregate_graph.merge(node, nearest_neighbor)
290
+ else:
291
+ chain.push_back(nearest_neighbor_last)
292
+ chain.push_back(node)
293
+ chain.push_back(nearest_neighbor)
294
+ else:
295
+ chain.push_back(node)
296
+ chain.push_back(nearest_neighbor)
297
+ else:
298
+ connected_components.push_back((node, aggregate_graph.cluster_sizes[node]))
299
+ del aggregate_graph.cluster_sizes[node]
300
+
301
+ node, cluster_size = connected_components[connected_components.size() - 1]
302
+ connected_components.pop_back()
303
+ for next_node, next_cluster_size in connected_components:
304
+ cluster_size += next_cluster_size
305
+ dendrogram.append([node, next_node, float("inf"), cluster_size])
306
+ node = aggregate_graph.next_cluster
307
+ aggregate_graph.next_cluster += 1
308
+
309
+ dendrogram = np.array(dendrogram)
310
+ if self.reorder:
311
+ dendrogram = reorder_dendrogram(dendrogram)
312
+
313
+ self.dendrogram_ = dendrogram
314
+ if self.bipartite:
315
+ self._split_vars(input_matrix.shape)
316
+
317
+ return self