scikit-network 0.28.3__cp39-cp39-macosx_12_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-network might be problematic. Click here for more details.
- scikit_network-0.28.3.dist-info/AUTHORS.rst +41 -0
- scikit_network-0.28.3.dist-info/LICENSE +34 -0
- scikit_network-0.28.3.dist-info/METADATA +457 -0
- scikit_network-0.28.3.dist-info/RECORD +240 -0
- scikit_network-0.28.3.dist-info/WHEEL +5 -0
- scikit_network-0.28.3.dist-info/top_level.txt +1 -0
- sknetwork/__init__.py +21 -0
- sknetwork/classification/__init__.py +8 -0
- sknetwork/classification/base.py +84 -0
- sknetwork/classification/base_rank.py +143 -0
- sknetwork/classification/diffusion.py +134 -0
- sknetwork/classification/knn.py +162 -0
- sknetwork/classification/metrics.py +205 -0
- sknetwork/classification/pagerank.py +66 -0
- sknetwork/classification/propagation.py +152 -0
- sknetwork/classification/tests/__init__.py +1 -0
- sknetwork/classification/tests/test_API.py +35 -0
- sknetwork/classification/tests/test_diffusion.py +37 -0
- sknetwork/classification/tests/test_knn.py +24 -0
- sknetwork/classification/tests/test_metrics.py +53 -0
- sknetwork/classification/tests/test_pagerank.py +20 -0
- sknetwork/classification/tests/test_propagation.py +24 -0
- sknetwork/classification/vote.cpython-39-darwin.so +0 -0
- sknetwork/classification/vote.pyx +58 -0
- sknetwork/clustering/__init__.py +7 -0
- sknetwork/clustering/base.py +102 -0
- sknetwork/clustering/kmeans.py +142 -0
- sknetwork/clustering/louvain.py +255 -0
- sknetwork/clustering/louvain_core.cpython-39-darwin.so +0 -0
- sknetwork/clustering/louvain_core.pyx +134 -0
- sknetwork/clustering/metrics.py +91 -0
- sknetwork/clustering/postprocess.py +66 -0
- sknetwork/clustering/propagation_clustering.py +108 -0
- sknetwork/clustering/tests/__init__.py +1 -0
- sknetwork/clustering/tests/test_API.py +37 -0
- sknetwork/clustering/tests/test_kmeans.py +47 -0
- sknetwork/clustering/tests/test_louvain.py +104 -0
- sknetwork/clustering/tests/test_metrics.py +50 -0
- sknetwork/clustering/tests/test_post_processing.py +23 -0
- sknetwork/clustering/tests/test_postprocess.py +39 -0
- sknetwork/data/__init__.py +5 -0
- sknetwork/data/load.py +408 -0
- sknetwork/data/models.py +459 -0
- sknetwork/data/parse.py +621 -0
- sknetwork/data/test_graphs.py +84 -0
- sknetwork/data/tests/__init__.py +1 -0
- sknetwork/data/tests/test_API.py +30 -0
- sknetwork/data/tests/test_load.py +95 -0
- sknetwork/data/tests/test_models.py +52 -0
- sknetwork/data/tests/test_parse.py +253 -0
- sknetwork/data/tests/test_test_graphs.py +30 -0
- sknetwork/data/tests/test_toy_graphs.py +68 -0
- sknetwork/data/toy_graphs.py +619 -0
- sknetwork/embedding/__init__.py +10 -0
- sknetwork/embedding/base.py +90 -0
- sknetwork/embedding/force_atlas.py +197 -0
- sknetwork/embedding/louvain_embedding.py +174 -0
- sknetwork/embedding/louvain_hierarchy.py +142 -0
- sknetwork/embedding/metrics.py +66 -0
- sknetwork/embedding/random_projection.py +133 -0
- sknetwork/embedding/spectral.py +214 -0
- sknetwork/embedding/spring.py +198 -0
- sknetwork/embedding/svd.py +363 -0
- sknetwork/embedding/tests/__init__.py +1 -0
- sknetwork/embedding/tests/test_API.py +73 -0
- sknetwork/embedding/tests/test_force_atlas.py +35 -0
- sknetwork/embedding/tests/test_louvain_embedding.py +33 -0
- sknetwork/embedding/tests/test_louvain_hierarchy.py +19 -0
- sknetwork/embedding/tests/test_metrics.py +29 -0
- sknetwork/embedding/tests/test_random_projection.py +28 -0
- sknetwork/embedding/tests/test_spectral.py +84 -0
- sknetwork/embedding/tests/test_spring.py +50 -0
- sknetwork/embedding/tests/test_svd.py +37 -0
- sknetwork/flow/__init__.py +3 -0
- sknetwork/flow/flow.py +73 -0
- sknetwork/flow/tests/__init__.py +1 -0
- sknetwork/flow/tests/test_flow.py +17 -0
- sknetwork/flow/tests/test_utils.py +69 -0
- sknetwork/flow/utils.py +91 -0
- sknetwork/gnn/__init__.py +10 -0
- sknetwork/gnn/activation.py +117 -0
- sknetwork/gnn/base.py +155 -0
- sknetwork/gnn/base_activation.py +89 -0
- sknetwork/gnn/base_layer.py +109 -0
- sknetwork/gnn/gnn_classifier.py +381 -0
- sknetwork/gnn/layer.py +153 -0
- sknetwork/gnn/layers.py +127 -0
- sknetwork/gnn/loss.py +180 -0
- sknetwork/gnn/neighbor_sampler.py +65 -0
- sknetwork/gnn/optimizer.py +163 -0
- sknetwork/gnn/tests/__init__.py +1 -0
- sknetwork/gnn/tests/test_activation.py +56 -0
- sknetwork/gnn/tests/test_base.py +79 -0
- sknetwork/gnn/tests/test_base_layer.py +37 -0
- sknetwork/gnn/tests/test_gnn_classifier.py +192 -0
- sknetwork/gnn/tests/test_layers.py +80 -0
- sknetwork/gnn/tests/test_loss.py +33 -0
- sknetwork/gnn/tests/test_neigh_sampler.py +23 -0
- sknetwork/gnn/tests/test_optimizer.py +43 -0
- sknetwork/gnn/tests/test_utils.py +93 -0
- sknetwork/gnn/utils.py +219 -0
- sknetwork/hierarchy/__init__.py +7 -0
- sknetwork/hierarchy/base.py +69 -0
- sknetwork/hierarchy/louvain_hierarchy.py +264 -0
- sknetwork/hierarchy/metrics.py +234 -0
- sknetwork/hierarchy/paris.cpython-39-darwin.so +0 -0
- sknetwork/hierarchy/paris.pyx +317 -0
- sknetwork/hierarchy/postprocess.py +350 -0
- sknetwork/hierarchy/tests/__init__.py +1 -0
- sknetwork/hierarchy/tests/test_API.py +25 -0
- sknetwork/hierarchy/tests/test_algos.py +29 -0
- sknetwork/hierarchy/tests/test_metrics.py +62 -0
- sknetwork/hierarchy/tests/test_postprocess.py +57 -0
- sknetwork/hierarchy/tests/test_ward.py +25 -0
- sknetwork/hierarchy/ward.py +94 -0
- sknetwork/linalg/__init__.py +9 -0
- sknetwork/linalg/basics.py +37 -0
- sknetwork/linalg/diteration.cpython-39-darwin.so +0 -0
- sknetwork/linalg/diteration.pyx +49 -0
- sknetwork/linalg/eig_solver.py +93 -0
- sknetwork/linalg/laplacian.py +15 -0
- sknetwork/linalg/normalization.py +66 -0
- sknetwork/linalg/operators.py +225 -0
- sknetwork/linalg/polynome.py +76 -0
- sknetwork/linalg/ppr_solver.py +170 -0
- sknetwork/linalg/push.cpython-39-darwin.so +0 -0
- sknetwork/linalg/push.pyx +73 -0
- sknetwork/linalg/sparse_lowrank.py +142 -0
- sknetwork/linalg/svd_solver.py +91 -0
- sknetwork/linalg/tests/__init__.py +1 -0
- sknetwork/linalg/tests/test_eig.py +44 -0
- sknetwork/linalg/tests/test_laplacian.py +18 -0
- sknetwork/linalg/tests/test_normalization.py +38 -0
- sknetwork/linalg/tests/test_operators.py +70 -0
- sknetwork/linalg/tests/test_polynome.py +38 -0
- sknetwork/linalg/tests/test_ppr.py +50 -0
- sknetwork/linalg/tests/test_sparse_lowrank.py +61 -0
- sknetwork/linalg/tests/test_svd.py +38 -0
- sknetwork/linkpred/__init__.py +4 -0
- sknetwork/linkpred/base.py +80 -0
- sknetwork/linkpred/first_order.py +508 -0
- sknetwork/linkpred/first_order_core.cpython-39-darwin.so +0 -0
- sknetwork/linkpred/first_order_core.pyx +315 -0
- sknetwork/linkpred/postprocessing.py +98 -0
- sknetwork/linkpred/tests/__init__.py +1 -0
- sknetwork/linkpred/tests/test_API.py +49 -0
- sknetwork/linkpred/tests/test_postprocessing.py +21 -0
- sknetwork/path/__init__.py +4 -0
- sknetwork/path/metrics.py +148 -0
- sknetwork/path/search.py +65 -0
- sknetwork/path/shortest_path.py +186 -0
- sknetwork/path/tests/__init__.py +1 -0
- sknetwork/path/tests/test_metrics.py +29 -0
- sknetwork/path/tests/test_search.py +25 -0
- sknetwork/path/tests/test_shortest_path.py +45 -0
- sknetwork/ranking/__init__.py +9 -0
- sknetwork/ranking/base.py +56 -0
- sknetwork/ranking/betweenness.cpython-39-darwin.so +0 -0
- sknetwork/ranking/betweenness.pyx +99 -0
- sknetwork/ranking/closeness.py +95 -0
- sknetwork/ranking/harmonic.py +82 -0
- sknetwork/ranking/hits.py +94 -0
- sknetwork/ranking/katz.py +81 -0
- sknetwork/ranking/pagerank.py +107 -0
- sknetwork/ranking/postprocess.py +25 -0
- sknetwork/ranking/tests/__init__.py +1 -0
- sknetwork/ranking/tests/test_API.py +34 -0
- sknetwork/ranking/tests/test_betweenness.py +38 -0
- sknetwork/ranking/tests/test_closeness.py +34 -0
- sknetwork/ranking/tests/test_hits.py +20 -0
- sknetwork/ranking/tests/test_pagerank.py +69 -0
- sknetwork/regression/__init__.py +4 -0
- sknetwork/regression/base.py +56 -0
- sknetwork/regression/diffusion.py +190 -0
- sknetwork/regression/tests/__init__.py +1 -0
- sknetwork/regression/tests/test_API.py +34 -0
- sknetwork/regression/tests/test_diffusion.py +48 -0
- sknetwork/sknetwork.py +3 -0
- sknetwork/topology/__init__.py +9 -0
- sknetwork/topology/dag.py +74 -0
- sknetwork/topology/dag_core.cpython-39-darwin.so +0 -0
- sknetwork/topology/dag_core.pyx +38 -0
- sknetwork/topology/kcliques.cpython-39-darwin.so +0 -0
- sknetwork/topology/kcliques.pyx +193 -0
- sknetwork/topology/kcore.cpython-39-darwin.so +0 -0
- sknetwork/topology/kcore.pyx +120 -0
- sknetwork/topology/structure.py +234 -0
- sknetwork/topology/tests/__init__.py +1 -0
- sknetwork/topology/tests/test_cliques.py +28 -0
- sknetwork/topology/tests/test_cores.py +21 -0
- sknetwork/topology/tests/test_dag.py +26 -0
- sknetwork/topology/tests/test_structure.py +99 -0
- sknetwork/topology/tests/test_triangles.py +42 -0
- sknetwork/topology/tests/test_wl_coloring.py +49 -0
- sknetwork/topology/tests/test_wl_kernel.py +31 -0
- sknetwork/topology/triangles.cpython-39-darwin.so +0 -0
- sknetwork/topology/triangles.pyx +166 -0
- sknetwork/topology/weisfeiler_lehman.py +163 -0
- sknetwork/topology/weisfeiler_lehman_core.cpython-39-darwin.so +0 -0
- sknetwork/topology/weisfeiler_lehman_core.pyx +116 -0
- sknetwork/utils/__init__.py +40 -0
- sknetwork/utils/base.py +35 -0
- sknetwork/utils/check.py +354 -0
- sknetwork/utils/co_neighbor.py +71 -0
- sknetwork/utils/format.py +219 -0
- sknetwork/utils/kmeans.py +89 -0
- sknetwork/utils/knn.py +166 -0
- sknetwork/utils/knn1d.cpython-39-darwin.so +0 -0
- sknetwork/utils/knn1d.pyx +80 -0
- sknetwork/utils/membership.py +82 -0
- sknetwork/utils/minheap.cpython-39-darwin.so +0 -0
- sknetwork/utils/minheap.pxd +22 -0
- sknetwork/utils/minheap.pyx +111 -0
- sknetwork/utils/neighbors.py +115 -0
- sknetwork/utils/seeds.py +75 -0
- sknetwork/utils/simplex.py +140 -0
- sknetwork/utils/tests/__init__.py +1 -0
- sknetwork/utils/tests/test_base.py +28 -0
- sknetwork/utils/tests/test_bunch.py +16 -0
- sknetwork/utils/tests/test_check.py +190 -0
- sknetwork/utils/tests/test_co_neighbor.py +43 -0
- sknetwork/utils/tests/test_format.py +61 -0
- sknetwork/utils/tests/test_kmeans.py +21 -0
- sknetwork/utils/tests/test_knn.py +32 -0
- sknetwork/utils/tests/test_membership.py +24 -0
- sknetwork/utils/tests/test_neighbors.py +41 -0
- sknetwork/utils/tests/test_projection_simplex.py +33 -0
- sknetwork/utils/tests/test_seeds.py +67 -0
- sknetwork/utils/tests/test_verbose.py +15 -0
- sknetwork/utils/tests/test_ward.py +20 -0
- sknetwork/utils/timeout.py +38 -0
- sknetwork/utils/verbose.py +37 -0
- sknetwork/utils/ward.py +60 -0
- sknetwork/visualization/__init__.py +4 -0
- sknetwork/visualization/colors.py +34 -0
- sknetwork/visualization/dendrograms.py +229 -0
- sknetwork/visualization/graphs.py +819 -0
- sknetwork/visualization/tests/__init__.py +1 -0
- sknetwork/visualization/tests/test_dendrograms.py +53 -0
- sknetwork/visualization/tests/test_graphs.py +167 -0
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on June 2019
|
|
5
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
6
|
+
@author: Bertrand Charpentier <bertrand.charpentier@live.fr>
|
|
7
|
+
@author: Quentin Lutz <qlutz@enst.fr>
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import copy
|
|
11
|
+
from collections import defaultdict
|
|
12
|
+
from typing import Optional, Union, Tuple
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
|
|
16
|
+
from sknetwork.utils.check import check_n_clusters, check_dendrogram
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def reorder_dendrogram(dendrogram: np.ndarray) -> np.ndarray:
|
|
20
|
+
"""Reorder the dendrogram in non-decreasing order of height."""
|
|
21
|
+
n = dendrogram.shape[0] + 1
|
|
22
|
+
order = np.zeros((2, n - 1), float)
|
|
23
|
+
order[0] = np.max(dendrogram[:, :2], axis=1)
|
|
24
|
+
order[1] = dendrogram[:, 2]
|
|
25
|
+
index = np.lexsort(order)
|
|
26
|
+
dendrogram_new = dendrogram[index]
|
|
27
|
+
index_new = np.arange(2 * n - 1)
|
|
28
|
+
index_new[n + index] = np.arange(n, 2 * n - 1)
|
|
29
|
+
dendrogram_new[:, 0] = index_new[dendrogram_new[:, 0].astype(int)]
|
|
30
|
+
dendrogram_new[:, 1] = index_new[dendrogram_new[:, 1].astype(int)]
|
|
31
|
+
return dendrogram_new
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_labels(dendrogram: np.ndarray, cluster: dict, sort_clusters: bool, return_dendrogram: bool):
|
|
35
|
+
"""Returns the labels from clusters."""
|
|
36
|
+
n = len(dendrogram) + 1
|
|
37
|
+
clusters = list(cluster.values())
|
|
38
|
+
if sort_clusters:
|
|
39
|
+
sizes = np.array([len(nodes) for nodes in clusters])
|
|
40
|
+
index = np.argsort(-sizes)
|
|
41
|
+
clusters = [clusters[i] for i in index]
|
|
42
|
+
|
|
43
|
+
labels = np.zeros(n, dtype=int)
|
|
44
|
+
for label, nodes in enumerate(clusters):
|
|
45
|
+
labels[nodes] = label
|
|
46
|
+
|
|
47
|
+
if return_dendrogram:
|
|
48
|
+
cluster_index = {i: label for i, label in enumerate(labels)}
|
|
49
|
+
cluster_size = {i: len(cluster) for i, cluster in enumerate(clusters)}
|
|
50
|
+
dendrogram_new = []
|
|
51
|
+
current_cluster = len(labels)
|
|
52
|
+
current_cluster_new = len(clusters)
|
|
53
|
+
for i, j, height, _ in dendrogram:
|
|
54
|
+
i_new = cluster_index.pop(int(i))
|
|
55
|
+
j_new = cluster_index.pop(int(j))
|
|
56
|
+
if i_new != j_new:
|
|
57
|
+
size = cluster_size.pop(i_new) + cluster_size.pop(j_new)
|
|
58
|
+
cluster_size[current_cluster_new] = size
|
|
59
|
+
cluster_index[current_cluster] = current_cluster_new
|
|
60
|
+
dendrogram_new.append([i_new, j_new, height, size])
|
|
61
|
+
current_cluster_new += 1
|
|
62
|
+
else:
|
|
63
|
+
cluster_index[current_cluster] = i_new
|
|
64
|
+
current_cluster += 1
|
|
65
|
+
dendrogram_new = np.array(dendrogram_new)
|
|
66
|
+
return labels, dendrogram_new
|
|
67
|
+
else:
|
|
68
|
+
return labels
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def cut_straight(dendrogram: np.ndarray, n_clusters: Optional[int] = None, threshold: Optional[float] = None,
|
|
72
|
+
sort_clusters: bool = True, return_dendrogram: bool = False) \
|
|
73
|
+
-> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
|
|
74
|
+
"""Cut a dendrogram and return the corresponding clustering.
|
|
75
|
+
|
|
76
|
+
Parameters
|
|
77
|
+
----------
|
|
78
|
+
dendrogram:
|
|
79
|
+
Dendrogram.
|
|
80
|
+
n_clusters :
|
|
81
|
+
Number of clusters (optional).
|
|
82
|
+
The number of clusters can be larger than n_clusters in case of equal heights in the dendrogram.
|
|
83
|
+
threshold :
|
|
84
|
+
Threshold on height (optional).
|
|
85
|
+
If both n_clusters and threshold are ``None``, n_clusters is set to 2.
|
|
86
|
+
sort_clusters :
|
|
87
|
+
If ``True``, sorts clusters in decreasing order of size.
|
|
88
|
+
return_dendrogram :
|
|
89
|
+
If ``True``, returns the dendrogram formed by the clusters up to the root.
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
labels : np.ndarray
|
|
93
|
+
Cluster of each node.
|
|
94
|
+
dendrogram_aggregate : np.ndarray
|
|
95
|
+
Dendrogram starting from clusters (leaves = clusters).
|
|
96
|
+
|
|
97
|
+
Example
|
|
98
|
+
-------
|
|
99
|
+
>>> from sknetwork.hierarchy import cut_straight
|
|
100
|
+
>>> dendrogram = np.array([[0, 1, 0, 2], [2, 3, 1, 3]])
|
|
101
|
+
>>> cut_straight(dendrogram)
|
|
102
|
+
array([0, 0, 1])
|
|
103
|
+
"""
|
|
104
|
+
check_dendrogram(dendrogram)
|
|
105
|
+
n = dendrogram.shape[0] + 1
|
|
106
|
+
|
|
107
|
+
if return_dendrogram:
|
|
108
|
+
height = dendrogram[:, 2]
|
|
109
|
+
if not np.any(height[1:] < height[:-1]):
|
|
110
|
+
dendrogram = reorder_dendrogram(dendrogram)
|
|
111
|
+
|
|
112
|
+
cluster = {i: [i] for i in range(n)}
|
|
113
|
+
if n_clusters is None:
|
|
114
|
+
if threshold is None:
|
|
115
|
+
n_clusters = 2
|
|
116
|
+
else:
|
|
117
|
+
n_clusters = n
|
|
118
|
+
else:
|
|
119
|
+
check_n_clusters(n_clusters, n, n_min=1)
|
|
120
|
+
cut = np.sort(dendrogram[:, 2])[n - n_clusters]
|
|
121
|
+
if threshold is not None:
|
|
122
|
+
cut = max(cut, threshold)
|
|
123
|
+
for t in range(n - 1):
|
|
124
|
+
i = int(dendrogram[t][0])
|
|
125
|
+
j = int(dendrogram[t][1])
|
|
126
|
+
if dendrogram[t][2] < cut and i in cluster and j in cluster:
|
|
127
|
+
cluster[n + t] = cluster.pop(i) + cluster.pop(j)
|
|
128
|
+
|
|
129
|
+
return get_labels(dendrogram, cluster, sort_clusters, return_dendrogram)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def cut_balanced(dendrogram: np.ndarray, max_cluster_size: int = 20, sort_clusters: bool = True,
|
|
133
|
+
return_dendrogram: bool = False) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
|
|
134
|
+
"""Cuts a dendrogram with a constraint on the cluster size and returns the corresponding clustering.
|
|
135
|
+
|
|
136
|
+
Parameters
|
|
137
|
+
----------
|
|
138
|
+
dendrogram:
|
|
139
|
+
Dendrogram
|
|
140
|
+
max_cluster_size :
|
|
141
|
+
Maximum size of each cluster.
|
|
142
|
+
sort_clusters :
|
|
143
|
+
If ``True``, sort labels in decreasing order of cluster size.
|
|
144
|
+
return_dendrogram :
|
|
145
|
+
If ``True``, returns the dendrogram formed by the clusters up to the root.
|
|
146
|
+
Returns
|
|
147
|
+
-------
|
|
148
|
+
labels : np.ndarray
|
|
149
|
+
Label of each node.
|
|
150
|
+
dendrogram_aggregate : np.ndarray
|
|
151
|
+
Dendrogram starting from clusters (leaves = clusters).
|
|
152
|
+
|
|
153
|
+
Example
|
|
154
|
+
-------
|
|
155
|
+
>>> from sknetwork.hierarchy import cut_balanced
|
|
156
|
+
>>> dendrogram = np.array([[0, 1, 0, 2], [2, 3, 1, 3]])
|
|
157
|
+
>>> cut_balanced(dendrogram, 2)
|
|
158
|
+
array([0, 0, 1])
|
|
159
|
+
"""
|
|
160
|
+
check_dendrogram(dendrogram)
|
|
161
|
+
n = dendrogram.shape[0] + 1
|
|
162
|
+
if max_cluster_size < 2 or max_cluster_size > n:
|
|
163
|
+
raise ValueError("The maximum cluster size must be between 2 and the number of nodes.")
|
|
164
|
+
|
|
165
|
+
cluster = {i: [i] for i in range(n)}
|
|
166
|
+
for t in range(n - 1):
|
|
167
|
+
i = int(dendrogram[t][0])
|
|
168
|
+
j = int(dendrogram[t][1])
|
|
169
|
+
if i in cluster and j in cluster and len(cluster[i]) + len(cluster[j]) <= max_cluster_size:
|
|
170
|
+
cluster[n + t] = cluster.pop(i) + cluster.pop(j)
|
|
171
|
+
|
|
172
|
+
return get_labels(dendrogram, cluster, sort_clusters, return_dendrogram)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def aggregate_dendrogram(dendrogram: np.ndarray, n_clusters: int = 2, return_counts: bool = False) \
|
|
176
|
+
-> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
|
|
177
|
+
"""Aggregate a dendrogram in order to get a certain number of leaves.
|
|
178
|
+
The leaves in the output dendrogram correspond to subtrees in the input one.
|
|
179
|
+
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
dendrogram:
|
|
183
|
+
The input to aggregate.
|
|
184
|
+
n_clusters:
|
|
185
|
+
Number of clusters (or leaves) to keep.
|
|
186
|
+
return_counts
|
|
187
|
+
If ``True``, returns an array of counts corresponding to the sizes of the merged subtrees.
|
|
188
|
+
The sum of the counts is equal to the number of samples in the input dendrogram.
|
|
189
|
+
|
|
190
|
+
Returns
|
|
191
|
+
-------
|
|
192
|
+
new_dendrogram:
|
|
193
|
+
Aggregated dendrogram. The nodes are reindexed from 0.
|
|
194
|
+
counts:
|
|
195
|
+
Size of the subtrees corresponding to each leaf in new_dendrogram.
|
|
196
|
+
"""
|
|
197
|
+
n_nodes: int = dendrogram.shape[0] + 1
|
|
198
|
+
check_n_clusters(n_clusters, n_nodes, n_min=1)
|
|
199
|
+
|
|
200
|
+
new_dendrogram = dendrogram[n_nodes - n_clusters:].copy()
|
|
201
|
+
node_indices = np.array(sorted(set(new_dendrogram[:, 0]).union(set(new_dendrogram[:, 1]))))
|
|
202
|
+
new_index = {ix: i for i, ix in enumerate(node_indices)}
|
|
203
|
+
|
|
204
|
+
for j in range(2):
|
|
205
|
+
for i in range(new_dendrogram.shape[0]):
|
|
206
|
+
new_dendrogram[i, j] = new_index[new_dendrogram[i, j]]
|
|
207
|
+
|
|
208
|
+
if return_counts:
|
|
209
|
+
leaves = node_indices[:n_clusters].astype(int)
|
|
210
|
+
leaves_indices = leaves - n_nodes
|
|
211
|
+
counts = dendrogram[leaves_indices, 3]
|
|
212
|
+
|
|
213
|
+
return new_dendrogram, counts.astype(int)
|
|
214
|
+
else:
|
|
215
|
+
return new_dendrogram
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def get_index(tree):
|
|
219
|
+
"""Reindex a dendrogram from the leaves
|
|
220
|
+
|
|
221
|
+
Parameters
|
|
222
|
+
----------
|
|
223
|
+
tree:
|
|
224
|
+
The tree to be indexed
|
|
225
|
+
|
|
226
|
+
Returns
|
|
227
|
+
-------
|
|
228
|
+
index:
|
|
229
|
+
The index of the root of the given tree
|
|
230
|
+
"""
|
|
231
|
+
if type(tree) != list:
|
|
232
|
+
return tree
|
|
233
|
+
else:
|
|
234
|
+
return np.max([get_index(t) for t in tree])
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def get_dendrogram(tree, dendrogram=None, index=None, depth=0, size=None, copy_tree=False):
|
|
238
|
+
"""Get dendrogram from tree.
|
|
239
|
+
|
|
240
|
+
Parameters
|
|
241
|
+
----------
|
|
242
|
+
tree :
|
|
243
|
+
The initial tree
|
|
244
|
+
dendrogram :
|
|
245
|
+
Intermediary dendrogram for recursive use
|
|
246
|
+
index :
|
|
247
|
+
Intermediary index for recursive use
|
|
248
|
+
depth :
|
|
249
|
+
Current depth for recursive use
|
|
250
|
+
size :
|
|
251
|
+
Current leaf count for recursive use
|
|
252
|
+
copy_tree :
|
|
253
|
+
If ``True``, ensure the passed tree remains unchanged.
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
dendrogram`:
|
|
258
|
+
The reordered dendrogram
|
|
259
|
+
index :
|
|
260
|
+
The indexing array
|
|
261
|
+
"""
|
|
262
|
+
if copy_tree:
|
|
263
|
+
return get_dendrogram(copy.deepcopy(tree))
|
|
264
|
+
else:
|
|
265
|
+
if dendrogram is None:
|
|
266
|
+
dendrogram = []
|
|
267
|
+
if index is None:
|
|
268
|
+
index = get_index(tree)
|
|
269
|
+
if size is None:
|
|
270
|
+
size = defaultdict(lambda: 1)
|
|
271
|
+
if len(tree) > 1:
|
|
272
|
+
lengths = np.array([len(t) for t in tree])
|
|
273
|
+
if np.max(lengths) == 1:
|
|
274
|
+
# merge all
|
|
275
|
+
i = tree.pop()[0]
|
|
276
|
+
j = tree.pop()[0]
|
|
277
|
+
s = size[i] + size[j]
|
|
278
|
+
dendrogram.append([i, j, float(-depth), s])
|
|
279
|
+
index += 1
|
|
280
|
+
while len(tree):
|
|
281
|
+
s += 1
|
|
282
|
+
dendrogram.append([index, tree.pop()[0], float(-depth), s])
|
|
283
|
+
index += 1
|
|
284
|
+
size[index] = s
|
|
285
|
+
tree.append(index)
|
|
286
|
+
return dendrogram, index
|
|
287
|
+
else:
|
|
288
|
+
i = np.argwhere(lengths > 1).ravel()[0]
|
|
289
|
+
dendrogram_, index_ = get_dendrogram(tree[i], None, index, depth + 1, size)
|
|
290
|
+
dendrogram += dendrogram_
|
|
291
|
+
return get_dendrogram(tree, dendrogram, index_, depth, size)
|
|
292
|
+
else:
|
|
293
|
+
return dendrogram, index
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def split_dendrogram(dendrogram: np.ndarray, shape: tuple):
|
|
297
|
+
"""Split the dendrogram of a bipartite graph into 2 dendrograms, one for each part.
|
|
298
|
+
|
|
299
|
+
Parameters
|
|
300
|
+
----------
|
|
301
|
+
dendrogram :
|
|
302
|
+
Dendrogram of the bipartite graph.
|
|
303
|
+
shape :
|
|
304
|
+
Shape of the biadjacency matrix.
|
|
305
|
+
Returns
|
|
306
|
+
-------
|
|
307
|
+
dendrogram_row :
|
|
308
|
+
Dendrogram for the rows.
|
|
309
|
+
dendrogram_col :
|
|
310
|
+
Dendrogram for the columns.
|
|
311
|
+
"""
|
|
312
|
+
n1, n2 = shape
|
|
313
|
+
dendrogram_row = []
|
|
314
|
+
dendrogram_col = []
|
|
315
|
+
id_row_new = n1
|
|
316
|
+
id_col_new = n2
|
|
317
|
+
size_row = {i: 1 for i in range(n1)}
|
|
318
|
+
size_col = {i + n1: 1 for i in range(n2)}
|
|
319
|
+
id_row = {i: i for i in range(n1)}
|
|
320
|
+
id_col = {i + n1: i for i in range(n2)}
|
|
321
|
+
|
|
322
|
+
for t in range(n1 + n2 - 1):
|
|
323
|
+
i = dendrogram[t, 0]
|
|
324
|
+
j = dendrogram[t, 1]
|
|
325
|
+
|
|
326
|
+
if i in id_row and j in id_row:
|
|
327
|
+
size_row[n1 + n2 + t] = size_row.pop(i) + size_row.pop(j)
|
|
328
|
+
id_row[n1 + n2 + t] = id_row_new
|
|
329
|
+
dendrogram_row.append([id_row.pop(i), id_row.pop(j), dendrogram[t, 2], size_row[n1 + n2 + t]])
|
|
330
|
+
id_row_new += 1
|
|
331
|
+
elif i in id_row:
|
|
332
|
+
size_row[n1 + n2 + t] = size_row.pop(i)
|
|
333
|
+
id_row[n1 + n2 + t] = id_row.pop(i)
|
|
334
|
+
elif j in id_row:
|
|
335
|
+
size_row[n1 + n2 + t] = size_row.pop(j)
|
|
336
|
+
id_row[n1 + n2 + t] = id_row.pop(j)
|
|
337
|
+
|
|
338
|
+
if i in id_col and j in id_col:
|
|
339
|
+
size_col[n1 + n2 + t] = size_col.pop(i) + size_col.pop(j)
|
|
340
|
+
id_col[n1 + n2 + t] = id_col_new
|
|
341
|
+
dendrogram_col.append([id_col.pop(i), id_col.pop(j), dendrogram[t, 2], size_col[n1 + n2 + t]])
|
|
342
|
+
id_col_new += 1
|
|
343
|
+
elif i in id_col:
|
|
344
|
+
size_col[n1 + n2 + t] = size_col.pop(i)
|
|
345
|
+
id_col[n1 + n2 + t] = id_col.pop(i)
|
|
346
|
+
elif j in id_col:
|
|
347
|
+
size_col[n1 + n2 + t] = size_col.pop(j)
|
|
348
|
+
id_col[n1 + n2 + t] = id_col.pop(j)
|
|
349
|
+
|
|
350
|
+
return np.array(dendrogram_row), np.array(dendrogram_col)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""tests for hierarchy"""
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""Tests for hierarchy API"""
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
from sknetwork.data.test_graphs import *
|
|
7
|
+
from sknetwork.embedding import GSVD
|
|
8
|
+
from sknetwork.hierarchy import *
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestHierarchyAPI(unittest.TestCase):
|
|
12
|
+
|
|
13
|
+
def test_undirected(self):
|
|
14
|
+
adjacency = test_graph()
|
|
15
|
+
n = adjacency.shape[0]
|
|
16
|
+
|
|
17
|
+
for algo in [Paris(), Ward(GSVD(3)), LouvainIteration()]:
|
|
18
|
+
dendrogram = algo.fit_predict(adjacency)
|
|
19
|
+
self.assertTupleEqual(dendrogram.shape, (n - 1, 4))
|
|
20
|
+
|
|
21
|
+
def test_disconnected(self):
|
|
22
|
+
adjacency = test_graph_disconnect()
|
|
23
|
+
for algo in [Paris(), Ward(GSVD(3)), LouvainIteration()]:
|
|
24
|
+
dendrogram = algo.fit_transform(adjacency)
|
|
25
|
+
self.assertEqual(dendrogram.shape, (9, 4))
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created in March 2020
|
|
5
|
+
@author: Quentin Lutz <qlutz@enst.fr>
|
|
6
|
+
@author: Thomas Bonald <tbonald@enst.fr>
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import unittest
|
|
10
|
+
|
|
11
|
+
from sknetwork.data.test_graphs import *
|
|
12
|
+
from sknetwork.hierarchy import LouvainIteration, LouvainHierarchy, Paris
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TestLouvainHierarchy(unittest.TestCase):
|
|
16
|
+
|
|
17
|
+
def test(self):
|
|
18
|
+
louvain_iteration = LouvainIteration()
|
|
19
|
+
louvain_iteration_ = LouvainIteration(resolution=2, depth=1)
|
|
20
|
+
louvain_hierarchy = LouvainHierarchy()
|
|
21
|
+
louvain_hierarchy_ = LouvainHierarchy(tol_aggregation=0.1)
|
|
22
|
+
paris = Paris()
|
|
23
|
+
paris_ = Paris(weights='uniform', reorder=False)
|
|
24
|
+
for algo in [louvain_iteration, louvain_iteration_, louvain_hierarchy, louvain_hierarchy_, paris, paris_]:
|
|
25
|
+
for input_matrix in [test_graph(), test_digraph(), test_bigraph()]:
|
|
26
|
+
dendrogram = algo.fit_predict(input_matrix)
|
|
27
|
+
self.assertEqual(dendrogram.shape, (input_matrix.shape[0] - 1, 4))
|
|
28
|
+
if algo.bipartite:
|
|
29
|
+
self.assertEqual(algo.dendrogram_full_.shape, (sum(input_matrix.shape) - 1, 4))
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on March 2019
|
|
5
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import unittest
|
|
9
|
+
|
|
10
|
+
from sknetwork.data.test_graphs import *
|
|
11
|
+
from sknetwork.data import cyclic_graph
|
|
12
|
+
from sknetwork.hierarchy import Paris, LouvainIteration, dasgupta_cost, dasgupta_score, tree_sampling_divergence
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# noinspection PyMissingOrEmptyDocstring
|
|
16
|
+
class TestMetrics(unittest.TestCase):
|
|
17
|
+
|
|
18
|
+
def setUp(self):
|
|
19
|
+
self.paris = Paris()
|
|
20
|
+
self.louvain_hierarchy = LouvainIteration()
|
|
21
|
+
|
|
22
|
+
def test_undirected(self):
|
|
23
|
+
adjacency = cyclic_graph(3)
|
|
24
|
+
dendrogram = self.paris.fit_predict(adjacency)
|
|
25
|
+
self.assertAlmostEqual(dasgupta_cost(adjacency, dendrogram), 2.666, 2)
|
|
26
|
+
self.assertAlmostEqual(dasgupta_score(adjacency, dendrogram), 0.111, 2)
|
|
27
|
+
self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram), 0.0632, 3)
|
|
28
|
+
self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram, normalized=False), 0.0256, 3)
|
|
29
|
+
adjacency = test_graph()
|
|
30
|
+
dendrogram = self.paris.fit_transform(adjacency)
|
|
31
|
+
self.assertAlmostEqual(dasgupta_cost(adjacency, dendrogram), 4.26, 2)
|
|
32
|
+
self.assertAlmostEqual(dasgupta_score(adjacency, dendrogram), 0.573, 2)
|
|
33
|
+
self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram), 0.304, 2)
|
|
34
|
+
dendrogram = self.louvain_hierarchy.fit_transform(adjacency)
|
|
35
|
+
self.assertAlmostEqual(dasgupta_cost(adjacency, dendrogram), 4.43, 2)
|
|
36
|
+
self.assertAlmostEqual(dasgupta_score(adjacency, dendrogram), 0.555, 2)
|
|
37
|
+
self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram), 0.286, 2)
|
|
38
|
+
|
|
39
|
+
def test_directed(self):
|
|
40
|
+
adjacency = test_digraph()
|
|
41
|
+
dendrogram = self.paris.fit_transform(adjacency)
|
|
42
|
+
self.assertAlmostEqual(dasgupta_score(adjacency, dendrogram), 0.566, 2)
|
|
43
|
+
self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram), 0.318, 2)
|
|
44
|
+
dendrogram = self.louvain_hierarchy.fit_transform(adjacency)
|
|
45
|
+
self.assertAlmostEqual(dasgupta_score(adjacency, dendrogram), 0.55, 2)
|
|
46
|
+
self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram), 0.313, 2)
|
|
47
|
+
|
|
48
|
+
def test_disconnected(self):
|
|
49
|
+
adjacency = test_graph_disconnect()
|
|
50
|
+
dendrogram = self.paris.fit_transform(adjacency)
|
|
51
|
+
self.assertAlmostEqual(dasgupta_score(adjacency, dendrogram), 0.682, 2)
|
|
52
|
+
self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram), 0.464, 2)
|
|
53
|
+
dendrogram = self.louvain_hierarchy.fit_transform(adjacency)
|
|
54
|
+
self.assertAlmostEqual(dasgupta_score(adjacency, dendrogram), 0.670, 2)
|
|
55
|
+
self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram), 0.594, 2)
|
|
56
|
+
|
|
57
|
+
def test_options(self):
|
|
58
|
+
adjacency = test_graph()
|
|
59
|
+
dendrogram = self.paris.fit_transform(adjacency)
|
|
60
|
+
self.assertAlmostEqual(dasgupta_score(adjacency, dendrogram, weights='degree'), 0.573, 2)
|
|
61
|
+
self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram, weights='uniform'), 0.271, 2)
|
|
62
|
+
self.assertAlmostEqual(tree_sampling_divergence(adjacency, dendrogram, normalized=False), 0.367, 2)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on March 2019
|
|
5
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
6
|
+
@author: Quentin Lutz <qlutz@enst.fr>
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import unittest
|
|
10
|
+
|
|
11
|
+
from sknetwork.data import karate_club
|
|
12
|
+
from sknetwork.hierarchy import Paris, cut_straight, cut_balanced, aggregate_dendrogram
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# noinspection PyMissingOrEmptyDocstring
|
|
16
|
+
class TestCuts(unittest.TestCase):
|
|
17
|
+
|
|
18
|
+
def setUp(self):
|
|
19
|
+
paris = Paris()
|
|
20
|
+
self.adjacency = karate_club()
|
|
21
|
+
self.dendrogram = paris.fit_transform(self.adjacency)
|
|
22
|
+
|
|
23
|
+
def test_cuts(self):
|
|
24
|
+
labels = cut_straight(self.dendrogram)
|
|
25
|
+
self.assertEqual(len(set(labels)), 2)
|
|
26
|
+
labels = cut_straight(self.dendrogram, n_clusters=5)
|
|
27
|
+
self.assertEqual(len(set(labels)), 5)
|
|
28
|
+
labels = cut_balanced(self.dendrogram, 2)
|
|
29
|
+
self.assertEqual(len(set(labels)), 21)
|
|
30
|
+
labels, new_dendrogram = cut_balanced(self.dendrogram, max_cluster_size=4, return_dendrogram=True)
|
|
31
|
+
self.assertEqual(len(set(labels)), 12)
|
|
32
|
+
self.assertTupleEqual(new_dendrogram.shape, (11, 4))
|
|
33
|
+
paris = Paris(reorder=False)
|
|
34
|
+
dendrogram = paris.fit_predict(self.adjacency)
|
|
35
|
+
labels = cut_balanced(dendrogram, 4)
|
|
36
|
+
self.assertEqual(len(set(labels)), 12)
|
|
37
|
+
|
|
38
|
+
def test_options(self):
|
|
39
|
+
labels = cut_straight(self.dendrogram, threshold=0.5)
|
|
40
|
+
self.assertEqual(len(set(labels)), 7)
|
|
41
|
+
labels = cut_straight(self.dendrogram, n_clusters=3, threshold=0.5)
|
|
42
|
+
self.assertEqual(len(set(labels)), 3)
|
|
43
|
+
labels = cut_straight(self.dendrogram, sort_clusters=False)
|
|
44
|
+
self.assertEqual(len(set(labels)), 2)
|
|
45
|
+
labels = cut_balanced(self.dendrogram, max_cluster_size=2, sort_clusters=False)
|
|
46
|
+
self.assertEqual(len(set(labels)), 21)
|
|
47
|
+
labels = cut_balanced(self.dendrogram, max_cluster_size=10)
|
|
48
|
+
self.assertEqual(len(set(labels)), 5)
|
|
49
|
+
|
|
50
|
+
def test_aggregation(self):
|
|
51
|
+
aggregated = aggregate_dendrogram(self.dendrogram, n_clusters=3)
|
|
52
|
+
self.assertEqual(len(aggregated), 2)
|
|
53
|
+
|
|
54
|
+
aggregated, counts = aggregate_dendrogram(self.dendrogram, n_clusters=3, return_counts=True)
|
|
55
|
+
self.assertEqual(len(aggregated), 2)
|
|
56
|
+
self.assertEqual(len(counts), 3)
|
|
57
|
+
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on October 2019
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import unittest
|
|
9
|
+
|
|
10
|
+
from sknetwork.data.test_graphs import test_graph, test_digraph, test_bigraph
|
|
11
|
+
from sknetwork.embedding import Spectral
|
|
12
|
+
from sknetwork.hierarchy import Ward
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TestWard(unittest.TestCase):
|
|
16
|
+
|
|
17
|
+
def test_options(self):
|
|
18
|
+
ward = Ward()
|
|
19
|
+
ward_options = Ward(embedding_method=Spectral(3), co_cluster=True)
|
|
20
|
+
for algo in [ward, ward_options]:
|
|
21
|
+
for input_matrix in [test_graph(), test_digraph(), test_bigraph()]:
|
|
22
|
+
dendrogram = algo.fit_predict(input_matrix)
|
|
23
|
+
self.assertEqual(dendrogram.shape, (input_matrix.shape[0] - 1, 4))
|
|
24
|
+
if algo.co_cluster:
|
|
25
|
+
self.assertEqual(algo.dendrogram_full_.shape, (sum(input_matrix.shape) - 1, 4))
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on October 2019
|
|
5
|
+
@author: Nathan de Lara <nathan.delara@polytechnique.org>
|
|
6
|
+
@author: Thomas Bonald <bonald@enst.fr>
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Union
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
from scipy import sparse
|
|
13
|
+
|
|
14
|
+
from sknetwork.embedding import BaseEmbedding, Spectral
|
|
15
|
+
from sknetwork.clustering.kmeans import get_embedding
|
|
16
|
+
from sknetwork.hierarchy.base import BaseHierarchy
|
|
17
|
+
from sknetwork.utils.check import check_format
|
|
18
|
+
from sknetwork.utils.ward import WardDense
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Ward(BaseHierarchy):
|
|
22
|
+
"""Hierarchical clustering by the Ward method.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
embedding_method :
|
|
27
|
+
Embedding method (default = Spectral embedding in dimension 10).
|
|
28
|
+
co_cluster :
|
|
29
|
+
If ``True``, co-cluster rows and columns, considered as different nodes (default = ``False``).
|
|
30
|
+
|
|
31
|
+
Attributes
|
|
32
|
+
----------
|
|
33
|
+
dendrogram_ :
|
|
34
|
+
Dendrogram of the graph.
|
|
35
|
+
dendrogram_row_ :
|
|
36
|
+
Dendrogram for the rows, for bipartite graphs.
|
|
37
|
+
dendrogram_col_ :
|
|
38
|
+
Dendrogram for the columns, for bipartite graphs.
|
|
39
|
+
dendrogram_full_ :
|
|
40
|
+
Dendrogram for both rows and columns, indexed in this order, for bipartite graphs.
|
|
41
|
+
|
|
42
|
+
Examples
|
|
43
|
+
--------
|
|
44
|
+
>>> from sknetwork.hierarchy import Ward
|
|
45
|
+
>>> from sknetwork.data import karate_club
|
|
46
|
+
>>> ward = Ward()
|
|
47
|
+
>>> adjacency = karate_club()
|
|
48
|
+
>>> dendrogram = ward.fit_transform(adjacency)
|
|
49
|
+
>>> dendrogram.shape
|
|
50
|
+
(33, 4)
|
|
51
|
+
|
|
52
|
+
References
|
|
53
|
+
----------
|
|
54
|
+
* Ward, J. H., Jr. (1963). Hierarchical grouping to optimize an objective function.
|
|
55
|
+
Journal of the American Statistical Association.
|
|
56
|
+
|
|
57
|
+
* Murtagh, F., & Contreras, P. (2012). Algorithms for hierarchical clustering: an overview.
|
|
58
|
+
Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery.
|
|
59
|
+
"""
|
|
60
|
+
def __init__(self, embedding_method: BaseEmbedding = Spectral(10), co_cluster: bool = False):
|
|
61
|
+
super(Ward, self).__init__()
|
|
62
|
+
self.embedding_method = embedding_method
|
|
63
|
+
self.co_cluster = co_cluster
|
|
64
|
+
self.bipartite = None
|
|
65
|
+
|
|
66
|
+
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'Ward':
|
|
67
|
+
"""Applies embedding method followed by the Ward algorithm.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
input_matrix :
|
|
72
|
+
Adjacency matrix or biadjacency matrix of the graph.
|
|
73
|
+
|
|
74
|
+
Returns
|
|
75
|
+
-------
|
|
76
|
+
self: :class:`Ward`
|
|
77
|
+
"""
|
|
78
|
+
self._init_vars()
|
|
79
|
+
|
|
80
|
+
# input
|
|
81
|
+
check_format(input_matrix)
|
|
82
|
+
|
|
83
|
+
# embedding
|
|
84
|
+
embedding, self.bipartite = get_embedding(input_matrix, self.embedding_method, self.co_cluster)
|
|
85
|
+
|
|
86
|
+
# clustering
|
|
87
|
+
ward = WardDense()
|
|
88
|
+
self.dendrogram_ = ward.fit_transform(embedding)
|
|
89
|
+
|
|
90
|
+
# output
|
|
91
|
+
if self.co_cluster:
|
|
92
|
+
self._split_vars(input_matrix.shape)
|
|
93
|
+
|
|
94
|
+
return self
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Module of linear algebra."""
|
|
2
|
+
from sknetwork.linalg.basics import safe_sparse_dot
|
|
3
|
+
from sknetwork.linalg.eig_solver import EigSolver, LanczosEig
|
|
4
|
+
from sknetwork.linalg.laplacian import get_laplacian
|
|
5
|
+
from sknetwork.linalg.normalization import diag_pinv, normalize
|
|
6
|
+
from sknetwork.linalg.operators import Regularizer, Laplacian, Normalizer, CoNeighbor
|
|
7
|
+
from sknetwork.linalg.polynome import Polynome
|
|
8
|
+
from sknetwork.linalg.sparse_lowrank import SparseLR
|
|
9
|
+
from sknetwork.linalg.svd_solver import SVDSolver, LanczosSVD
|