nystrom-ncut 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nystrom_ncut/__init__.py +4 -4
- nystrom_ncut/common.py +20 -0
- nystrom_ncut/ncut_pytorch.py +192 -467
- nystrom_ncut/nystrom.py +4 -2
- nystrom_ncut/propagation_utils.py +15 -57
- nystrom_ncut/visualize_utils.py +9 -98
- {nystrom_ncut-0.0.1.dist-info → nystrom_ncut-0.0.2.dist-info}/METADATA +1 -1
- nystrom_ncut-0.0.2.dist-info/RECORD +11 -0
- nystrom_ncut/new_ncut_pytorch.py +0 -241
- nystrom_ncut-0.0.1.dist-info/RECORD +0 -11
- {nystrom_ncut-0.0.1.dist-info → nystrom_ncut-0.0.2.dist-info}/LICENSE +0 -0
- {nystrom_ncut-0.0.1.dist-info → nystrom_ncut-0.0.2.dist-info}/WHEEL +0 -0
- {nystrom_ncut-0.0.1.dist-info → nystrom_ncut-0.0.2.dist-info}/top_level.txt +0 -0
nystrom_ncut/ncut_pytorch.py
CHANGED
@@ -1,46 +1,113 @@
|
|
1
|
-
# %%
|
2
1
|
import logging
|
3
|
-
import
|
4
|
-
from typing import Literal
|
2
|
+
from typing import Literal, Tuple
|
5
3
|
|
6
4
|
import torch
|
5
|
+
import torch.nn.functional as Fn
|
7
6
|
|
8
7
|
from .nystrom import (
|
9
|
-
|
8
|
+
EigSolverOptions,
|
9
|
+
OnlineKernel,
|
10
|
+
OnlineNystrom,
|
11
|
+
solve_eig,
|
10
12
|
)
|
11
13
|
from .propagation_utils import (
|
12
|
-
run_subgraph_sampling,
|
13
|
-
propagate_knn,
|
14
14
|
affinity_from_features,
|
15
|
+
run_subgraph_sampling,
|
15
16
|
)
|
16
17
|
|
17
18
|
|
18
|
-
|
19
|
+
DistanceOptions = Literal["cosine", "euclidean", "rbf"]
|
20
|
+
|
21
|
+
|
22
|
+
class LaplacianKernel(OnlineKernel):
|
23
|
+
def __init__(
|
24
|
+
self,
|
25
|
+
affinity_focal_gamma: float,
|
26
|
+
distance: DistanceOptions,
|
27
|
+
eig_solver: EigSolverOptions,
|
28
|
+
):
|
29
|
+
self.affinity_focal_gamma = affinity_focal_gamma
|
30
|
+
self.distance: DistanceOptions = distance
|
31
|
+
self.eig_solver: EigSolverOptions = eig_solver
|
32
|
+
|
33
|
+
# Anchor matrices
|
34
|
+
self.anchor_features: torch.Tensor = None # [n x d]
|
35
|
+
self.A: torch.Tensor = None # [n x n]
|
36
|
+
self.Ainv: torch.Tensor = None # [n x n]
|
37
|
+
|
38
|
+
# Updated matrices
|
39
|
+
self.a_r: torch.Tensor = None # [n]
|
40
|
+
self.b_r: torch.Tensor = None # [n]
|
41
|
+
|
42
|
+
def fit(self, features: torch.Tensor) -> None:
|
43
|
+
self.anchor_features = features # [n x d]
|
44
|
+
self.A = affinity_from_features(
|
45
|
+
self.anchor_features, # [n x d]
|
46
|
+
affinity_focal_gamma=self.affinity_focal_gamma,
|
47
|
+
distance=self.distance,
|
48
|
+
) # [n x n]
|
49
|
+
U, L = solve_eig(
|
50
|
+
self.A,
|
51
|
+
num_eig=features.shape[-1] + 1,
|
52
|
+
eig_solver=self.eig_solver,
|
53
|
+
) # [n x (d + 1)], [d + 1]
|
54
|
+
self.Ainv = U @ torch.diag(1 / L) @ U.mT # [n x n]
|
55
|
+
self.a_r = torch.sum(self.A, dim=-1) # [n]
|
56
|
+
self.b_r = torch.zeros_like(self.a_r) # [n]
|
57
|
+
|
58
|
+
def update(self, features: torch.Tensor) -> torch.Tensor:
|
59
|
+
B = affinity_from_features(
|
60
|
+
self.anchor_features, # [n x d]
|
61
|
+
features, # [m x d]
|
62
|
+
affinity_focal_gamma=self.affinity_focal_gamma,
|
63
|
+
distance=self.distance,
|
64
|
+
) # [n x m]
|
65
|
+
b_r = torch.sum(B, dim=-1) # [n]
|
66
|
+
b_c = torch.sum(B, dim=-2) # [m]
|
67
|
+
self.b_r = self.b_r + b_r # [n]
|
68
|
+
|
69
|
+
rowscale = self.a_r + self.b_r # [n]
|
70
|
+
colscale = b_c + B.mT @ self.Ainv @ self.b_r # [m]
|
71
|
+
scale = (rowscale[:, None] * colscale) ** -0.5 # [n x m]
|
72
|
+
return (B * scale).mT # [m x n]
|
73
|
+
|
74
|
+
def transform(self, features: torch.Tensor = None) -> torch.Tensor:
|
75
|
+
rowscale = self.a_r + self.b_r # [n]
|
76
|
+
if features is None:
|
77
|
+
B = self.A # [n x n]
|
78
|
+
colscale = rowscale # [n]
|
79
|
+
else:
|
80
|
+
B = affinity_from_features(
|
81
|
+
self.anchor_features, # [n x d]
|
82
|
+
features, # [m x d]
|
83
|
+
affinity_focal_gamma=self.affinity_focal_gamma,
|
84
|
+
distance=self.distance,
|
85
|
+
) # [n x m]
|
86
|
+
b_c = torch.sum(B, dim=-2) # [m]
|
87
|
+
colscale = b_c + B.mT @ self.Ainv @ self.b_r # [m]
|
88
|
+
scale = (rowscale[:, None] * colscale) ** -0.5 # [n x m]
|
89
|
+
return (B * scale).mT # [m x n]
|
90
|
+
|
91
|
+
|
92
|
+
class NCUT(OnlineNystrom):
|
19
93
|
"""Nystrom Normalized Cut for large scale graph."""
|
20
94
|
|
21
95
|
def __init__(
|
22
96
|
self,
|
23
97
|
num_eig: int = 100,
|
24
|
-
knn: int = 10,
|
25
98
|
affinity_focal_gamma: float = 1.0,
|
26
99
|
num_sample: int = 10000,
|
27
100
|
sample_method: Literal["farthest", "random"] = "farthest",
|
28
|
-
distance:
|
29
|
-
|
30
|
-
|
101
|
+
distance: DistanceOptions = "cosine",
|
102
|
+
eig_solver: EigSolverOptions = "svd_lowrank",
|
103
|
+
normalize_features: bool = None,
|
31
104
|
device: str = None,
|
32
105
|
move_output_to_cpu: bool = False,
|
33
|
-
eig_solver: Literal["svd_lowrank", "lobpcg", "svd", "eigh"] = "svd_lowrank",
|
34
|
-
normalize_features: bool = None,
|
35
106
|
matmul_chunk_size: int = 8096,
|
36
|
-
make_orthogonal: bool = False,
|
37
|
-
verbose: bool = False,
|
38
107
|
):
|
39
108
|
"""
|
40
109
|
Args:
|
41
110
|
num_eig (int): number of top eigenvectors to return
|
42
|
-
knn (int): number of KNN for propagating eigenvectors from subgraph to full graph,
|
43
|
-
smaller knn result in more sharp eigenvectors.
|
44
111
|
affinity_focal_gamma (float): affinity matrix temperature, lower t reduce the not-so-connected edge weights,
|
45
112
|
smaller t result in more sharp eigenvectors.
|
46
113
|
num_sample (int): number of samples for Nystrom-like approximation,
|
@@ -48,140 +115,108 @@ class NCUT:
|
|
48
115
|
sample_method (str): subgraph sampling, ['farthest', 'random'].
|
49
116
|
farthest point sampling is recommended for better Nystrom-approximation accuracy
|
50
117
|
distance (str): distance metric for affinity matrix, ['cosine', 'euclidean', 'rbf'].
|
51
|
-
indirect_connection (bool): include indirect connection in the Nystrom-like approximation
|
52
|
-
indirect_pca_dim (int): when compute indirect connection, PCA to reduce the node dimension,
|
53
|
-
device (str): device to use for eigen computation,
|
54
|
-
move to GPU to speeds up a bit (~5x faster)
|
55
|
-
move_output_to_cpu (bool): move output to CPU, set to True if you have memory issue
|
56
118
|
eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh'].
|
57
119
|
normalize_features (bool): normalize input features before computing affinity matrix,
|
58
120
|
default 'None' is True for cosine distance, False for euclidean distance and rbf
|
121
|
+
device (str): device to use for eigen computation,
|
122
|
+
move to GPU to speeds up a bit (~5x faster)
|
123
|
+
move_output_to_cpu (bool): move output to CPU, set to True if you have memory issue
|
59
124
|
matmul_chunk_size (int): chunk size for large-scale matrix multiplication
|
60
|
-
make_orthogonal (bool): make eigenvectors orthogonal post-hoc
|
61
|
-
verbose (bool): progress bar
|
62
|
-
|
63
|
-
Examples:
|
64
|
-
>>> from ncut_pytorch import NCUT
|
65
|
-
>>> import torch
|
66
|
-
>>> features = torch.rand(10000, 100)
|
67
|
-
>>> ncut = NCUT(num_eig=20)
|
68
|
-
>>> ncut.fit(features)
|
69
|
-
>>> eigenvectors, eigenvalues = ncut.transform(features)
|
70
|
-
>>> print(eigenvectors.shape, eigenvalues.shape)
|
71
|
-
>>> # (10000, 20) (20,)
|
72
|
-
|
73
|
-
>>> from ncut_pytorch import eigenvector_to_rgb
|
74
|
-
>>> # use t-SNE or UMAP to convert eigenvectors to RGB
|
75
|
-
>>> X_3d, rgb = eigenvector_to_rgb(eigenvectors, method='tsne_3d')
|
76
|
-
>>> print(X_3d.shape, rgb.shape)
|
77
|
-
>>> # (10000, 3) (10000, 3)
|
78
|
-
|
79
|
-
>>> # transform new features
|
80
|
-
>>> new_features = torch.rand(500, 100)
|
81
|
-
>>> new_eigenvectors, _ = ncut.transform(new_features)
|
82
|
-
>>> print(new_eigenvectors.shape)
|
83
|
-
>>> # (500, 20)
|
84
125
|
"""
|
85
|
-
|
126
|
+
OnlineNystrom.__init__(
|
127
|
+
self,
|
128
|
+
n_components=num_eig,
|
129
|
+
kernel=LaplacianKernel(affinity_focal_gamma, distance, eig_solver),
|
130
|
+
eig_solver=eig_solver,
|
131
|
+
chunk_size=matmul_chunk_size,
|
132
|
+
)
|
86
133
|
self.num_sample = num_sample
|
87
|
-
self.knn = knn
|
88
134
|
self.sample_method = sample_method
|
89
135
|
self.distance = distance
|
90
|
-
self.affinity_focal_gamma = affinity_focal_gamma
|
91
|
-
self.indirect_connection = indirect_connection
|
92
|
-
self.indirect_pca_dim = indirect_pca_dim
|
93
|
-
self.device = device
|
94
|
-
self.move_output_to_cpu = move_output_to_cpu
|
95
|
-
self.eig_solver = eig_solver
|
96
136
|
self.normalize_features = normalize_features
|
97
137
|
if self.normalize_features is None:
|
98
138
|
if distance in ["cosine"]:
|
99
139
|
self.normalize_features = True
|
100
140
|
if distance in ["euclidean", "rbf"]:
|
101
141
|
self.normalize_features = False
|
142
|
+
|
143
|
+
self.device = device
|
144
|
+
self.move_output_to_cpu = move_output_to_cpu
|
102
145
|
self.matmul_chunk_size = matmul_chunk_size
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
precomputed_sampled_indices: torch.Tensor = None
|
114
|
-
):
|
115
|
-
"""Fit Nystrom Normalized Cut on the input features.
|
116
|
-
Args:
|
117
|
-
features (torch.Tensor): input features, shape (n_samples, n_features)
|
118
|
-
precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
|
119
|
-
override the sample_method, if not None
|
120
|
-
Returns:
|
121
|
-
(NCUT): self
|
122
|
-
"""
|
146
|
+
|
147
|
+
def _fit_helper(
|
148
|
+
self,
|
149
|
+
features: torch.Tensor,
|
150
|
+
precomputed_sampled_indices: torch.Tensor,
|
151
|
+
) -> Tuple[torch.Tensor, torch.Tensor]:
|
152
|
+
# move subgraph gpu to speed up
|
153
|
+
original_device = features.device
|
154
|
+
device = original_device if self.device is None else self.device
|
155
|
+
|
123
156
|
_n = features.shape[0]
|
124
157
|
if self.num_sample >= _n:
|
125
158
|
logging.info(
|
126
|
-
f"NCUT nystrom num_sample is larger than number of input samples, nyström approximation is not needed, setting num_sample={_n}
|
159
|
+
f"NCUT nystrom num_sample is larger than number of input samples, nyström approximation is not needed, setting num_sample={_n}"
|
127
160
|
)
|
128
161
|
self.num_sample = _n
|
129
|
-
self.knn = 1
|
130
|
-
|
131
|
-
# save the eigenvectors solution on the sub-sampled graph, do not propagate to full graph yet
|
132
|
-
self.subgraph_eigen_vector, self.eigen_value, self.subgraph_indices = nystrom_ncut(
|
133
|
-
features,
|
134
|
-
num_eig=self.num_eig,
|
135
|
-
num_sample=self.num_sample,
|
136
|
-
sample_method=self.sample_method,
|
137
|
-
precomputed_sampled_indices=precomputed_sampled_indices,
|
138
|
-
distance=self.distance,
|
139
|
-
affinity_focal_gamma=self.affinity_focal_gamma,
|
140
|
-
indirect_connection=self.indirect_connection,
|
141
|
-
indirect_pca_dim=self.indirect_pca_dim,
|
142
|
-
device=self.device,
|
143
|
-
eig_solver=self.eig_solver,
|
144
|
-
normalize_features=self.normalize_features,
|
145
|
-
matmul_chunk_size=self.matmul_chunk_size,
|
146
|
-
verbose=self.verbose,
|
147
|
-
no_propagation=True,
|
148
|
-
move_output_to_cpu=self.move_output_to_cpu,
|
149
|
-
)
|
150
|
-
self.subgraph_features = features[self.subgraph_indices]
|
151
|
-
return self
|
152
162
|
|
153
|
-
|
154
|
-
|
163
|
+
# check if features dimension greater than num_eig
|
164
|
+
if self.eig_solver in ["svd_lowrank", "lobpcg"]:
|
165
|
+
assert (
|
166
|
+
_n >= self.n_components * 2
|
167
|
+
), "number of nodes should be greater than 2*num_eig"
|
168
|
+
elif self.eig_solver in ["svd", "eigh"]:
|
169
|
+
assert (
|
170
|
+
_n >= self.n_components
|
171
|
+
), "number of nodes should be greater than num_eig"
|
172
|
+
|
173
|
+
assert self.distance in ["cosine", "euclidean", "rbf"], "distance should be 'cosine', 'euclidean', 'rbf'"
|
174
|
+
|
175
|
+
if self.normalize_features:
|
176
|
+
# features need to be normalized for affinity matrix computation (cosine distance)
|
177
|
+
features = torch.nn.functional.normalize(features, dim=-1)
|
178
|
+
|
179
|
+
if precomputed_sampled_indices is not None:
|
180
|
+
sampled_indices = precomputed_sampled_indices
|
181
|
+
else:
|
182
|
+
sampled_indices = run_subgraph_sampling(
|
183
|
+
features,
|
184
|
+
num_sample=self.num_sample,
|
185
|
+
sample_method=self.sample_method,
|
186
|
+
)
|
187
|
+
sampled_features = features[sampled_indices].to(device)
|
188
|
+
OnlineNystrom.fit(self, sampled_features)
|
189
|
+
|
190
|
+
_n_not_sampled = _n - len(sampled_features)
|
191
|
+
if _n_not_sampled > 0:
|
192
|
+
unsampled_indices = torch.full((_n,), True).scatter(0, sampled_indices, False)
|
193
|
+
unsampled_features = features[unsampled_indices].to(device)
|
194
|
+
V_unsampled, _ = OnlineNystrom.update(self, unsampled_features)
|
195
|
+
else:
|
196
|
+
unsampled_indices = V_unsampled = None
|
197
|
+
return unsampled_indices, V_unsampled
|
198
|
+
|
199
|
+
def fit(
|
200
|
+
self,
|
201
|
+
features: torch.Tensor,
|
202
|
+
precomputed_sampled_indices: torch.Tensor = None,
|
203
|
+
):
|
204
|
+
"""Fit Nystrom Normalized Cut on the input features.
|
155
205
|
Args:
|
156
|
-
features (torch.Tensor):
|
157
|
-
|
206
|
+
features (torch.Tensor): input features, shape (n_samples, n_features)
|
207
|
+
precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
|
208
|
+
override the sample_method, if not None
|
158
209
|
Returns:
|
159
|
-
(
|
160
|
-
(torch.Tensor): eigen_values, sorted in descending order, shape (num_eig,)
|
210
|
+
(NCUT): self
|
161
211
|
"""
|
212
|
+
NCUT._fit_helper(self, features, precomputed_sampled_indices)
|
213
|
+
return self
|
162
214
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
features,
|
169
|
-
self.subgraph_features,
|
170
|
-
knn,
|
171
|
-
distance=self.distance,
|
172
|
-
chunk_size=self.matmul_chunk_size,
|
173
|
-
device=self.device,
|
174
|
-
use_tqdm=self.verbose,
|
175
|
-
move_output_to_cpu=self.move_output_to_cpu,
|
176
|
-
)
|
177
|
-
if self.make_orthogonal:
|
178
|
-
eigen_vector = gram_schmidt(eigen_vector)
|
179
|
-
return eigen_vector, self.eigen_value
|
180
|
-
|
181
|
-
def fit_transform(self,
|
182
|
-
features: torch.Tensor,
|
183
|
-
precomputed_sampled_indices: torch.Tensor = None
|
184
|
-
):
|
215
|
+
def fit_transform(
|
216
|
+
self,
|
217
|
+
features: torch.Tensor,
|
218
|
+
precomputed_sampled_indices: torch.Tensor = None,
|
219
|
+
) -> Tuple[torch.Tensor, torch.Tensor]:
|
185
220
|
"""
|
186
221
|
Args:
|
187
222
|
features (torch.Tensor): input features, shape (n_samples, n_features)
|
@@ -192,285 +227,19 @@ class NCUT:
|
|
192
227
|
(torch.Tensor): eigen_vectors, shape (n_samples, num_eig)
|
193
228
|
(torch.Tensor): eigen_values, sorted in descending order, shape (num_eig,)
|
194
229
|
"""
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
def nystrom_ncut(
|
199
|
-
features: torch.Tensor,
|
200
|
-
num_eig: int = 100,
|
201
|
-
num_sample: int = 10000,
|
202
|
-
knn: int = 10,
|
203
|
-
sample_method: Literal["farthest", "random"] = "farthest",
|
204
|
-
precomputed_sampled_indices: torch.Tensor = None,
|
205
|
-
distance: Literal["cosine", "euclidean", "rbf"] = "cosine",
|
206
|
-
affinity_focal_gamma: float = 1.0,
|
207
|
-
indirect_connection: bool = True,
|
208
|
-
indirect_pca_dim: int = 100,
|
209
|
-
device: str = None,
|
210
|
-
eig_solver: Literal["svd_lowrank", "lobpcg", "svd", "eigh"] = "svd_lowrank",
|
211
|
-
normalize_features: bool = None,
|
212
|
-
matmul_chunk_size: int = 8096,
|
213
|
-
make_orthogonal: bool = True,
|
214
|
-
verbose: bool = False,
|
215
|
-
no_propagation: bool = False,
|
216
|
-
move_output_to_cpu: bool = False,
|
217
|
-
):
|
218
|
-
"""PyTorch implementation of Faster Nystrom Normalized cut.
|
219
|
-
Args:
|
220
|
-
features (torch.Tensor): feature matrix, shape (n_samples, n_features)
|
221
|
-
num_eig (int): default 100, number of top eigenvectors to return
|
222
|
-
num_sample (int): default 10000, number of samples for Nystrom-like approximation
|
223
|
-
knn (int): default 10, number of KNN for propagating eigenvectors from subgraph to full graph,
|
224
|
-
smaller knn will result in more sharp eigenvectors,
|
225
|
-
sample_method (str): sample method, 'farthest' (default) or 'random'
|
226
|
-
'farthest' is recommended for better approximation
|
227
|
-
precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
|
228
|
-
override the sample_method, if not None
|
229
|
-
distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'
|
230
|
-
affinity_focal_gamma (float): affinity matrix parameter, lower t reduce the weak edge weights,
|
231
|
-
resulting in more sharp eigenvectors, default 1.0
|
232
|
-
indirect_connection (bool): include indirect connection in the subgraph, default True
|
233
|
-
indirect_pca_dim (int): default 100, PCA dimension to reduce the node dimension, only applied to
|
234
|
-
the not sampled nodes, not applied to the sampled nodes
|
235
|
-
device (str): device to use for computation, if None, will not change device
|
236
|
-
a good practice is to pass features by CPU since it's usually large,
|
237
|
-
and move subgraph affinity to GPU to speed up eigenvector computation
|
238
|
-
eig_solver (str): eigen decompose solver, 'svd_lowrank' (default), 'lobpcg', 'svd', 'eigh'
|
239
|
-
'svd_lowrank' is recommended for large scale graph, it's the fastest
|
240
|
-
they correspond to torch.svd_lowrank, torch.lobpcg, torch.svd, torch.linalg.eigh
|
241
|
-
normalize_features (bool): normalize input features before computing affinity matrix,
|
242
|
-
default 'None' is True for cosine distance, False for euclidean distance and rbf
|
243
|
-
matmul_chunk_size (int): chunk size for matrix multiplication
|
244
|
-
large matrix multiplication is chunked to reduce memory usage,
|
245
|
-
smaller chunk size will reduce memory usage but slower computation, default 8096
|
246
|
-
make_orthogonal (bool): make eigenvectors orthogonal after propagation, default True
|
247
|
-
verbose (bool): show progress bar when propagating eigenvectors from subgraph to full graph
|
248
|
-
no_propagation (bool): if True, skip the eigenvector propagation step, only return the subgraph eigenvectors
|
249
|
-
move_output_to_cpu (bool): move output to CPU, set to True if you have memory issue
|
250
|
-
Returns:
|
251
|
-
(torch.Tensor): eigenvectors, shape (n_samples, num_eig)
|
252
|
-
(torch.Tensor): eigenvalues, sorted in descending order, shape (num_eig,)
|
253
|
-
(torch.Tensor): sampled_indices used by Nystrom-like approximation subgraph, shape (num_sample,)
|
254
|
-
"""
|
255
|
-
|
256
|
-
# check if features dimension greater than num_eig
|
257
|
-
if eig_solver in ["svd_lowrank", "lobpcg"]:
|
258
|
-
assert features.shape[0] > (
|
259
|
-
num_eig * 2
|
260
|
-
), "number of nodes should be greater than 2*num_eig"
|
261
|
-
if eig_solver in ["svd", "eigh"]:
|
262
|
-
assert (
|
263
|
-
features.shape[0] > num_eig
|
264
|
-
), "number of nodes should be greater than num_eig"
|
265
|
-
|
266
|
-
assert distance in ["cosine", "euclidean", "rbf"], "distance should be 'cosine', 'euclidean', 'rbf'"
|
267
|
-
|
268
|
-
if normalize_features:
|
269
|
-
# features need to be normalized for affinity matrix computation (cosine distance)
|
270
|
-
features = torch.nn.functional.normalize(features, dim=-1)
|
271
|
-
|
272
|
-
if precomputed_sampled_indices is not None:
|
273
|
-
sampled_indices = precomputed_sampled_indices
|
274
|
-
else:
|
275
|
-
sampled_indices = run_subgraph_sampling(
|
276
|
-
features,
|
277
|
-
num_sample=num_sample,
|
278
|
-
sample_method=sample_method,
|
279
|
-
)
|
280
|
-
|
281
|
-
sampled_features = features[sampled_indices]
|
282
|
-
# move subgraph gpu to speed up
|
283
|
-
original_device = sampled_features.device
|
284
|
-
device = original_device if device is None else device
|
285
|
-
sampled_features = sampled_features.to(device)
|
286
|
-
|
287
|
-
# compute affinity matrix on subgraph
|
288
|
-
A = affinity_from_features(
|
289
|
-
sampled_features,
|
290
|
-
affinity_focal_gamma=affinity_focal_gamma,
|
291
|
-
distance=distance,
|
292
|
-
)
|
293
|
-
|
294
|
-
# check if all nodes are sampled, if so, no need for Nystrom approximation
|
295
|
-
not_sampled = torch.full((features.shape[0],), True)
|
296
|
-
not_sampled[sampled_indices] = False
|
297
|
-
_n_not_sampled = not_sampled.sum()
|
298
|
-
|
299
|
-
if _n_not_sampled == 0:
|
300
|
-
# if sampled all nodes, no need for nyström approximation
|
301
|
-
eigen_vector, eigen_value = ncut(A, num_eig, eig_solver=eig_solver)
|
302
|
-
return eigen_vector, eigen_value, sampled_indices
|
303
|
-
|
304
|
-
# 1) PCA to reduce the node dimension for the not sampled nodes
|
305
|
-
# 2) compute indirect connection on the PC nodes
|
306
|
-
if _n_not_sampled > 0 and indirect_connection:
|
307
|
-
indirect_pca_dim = min(indirect_pca_dim, *features.shape)
|
308
|
-
U, S, V = torch.pca_lowrank(features[not_sampled].T, q=indirect_pca_dim)
|
309
|
-
S = S / math.sqrt(_n_not_sampled)
|
310
|
-
feature_B_T = U @ torch.diag(S)
|
311
|
-
feature_B = feature_B_T.T
|
312
|
-
feature_B = feature_B.to(device)
|
313
|
-
|
314
|
-
B = affinity_from_features(
|
315
|
-
sampled_features,
|
316
|
-
feature_B,
|
317
|
-
affinity_focal_gamma=affinity_focal_gamma,
|
318
|
-
distance=distance,
|
319
|
-
fill_diagonal=False,
|
320
|
-
)
|
321
|
-
# P is 1-hop random walk matrix
|
322
|
-
B_row = B / B.sum(dim=1, keepdim=True)
|
323
|
-
B_col = B / B.sum(dim=0, keepdim=True)
|
324
|
-
P = B_row @ B_col.T
|
325
|
-
P = (P + P.T) / 2
|
326
|
-
# fill diagonal with 0
|
327
|
-
P[torch.arange(P.shape[0]), torch.arange(P.shape[0])] = 0
|
328
|
-
A = A + P
|
329
|
-
|
330
|
-
# compute normalized cut on the subgraph
|
331
|
-
eigen_vector, eigen_value = ncut(A, num_eig, eig_solver=eig_solver)
|
332
|
-
eigen_vector = eigen_vector.to(dtype=features.dtype, device=original_device)
|
333
|
-
eigen_value = eigen_value.to(dtype=features.dtype, device=original_device)
|
334
|
-
|
335
|
-
if no_propagation:
|
336
|
-
return eigen_vector, eigen_value, sampled_indices
|
337
|
-
|
338
|
-
# propagate eigenvectors from subgraph to full graph
|
339
|
-
eigen_vector = propagate_knn(
|
340
|
-
eigen_vector,
|
341
|
-
features,
|
342
|
-
sampled_features,
|
343
|
-
knn,
|
344
|
-
distance=distance,
|
345
|
-
chunk_size=matmul_chunk_size,
|
346
|
-
device=device,
|
347
|
-
use_tqdm=verbose,
|
348
|
-
move_output_to_cpu=move_output_to_cpu,
|
349
|
-
)
|
350
|
-
|
351
|
-
# post-hoc orthogonalization
|
352
|
-
if make_orthogonal:
|
353
|
-
eigen_vector = gram_schmidt(eigen_vector)
|
354
|
-
|
355
|
-
return eigen_vector, eigen_value, sampled_indices
|
356
|
-
|
357
|
-
|
358
|
-
def normalized_affinity_transform(D: torch.Tensor, affinity_focal_gamma: float):
|
359
|
-
"""Compute Laplacian-normalized affinity matrix from input features.
|
360
|
-
|
361
|
-
Args:
|
362
|
-
features (torch.Tensor): input features, shape (n_samples, n_features)
|
363
|
-
features_B (torch.Tensor, optional): optional, if not None, compute affinity between two features
|
364
|
-
affinity_focal_gamma (float): affinity matrix parameter, lower t reduce the edge weights
|
365
|
-
on weak connections, default 1.0
|
366
|
-
distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'.
|
367
|
-
normalize_features (bool): normalize input features before computing affinity matrix
|
368
|
-
|
369
|
-
Returns:
|
370
|
-
(torch.Tensor): affinity matrix, shape (n_samples, n_samples)
|
371
|
-
"""
|
372
|
-
# make sure D is symmetric
|
373
|
-
D = (D + D.T) / 2
|
374
|
-
A = torch.exp(-D / affinity_focal_gamma)
|
375
|
-
|
376
|
-
# symmetrical normalization; A = D^(-1/2) A D^(-1/2)
|
377
|
-
D = A.sum(dim=-1).detach().clone()
|
378
|
-
A /= torch.sqrt(D)[:, None]
|
379
|
-
A /= torch.sqrt(D)[None, :]
|
380
|
-
return A
|
381
|
-
|
382
|
-
|
383
|
-
def ncut(
|
384
|
-
A: torch.Tensor,
|
385
|
-
num_eig: int = 100,
|
386
|
-
eig_solver: Literal["svd_lowrank", "lobpcg", "svd", "eigh"] = "svd_lowrank",
|
387
|
-
):
|
388
|
-
"""PyTorch implementation of Normalized cut without Nystrom-like approximation.
|
389
|
-
|
390
|
-
Args:
|
391
|
-
A (torch.Tensor): affinity matrix, shape (n_samples, n_samples)
|
392
|
-
num_eig (int): number of eigenvectors to return
|
393
|
-
eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh']
|
394
|
-
|
395
|
-
Returns:
|
396
|
-
(torch.Tensor): eigenvectors corresponding to the eigenvalues, shape (n_samples, num_eig)
|
397
|
-
(torch.Tensor): eigenvalues of the eigenvectors, sorted in descending order
|
398
|
-
"""
|
399
|
-
# make sure A is symmetric
|
400
|
-
A = (A + A.T) / 2
|
230
|
+
unsampled_indices, V_unsampled = NCUT._fit_helper(self, features, precomputed_sampled_indices)
|
231
|
+
V_sampled, L = OnlineNystrom.transform(self)
|
401
232
|
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
if eigen_value.min() < 0:
|
411
|
-
logging.warning(
|
412
|
-
"negative eigenvalues detected, please make sure the affinity matrix is positive definite"
|
413
|
-
)
|
414
|
-
|
415
|
-
return eigen_vector, eigen_value
|
416
|
-
|
417
|
-
|
418
|
-
def gram_schmidt(matrix):
|
419
|
-
"""Orthogonalize a matrix column-wise using the Gram-Schmidt process.
|
420
|
-
|
421
|
-
Args:
|
422
|
-
matrix (torch.Tensor): A matrix to be orthogonalized (m x n).
|
423
|
-
the second dimension is orthogonalized
|
424
|
-
Returns:
|
425
|
-
torch.Tensor: Orthogonalized matrix (m x n).
|
426
|
-
"""
|
427
|
-
|
428
|
-
# Get the number of rows (m) and columns (n) of the input matrix
|
429
|
-
m, n = matrix.shape
|
430
|
-
|
431
|
-
# Create an empty matrix to store the orthogonalized columns
|
432
|
-
orthogonal_matrix = torch.zeros((m, n), dtype=matrix.dtype)
|
433
|
-
|
434
|
-
for i in range(n):
|
435
|
-
# Start with the i-th column of the input matrix
|
436
|
-
vec = matrix[:, i]
|
437
|
-
|
438
|
-
for j in range(i):
|
439
|
-
# Subtract the projection of vec onto the j-th orthogonal column
|
440
|
-
proj = torch.dot(orthogonal_matrix[:, j], matrix[:, i]) / torch.dot(
|
441
|
-
orthogonal_matrix[:, j], orthogonal_matrix[:, j]
|
442
|
-
)
|
443
|
-
vec = vec - proj * orthogonal_matrix[:, j]
|
444
|
-
|
445
|
-
# Store the orthogonalized vector
|
446
|
-
orthogonal_matrix[:, i] = vec / torch.norm(vec)
|
447
|
-
|
448
|
-
return orthogonal_matrix
|
449
|
-
|
450
|
-
|
451
|
-
def correct_rotation(eigen_vector):
|
452
|
-
# correct the random rotation (flipping sign) of eigenvectors
|
453
|
-
rand_w = torch.ones(
|
454
|
-
eigen_vector.shape[0], device=eigen_vector.device, dtype=eigen_vector.dtype
|
455
|
-
)
|
456
|
-
s = rand_w[None, :] @ eigen_vector
|
457
|
-
s = s.sign()
|
458
|
-
return eigen_vector * s
|
459
|
-
|
460
|
-
|
461
|
-
# Multiclass Spectral Clustering, SX Yu, J Shi, 2003
|
462
|
-
def _discretisation_eigenvector(eigen_vector):
|
463
|
-
# Function that discretizes rotated eigenvectors
|
464
|
-
n, k = eigen_vector.shape
|
465
|
-
|
466
|
-
# Find the maximum index along each row
|
467
|
-
_, J = torch.max(eigen_vector, dim=1)
|
468
|
-
Y = torch.zeros(n, k, device=eigen_vector.device).scatter_(1, J.unsqueeze(1), 1)
|
469
|
-
|
470
|
-
return Y
|
233
|
+
if unsampled_indices is not None:
|
234
|
+
V = torch.zeros((len(unsampled_indices), self.n_components))
|
235
|
+
V[~unsampled_indices] = V_sampled
|
236
|
+
V[unsampled_indices] = V_unsampled
|
237
|
+
else:
|
238
|
+
V = V_sampled
|
239
|
+
return V, L
|
471
240
|
|
472
241
|
|
473
|
-
def
|
242
|
+
def axis_align(eigen_vectors: torch.Tensor, max_iter=300):
|
474
243
|
"""Multiclass Spectral Clustering, SX Yu, J Shi, 2003
|
475
244
|
|
476
245
|
Args:
|
@@ -482,80 +251,36 @@ def kway_ncut(eigen_vectors: torch.Tensor, max_iter=300, return_rotation=False):
|
|
482
251
|
"""
|
483
252
|
# Normalize eigenvectors
|
484
253
|
n, k = eigen_vectors.shape
|
485
|
-
|
486
|
-
eigen_vectors = eigen_vectors / vm.unsqueeze(1)
|
254
|
+
eigen_vectors = Fn.normalize(eigen_vectors, p=2, dim=-1)
|
487
255
|
|
488
256
|
# Initialize R matrix with the first column from a random row of EigenVectors
|
489
|
-
R = torch.
|
490
|
-
R[
|
257
|
+
R = torch.empty((k, k), device=eigen_vectors.device)
|
258
|
+
R[0] = eigen_vectors[torch.randint(0, n, (1,))].squeeze()
|
491
259
|
|
492
260
|
# Loop to populate R with k orthogonal directions
|
493
261
|
c = torch.zeros(n, device=eigen_vectors.device)
|
494
|
-
for
|
495
|
-
c += torch.abs(eigen_vectors @ R[
|
496
|
-
|
497
|
-
R[:, j] = eigen_vectors[i]
|
262
|
+
for i in range(1, k):
|
263
|
+
c += torch.abs(eigen_vectors @ R[i - 1])
|
264
|
+
R[i] = eigen_vectors[torch.argmin(c, dim=0)]
|
498
265
|
|
499
266
|
# Iterative optimization loop
|
500
|
-
|
501
|
-
|
502
|
-
|
267
|
+
eps = torch.finfo(torch.float32).eps
|
268
|
+
prev_objective = torch.inf
|
269
|
+
for _ in range(max_iter):
|
270
|
+
# Discretize the projected eigenvectors
|
271
|
+
idx = torch.argmax(eigen_vectors @ R.mT, dim=-1)
|
272
|
+
M = torch.zeros((k, k)).index_add_(0, idx, eigen_vectors)
|
503
273
|
|
504
|
-
|
505
|
-
|
274
|
+
# Compute the NCut value
|
275
|
+
objective = torch.norm(M)
|
506
276
|
|
507
|
-
#
|
508
|
-
|
277
|
+
# Check for convergence
|
278
|
+
if torch.abs(objective - prev_objective) < eps:
|
279
|
+
break
|
280
|
+
prev_objective = objective
|
509
281
|
|
510
282
|
# SVD decomposition
|
511
|
-
U, S, Vh = torch.linalg.svd(
|
512
|
-
|
283
|
+
U, S, Vh = torch.linalg.svd(M, full_matrices=False)
|
284
|
+
R = U @ Vh
|
513
285
|
|
514
|
-
|
515
|
-
ncut_value = 2 * (n - torch.sum(S))
|
516
|
-
|
517
|
-
# Check for convergence
|
518
|
-
if torch.abs(ncut_value - last_objective_value) < torch.finfo(
|
519
|
-
torch.float32).eps or nb_iterations_discretisation > max_iter:
|
520
|
-
exit_loop = True
|
521
|
-
else:
|
522
|
-
last_objective_value = ncut_value
|
523
|
-
R = V @ U.T
|
524
|
-
|
525
|
-
if return_rotation:
|
526
|
-
return eigenvectors_discrete, R
|
527
|
-
|
528
|
-
return eigenvectors_discrete
|
529
|
-
|
530
|
-
|
531
|
-
def axis_align(eigen_vectors, max_iter=300):
|
532
|
-
return kway_ncut(eigen_vectors, max_iter=max_iter, return_rotation=True)
|
533
|
-
|
534
|
-
|
535
|
-
## for backward compatibility ##
|
536
|
-
|
537
|
-
try:
|
538
|
-
|
539
|
-
from .propagation_utils import (
|
540
|
-
propagate_nearest,
|
541
|
-
propagate_eigenvectors,
|
542
|
-
quantile_normalize,
|
543
|
-
quantile_min_max,
|
544
|
-
farthest_point_sampling,
|
545
|
-
)
|
546
|
-
from .visualize_utils import (
|
547
|
-
eigenvector_to_rgb,
|
548
|
-
rgb_from_tsne_3d,
|
549
|
-
rgb_from_umap_sphere,
|
550
|
-
rgb_from_tsne_2d,
|
551
|
-
rgb_from_umap_3d,
|
552
|
-
rgb_from_umap_2d,
|
553
|
-
rotate_rgb_cube,
|
554
|
-
convert_to_lab_color,
|
555
|
-
_transform_heatmap,
|
556
|
-
_clean_mask,
|
557
|
-
get_mask,
|
558
|
-
)
|
559
|
-
|
560
|
-
except ImportError:
|
561
|
-
print("some of viualization and nystrom_utils are not imported")
|
286
|
+
return Fn.one_hot(idx, num_classes=k).to(torch.float), R
|