nystrom-ncut 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nystrom_ncut/__init__.py +6 -3
- nystrom_ncut/common.py +7 -16
- nystrom_ncut/distance_utils.py +81 -0
- nystrom_ncut/nystrom/distance_realization.py +9 -15
- nystrom_ncut/nystrom/normalized_cut.py +8 -12
- nystrom_ncut/nystrom/{nystrom.py → nystrom_utils.py} +19 -16
- nystrom_ncut/sampling_utils.py +96 -0
- nystrom_ncut/visualize_utils.py +162 -20
- {nystrom_ncut-0.1.0.dist-info → nystrom_ncut-0.1.2.dist-info}/METADATA +1 -1
- nystrom_ncut-0.1.2.dist-info/RECORD +15 -0
- nystrom_ncut/propagation_utils.py +0 -268
- nystrom_ncut-0.1.0.dist-info/RECORD +0 -14
- {nystrom_ncut-0.1.0.dist-info → nystrom_ncut-0.1.2.dist-info}/LICENSE +0 -0
- {nystrom_ncut-0.1.0.dist-info → nystrom_ncut-0.1.2.dist-info}/WHEEL +0 -0
- {nystrom_ncut-0.1.0.dist-info → nystrom_ncut-0.1.2.dist-info}/top_level.txt +0 -0
nystrom_ncut/__init__.py
CHANGED
@@ -2,13 +2,16 @@ from .nystrom import (
|
|
2
2
|
NCut,
|
3
3
|
axis_align,
|
4
4
|
)
|
5
|
-
from .
|
5
|
+
from .distance_utils import (
|
6
6
|
distance_from_features,
|
7
7
|
affinity_from_features,
|
8
|
-
|
9
|
-
|
8
|
+
)
|
9
|
+
from .sampling_utils import (
|
10
|
+
SampleConfig,
|
10
11
|
)
|
11
12
|
from .visualize_utils import (
|
13
|
+
extrapolate_knn,
|
14
|
+
extrapolate_knn_with_subsampling,
|
12
15
|
rgb_from_tsne_3d,
|
13
16
|
rgb_from_umap_sphere,
|
14
17
|
rgb_from_tsne_2d,
|
nystrom_ncut/common.py
CHANGED
@@ -1,14 +1,10 @@
|
|
1
|
-
from typing import Any
|
1
|
+
from typing import Any
|
2
2
|
|
3
3
|
import numpy as np
|
4
4
|
import torch
|
5
5
|
import torch.nn.functional as Fn
|
6
6
|
|
7
7
|
|
8
|
-
DistanceOptions = Literal["cosine", "euclidean", "rbf"]
|
9
|
-
SampleOptions = Literal["farthest", "random"]
|
10
|
-
|
11
|
-
|
12
8
|
def ceildiv(a: int, b: int) -> int:
|
13
9
|
return -(-a // b)
|
14
10
|
|
@@ -24,16 +20,7 @@ def lazy_normalize(x: torch.Tensor, n: int = 1000, **normalize_kwargs: Any) -> t
|
|
24
20
|
return Fn.normalize(x, **normalize_kwargs)
|
25
21
|
|
26
22
|
|
27
|
-
def
|
28
|
-
if disttype == "cosine":
|
29
|
-
return lazy_normalize(x, p=2, dim=-1)
|
30
|
-
elif disttype == "rbf":
|
31
|
-
return x
|
32
|
-
else:
|
33
|
-
raise ValueError(f"to_euclidean not implemented for disttype {disttype}.")
|
34
|
-
|
35
|
-
|
36
|
-
def quantile_min_max(x, q1=0.01, q2=0.99, n_sample=10000):
|
23
|
+
def quantile_min_max(x: torch.Tensor, q1: float, q2: float, n_sample: int = 10000):
|
37
24
|
if x.shape[0] > n_sample:
|
38
25
|
np.random.seed(0)
|
39
26
|
random_idx = np.random.choice(x.shape[0], n_sample, replace=False)
|
@@ -43,7 +30,7 @@ def quantile_min_max(x, q1=0.01, q2=0.99, n_sample=10000):
|
|
43
30
|
return vmin, vmax
|
44
31
|
|
45
32
|
|
46
|
-
def quantile_normalize(x, q=0.95):
|
33
|
+
def quantile_normalize(x: torch.Tensor, q: float = 0.95):
|
47
34
|
"""normalize each dimension of x to [0, 1], take 95-th percentage, this robust to outliers
|
48
35
|
</br> 1. sort x
|
49
36
|
</br> 2. take q-th quantile
|
@@ -68,3 +55,7 @@ def quantile_normalize(x, q=0.95):
|
|
68
55
|
x = (x - vmin) / (vmax - vmin)
|
69
56
|
x = x.clamp(0, 1)
|
70
57
|
return x
|
58
|
+
|
59
|
+
|
60
|
+
def profile(name: str, t: torch.Tensor) -> None:
|
61
|
+
print(f"{name} --- nan: {t.isnan().any()}, inf: {t.isinf().any()}, max: {t.abs().max()}, min: {t.abs().min()}")
|
@@ -0,0 +1,81 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
3
|
+
import torch
|
4
|
+
|
5
|
+
from .common import lazy_normalize
|
6
|
+
|
7
|
+
|
8
|
+
DistanceOptions = Literal["cosine", "euclidean", "rbf"]
|
9
|
+
|
10
|
+
|
11
|
+
def to_euclidean(x: torch.Tensor, disttype: DistanceOptions) -> torch.Tensor:
|
12
|
+
if disttype == "cosine":
|
13
|
+
return lazy_normalize(x, p=2, dim=-1)
|
14
|
+
elif disttype == "rbf":
|
15
|
+
return x
|
16
|
+
else:
|
17
|
+
raise ValueError(f"to_euclidean not implemented for disttype {disttype}.")
|
18
|
+
|
19
|
+
|
20
|
+
def distance_from_features(
|
21
|
+
features: torch.Tensor,
|
22
|
+
features_B: torch.Tensor,
|
23
|
+
distance: DistanceOptions,
|
24
|
+
):
|
25
|
+
"""Compute affinity matrix from input features.
|
26
|
+
Args:
|
27
|
+
features (torch.Tensor): input features, shape (n_samples, n_features)
|
28
|
+
features_B (torch.Tensor, optional): optional, if not None, compute affinity between two features
|
29
|
+
distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'.
|
30
|
+
Returns:
|
31
|
+
(torch.Tensor): affinity matrix, shape (n_samples, n_samples)
|
32
|
+
"""
|
33
|
+
# compute distance matrix from input features
|
34
|
+
if distance == "cosine":
|
35
|
+
features = lazy_normalize(features, dim=-1)
|
36
|
+
features_B = lazy_normalize(features_B, dim=-1)
|
37
|
+
D = 1 - features @ features_B.T
|
38
|
+
elif distance == "euclidean":
|
39
|
+
D = torch.cdist(features, features_B, p=2)
|
40
|
+
elif distance == "rbf":
|
41
|
+
D = torch.cdist(features, features_B, p=2) ** 2
|
42
|
+
|
43
|
+
# Outlier-robust scale invariance using quantiles to estimate standard deviation
|
44
|
+
stds = torch.quantile(features, q=torch.tensor((0.158655, 0.841345), device=features.device), dim=0)
|
45
|
+
stds = (stds[1] - stds[0]) / 2
|
46
|
+
D = D / (2 * torch.linalg.norm(stds) ** 2)
|
47
|
+
else:
|
48
|
+
raise ValueError("distance should be 'cosine' or 'euclidean', 'rbf'")
|
49
|
+
return D
|
50
|
+
|
51
|
+
|
52
|
+
def affinity_from_features(
|
53
|
+
features: torch.Tensor,
|
54
|
+
features_B: torch.Tensor = None,
|
55
|
+
affinity_focal_gamma: float = 1.0,
|
56
|
+
distance: DistanceOptions = "cosine",
|
57
|
+
):
|
58
|
+
"""Compute affinity matrix from input features.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
features (torch.Tensor): input features, shape (n_samples, n_features)
|
62
|
+
features_B (torch.Tensor, optional): optional, if not None, compute affinity between two features
|
63
|
+
affinity_focal_gamma (float): affinity matrix parameter, lower t reduce the edge weights
|
64
|
+
on weak connections, default 1.0
|
65
|
+
distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'.
|
66
|
+
Returns:
|
67
|
+
(torch.Tensor): affinity matrix, shape (n_samples, n_samples)
|
68
|
+
"""
|
69
|
+
# compute affinity matrix from input features
|
70
|
+
|
71
|
+
# if feature_B is not provided, compute affinity matrix on features x features
|
72
|
+
# if feature_B is provided, compute affinity matrix on features x feature_B
|
73
|
+
features_B = features if features_B is None else features_B
|
74
|
+
|
75
|
+
# compute distance matrix from input features
|
76
|
+
D = distance_from_features(features, features_B, distance)
|
77
|
+
|
78
|
+
# torch.exp make affinity matrix positive definite,
|
79
|
+
# lower affinity_focal_gamma reduce the weak edge weights
|
80
|
+
A = torch.exp(-D / affinity_focal_gamma)
|
81
|
+
return A
|
@@ -1,20 +1,18 @@
|
|
1
|
-
from typing import Tuple
|
2
|
-
|
3
1
|
import torch
|
4
2
|
|
5
|
-
from .
|
3
|
+
from .nystrom_utils import (
|
6
4
|
EigSolverOptions,
|
7
5
|
OnlineKernel,
|
8
6
|
OnlineNystromSubsampleFit,
|
9
7
|
solve_eig,
|
10
8
|
)
|
11
|
-
from ..
|
9
|
+
from ..distance_utils import (
|
12
10
|
DistanceOptions,
|
13
|
-
SampleOptions,
|
14
|
-
)
|
15
|
-
from ..propagation_utils import (
|
16
11
|
distance_from_features,
|
17
12
|
)
|
13
|
+
from ..sampling_utils import (
|
14
|
+
SampleConfig,
|
15
|
+
)
|
18
16
|
|
19
17
|
|
20
18
|
class GramKernel(OnlineKernel):
|
@@ -100,8 +98,7 @@ class DistanceRealization(OnlineNystromSubsampleFit):
|
|
100
98
|
def __init__(
|
101
99
|
self,
|
102
100
|
n_components: int = 100,
|
103
|
-
|
104
|
-
sample_method: SampleOptions = "farthest",
|
101
|
+
sample_config: SampleConfig = SampleConfig(),
|
105
102
|
distance: DistanceOptions = "cosine",
|
106
103
|
eig_solver: EigSolverOptions = "svd_lowrank",
|
107
104
|
chunk_size: int = 8192,
|
@@ -109,9 +106,7 @@ class DistanceRealization(OnlineNystromSubsampleFit):
|
|
109
106
|
"""
|
110
107
|
Args:
|
111
108
|
n_components (int): number of top eigenvectors to return
|
112
|
-
|
113
|
-
reduce only if memory is not enough, increase for better approximation
|
114
|
-
sample_method (str): subgraph sampling, ['farthest', 'random'].
|
109
|
+
sample_config (str): subgraph sampling, ['farthest', 'random'].
|
115
110
|
farthest point sampling is recommended for better Nystrom-approximation accuracy
|
116
111
|
distance (str): distance metric for affinity matrix, ['cosine', 'euclidean', 'rbf'].
|
117
112
|
eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh'].
|
@@ -121,9 +116,8 @@ class DistanceRealization(OnlineNystromSubsampleFit):
|
|
121
116
|
self,
|
122
117
|
n_components=n_components,
|
123
118
|
kernel=GramKernel(distance, eig_solver),
|
124
|
-
num_sample=num_sample,
|
125
119
|
distance=distance,
|
126
|
-
|
120
|
+
sample_config=sample_config,
|
127
121
|
eig_solver=eig_solver,
|
128
122
|
chunk_size=chunk_size,
|
129
123
|
)
|
@@ -138,5 +132,5 @@ class DistanceRealization(OnlineNystromSubsampleFit):
|
|
138
132
|
return V * (L ** 0.5)
|
139
133
|
|
140
134
|
def transform(self, features: torch.Tensor = None) -> torch.Tensor:
|
141
|
-
V, L = OnlineNystromSubsampleFit.transform(features)
|
135
|
+
V, L = OnlineNystromSubsampleFit.transform(self, features)
|
142
136
|
return V * (L ** 0.5)
|
@@ -1,19 +1,19 @@
|
|
1
1
|
import torch
|
2
2
|
import torch.nn.functional as Fn
|
3
3
|
|
4
|
-
from .
|
4
|
+
from .nystrom_utils import (
|
5
5
|
EigSolverOptions,
|
6
6
|
OnlineKernel,
|
7
7
|
OnlineNystromSubsampleFit,
|
8
8
|
solve_eig,
|
9
9
|
)
|
10
|
-
from ..
|
10
|
+
from ..distance_utils import (
|
11
11
|
DistanceOptions,
|
12
|
-
SampleOptions,
|
13
|
-
)
|
14
|
-
from ..propagation_utils import (
|
15
12
|
affinity_from_features,
|
16
13
|
)
|
14
|
+
from ..sampling_utils import (
|
15
|
+
SampleConfig,
|
16
|
+
)
|
17
17
|
|
18
18
|
|
19
19
|
class LaplacianKernel(OnlineKernel):
|
@@ -94,8 +94,7 @@ class NCut(OnlineNystromSubsampleFit):
|
|
94
94
|
self,
|
95
95
|
n_components: int = 100,
|
96
96
|
affinity_focal_gamma: float = 1.0,
|
97
|
-
|
98
|
-
sample_method: SampleOptions = "farthest",
|
97
|
+
sample_config: SampleConfig = SampleConfig(),
|
99
98
|
distance: DistanceOptions = "cosine",
|
100
99
|
eig_solver: EigSolverOptions = "svd_lowrank",
|
101
100
|
chunk_size: int = 8192,
|
@@ -105,9 +104,7 @@ class NCut(OnlineNystromSubsampleFit):
|
|
105
104
|
n_components (int): number of top eigenvectors to return
|
106
105
|
affinity_focal_gamma (float): affinity matrix temperature, lower t reduce the not-so-connected edge weights,
|
107
106
|
smaller t result in more sharp eigenvectors.
|
108
|
-
|
109
|
-
reduce only if memory is not enough, increase for better approximation
|
110
|
-
sample_method (str): subgraph sampling, ['farthest', 'random'].
|
107
|
+
sample_config (str): subgraph sampling, ['farthest', 'random'].
|
111
108
|
farthest point sampling is recommended for better Nystrom-approximation accuracy
|
112
109
|
distance (str): distance metric for affinity matrix, ['cosine', 'euclidean', 'rbf'].
|
113
110
|
eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh'].
|
@@ -117,9 +114,8 @@ class NCut(OnlineNystromSubsampleFit):
|
|
117
114
|
self,
|
118
115
|
n_components=n_components,
|
119
116
|
kernel=LaplacianKernel(affinity_focal_gamma, distance, eig_solver),
|
120
|
-
num_sample=num_sample,
|
121
117
|
distance=distance,
|
122
|
-
|
118
|
+
sample_config=sample_config,
|
123
119
|
eig_solver=eig_solver,
|
124
120
|
chunk_size=chunk_size,
|
125
121
|
)
|
@@ -1,14 +1,17 @@
|
|
1
|
+
import copy
|
1
2
|
import logging
|
2
3
|
from typing import Literal, Tuple
|
3
4
|
|
4
5
|
import torch
|
5
6
|
|
6
7
|
from ..common import (
|
7
|
-
DistanceOptions,
|
8
|
-
SampleOptions,
|
9
8
|
ceildiv,
|
10
9
|
)
|
11
|
-
from ..
|
10
|
+
from ..distance_utils import (
|
11
|
+
DistanceOptions,
|
12
|
+
)
|
13
|
+
from ..sampling_utils import (
|
14
|
+
SampleConfig,
|
12
15
|
run_subgraph_sampling,
|
13
16
|
)
|
14
17
|
|
@@ -145,9 +148,8 @@ class OnlineNystromSubsampleFit(OnlineNystrom):
|
|
145
148
|
self,
|
146
149
|
n_components: int,
|
147
150
|
kernel: OnlineKernel,
|
148
|
-
num_sample: int,
|
149
151
|
distance: DistanceOptions,
|
150
|
-
|
152
|
+
sample_config: SampleConfig,
|
151
153
|
eig_solver: EigSolverOptions = "svd_lowrank",
|
152
154
|
chunk_size: int = 8192,
|
153
155
|
):
|
@@ -158,9 +160,9 @@ class OnlineNystromSubsampleFit(OnlineNystrom):
|
|
158
160
|
eig_solver=eig_solver,
|
159
161
|
chunk_size=chunk_size,
|
160
162
|
)
|
161
|
-
self.num_sample: int = num_sample
|
162
163
|
self.distance: DistanceOptions = distance
|
163
|
-
self.
|
164
|
+
self.sample_config: SampleConfig = sample_config
|
165
|
+
self.sample_config._ncut_obj = copy.deepcopy(self)
|
164
166
|
self.anchor_indices: torch.Tensor = None
|
165
167
|
|
166
168
|
def _fit_helper(
|
@@ -169,7 +171,7 @@ class OnlineNystromSubsampleFit(OnlineNystrom):
|
|
169
171
|
precomputed_sampled_indices: torch.Tensor,
|
170
172
|
) -> Tuple[torch.Tensor, torch.Tensor]:
|
171
173
|
_n = features.shape[0]
|
172
|
-
if self.num_sample >= _n:
|
174
|
+
if self.sample_config.num_sample >= _n:
|
173
175
|
logging.info(
|
174
176
|
f"NCUT nystrom num_sample is larger than number of input samples, nyström approximation is not needed, setting num_sample={_n}"
|
175
177
|
)
|
@@ -180,9 +182,8 @@ class OnlineNystromSubsampleFit(OnlineNystrom):
|
|
180
182
|
else:
|
181
183
|
self.anchor_indices = run_subgraph_sampling(
|
182
184
|
features=features,
|
183
|
-
num_sample=self.num_sample,
|
184
185
|
disttype=self.distance,
|
185
|
-
|
186
|
+
config=self.sample_config,
|
186
187
|
)
|
187
188
|
sampled_features = features[self.anchor_indices]
|
188
189
|
OnlineNystrom.fit(self, sampled_features)
|
@@ -243,6 +244,7 @@ def solve_eig(
|
|
243
244
|
A: torch.Tensor,
|
244
245
|
num_eig: int,
|
245
246
|
eig_solver: EigSolverOptions,
|
247
|
+
eig_value_buffer: float = 0.0,
|
246
248
|
) -> Tuple[torch.Tensor, torch.Tensor]:
|
247
249
|
"""PyTorch implementation of Eigensolver cut without Nystrom-like approximation.
|
248
250
|
|
@@ -250,11 +252,13 @@ def solve_eig(
|
|
250
252
|
A (torch.Tensor): input matrix, shape (n_samples, n_samples)
|
251
253
|
num_eig (int): number of eigenvectors to return
|
252
254
|
eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh']
|
253
|
-
|
255
|
+
eig_value_buffer (float): value added to diagonal to buffer symmetric but non-PSD matrices
|
254
256
|
Returns:
|
255
257
|
(torch.Tensor): eigenvectors corresponding to the eigenvalues, shape (n_samples, num_eig)
|
256
258
|
(torch.Tensor): eigenvalues of the eigenvectors, sorted in descending order
|
257
259
|
"""
|
260
|
+
A = A + eig_value_buffer * torch.eye(A.shape[0], device=A.device)
|
261
|
+
|
258
262
|
# compute eigenvectors
|
259
263
|
if eig_solver == "svd_lowrank": # default
|
260
264
|
# only top q eigenvectors, fastest
|
@@ -272,15 +276,14 @@ def solve_eig(
|
|
272
276
|
raise ValueError(
|
273
277
|
"eigen_solver should be 'lobpcg', 'svd_lowrank', 'svd' or 'eigh'"
|
274
278
|
)
|
279
|
+
eigen_value = eigen_value - eig_value_buffer
|
275
280
|
|
276
281
|
# sort eigenvectors by eigenvalues, take top (descending order)
|
277
|
-
|
278
|
-
eigen_vector = eigen_vector
|
279
|
-
eigen_value, indices = torch.topk(eigen_value, k=num_eig, dim=0)
|
280
|
-
eigen_vector = eigen_vector[:, indices]
|
282
|
+
indices = torch.topk(eigen_value.abs(), k=num_eig, dim=0).indices
|
283
|
+
eigen_value, eigen_vector = eigen_value[indices], eigen_vector[:, indices]
|
281
284
|
|
282
285
|
# correct the random rotation (flipping sign) of eigenvectors
|
283
|
-
sign = torch.sum(eigen_vector, dim=0).sign()
|
286
|
+
sign = torch.sum(eigen_vector.real, dim=0).sign()
|
284
287
|
sign[sign == 0] = 1.0
|
285
288
|
eigen_vector = eigen_vector * sign
|
286
289
|
return eigen_vector, eigen_value
|
@@ -0,0 +1,96 @@
|
|
1
|
+
import logging
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from typing import Literal
|
4
|
+
|
5
|
+
import torch
|
6
|
+
from dgl.geometry import farthest_point_sampler
|
7
|
+
|
8
|
+
from .distance_utils import (
|
9
|
+
DistanceOptions,
|
10
|
+
affinity_from_features,
|
11
|
+
to_euclidean,
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
SampleOptions = Literal["random", "fps", "fps_recursive"]
|
16
|
+
|
17
|
+
|
18
|
+
@dataclass
|
19
|
+
class SampleConfig:
|
20
|
+
method: SampleOptions = "fps"
|
21
|
+
num_sample: int = 10000
|
22
|
+
fps_dim: int = 12
|
23
|
+
n_iter: int = None
|
24
|
+
_ncut_obj: object = None
|
25
|
+
|
26
|
+
|
27
|
+
@torch.no_grad()
|
28
|
+
def run_subgraph_sampling(
|
29
|
+
features: torch.Tensor,
|
30
|
+
disttype: DistanceOptions,
|
31
|
+
config: SampleConfig,
|
32
|
+
max_draw: int = 1000000,
|
33
|
+
):
|
34
|
+
features = features.detach()
|
35
|
+
if config.num_sample >= features.shape[0]:
|
36
|
+
# if too many samples, use all samples and bypass Nystrom-like approximation
|
37
|
+
logging.info(
|
38
|
+
"num_sample is larger than total, bypass Nystrom-like approximation"
|
39
|
+
)
|
40
|
+
sampled_indices = torch.arange(features.shape[0])
|
41
|
+
else:
|
42
|
+
# sample subgraph
|
43
|
+
if config.method == "fps": # default
|
44
|
+
features = to_euclidean(features, disttype)
|
45
|
+
if config.num_sample > max_draw:
|
46
|
+
logging.warning(
|
47
|
+
f"num_sample is larger than max_draw, apply farthest point sampling on random sampled {max_draw} samples"
|
48
|
+
)
|
49
|
+
draw_indices = torch.randperm(features.shape[0])[:max_draw]
|
50
|
+
sampled_indices = fpsample(features[draw_indices], config)
|
51
|
+
sampled_indices = draw_indices[sampled_indices]
|
52
|
+
else:
|
53
|
+
sampled_indices = fpsample(features, config)
|
54
|
+
|
55
|
+
elif config.method == "random": # not recommended
|
56
|
+
sampled_indices = torch.randperm(features.shape[0])[:config.num_sample]
|
57
|
+
|
58
|
+
elif config.method == "fps_recursive":
|
59
|
+
features = to_euclidean(features, disttype)
|
60
|
+
sampled_indices = run_subgraph_sampling(
|
61
|
+
features=features,
|
62
|
+
disttype=disttype,
|
63
|
+
config=SampleConfig(method="fps", num_sample=config.num_sample, fps_dim=config.fps_dim)
|
64
|
+
)
|
65
|
+
|
66
|
+
nc = config._ncut_obj
|
67
|
+
|
68
|
+
A = affinity_from_features(features, affinity_focal_gamma=nc.kernel.affinity_focal_gamma, distance=nc.kernel.distance)
|
69
|
+
R = torch.diag(torch.sum(A, dim=-1) ** -0.5)
|
70
|
+
L = R @ A @ R
|
71
|
+
|
72
|
+
for _ in range(config.n_iter):
|
73
|
+
fps_features, eigenvalues = nc.fit_transform(features, precomputed_sampled_indices=sampled_indices)
|
74
|
+
|
75
|
+
_L = fps_features @ torch.diag(eigenvalues) @ fps_features.mT
|
76
|
+
RE = torch.abs(_L / L - 1)
|
77
|
+
|
78
|
+
print(f"Iteration {_} --- max: {RE.max().item()}, mean: {RE.mean().item()}, min: {RE.min().item()}")
|
79
|
+
fps_features = to_euclidean(fps_features[:, :config.fps_dim], "cosine")
|
80
|
+
sampled_indices = torch.sort(fpsample(fps_features, config)).values
|
81
|
+
else:
|
82
|
+
raise ValueError("sample_method should be 'farthest' or 'random'")
|
83
|
+
sampled_indices = torch.sort(sampled_indices).values
|
84
|
+
return sampled_indices.to(features.device)
|
85
|
+
|
86
|
+
|
87
|
+
def fpsample(
|
88
|
+
features: torch.Tensor,
|
89
|
+
config: SampleConfig,
|
90
|
+
):
|
91
|
+
# PCA to reduce the dimension
|
92
|
+
if features.shape[1] > config.fps_dim:
|
93
|
+
U, S, V = torch.pca_lowrank(features, q=config.fps_dim)
|
94
|
+
features = U * S
|
95
|
+
|
96
|
+
return farthest_point_sampler(features[None], config.num_sample)[0]
|
nystrom_ncut/visualize_utils.py
CHANGED
@@ -1,26 +1,150 @@
|
|
1
1
|
import logging
|
2
|
-
from typing import Any, Callable, Dict, Literal
|
2
|
+
from typing import Any, Callable, Dict, Literal
|
3
3
|
|
4
4
|
import numpy as np
|
5
5
|
import torch
|
6
|
-
import torch.nn.functional as
|
6
|
+
import torch.nn.functional as Fn
|
7
7
|
from sklearn.base import BaseEstimator
|
8
8
|
|
9
9
|
from .common import (
|
10
|
+
ceildiv,
|
10
11
|
lazy_normalize,
|
11
|
-
to_euclidean,
|
12
12
|
quantile_min_max,
|
13
13
|
quantile_normalize,
|
14
14
|
)
|
15
|
-
from .
|
16
|
-
|
15
|
+
from .distance_utils import (
|
16
|
+
DistanceOptions,
|
17
|
+
to_euclidean,
|
18
|
+
affinity_from_features,
|
17
19
|
)
|
18
|
-
from .
|
20
|
+
from .sampling_utils import (
|
21
|
+
SampleConfig,
|
19
22
|
run_subgraph_sampling,
|
20
|
-
extrapolate_knn,
|
21
23
|
)
|
22
24
|
|
23
25
|
|
26
|
+
def extrapolate_knn(
|
27
|
+
anchor_features: torch.Tensor, # [n x d]
|
28
|
+
anchor_output: torch.Tensor, # [n x d']
|
29
|
+
extrapolation_features: torch.Tensor, # [m x d]
|
30
|
+
distance: DistanceOptions,
|
31
|
+
knn: int = 10, # k
|
32
|
+
affinity_focal_gamma: float = 1.0,
|
33
|
+
chunk_size: int = 8192,
|
34
|
+
device: str = None,
|
35
|
+
move_output_to_cpu: bool = False,
|
36
|
+
) -> torch.Tensor: # [m x d']
|
37
|
+
"""A generic function to propagate new nodes using KNN.
|
38
|
+
|
39
|
+
Args:
|
40
|
+
anchor_features (torch.Tensor): features from subgraph, shape (num_sample, n_features)
|
41
|
+
anchor_output (torch.Tensor): output from subgraph, shape (num_sample, D)
|
42
|
+
extrapolation_features (torch.Tensor): features from existing nodes, shape (new_num_samples, n_features)
|
43
|
+
knn (int): number of KNN to propagate eige nvectors
|
44
|
+
distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'
|
45
|
+
chunk_size (int): chunk size for matrix multiplication
|
46
|
+
device (str): device to use for computation, if None, will not change device
|
47
|
+
Returns:
|
48
|
+
torch.Tensor: propagated eigenvectors, shape (new_num_samples, D)
|
49
|
+
|
50
|
+
Examples:
|
51
|
+
>>> old_eigenvectors = torch.randn(3000, 20)
|
52
|
+
>>> old_features = torch.randn(3000, 100)
|
53
|
+
>>> new_features = torch.randn(200, 100)
|
54
|
+
>>> new_eigenvectors = extrapolate_knn(old_features, old_eigenvectors, new_features, knn=3)
|
55
|
+
>>> # new_eigenvectors.shape = (200, 20)
|
56
|
+
|
57
|
+
"""
|
58
|
+
device = anchor_output.device if device is None else device
|
59
|
+
|
60
|
+
# used in nystrom_ncut
|
61
|
+
# propagate eigen_vector from subgraph to full graph
|
62
|
+
anchor_output = anchor_output.to(device)
|
63
|
+
|
64
|
+
n_chunks = ceildiv(extrapolation_features.shape[0], chunk_size)
|
65
|
+
V_list = []
|
66
|
+
for _v in torch.chunk(extrapolation_features, n_chunks, dim=0):
|
67
|
+
_v = _v.to(device) # [_m x d]
|
68
|
+
|
69
|
+
_A = affinity_from_features(anchor_features, _v, affinity_focal_gamma, distance).mT # [_m x n]
|
70
|
+
if knn is not None:
|
71
|
+
_A, indices = _A.topk(k=knn, dim=-1, largest=True) # [_m x k], [_m x k]
|
72
|
+
_anchor_output = anchor_output[indices] # [_m x k x d]
|
73
|
+
else:
|
74
|
+
_anchor_output = anchor_output[None] # [1 x n x d]
|
75
|
+
|
76
|
+
_A = Fn.normalize(_A, p=1, dim=-1) # [_m x k]
|
77
|
+
_V = (_A[:, None, :] @ _anchor_output).squeeze(1) # [_m x d]
|
78
|
+
|
79
|
+
if move_output_to_cpu:
|
80
|
+
_V = _V.cpu()
|
81
|
+
V_list.append(_V)
|
82
|
+
|
83
|
+
extrapolation_output = torch.cat(V_list, dim=0)
|
84
|
+
return extrapolation_output
|
85
|
+
|
86
|
+
|
87
|
+
# wrapper functions for adding new nodes to existing graph
|
88
|
+
def extrapolate_knn_with_subsampling(
|
89
|
+
full_features: torch.Tensor, # [n x d]
|
90
|
+
full_output: torch.Tensor, # [n x d']
|
91
|
+
extrapolation_features: torch.Tensor, # [m x d]
|
92
|
+
sample_config: SampleConfig,
|
93
|
+
distance: DistanceOptions,
|
94
|
+
knn: int = 10, # k
|
95
|
+
affinity_focal_gamma: float = 1.0,
|
96
|
+
chunk_size: int = 8192,
|
97
|
+
device: str = None,
|
98
|
+
move_output_to_cpu: bool = False,
|
99
|
+
) -> torch.Tensor: # [m x d']
|
100
|
+
"""Propagate eigenvectors to new nodes using KNN. Note: this is equivalent to the class API `NCUT.tranform(new_features)`, expect for the sampling is re-done in this function.
|
101
|
+
Args:
|
102
|
+
full_output (torch.Tensor): eigenvectors from existing nodes, shape (num_sample, num_eig)
|
103
|
+
full_features (torch.Tensor): features from existing nodes, shape (n_samples, n_features)
|
104
|
+
extrapolation_features (torch.Tensor): features from new nodes, shape (n_new_samples, n_features)
|
105
|
+
knn (int): number of KNN to propagate eigenvectors, default 3
|
106
|
+
sample_config (str): sample method, 'farthest' (default) or 'random'
|
107
|
+
chunk_size (int): chunk size for matrix multiplication, default 8192
|
108
|
+
device (str): device to use for computation, if None, will not change device
|
109
|
+
Returns:
|
110
|
+
torch.Tensor: propagated eigenvectors, shape (n_new_samples, num_eig)
|
111
|
+
|
112
|
+
Examples:
|
113
|
+
>>> old_eigenvectors = torch.randn(3000, 20)
|
114
|
+
>>> old_features = torch.randn(3000, 100)
|
115
|
+
>>> new_features = torch.randn(200, 100)
|
116
|
+
>>> new_eigenvectors = extrapolate_knn_with_subsampling(extrapolation_features,old_eigenvectors,old_features,knn=3,num_sample=,sample_method=,chunk_size=,device=)
|
117
|
+
>>> # new_eigenvectors.shape = (200, 20)
|
118
|
+
"""
|
119
|
+
|
120
|
+
device = full_output.device if device is None else device
|
121
|
+
|
122
|
+
# sample subgraph
|
123
|
+
anchor_indices = run_subgraph_sampling(
|
124
|
+
features=full_features,
|
125
|
+
disttype=distance,
|
126
|
+
config=sample_config,
|
127
|
+
)
|
128
|
+
|
129
|
+
anchor_output = full_output[anchor_indices].to(device)
|
130
|
+
anchor_features = full_features[anchor_indices].to(device)
|
131
|
+
extrapolation_features = extrapolation_features.to(device)
|
132
|
+
|
133
|
+
# propagate eigenvectors from subgraph to new nodes
|
134
|
+
extrapolation_output = extrapolate_knn(
|
135
|
+
anchor_features,
|
136
|
+
anchor_output,
|
137
|
+
extrapolation_features,
|
138
|
+
distance,
|
139
|
+
knn=knn,
|
140
|
+
affinity_focal_gamma=affinity_focal_gamma,
|
141
|
+
chunk_size=chunk_size,
|
142
|
+
device=device,
|
143
|
+
move_output_to_cpu=move_output_to_cpu,
|
144
|
+
)
|
145
|
+
return extrapolation_output
|
146
|
+
|
147
|
+
|
24
148
|
def _rgb_with_dimensionality_reduction(
|
25
149
|
features: torch.Tensor,
|
26
150
|
num_sample: int,
|
@@ -38,9 +162,8 @@ def _rgb_with_dimensionality_reduction(
|
|
38
162
|
if True:
|
39
163
|
_subgraph_indices = run_subgraph_sampling(
|
40
164
|
features=features,
|
41
|
-
num_sample=10000,
|
42
165
|
disttype=disttype,
|
43
|
-
|
166
|
+
config=SampleConfig(method="fps"),
|
44
167
|
)
|
45
168
|
features = extrapolate_knn(
|
46
169
|
anchor_features=features[_subgraph_indices],
|
@@ -51,9 +174,8 @@ def _rgb_with_dimensionality_reduction(
|
|
51
174
|
|
52
175
|
subgraph_indices = run_subgraph_sampling(
|
53
176
|
features=features,
|
54
|
-
num_sample=num_sample,
|
55
177
|
disttype=disttype,
|
56
|
-
|
178
|
+
config=SampleConfig(method="fps", num_sample=num_sample),
|
57
179
|
)
|
58
180
|
|
59
181
|
_inp = features[subgraph_indices].numpy(force=True)
|
@@ -334,14 +456,14 @@ def rgb_from_umap_3d(
|
|
334
456
|
return rgb
|
335
457
|
|
336
458
|
|
337
|
-
def flatten_sphere(X_3d):
|
338
|
-
x =
|
339
|
-
y = -
|
340
|
-
X_2d =
|
459
|
+
def flatten_sphere(X_3d: torch.Tensor) -> torch.Tensor:
|
460
|
+
x = torch.atan2(X_3d[:, 0], X_3d[:, 1])
|
461
|
+
y = -torch.acos(X_3d[:, 2])
|
462
|
+
X_2d = torch.stack((x, y), dim=1)
|
341
463
|
return X_2d
|
342
464
|
|
343
465
|
|
344
|
-
def rotate_rgb_cube(rgb, position=1):
|
466
|
+
def rotate_rgb_cube(rgb: torch.Tensor, position: int = 1) -> torch.Tensor:
|
345
467
|
"""rotate RGB cube to different position
|
346
468
|
|
347
469
|
Args:
|
@@ -365,7 +487,7 @@ def rotate_rgb_cube(rgb, position=1):
|
|
365
487
|
return rgb
|
366
488
|
|
367
489
|
|
368
|
-
def rgb_from_3d_rgb_cube(X_3d, q=0.95):
|
490
|
+
def rgb_from_3d_rgb_cube(X_3d: torch.Tensor, q: float = 0.95) -> torch.Tensor:
|
369
491
|
"""convert 3D t-SNE to RGB color space
|
370
492
|
Args:
|
371
493
|
X_3d (torch.Tensor): 3D t-SNE embedding, shape (n_samples, 3)
|
@@ -383,6 +505,26 @@ def rgb_from_3d_rgb_cube(X_3d, q=0.95):
|
|
383
505
|
return rgb
|
384
506
|
|
385
507
|
|
508
|
+
def rgb_from_3d_lab_cube(X_3d: torch.Tensor, q: float = 0.95, full_range: bool = True) -> torch.Tensor:
|
509
|
+
from skimage import color
|
510
|
+
X_3d = X_3d - torch.mean(X_3d, dim=0)
|
511
|
+
U, S, VT = torch.linalg.svd(X_3d)
|
512
|
+
X_3d = torch.flip(U[:, :3] * S, dims=(1,))
|
513
|
+
|
514
|
+
AB_scale = 128.0 / torch.quantile(torch.linalg.norm(X_3d[:, 1:], dim=1), q=q, dim=0)
|
515
|
+
L_min, L_max = torch.quantile(X_3d[:, 0], q=torch.tensor(((1 - q) / 2, (1 + q) / 2)), dim=0)
|
516
|
+
L_scale = 100.0 / (L_max - L_min)
|
517
|
+
|
518
|
+
X_3d[:, 0] = X_3d[:, 0] - L_min
|
519
|
+
if full_range:
|
520
|
+
lab = X_3d * torch.tensor((L_scale, AB_scale, AB_scale))
|
521
|
+
else:
|
522
|
+
lab = X_3d * L_scale
|
523
|
+
|
524
|
+
rgb = torch.tensor(color.lab2rgb(lab.numpy(force=True)))
|
525
|
+
return rgb
|
526
|
+
|
527
|
+
|
386
528
|
def convert_to_lab_color(rgb, full_range=True):
|
387
529
|
from skimage import color
|
388
530
|
import copy
|
@@ -401,7 +543,7 @@ def convert_to_lab_color(rgb, full_range=True):
|
|
401
543
|
return lab_rgb
|
402
544
|
|
403
545
|
|
404
|
-
def rgb_from_2d_colormap(X_2d, q=0.95):
|
546
|
+
def rgb_from_2d_colormap(X_2d: torch.Tensor, q: float = 0.95):
|
405
547
|
xy = X_2d.clone()
|
406
548
|
for i in range(2):
|
407
549
|
xy[:, i] = quantile_normalize(xy[:, i], q=q)
|
@@ -446,7 +588,7 @@ def _transform_heatmap(heatmap, gamma=1.0):
|
|
446
588
|
# large gamma means more focus on the high values, hence smaller mask
|
447
589
|
heatmap = 1 / heatmap ** gamma
|
448
590
|
# min-max normalization [0, 1]
|
449
|
-
vmin, vmax = quantile_min_max(heatmap.flatten())
|
591
|
+
vmin, vmax = quantile_min_max(heatmap.flatten(), 0.01, 0.99)
|
450
592
|
heatmap = (heatmap - vmin) / (vmax - vmin)
|
451
593
|
return heatmap
|
452
594
|
|
@@ -514,7 +656,7 @@ def get_mask(
|
|
514
656
|
|
515
657
|
# normalize the eigenvectors to unit norm, to compute cosine similarity
|
516
658
|
all_eigvecs = lazy_normalize(all_eigvecs, p=2, dim=-1)
|
517
|
-
prompt_eigvec =
|
659
|
+
prompt_eigvec = Fn.normalize(prompt_eigvec, p=2, dim=-1)
|
518
660
|
|
519
661
|
# compute the cosine similarity
|
520
662
|
cos_sim = all_eigvecs @ prompt_eigvec.unsqueeze(-1) # (B, H, W, 1)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nystrom_ncut
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.2
|
4
4
|
Summary: Normalized Cut and Nyström Approximation
|
5
5
|
Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
|
6
6
|
Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/
|
@@ -0,0 +1,15 @@
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
nystrom_ncut/__init__.py,sha256=Wgud0tGaNkK2m_qVU47rXQqKdoR-4ztDXXD9UKzc4c8,488
|
3
|
+
nystrom_ncut/common.py,sha256=_PGJoImSk_Fb_5Ri-e_IsFoCcSfbGS8CxYUUHVoNM50,2036
|
4
|
+
nystrom_ncut/distance_utils.py,sha256=U1223ri8OuIzj0wjhAUhHWcsEvREDitgz8i1rRlCfj8,3069
|
5
|
+
nystrom_ncut/sampling_utils.py,sha256=uoWWSyfttv5fnOSq8KFXomWiNO-THiPPbLXfupnVar0,3444
|
6
|
+
nystrom_ncut/visualize_utils.py,sha256=xDlkE5sMXehK5hNz9U1twqgHZVzmV5tf5O9bL96AiaM,22982
|
7
|
+
nystrom_ncut/nystrom/__init__.py,sha256=4EpxD3Cmc8Fif4vo8DG-6FpTfCnNanD5zCZxK3WrMwQ,121
|
8
|
+
nystrom_ncut/nystrom/distance_realization.py,sha256=9GX_XSISTvsEWUu8bG5AxtlkYYNItFspcH5wXiwSOKY,5789
|
9
|
+
nystrom_ncut/nystrom/normalized_cut.py,sha256=ZxFV8Sckp6wtpNyoA15DS7Vfu9QLvzNpwrwY0n9_GNs,6953
|
10
|
+
nystrom_ncut/nystrom/nystrom_utils.py,sha256=MEmW5xgOu8u2HCwjFapHAOFFXhoVslBbLG4Cn-mYMDU,12995
|
11
|
+
nystrom_ncut-0.1.2.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
|
12
|
+
nystrom_ncut-0.1.2.dist-info/METADATA,sha256=0wsHYtW3cY4Bzq-lH_y_Blazt6YMrwWxsmR7SOHMyzs,6058
|
13
|
+
nystrom_ncut-0.1.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
14
|
+
nystrom_ncut-0.1.2.dist-info/top_level.txt,sha256=gM8IWWHYysIRTCvCTcdS4RShOyl9pxpylgSwPUZR2XM,22
|
15
|
+
nystrom_ncut-0.1.2.dist-info/RECORD,,
|
@@ -1,268 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
|
3
|
-
import numpy as np
|
4
|
-
import torch
|
5
|
-
import torch.nn.functional as Fn
|
6
|
-
|
7
|
-
from .common import (
|
8
|
-
DistanceOptions,
|
9
|
-
SampleOptions,
|
10
|
-
ceildiv,
|
11
|
-
lazy_normalize,
|
12
|
-
to_euclidean,
|
13
|
-
)
|
14
|
-
|
15
|
-
|
16
|
-
# @torch.no_grad()
|
17
|
-
def run_subgraph_sampling(
|
18
|
-
features: torch.Tensor,
|
19
|
-
num_sample: int,
|
20
|
-
disttype: DistanceOptions,
|
21
|
-
sample_method: SampleOptions,
|
22
|
-
max_draw: int = 1000000,
|
23
|
-
):
|
24
|
-
if num_sample >= features.shape[0]:
|
25
|
-
# if too many samples, use all samples and bypass Nystrom-like approximation
|
26
|
-
logging.info(
|
27
|
-
"num_sample is larger than total, bypass Nystrom-like approximation"
|
28
|
-
)
|
29
|
-
sampled_indices = torch.arange(features.shape[0])
|
30
|
-
else:
|
31
|
-
# sample subgraph
|
32
|
-
if sample_method == "farthest": # default
|
33
|
-
features = to_euclidean(features, disttype)
|
34
|
-
if num_sample > max_draw:
|
35
|
-
logging.warning(
|
36
|
-
f"num_sample is larger than max_draw, apply farthest point sampling on random sampled {max_draw} samples"
|
37
|
-
)
|
38
|
-
draw_indices = torch.randperm(features.shape[0])[:max_draw]
|
39
|
-
sampled_indices = farthest_point_sampling(
|
40
|
-
features[draw_indices].detach(),
|
41
|
-
num_sample=num_sample,
|
42
|
-
)
|
43
|
-
sampled_indices = draw_indices[sampled_indices]
|
44
|
-
else:
|
45
|
-
sampled_indices = farthest_point_sampling(
|
46
|
-
features.detach(),
|
47
|
-
num_sample=num_sample,
|
48
|
-
)
|
49
|
-
elif sample_method == "random": # not recommended
|
50
|
-
sampled_indices = torch.randperm(features.shape[0])[:num_sample]
|
51
|
-
else:
|
52
|
-
raise ValueError("sample_method should be 'farthest' or 'random'")
|
53
|
-
sampled_indices = torch.sort(sampled_indices).values
|
54
|
-
return sampled_indices.to(features.device)
|
55
|
-
|
56
|
-
|
57
|
-
def farthest_point_sampling(
|
58
|
-
features: torch.Tensor,
|
59
|
-
num_sample: int = 300,
|
60
|
-
h: int = 9,
|
61
|
-
):
|
62
|
-
try:
|
63
|
-
import fpsample
|
64
|
-
except ImportError:
|
65
|
-
raise ImportError(
|
66
|
-
"fpsample import failed, please install `pip install fpsample`"
|
67
|
-
)
|
68
|
-
|
69
|
-
# PCA to reduce the dimension
|
70
|
-
if features.shape[1] > 8:
|
71
|
-
u, s, v = torch.pca_lowrank(features, q=8)
|
72
|
-
features = u @ torch.diag(s)
|
73
|
-
|
74
|
-
h = min(h, int(np.log2(features.shape[0])))
|
75
|
-
|
76
|
-
kdline_fps_samples_idx = fpsample.bucket_fps_kdline_sampling(
|
77
|
-
features.numpy(force=True), num_sample, h
|
78
|
-
).astype(np.int64)
|
79
|
-
return torch.from_numpy(kdline_fps_samples_idx)
|
80
|
-
|
81
|
-
|
82
|
-
def distance_from_features(
|
83
|
-
features: torch.Tensor,
|
84
|
-
features_B: torch.Tensor,
|
85
|
-
distance: DistanceOptions,
|
86
|
-
):
|
87
|
-
"""Compute affinity matrix from input features.
|
88
|
-
Args:
|
89
|
-
features (torch.Tensor): input features, shape (n_samples, n_features)
|
90
|
-
features_B (torch.Tensor, optional): optional, if not None, compute affinity between two features
|
91
|
-
distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'.
|
92
|
-
Returns:
|
93
|
-
(torch.Tensor): affinity matrix, shape (n_samples, n_samples)
|
94
|
-
"""
|
95
|
-
# compute distance matrix from input features
|
96
|
-
if distance == "cosine":
|
97
|
-
features = lazy_normalize(features, dim=-1)
|
98
|
-
features_B = lazy_normalize(features_B, dim=-1)
|
99
|
-
D = 1 - features @ features_B.T
|
100
|
-
elif distance == "euclidean":
|
101
|
-
D = torch.cdist(features, features_B, p=2)
|
102
|
-
elif distance == "rbf":
|
103
|
-
D = torch.cdist(features, features_B, p=2) ** 2
|
104
|
-
|
105
|
-
# Outlier-robust scale invariance using quantiles to estimate standard deviation
|
106
|
-
stds = torch.quantile(features, q=torch.tensor((0.158655, 0.841345), device=features.device), dim=0)
|
107
|
-
stds = (stds[1] - stds[0]) / 2
|
108
|
-
D = D / (2 * torch.linalg.norm(stds) ** 2)
|
109
|
-
else:
|
110
|
-
raise ValueError("distance should be 'cosine' or 'euclidean', 'rbf'")
|
111
|
-
return D
|
112
|
-
|
113
|
-
|
114
|
-
def affinity_from_features(
|
115
|
-
features: torch.Tensor,
|
116
|
-
features_B: torch.Tensor = None,
|
117
|
-
affinity_focal_gamma: float = 1.0,
|
118
|
-
distance: DistanceOptions = "cosine",
|
119
|
-
):
|
120
|
-
"""Compute affinity matrix from input features.
|
121
|
-
|
122
|
-
Args:
|
123
|
-
features (torch.Tensor): input features, shape (n_samples, n_features)
|
124
|
-
features_B (torch.Tensor, optional): optional, if not None, compute affinity between two features
|
125
|
-
affinity_focal_gamma (float): affinity matrix parameter, lower t reduce the edge weights
|
126
|
-
on weak connections, default 1.0
|
127
|
-
distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'.
|
128
|
-
Returns:
|
129
|
-
(torch.Tensor): affinity matrix, shape (n_samples, n_samples)
|
130
|
-
"""
|
131
|
-
# compute affinity matrix from input features
|
132
|
-
|
133
|
-
# if feature_B is not provided, compute affinity matrix on features x features
|
134
|
-
# if feature_B is provided, compute affinity matrix on features x feature_B
|
135
|
-
features_B = features if features_B is None else features_B
|
136
|
-
|
137
|
-
# compute distance matrix from input features
|
138
|
-
D = distance_from_features(features, features_B, distance)
|
139
|
-
|
140
|
-
# torch.exp make affinity matrix positive definite,
|
141
|
-
# lower affinity_focal_gamma reduce the weak edge weights
|
142
|
-
A = torch.exp(-D / affinity_focal_gamma)
|
143
|
-
return A
|
144
|
-
|
145
|
-
|
146
|
-
def extrapolate_knn(
|
147
|
-
anchor_features: torch.Tensor, # [n x d]
|
148
|
-
anchor_output: torch.Tensor, # [n x d']
|
149
|
-
extrapolation_features: torch.Tensor, # [m x d]
|
150
|
-
distance: DistanceOptions,
|
151
|
-
knn: int = 10, # k
|
152
|
-
affinity_focal_gamma: float = 1.0,
|
153
|
-
chunk_size: int = 8192,
|
154
|
-
device: str = None,
|
155
|
-
move_output_to_cpu: bool = False,
|
156
|
-
) -> torch.Tensor: # [m x d']
|
157
|
-
"""A generic function to propagate new nodes using KNN.
|
158
|
-
|
159
|
-
Args:
|
160
|
-
anchor_features (torch.Tensor): features from subgraph, shape (num_sample, n_features)
|
161
|
-
anchor_output (torch.Tensor): output from subgraph, shape (num_sample, D)
|
162
|
-
extrapolation_features (torch.Tensor): features from existing nodes, shape (new_num_samples, n_features)
|
163
|
-
knn (int): number of KNN to propagate eige nvectors
|
164
|
-
distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'
|
165
|
-
chunk_size (int): chunk size for matrix multiplication
|
166
|
-
device (str): device to use for computation, if None, will not change device
|
167
|
-
Returns:
|
168
|
-
torch.Tensor: propagated eigenvectors, shape (new_num_samples, D)
|
169
|
-
|
170
|
-
Examples:
|
171
|
-
>>> old_eigenvectors = torch.randn(3000, 20)
|
172
|
-
>>> old_features = torch.randn(3000, 100)
|
173
|
-
>>> new_features = torch.randn(200, 100)
|
174
|
-
>>> new_eigenvectors = extrapolate_knn(old_features, old_eigenvectors, new_features, knn=3)
|
175
|
-
>>> # new_eigenvectors.shape = (200, 20)
|
176
|
-
|
177
|
-
"""
|
178
|
-
device = anchor_output.device if device is None else device
|
179
|
-
|
180
|
-
# used in nystrom_ncut
|
181
|
-
# propagate eigen_vector from subgraph to full graph
|
182
|
-
anchor_output = anchor_output.to(device)
|
183
|
-
|
184
|
-
n_chunks = ceildiv(extrapolation_features.shape[0], chunk_size)
|
185
|
-
V_list = []
|
186
|
-
for _v in torch.chunk(extrapolation_features, n_chunks, dim=0):
|
187
|
-
_v = _v.to(device) # [_m x d]
|
188
|
-
|
189
|
-
_A = affinity_from_features(anchor_features, _v, affinity_focal_gamma, distance).mT # [_m x n]
|
190
|
-
if knn is not None:
|
191
|
-
_A, indices = _A.topk(k=knn, dim=-1, largest=True) # [_m x k], [_m x k]
|
192
|
-
_anchor_output = anchor_output[indices] # [_m x k x d]
|
193
|
-
else:
|
194
|
-
_anchor_output = anchor_output[None] # [1 x n x d]
|
195
|
-
|
196
|
-
_A = Fn.normalize(_A, p=1, dim=-1) # [_m x k]
|
197
|
-
_V = (_A[:, None, :] @ _anchor_output).squeeze(1) # [_m x d]
|
198
|
-
|
199
|
-
if move_output_to_cpu:
|
200
|
-
_V = _V.cpu()
|
201
|
-
V_list.append(_V)
|
202
|
-
|
203
|
-
extrapolation_output = torch.cat(V_list, dim=0)
|
204
|
-
return extrapolation_output
|
205
|
-
|
206
|
-
|
207
|
-
# wrapper functions for adding new nodes to existing graph
|
208
|
-
def extrapolate_knn_with_subsampling(
|
209
|
-
full_features: torch.Tensor, # [n x d]
|
210
|
-
full_output: torch.Tensor, # [n x d']
|
211
|
-
extrapolation_features: torch.Tensor, # [m x d]
|
212
|
-
num_sample: int, # n'
|
213
|
-
sample_method: SampleOptions,
|
214
|
-
distance: DistanceOptions,
|
215
|
-
knn: int = 10, # k
|
216
|
-
affinity_focal_gamma: float = 1.0,
|
217
|
-
chunk_size: int = 8192,
|
218
|
-
device: str = None,
|
219
|
-
move_output_to_cpu: bool = False,
|
220
|
-
) -> torch.Tensor: # [m x d']
|
221
|
-
"""Propagate eigenvectors to new nodes using KNN. Note: this is equivalent to the class API `NCUT.tranform(new_features)`, expect for the sampling is re-done in this function.
|
222
|
-
Args:
|
223
|
-
full_output (torch.Tensor): eigenvectors from existing nodes, shape (num_sample, num_eig)
|
224
|
-
full_features (torch.Tensor): features from existing nodes, shape (n_samples, n_features)
|
225
|
-
extrapolation_features (torch.Tensor): features from new nodes, shape (n_new_samples, n_features)
|
226
|
-
knn (int): number of KNN to propagate eigenvectors, default 3
|
227
|
-
num_sample (int): number of samples for subgraph sampling, default 50000
|
228
|
-
sample_method (str): sample method, 'farthest' (default) or 'random'
|
229
|
-
chunk_size (int): chunk size for matrix multiplication, default 8192
|
230
|
-
device (str): device to use for computation, if None, will not change device
|
231
|
-
Returns:
|
232
|
-
torch.Tensor: propagated eigenvectors, shape (n_new_samples, num_eig)
|
233
|
-
|
234
|
-
Examples:
|
235
|
-
>>> old_eigenvectors = torch.randn(3000, 20)
|
236
|
-
>>> old_features = torch.randn(3000, 100)
|
237
|
-
>>> new_features = torch.randn(200, 100)
|
238
|
-
>>> new_eigenvectors = extrapolate_knn_with_subsampling(extrapolation_features,old_eigenvectors,old_features,knn=3,num_sample=,sample_method=,chunk_size=,device=)
|
239
|
-
>>> # new_eigenvectors.shape = (200, 20)
|
240
|
-
"""
|
241
|
-
|
242
|
-
device = full_output.device if device is None else device
|
243
|
-
|
244
|
-
# sample subgraph
|
245
|
-
anchor_indices = run_subgraph_sampling(
|
246
|
-
features=full_features,
|
247
|
-
num_sample=num_sample,
|
248
|
-
disttype=distance,
|
249
|
-
sample_method=sample_method,
|
250
|
-
)
|
251
|
-
|
252
|
-
anchor_output = full_output[anchor_indices].to(device)
|
253
|
-
anchor_features = full_features[anchor_indices].to(device)
|
254
|
-
extrapolation_features = extrapolation_features.to(device)
|
255
|
-
|
256
|
-
# propagate eigenvectors from subgraph to new nodes
|
257
|
-
extrapolation_output = extrapolate_knn(
|
258
|
-
anchor_features,
|
259
|
-
anchor_output,
|
260
|
-
extrapolation_features,
|
261
|
-
distance,
|
262
|
-
knn=knn,
|
263
|
-
affinity_focal_gamma=affinity_focal_gamma,
|
264
|
-
chunk_size=chunk_size,
|
265
|
-
device=device,
|
266
|
-
move_output_to_cpu=move_output_to_cpu,
|
267
|
-
)
|
268
|
-
return extrapolation_output
|
@@ -1,14 +0,0 @@
|
|
1
|
-
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
nystrom_ncut/__init__.py,sha256=ffExLdGTaPsUweHcYc61Ose6a5A5Tfo9hm48zjEl6ho,441
|
3
|
-
nystrom_ncut/common.py,sha256=l5kjF6neukdtbMrt5bE69pN0JM9r-93JYltLlYJuBik,2227
|
4
|
-
nystrom_ncut/propagation_utils.py,sha256=79M61iJfp_RWj_xLOn51PHiextWcEWTQ7NWl2T51-3Y,10907
|
5
|
-
nystrom_ncut/visualize_utils.py,sha256=uGfBBkETQ8uRJ-UXPWpiXT8KEYMLmEHXPKJMLvN0c34,16543
|
6
|
-
nystrom_ncut/nystrom/__init__.py,sha256=4EpxD3Cmc8Fif4vo8DG-6FpTfCnNanD5zCZxK3WrMwQ,121
|
7
|
-
nystrom_ncut/nystrom/distance_realization.py,sha256=FGH7VjbtRrSROH0d8OPuCUxLQy5j7Z8BuE4hrSGGZG4,6031
|
8
|
-
nystrom_ncut/nystrom/normalized_cut.py,sha256=s9ZS3-tQbWnxAlPc01v9l7fqBhl28lvOalaCO2y-Gd8,7175
|
9
|
-
nystrom_ncut/nystrom/nystrom.py,sha256=OV5o9UL9fkrz9HdsD6rXh7MTsenPKrtCNRIczMuDS_4,12779
|
10
|
-
nystrom_ncut-0.1.0.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
|
11
|
-
nystrom_ncut-0.1.0.dist-info/METADATA,sha256=wAhGONU0ZM1VWoLqEwtfAs3_GUAt4CItkHq3ISuFyVE,6058
|
12
|
-
nystrom_ncut-0.1.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
13
|
-
nystrom_ncut-0.1.0.dist-info/top_level.txt,sha256=gM8IWWHYysIRTCvCTcdS4RShOyl9pxpylgSwPUZR2XM,22
|
14
|
-
nystrom_ncut-0.1.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|