nystrom-ncut 0.0.7__tar.gz → 0.0.9__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {nystrom_ncut-0.0.7/src/nystrom_ncut.egg-info → nystrom_ncut-0.0.9}/PKG-INFO +1 -1
- {nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/pyproject.toml +1 -1
- nystrom_ncut-0.0.9/src/nystrom_ncut/common.py +61 -0
- {nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/src/nystrom_ncut/ncut_pytorch.py +1 -1
- {nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/src/nystrom_ncut/nystrom.py +3 -1
- {nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/src/nystrom_ncut/propagation_utils.py +10 -66
- {nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/src/nystrom_ncut/visualize_utils.py +55 -51
- {nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9/src/nystrom_ncut.egg-info}/PKG-INFO +1 -1
- nystrom_ncut-0.0.7/src/nystrom_ncut/common.py +0 -24
- {nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/LICENSE +0 -0
- {nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/MANIFEST.in +0 -0
- {nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/README.md +0 -0
- {nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/requirements.txt +0 -0
- {nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/setup.cfg +0 -0
- {nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/src/nystrom_ncut/__init__.py +0 -0
- {nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/src/nystrom_ncut.egg-info/SOURCES.txt +0 -0
- {nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/src/nystrom_ncut.egg-info/dependency_links.txt +0 -0
- {nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/src/nystrom_ncut.egg-info/top_level.txt +0 -0
- {nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nystrom_ncut
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.9
|
4
4
|
Summary: Normalized Cut and Nyström Approximation
|
5
5
|
Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
|
6
6
|
Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/
|
@@ -0,0 +1,61 @@
|
|
1
|
+
from typing import Any, Literal
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
import torch
|
5
|
+
import torch.nn.functional as Fn
|
6
|
+
|
7
|
+
|
8
|
+
DistanceOptions = Literal["cosine", "euclidean", "rbf"]
|
9
|
+
SampleOptions = Literal["farthest", "random"]
|
10
|
+
|
11
|
+
|
12
|
+
def ceildiv(a: int, b: int) -> int:
|
13
|
+
return -(-a // b)
|
14
|
+
|
15
|
+
|
16
|
+
def lazy_normalize(x: torch.Tensor, n: int = 1000, **normalize_kwargs: Any) -> torch.Tensor:
|
17
|
+
numel = np.prod(x.shape[:-1])
|
18
|
+
n = min(n, numel)
|
19
|
+
random_indices = torch.randperm(numel)[:n]
|
20
|
+
_x = x.flatten(0, -2)[random_indices]
|
21
|
+
if torch.allclose(torch.norm(_x, **normalize_kwargs), torch.ones(n, device=x.device)):
|
22
|
+
return x
|
23
|
+
else:
|
24
|
+
return Fn.normalize(x, **normalize_kwargs)
|
25
|
+
|
26
|
+
|
27
|
+
def quantile_min_max(x, q1=0.01, q2=0.99, n_sample=10000):
|
28
|
+
if x.shape[0] > n_sample:
|
29
|
+
np.random.seed(0)
|
30
|
+
random_idx = np.random.choice(x.shape[0], n_sample, replace=False)
|
31
|
+
vmin, vmax = x[random_idx].quantile(q1), x[random_idx].quantile(q2)
|
32
|
+
else:
|
33
|
+
vmin, vmax = x.quantile(q1), x.quantile(q2)
|
34
|
+
return vmin, vmax
|
35
|
+
|
36
|
+
|
37
|
+
def quantile_normalize(x, q=0.95):
|
38
|
+
"""normalize each dimension of x to [0, 1], take 95-th percentage, this robust to outliers
|
39
|
+
</br> 1. sort x
|
40
|
+
</br> 2. take q-th quantile
|
41
|
+
</br> min_value -> (1-q)-th quantile
|
42
|
+
</br> max_value -> q-th quantile
|
43
|
+
</br> 3. normalize
|
44
|
+
</br> x = (x - min_value) / (max_value - min_value)
|
45
|
+
|
46
|
+
Args:
|
47
|
+
x (torch.Tensor): input tensor, shape (n_samples, n_features)
|
48
|
+
normalize each feature to 0-1 range
|
49
|
+
q (float): quantile, default 0.95
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
torch.Tensor: quantile normalized tensor
|
53
|
+
"""
|
54
|
+
# normalize x to 0-1 range, max value is q-th quantile
|
55
|
+
# quantile makes the normalization robust to outliers
|
56
|
+
if isinstance(x, np.ndarray):
|
57
|
+
x = torch.tensor(x)
|
58
|
+
vmax, vmin = quantile_min_max(x, q, 1 - q)
|
59
|
+
x = (x - vmin) / (vmax - vmin)
|
60
|
+
x = x.clamp(0, 1)
|
61
|
+
return x
|
@@ -1,5 +1,4 @@
|
|
1
1
|
import logging
|
2
|
-
from typing import Literal
|
3
2
|
|
4
3
|
import numpy as np
|
5
4
|
import torch
|
@@ -98,7 +97,11 @@ def distance_from_features(
|
|
98
97
|
D = torch.cdist(features, features_B, p=2)
|
99
98
|
elif distance == "rbf":
|
100
99
|
D = torch.cdist(features, features_B, p=2) ** 2
|
101
|
-
|
100
|
+
|
101
|
+
# Outlier-robust scale invariance using quantiles to estimate standard deviation
|
102
|
+
stds = torch.quantile(features, q=torch.tensor((0.158655, 0.841345), device=features.device), dim=0)
|
103
|
+
stds = (stds[1] - stds[0]) / 2
|
104
|
+
D = D / (2 * torch.linalg.norm(stds) ** 2)
|
102
105
|
else:
|
103
106
|
raise ValueError("distance should be 'cosine' or 'euclidean', 'rbf'")
|
104
107
|
return D
|
@@ -178,39 +181,17 @@ def extrapolate_knn(
|
|
178
181
|
V_list = []
|
179
182
|
for _v in torch.chunk(extrapolation_features, n_chunks, dim=0):
|
180
183
|
_v = _v.to(device) # [_m x d]
|
184
|
+
|
181
185
|
_A = affinity_from_features(anchor_features, _v, affinity_focal_gamma, distance).mT # [_m x n]
|
182
186
|
if knn is not None:
|
183
187
|
_A, indices = _A.topk(k=knn, dim=-1, largest=True) # [_m x k], [_m x k]
|
184
188
|
_anchor_output = anchor_output[indices] # [_m x k x d]
|
185
189
|
else:
|
186
190
|
_anchor_output = anchor_output[None] # [1 x n x d]
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
# elif distance == 'euclidean':
|
192
|
-
# _A = - torch.cdist(_v, subgraph_features, p=2)
|
193
|
-
# elif distance == 'rbf':
|
194
|
-
# _A = - torch.cdist(_v, subgraph_features, p=2) ** 2
|
195
|
-
# else:
|
196
|
-
# raise ValueError("distance should be 'cosine' or 'euclidean', 'rbf'")
|
197
|
-
#
|
198
|
-
# # keep topk KNN for each row
|
199
|
-
# topk_sim, topk_idx = _A.topk(knn, dim=-1, largest=True)
|
200
|
-
# row_id = torch.arange(topk_idx.shape[0], device=_A.device)[:, None].expand(
|
201
|
-
# -1, topk_idx.shape[1]
|
202
|
-
# )
|
203
|
-
# _A = torch.sparse_coo_tensor(
|
204
|
-
# torch.stack([row_id, topk_idx], dim=-1).reshape(-1, 2).T,
|
205
|
-
# topk_sim.reshape(-1),
|
206
|
-
# size=(_A.shape[0], _A.shape[1]),
|
207
|
-
# device=_A.device,
|
208
|
-
# )
|
209
|
-
# _A = _A.to_dense().to(dtype=subgraph_output.dtype)
|
210
|
-
# _D = _A.sum(-1)
|
211
|
-
# _A /= _D[:, None]
|
212
|
-
|
213
|
-
_V = (_A[:, None, :] @ _anchor_output).squeeze(1)
|
191
|
+
|
192
|
+
_A = Fn.normalize(_A, p=1, dim=-1) # [_m x k]
|
193
|
+
_V = (_A[:, None, :] @ _anchor_output).squeeze(1) # [_m x d]
|
194
|
+
|
214
195
|
if move_output_to_cpu:
|
215
196
|
_V = _V.cpu()
|
216
197
|
V_list.append(_V)
|
@@ -274,40 +255,3 @@ def extrapolate_knn_with_subsampling(
|
|
274
255
|
device=device
|
275
256
|
)
|
276
257
|
return new_eigenvectors
|
277
|
-
|
278
|
-
|
279
|
-
def quantile_min_max(x, q1=0.01, q2=0.99, n_sample=10000):
|
280
|
-
if x.shape[0] > n_sample:
|
281
|
-
np.random.seed(0)
|
282
|
-
random_idx = np.random.choice(x.shape[0], n_sample, replace=False)
|
283
|
-
vmin, vmax = x[random_idx].quantile(q1), x[random_idx].quantile(q2)
|
284
|
-
else:
|
285
|
-
vmin, vmax = x.quantile(q1), x.quantile(q2)
|
286
|
-
return vmin, vmax
|
287
|
-
|
288
|
-
|
289
|
-
def quantile_normalize(x, q=0.95):
|
290
|
-
"""normalize each dimension of x to [0, 1], take 95-th percentage, this robust to outliers
|
291
|
-
</br> 1. sort x
|
292
|
-
</br> 2. take q-th quantile
|
293
|
-
</br> min_value -> (1-q)-th quantile
|
294
|
-
</br> max_value -> q-th quantile
|
295
|
-
</br> 3. normalize
|
296
|
-
</br> x = (x - min_value) / (max_value - min_value)
|
297
|
-
|
298
|
-
Args:
|
299
|
-
x (torch.Tensor): input tensor, shape (n_samples, n_features)
|
300
|
-
normalize each feature to 0-1 range
|
301
|
-
q (float): quantile, default 0.95
|
302
|
-
|
303
|
-
Returns:
|
304
|
-
torch.Tensor: quantile normalized tensor
|
305
|
-
"""
|
306
|
-
# normalize x to 0-1 range, max value is q-th quantile
|
307
|
-
# quantile makes the normalization robust to outliers
|
308
|
-
if isinstance(x, np.ndarray):
|
309
|
-
x = torch.tensor(x)
|
310
|
-
vmax, vmin = quantile_min_max(x, q, 1 - q)
|
311
|
-
x = (x - vmin) / (vmax - vmin)
|
312
|
-
x = x.clamp(0, 1)
|
313
|
-
return x
|
@@ -7,15 +7,13 @@ import torch.nn.functional as F
|
|
7
7
|
from sklearn.base import BaseEstimator
|
8
8
|
|
9
9
|
from .common import (
|
10
|
-
DistanceOptions,
|
11
10
|
lazy_normalize,
|
11
|
+
quantile_min_max,
|
12
|
+
quantile_normalize,
|
12
13
|
)
|
13
14
|
from .propagation_utils import (
|
14
15
|
run_subgraph_sampling,
|
15
16
|
extrapolate_knn,
|
16
|
-
extrapolate_knn_with_subsampling,
|
17
|
-
quantile_min_max,
|
18
|
-
quantile_normalize
|
19
17
|
)
|
20
18
|
|
21
19
|
|
@@ -28,27 +26,22 @@ def _rgb_with_dimensionality_reduction(
|
|
28
26
|
num_sample: int,
|
29
27
|
metric: Literal["cosine", "euclidean"],
|
30
28
|
rgb_func: Callable[[torch.Tensor, float], torch.Tensor],
|
31
|
-
q: float,
|
32
|
-
|
29
|
+
q: float,
|
30
|
+
knn: int,
|
33
31
|
reduction: Callable[..., BaseEstimator],
|
34
32
|
reduction_dim: int,
|
35
33
|
reduction_kwargs: Dict[str, Any],
|
36
|
-
transform_func: Callable[[torch.Tensor], torch.Tensor]
|
37
|
-
|
34
|
+
transform_func: Callable[[torch.Tensor], torch.Tensor],
|
35
|
+
seed: int,
|
36
|
+
device: str,
|
38
37
|
) -> Tuple[torch.Tensor, torch.Tensor]:
|
39
38
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
features = extrapolate_knn(
|
47
|
-
features[_subgraph_indices],
|
48
|
-
features[_subgraph_indices],
|
49
|
-
features,
|
50
|
-
distance="cosine",
|
51
|
-
)
|
39
|
+
features = extrapolate_knn(
|
40
|
+
features,
|
41
|
+
features,
|
42
|
+
features,
|
43
|
+
distance="cosine",
|
44
|
+
)
|
52
45
|
|
53
46
|
subgraph_indices = run_subgraph_sampling(
|
54
47
|
features,
|
@@ -83,10 +76,10 @@ def rgb_from_tsne_2d(
|
|
83
76
|
num_sample: int = 1000,
|
84
77
|
perplexity: int = 150,
|
85
78
|
metric: Literal["cosine", "euclidean"] = "cosine",
|
86
|
-
device: str = None,
|
87
|
-
seed: int = 0,
|
88
79
|
q: float = 0.95,
|
89
80
|
knn: int = 10,
|
81
|
+
seed: int = 0,
|
82
|
+
device: str = None,
|
90
83
|
):
|
91
84
|
"""
|
92
85
|
Returns:
|
@@ -111,11 +104,13 @@ def rgb_from_tsne_2d(
|
|
111
104
|
num_sample=num_sample,
|
112
105
|
metric=metric,
|
113
106
|
rgb_func=rgb_from_2d_colormap,
|
114
|
-
q=q,
|
115
|
-
|
107
|
+
q=q,
|
108
|
+
knn=knn,
|
116
109
|
reduction=TSNE, reduction_dim=2, reduction_kwargs={
|
117
110
|
"perplexity": perplexity,
|
118
|
-
},
|
111
|
+
}, transform_func=_identity,
|
112
|
+
seed=seed,
|
113
|
+
device=device,
|
119
114
|
)
|
120
115
|
return x2d, rgb
|
121
116
|
|
@@ -125,10 +120,10 @@ def rgb_from_tsne_3d(
|
|
125
120
|
num_sample: int = 1000,
|
126
121
|
perplexity: int = 150,
|
127
122
|
metric: Literal["cosine", "euclidean"] = "cosine",
|
128
|
-
device: str = None,
|
129
|
-
seed: int = 0,
|
130
123
|
q: float = 0.95,
|
131
124
|
knn: int = 10,
|
125
|
+
seed: int = 0,
|
126
|
+
device: str = None,
|
132
127
|
):
|
133
128
|
"""
|
134
129
|
Returns:
|
@@ -153,11 +148,13 @@ def rgb_from_tsne_3d(
|
|
153
148
|
num_sample=num_sample,
|
154
149
|
metric=metric,
|
155
150
|
rgb_func=rgb_from_3d_rgb_cube,
|
156
|
-
q=q,
|
157
|
-
|
151
|
+
q=q,
|
152
|
+
knn=knn,
|
158
153
|
reduction=TSNE, reduction_dim=3, reduction_kwargs={
|
159
154
|
"perplexity": perplexity,
|
160
|
-
},
|
155
|
+
}, transform_func=_identity,
|
156
|
+
seed=seed,
|
157
|
+
device=device,
|
161
158
|
)
|
162
159
|
return x3d, rgb
|
163
160
|
|
@@ -166,10 +163,10 @@ def rgb_from_cosine_tsne_3d(
|
|
166
163
|
features: torch.Tensor,
|
167
164
|
num_sample: int = 1000,
|
168
165
|
perplexity: int = 150,
|
169
|
-
device: str = None,
|
170
|
-
seed: int = 0,
|
171
166
|
q: float = 0.95,
|
172
167
|
knn: int = 10,
|
168
|
+
seed: int = 0,
|
169
|
+
device: str = None
|
173
170
|
):
|
174
171
|
"""
|
175
172
|
Returns:
|
@@ -210,11 +207,13 @@ def rgb_from_cosine_tsne_3d(
|
|
210
207
|
num_sample=num_sample,
|
211
208
|
metric="cosine",
|
212
209
|
rgb_func=rgb_from_cosine,
|
213
|
-
q=q,
|
214
|
-
|
210
|
+
q=q,
|
211
|
+
knn=knn,
|
215
212
|
reduction=TSNE, reduction_dim=3, reduction_kwargs={
|
216
213
|
"perplexity": perplexity,
|
217
|
-
},
|
214
|
+
}, transform_func=_identity,
|
215
|
+
seed=seed,
|
216
|
+
device=device,
|
218
217
|
)
|
219
218
|
return x3d, rgb
|
220
219
|
|
@@ -225,10 +224,10 @@ def rgb_from_umap_2d(
|
|
225
224
|
n_neighbors: int = 150,
|
226
225
|
min_dist: float = 0.1,
|
227
226
|
metric: Literal["cosine", "euclidean"] = "cosine",
|
228
|
-
device: str = None,
|
229
|
-
seed: int = 0,
|
230
227
|
q: float = 0.95,
|
231
228
|
knn: int = 10,
|
229
|
+
seed: int = 0,
|
230
|
+
device: str = None,
|
232
231
|
):
|
233
232
|
"""
|
234
233
|
Returns:
|
@@ -245,12 +244,14 @@ def rgb_from_umap_2d(
|
|
245
244
|
num_sample=num_sample,
|
246
245
|
metric=metric,
|
247
246
|
rgb_func=rgb_from_2d_colormap,
|
248
|
-
q=q,
|
249
|
-
|
247
|
+
q=q,
|
248
|
+
knn=knn,
|
250
249
|
reduction=UMAP, reduction_dim=2, reduction_kwargs={
|
251
250
|
"n_neighbors": n_neighbors,
|
252
251
|
"min_dist": min_dist,
|
253
|
-
},
|
252
|
+
}, transform_func=_identity,
|
253
|
+
seed=seed,
|
254
|
+
device=device,
|
254
255
|
)
|
255
256
|
return x2d, rgb
|
256
257
|
|
@@ -261,10 +262,10 @@ def rgb_from_umap_sphere(
|
|
261
262
|
n_neighbors: int = 150,
|
262
263
|
min_dist: float = 0.1,
|
263
264
|
metric: Literal["cosine", "euclidean"] = "cosine",
|
264
|
-
device: str = None,
|
265
|
-
seed: int = 0,
|
266
265
|
q: float = 0.95,
|
267
266
|
knn: int = 10,
|
267
|
+
seed: int = 0,
|
268
|
+
device: str = None,
|
268
269
|
):
|
269
270
|
"""
|
270
271
|
Returns:
|
@@ -288,14 +289,15 @@ def rgb_from_umap_sphere(
|
|
288
289
|
num_sample=num_sample,
|
289
290
|
metric=metric,
|
290
291
|
rgb_func=rgb_from_3d_rgb_cube,
|
291
|
-
q=q,
|
292
|
-
|
292
|
+
q=q,
|
293
|
+
knn=knn,
|
293
294
|
reduction=UMAP, reduction_dim=2, reduction_kwargs={
|
294
295
|
"n_neighbors": n_neighbors,
|
295
296
|
"min_dist": min_dist,
|
296
297
|
"output_metric": "haversine",
|
297
|
-
},
|
298
|
-
|
298
|
+
}, transform_func=transform_func,
|
299
|
+
seed=seed,
|
300
|
+
device=device,
|
299
301
|
)
|
300
302
|
return x3d, rgb
|
301
303
|
|
@@ -306,10 +308,10 @@ def rgb_from_umap_3d(
|
|
306
308
|
n_neighbors: int = 150,
|
307
309
|
min_dist: float = 0.1,
|
308
310
|
metric: Literal["cosine", "euclidean"] = "cosine",
|
309
|
-
device: str = None,
|
310
|
-
seed: int = 0,
|
311
311
|
q: float = 0.95,
|
312
312
|
knn: int = 10,
|
313
|
+
seed: int = 0,
|
314
|
+
device: str = None,
|
313
315
|
):
|
314
316
|
"""
|
315
317
|
Returns:
|
@@ -326,12 +328,14 @@ def rgb_from_umap_3d(
|
|
326
328
|
num_sample=num_sample,
|
327
329
|
metric=metric,
|
328
330
|
rgb_func=rgb_from_3d_rgb_cube,
|
329
|
-
q=q,
|
330
|
-
|
331
|
+
q=q,
|
332
|
+
knn=knn,
|
331
333
|
reduction=UMAP, reduction_dim=3, reduction_kwargs={
|
332
334
|
"n_neighbors": n_neighbors,
|
333
335
|
"min_dist": min_dist,
|
334
|
-
},
|
336
|
+
}, transform_func=_identity,
|
337
|
+
seed=seed,
|
338
|
+
device=device,
|
335
339
|
)
|
336
340
|
return x3d, rgb
|
337
341
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nystrom_ncut
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.9
|
4
4
|
Summary: Normalized Cut and Nyström Approximation
|
5
5
|
Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
|
6
6
|
Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/
|
@@ -1,24 +0,0 @@
|
|
1
|
-
from typing import Any, Literal
|
2
|
-
|
3
|
-
import numpy as np
|
4
|
-
import torch
|
5
|
-
import torch.nn.functional as Fn
|
6
|
-
|
7
|
-
|
8
|
-
DistanceOptions = Literal["cosine", "euclidean", "rbf"]
|
9
|
-
SampleOptions = Literal["farthest", "random"]
|
10
|
-
|
11
|
-
|
12
|
-
def ceildiv(a: int, b: int) -> int:
|
13
|
-
return -(-a // b)
|
14
|
-
|
15
|
-
|
16
|
-
def lazy_normalize(x: torch.Tensor, n: int = 1000, **normalize_kwargs: Any) -> torch.Tensor:
|
17
|
-
numel = np.prod(x.shape[:-1])
|
18
|
-
n = min(n, numel)
|
19
|
-
random_indices = torch.randperm(numel)[:n]
|
20
|
-
_x = x.flatten(0, -2)[random_indices]
|
21
|
-
if torch.allclose(torch.norm(_x, **normalize_kwargs), torch.ones(n, device=x.device)):
|
22
|
-
return x
|
23
|
-
else:
|
24
|
-
return Fn.normalize(x, **normalize_kwargs)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|