nystrom-ncut 0.0.7__tar.gz → 0.0.9__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nystrom_ncut
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Summary: Normalized Cut and Nyström Approximation
5
5
  Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
6
6
  Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "nystrom_ncut"
7
- version = "0.0.7"
7
+ version = "0.0.9"
8
8
  authors = [
9
9
  { name = "Huzheng Yang", email = "huze.yann@gmail.com" },
10
10
  { name = "Wentinn Liao", email = "wentinn.liao@gmail.com" },
@@ -0,0 +1,61 @@
1
+ from typing import Any, Literal
2
+
3
+ import numpy as np
4
+ import torch
5
+ import torch.nn.functional as Fn
6
+
7
+
8
+ DistanceOptions = Literal["cosine", "euclidean", "rbf"]
9
+ SampleOptions = Literal["farthest", "random"]
10
+
11
+
12
+ def ceildiv(a: int, b: int) -> int:
13
+ return -(-a // b)
14
+
15
+
16
+ def lazy_normalize(x: torch.Tensor, n: int = 1000, **normalize_kwargs: Any) -> torch.Tensor:
17
+ numel = np.prod(x.shape[:-1])
18
+ n = min(n, numel)
19
+ random_indices = torch.randperm(numel)[:n]
20
+ _x = x.flatten(0, -2)[random_indices]
21
+ if torch.allclose(torch.norm(_x, **normalize_kwargs), torch.ones(n, device=x.device)):
22
+ return x
23
+ else:
24
+ return Fn.normalize(x, **normalize_kwargs)
25
+
26
+
27
+ def quantile_min_max(x, q1=0.01, q2=0.99, n_sample=10000):
28
+ if x.shape[0] > n_sample:
29
+ np.random.seed(0)
30
+ random_idx = np.random.choice(x.shape[0], n_sample, replace=False)
31
+ vmin, vmax = x[random_idx].quantile(q1), x[random_idx].quantile(q2)
32
+ else:
33
+ vmin, vmax = x.quantile(q1), x.quantile(q2)
34
+ return vmin, vmax
35
+
36
+
37
+ def quantile_normalize(x, q=0.95):
38
+ """normalize each dimension of x to [0, 1], take 95-th percentage, this robust to outliers
39
+ </br> 1. sort x
40
+ </br> 2. take q-th quantile
41
+ </br> min_value -> (1-q)-th quantile
42
+ </br> max_value -> q-th quantile
43
+ </br> 3. normalize
44
+ </br> x = (x - min_value) / (max_value - min_value)
45
+
46
+ Args:
47
+ x (torch.Tensor): input tensor, shape (n_samples, n_features)
48
+ normalize each feature to 0-1 range
49
+ q (float): quantile, default 0.95
50
+
51
+ Returns:
52
+ torch.Tensor: quantile normalized tensor
53
+ """
54
+ # normalize x to 0-1 range, max value is q-th quantile
55
+ # quantile makes the normalization robust to outliers
56
+ if isinstance(x, np.ndarray):
57
+ x = torch.tensor(x)
58
+ vmax, vmin = quantile_min_max(x, q, 1 - q)
59
+ x = (x - vmin) / (vmax - vmin)
60
+ x = x.clamp(0, 1)
61
+ return x
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Literal, Tuple
2
+ from typing import Tuple
3
3
 
4
4
  import torch
5
5
  import torch.nn.functional as Fn
@@ -2,7 +2,9 @@ from typing import Literal, Tuple
2
2
 
3
3
  import torch
4
4
 
5
- from .common import ceildiv
5
+ from .common import (
6
+ ceildiv,
7
+ )
6
8
 
7
9
 
8
10
  EigSolverOptions = Literal["svd_lowrank", "lobpcg", "svd", "eigh"]
@@ -1,5 +1,4 @@
1
1
  import logging
2
- from typing import Literal
3
2
 
4
3
  import numpy as np
5
4
  import torch
@@ -98,7 +97,11 @@ def distance_from_features(
98
97
  D = torch.cdist(features, features_B, p=2)
99
98
  elif distance == "rbf":
100
99
  D = torch.cdist(features, features_B, p=2) ** 2
101
- D = D / (2 * features.var(dim=0).sum())
100
+
101
+ # Outlier-robust scale invariance using quantiles to estimate standard deviation
102
+ stds = torch.quantile(features, q=torch.tensor((0.158655, 0.841345), device=features.device), dim=0)
103
+ stds = (stds[1] - stds[0]) / 2
104
+ D = D / (2 * torch.linalg.norm(stds) ** 2)
102
105
  else:
103
106
  raise ValueError("distance should be 'cosine' or 'euclidean', 'rbf'")
104
107
  return D
@@ -178,39 +181,17 @@ def extrapolate_knn(
178
181
  V_list = []
179
182
  for _v in torch.chunk(extrapolation_features, n_chunks, dim=0):
180
183
  _v = _v.to(device) # [_m x d]
184
+
181
185
  _A = affinity_from_features(anchor_features, _v, affinity_focal_gamma, distance).mT # [_m x n]
182
186
  if knn is not None:
183
187
  _A, indices = _A.topk(k=knn, dim=-1, largest=True) # [_m x k], [_m x k]
184
188
  _anchor_output = anchor_output[indices] # [_m x k x d]
185
189
  else:
186
190
  _anchor_output = anchor_output[None] # [1 x n x d]
187
- _A = Fn.normalize(_A, p=1, dim=-1)
188
-
189
- # if distance == 'cosine':
190
- # _A = _v @ subgraph_features.T
191
- # elif distance == 'euclidean':
192
- # _A = - torch.cdist(_v, subgraph_features, p=2)
193
- # elif distance == 'rbf':
194
- # _A = - torch.cdist(_v, subgraph_features, p=2) ** 2
195
- # else:
196
- # raise ValueError("distance should be 'cosine' or 'euclidean', 'rbf'")
197
- #
198
- # # keep topk KNN for each row
199
- # topk_sim, topk_idx = _A.topk(knn, dim=-1, largest=True)
200
- # row_id = torch.arange(topk_idx.shape[0], device=_A.device)[:, None].expand(
201
- # -1, topk_idx.shape[1]
202
- # )
203
- # _A = torch.sparse_coo_tensor(
204
- # torch.stack([row_id, topk_idx], dim=-1).reshape(-1, 2).T,
205
- # topk_sim.reshape(-1),
206
- # size=(_A.shape[0], _A.shape[1]),
207
- # device=_A.device,
208
- # )
209
- # _A = _A.to_dense().to(dtype=subgraph_output.dtype)
210
- # _D = _A.sum(-1)
211
- # _A /= _D[:, None]
212
-
213
- _V = (_A[:, None, :] @ _anchor_output).squeeze(1)
191
+
192
+ _A = Fn.normalize(_A, p=1, dim=-1) # [_m x k]
193
+ _V = (_A[:, None, :] @ _anchor_output).squeeze(1) # [_m x d]
194
+
214
195
  if move_output_to_cpu:
215
196
  _V = _V.cpu()
216
197
  V_list.append(_V)
@@ -274,40 +255,3 @@ def extrapolate_knn_with_subsampling(
274
255
  device=device
275
256
  )
276
257
  return new_eigenvectors
277
-
278
-
279
- def quantile_min_max(x, q1=0.01, q2=0.99, n_sample=10000):
280
- if x.shape[0] > n_sample:
281
- np.random.seed(0)
282
- random_idx = np.random.choice(x.shape[0], n_sample, replace=False)
283
- vmin, vmax = x[random_idx].quantile(q1), x[random_idx].quantile(q2)
284
- else:
285
- vmin, vmax = x.quantile(q1), x.quantile(q2)
286
- return vmin, vmax
287
-
288
-
289
- def quantile_normalize(x, q=0.95):
290
- """normalize each dimension of x to [0, 1], take 95-th percentage, this robust to outliers
291
- </br> 1. sort x
292
- </br> 2. take q-th quantile
293
- </br> min_value -> (1-q)-th quantile
294
- </br> max_value -> q-th quantile
295
- </br> 3. normalize
296
- </br> x = (x - min_value) / (max_value - min_value)
297
-
298
- Args:
299
- x (torch.Tensor): input tensor, shape (n_samples, n_features)
300
- normalize each feature to 0-1 range
301
- q (float): quantile, default 0.95
302
-
303
- Returns:
304
- torch.Tensor: quantile normalized tensor
305
- """
306
- # normalize x to 0-1 range, max value is q-th quantile
307
- # quantile makes the normalization robust to outliers
308
- if isinstance(x, np.ndarray):
309
- x = torch.tensor(x)
310
- vmax, vmin = quantile_min_max(x, q, 1 - q)
311
- x = (x - vmin) / (vmax - vmin)
312
- x = x.clamp(0, 1)
313
- return x
@@ -7,15 +7,13 @@ import torch.nn.functional as F
7
7
  from sklearn.base import BaseEstimator
8
8
 
9
9
  from .common import (
10
- DistanceOptions,
11
10
  lazy_normalize,
11
+ quantile_min_max,
12
+ quantile_normalize,
12
13
  )
13
14
  from .propagation_utils import (
14
15
  run_subgraph_sampling,
15
16
  extrapolate_knn,
16
- extrapolate_knn_with_subsampling,
17
- quantile_min_max,
18
- quantile_normalize
19
17
  )
20
18
 
21
19
 
@@ -28,27 +26,22 @@ def _rgb_with_dimensionality_reduction(
28
26
  num_sample: int,
29
27
  metric: Literal["cosine", "euclidean"],
30
28
  rgb_func: Callable[[torch.Tensor, float], torch.Tensor],
31
- q: float, knn: int,
32
- seed: int, device: str,
29
+ q: float,
30
+ knn: int,
33
31
  reduction: Callable[..., BaseEstimator],
34
32
  reduction_dim: int,
35
33
  reduction_kwargs: Dict[str, Any],
36
- transform_func: Callable[[torch.Tensor], torch.Tensor] = _identity,
37
- pre_smooth: bool = True,
34
+ transform_func: Callable[[torch.Tensor], torch.Tensor],
35
+ seed: int,
36
+ device: str,
38
37
  ) -> Tuple[torch.Tensor, torch.Tensor]:
39
38
 
40
- if pre_smooth:
41
- _subgraph_indices = run_subgraph_sampling(
42
- features,
43
- num_sample,
44
- sample_method="farthest",
45
- )
46
- features = extrapolate_knn(
47
- features[_subgraph_indices],
48
- features[_subgraph_indices],
49
- features,
50
- distance="cosine",
51
- )
39
+ features = extrapolate_knn(
40
+ features,
41
+ features,
42
+ features,
43
+ distance="cosine",
44
+ )
52
45
 
53
46
  subgraph_indices = run_subgraph_sampling(
54
47
  features,
@@ -83,10 +76,10 @@ def rgb_from_tsne_2d(
83
76
  num_sample: int = 1000,
84
77
  perplexity: int = 150,
85
78
  metric: Literal["cosine", "euclidean"] = "cosine",
86
- device: str = None,
87
- seed: int = 0,
88
79
  q: float = 0.95,
89
80
  knn: int = 10,
81
+ seed: int = 0,
82
+ device: str = None,
90
83
  ):
91
84
  """
92
85
  Returns:
@@ -111,11 +104,13 @@ def rgb_from_tsne_2d(
111
104
  num_sample=num_sample,
112
105
  metric=metric,
113
106
  rgb_func=rgb_from_2d_colormap,
114
- q=q, knn=knn,
115
- seed=seed, device=device,
107
+ q=q,
108
+ knn=knn,
116
109
  reduction=TSNE, reduction_dim=2, reduction_kwargs={
117
110
  "perplexity": perplexity,
118
- },
111
+ }, transform_func=_identity,
112
+ seed=seed,
113
+ device=device,
119
114
  )
120
115
  return x2d, rgb
121
116
 
@@ -125,10 +120,10 @@ def rgb_from_tsne_3d(
125
120
  num_sample: int = 1000,
126
121
  perplexity: int = 150,
127
122
  metric: Literal["cosine", "euclidean"] = "cosine",
128
- device: str = None,
129
- seed: int = 0,
130
123
  q: float = 0.95,
131
124
  knn: int = 10,
125
+ seed: int = 0,
126
+ device: str = None,
132
127
  ):
133
128
  """
134
129
  Returns:
@@ -153,11 +148,13 @@ def rgb_from_tsne_3d(
153
148
  num_sample=num_sample,
154
149
  metric=metric,
155
150
  rgb_func=rgb_from_3d_rgb_cube,
156
- q=q, knn=knn,
157
- seed=seed, device=device,
151
+ q=q,
152
+ knn=knn,
158
153
  reduction=TSNE, reduction_dim=3, reduction_kwargs={
159
154
  "perplexity": perplexity,
160
- },
155
+ }, transform_func=_identity,
156
+ seed=seed,
157
+ device=device,
161
158
  )
162
159
  return x3d, rgb
163
160
 
@@ -166,10 +163,10 @@ def rgb_from_cosine_tsne_3d(
166
163
  features: torch.Tensor,
167
164
  num_sample: int = 1000,
168
165
  perplexity: int = 150,
169
- device: str = None,
170
- seed: int = 0,
171
166
  q: float = 0.95,
172
167
  knn: int = 10,
168
+ seed: int = 0,
169
+ device: str = None
173
170
  ):
174
171
  """
175
172
  Returns:
@@ -210,11 +207,13 @@ def rgb_from_cosine_tsne_3d(
210
207
  num_sample=num_sample,
211
208
  metric="cosine",
212
209
  rgb_func=rgb_from_cosine,
213
- q=q, knn=knn,
214
- seed=seed, device=device,
210
+ q=q,
211
+ knn=knn,
215
212
  reduction=TSNE, reduction_dim=3, reduction_kwargs={
216
213
  "perplexity": perplexity,
217
- },
214
+ }, transform_func=_identity,
215
+ seed=seed,
216
+ device=device,
218
217
  )
219
218
  return x3d, rgb
220
219
 
@@ -225,10 +224,10 @@ def rgb_from_umap_2d(
225
224
  n_neighbors: int = 150,
226
225
  min_dist: float = 0.1,
227
226
  metric: Literal["cosine", "euclidean"] = "cosine",
228
- device: str = None,
229
- seed: int = 0,
230
227
  q: float = 0.95,
231
228
  knn: int = 10,
229
+ seed: int = 0,
230
+ device: str = None,
232
231
  ):
233
232
  """
234
233
  Returns:
@@ -245,12 +244,14 @@ def rgb_from_umap_2d(
245
244
  num_sample=num_sample,
246
245
  metric=metric,
247
246
  rgb_func=rgb_from_2d_colormap,
248
- q=q, knn=knn,
249
- seed=seed, device=device,
247
+ q=q,
248
+ knn=knn,
250
249
  reduction=UMAP, reduction_dim=2, reduction_kwargs={
251
250
  "n_neighbors": n_neighbors,
252
251
  "min_dist": min_dist,
253
- },
252
+ }, transform_func=_identity,
253
+ seed=seed,
254
+ device=device,
254
255
  )
255
256
  return x2d, rgb
256
257
 
@@ -261,10 +262,10 @@ def rgb_from_umap_sphere(
261
262
  n_neighbors: int = 150,
262
263
  min_dist: float = 0.1,
263
264
  metric: Literal["cosine", "euclidean"] = "cosine",
264
- device: str = None,
265
- seed: int = 0,
266
265
  q: float = 0.95,
267
266
  knn: int = 10,
267
+ seed: int = 0,
268
+ device: str = None,
268
269
  ):
269
270
  """
270
271
  Returns:
@@ -288,14 +289,15 @@ def rgb_from_umap_sphere(
288
289
  num_sample=num_sample,
289
290
  metric=metric,
290
291
  rgb_func=rgb_from_3d_rgb_cube,
291
- q=q, knn=knn,
292
- seed=seed, device=device,
292
+ q=q,
293
+ knn=knn,
293
294
  reduction=UMAP, reduction_dim=2, reduction_kwargs={
294
295
  "n_neighbors": n_neighbors,
295
296
  "min_dist": min_dist,
296
297
  "output_metric": "haversine",
297
- },
298
- transform_func=transform_func
298
+ }, transform_func=transform_func,
299
+ seed=seed,
300
+ device=device,
299
301
  )
300
302
  return x3d, rgb
301
303
 
@@ -306,10 +308,10 @@ def rgb_from_umap_3d(
306
308
  n_neighbors: int = 150,
307
309
  min_dist: float = 0.1,
308
310
  metric: Literal["cosine", "euclidean"] = "cosine",
309
- device: str = None,
310
- seed: int = 0,
311
311
  q: float = 0.95,
312
312
  knn: int = 10,
313
+ seed: int = 0,
314
+ device: str = None,
313
315
  ):
314
316
  """
315
317
  Returns:
@@ -326,12 +328,14 @@ def rgb_from_umap_3d(
326
328
  num_sample=num_sample,
327
329
  metric=metric,
328
330
  rgb_func=rgb_from_3d_rgb_cube,
329
- q=q, knn=knn,
330
- seed=seed, device=device,
331
+ q=q,
332
+ knn=knn,
331
333
  reduction=UMAP, reduction_dim=3, reduction_kwargs={
332
334
  "n_neighbors": n_neighbors,
333
335
  "min_dist": min_dist,
334
- },
336
+ }, transform_func=_identity,
337
+ seed=seed,
338
+ device=device,
335
339
  )
336
340
  return x3d, rgb
337
341
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nystrom_ncut
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Summary: Normalized Cut and Nyström Approximation
5
5
  Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
6
6
  Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/
@@ -1,24 +0,0 @@
1
- from typing import Any, Literal
2
-
3
- import numpy as np
4
- import torch
5
- import torch.nn.functional as Fn
6
-
7
-
8
- DistanceOptions = Literal["cosine", "euclidean", "rbf"]
9
- SampleOptions = Literal["farthest", "random"]
10
-
11
-
12
- def ceildiv(a: int, b: int) -> int:
13
- return -(-a // b)
14
-
15
-
16
- def lazy_normalize(x: torch.Tensor, n: int = 1000, **normalize_kwargs: Any) -> torch.Tensor:
17
- numel = np.prod(x.shape[:-1])
18
- n = min(n, numel)
19
- random_indices = torch.randperm(numel)[:n]
20
- _x = x.flatten(0, -2)[random_indices]
21
- if torch.allclose(torch.norm(_x, **normalize_kwargs), torch.ones(n, device=x.device)):
22
- return x
23
- else:
24
- return Fn.normalize(x, **normalize_kwargs)
File without changes
File without changes
File without changes
File without changes
File without changes