nystrom-ncut 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
nystrom_ncut/nystrom.py CHANGED
@@ -2,6 +2,8 @@ from typing import Literal, Tuple
2
2
 
3
3
  import torch
4
4
 
5
+ from .common import ceildiv
6
+
5
7
 
6
8
  EigSolverOptions = Literal["svd_lowrank", "lobpcg", "svd", "eigh"]
7
9
 
@@ -75,7 +77,7 @@ class OnlineNystrom:
75
77
  return U[:, :self.n_components], L[:self.n_components] # [n x n_components], [n_components]
76
78
 
77
79
  def update(self, features: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
78
- n_chunks = -(-len(features) // self.chunk_size)
80
+ n_chunks = ceildiv(len(features), self.chunk_size)
79
81
  if n_chunks > 1:
80
82
  """ Chunked version """
81
83
  chunks = torch.chunk(features, n_chunks, dim=0)
@@ -111,7 +113,7 @@ class OnlineNystrom:
111
113
  if features is None:
112
114
  VS = self.A @ self.transform_matrix # [n x n_components]
113
115
  else:
114
- n_chunks = -(-len(features) // self.chunk_size)
116
+ n_chunks = ceildiv(len(features), self.chunk_size)
115
117
  if n_chunks > 1:
116
118
  """ Chunked version """
117
119
  chunks = torch.chunk(features, n_chunks, dim=0)
@@ -1,11 +1,12 @@
1
1
  import logging
2
- import math
3
2
  from typing import Literal
4
3
 
5
4
  import numpy as np
6
5
  import torch
7
6
  import torch.nn.functional as F
8
7
 
8
+ from .common import ceildiv, lazy_normalize
9
+
9
10
 
10
11
  @torch.no_grad()
11
12
  def run_subgraph_sampling(
@@ -60,14 +61,12 @@ def farthest_point_sampling(
60
61
  # PCA to reduce the dimension
61
62
  if features.shape[1] > 8:
62
63
  u, s, v = torch.pca_lowrank(features, q=8)
63
- _n = features.shape[0]
64
- s /= math.sqrt(_n)
65
64
  features = u @ torch.diag(s)
66
65
 
67
66
  h = min(h, int(np.log2(features.shape[0])))
68
67
 
69
68
  kdline_fps_samples_idx = fpsample.bucket_fps_kdline_sampling(
70
- features.cpu().numpy(), num_sample, h
69
+ features.numpy(force=True), num_sample, h
71
70
  ).astype(np.int64)
72
71
  return torch.from_numpy(kdline_fps_samples_idx)
73
72
 
@@ -76,26 +75,19 @@ def distance_from_features(
76
75
  features: torch.Tensor,
77
76
  features_B: torch.Tensor,
78
77
  distance: Literal["cosine", "euclidean", "rbf"],
79
- fill_diagonal: bool,
80
78
  ):
81
79
  """Compute affinity matrix from input features.
82
80
  Args:
83
81
  features (torch.Tensor): input features, shape (n_samples, n_features)
84
82
  features_B (torch.Tensor, optional): optional, if not None, compute affinity between two features
85
- affinity_focal_gamma (float): affinity matrix parameter, lower t reduce the edge weights
86
- on weak connections, default 1.0
87
83
  distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'.
88
- normalize_features (bool): normalize input features before computing affinity matrix
89
-
90
84
  Returns:
91
85
  (torch.Tensor): affinity matrix, shape (n_samples, n_samples)
92
86
  """
93
87
  # compute distance matrix from input features
94
88
  if distance == "cosine":
95
- if not check_if_normalized(features):
96
- features = F.normalize(features, dim=-1)
97
- if not check_if_normalized(features_B):
98
- features_B = F.normalize(features_B, dim=-1)
89
+ features = lazy_normalize(features, dim=-1)
90
+ features_B = lazy_normalize(features_B, dim=-1)
99
91
  D = 1 - features @ features_B.T
100
92
  elif distance == "euclidean":
101
93
  D = torch.cdist(features, features_B, p=2)
@@ -105,8 +97,6 @@ def distance_from_features(
105
97
  else:
106
98
  raise ValueError("distance should be 'cosine' or 'euclidean', 'rbf'")
107
99
 
108
- if fill_diagonal:
109
- D[torch.arange(D.shape[0]), torch.arange(D.shape[0])] = 0
110
100
  return D
111
101
 
112
102
 
@@ -115,7 +105,6 @@ def affinity_from_features(
115
105
  features_B: torch.Tensor = None,
116
106
  affinity_focal_gamma: float = 1.0,
117
107
  distance: Literal["cosine", "euclidean", "rbf"] = "cosine",
118
- fill_diagonal: bool = True,
119
108
  ):
120
109
  """Compute affinity matrix from input features.
121
110
 
@@ -125,8 +114,6 @@ def affinity_from_features(
125
114
  affinity_focal_gamma (float): affinity matrix parameter, lower t reduce the edge weights
126
115
  on weak connections, default 1.0
127
116
  distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'.
128
- normalize_features (bool): normalize input features before computing affinity matrix
129
-
130
117
  Returns:
131
118
  (torch.Tensor): affinity matrix, shape (n_samples, n_samples)
132
119
  """
@@ -134,12 +121,10 @@ def affinity_from_features(
134
121
 
135
122
  # if feature_B is not provided, compute affinity matrix on features x features
136
123
  # if feature_B is provided, compute affinity matrix on features x feature_B
137
- if features_B is not None:
138
- assert not fill_diagonal, "fill_diagonal should be False when feature_B is None"
139
124
  features_B = features if features_B is None else features_B
140
125
 
141
126
  # compute distance matrix from input features
142
- D = distance_from_features(features, features_B, distance, fill_diagonal)
127
+ D = distance_from_features(features, features_B, distance)
143
128
 
144
129
  # torch.exp make affinity matrix positive definite,
145
130
  # lower affinity_focal_gamma reduce the weak edge weights
@@ -156,7 +141,6 @@ def propagate_knn(
156
141
  affinity_focal_gamma: float = 1.0,
157
142
  chunk_size: int = 8096,
158
143
  device: str = None,
159
- use_tqdm: bool = False,
160
144
  move_output_to_cpu: bool = False,
161
145
  ):
162
146
  """A generic function to propagate new nodes using KNN.
@@ -169,8 +153,6 @@ def propagate_knn(
169
153
  distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'
170
154
  chunk_size (int): chunk size for matrix multiplication
171
155
  device (str): device to use for computation, if None, will not change device
172
- use_tqdm (bool): show progress bar when propagating eigenvectors from subgraph to full graph
173
-
174
156
  Returns:
175
157
  torch.Tensor: propagated eigenvectors, shape (new_num_samples, D)
176
158
 
@@ -197,24 +179,16 @@ def propagate_knn(
197
179
  # used in nystrom_ncut
198
180
  # propagate eigen_vector from subgraph to full graph
199
181
  subgraph_output = subgraph_output.to(device)
200
- V_list = []
201
- iterator = range(0, inp_features.shape[0], chunk_size)
202
- try:
203
- assert use_tqdm
204
- from tqdm import tqdm
205
- iterator = tqdm(iterator, "propagate by KNN")
206
- except (AssertionError, ImportError):
207
- pass
208
182
 
209
- subgraph_features = subgraph_features.to(device)
210
- for i in iterator:
211
- end = min(i + chunk_size, inp_features.shape[0])
212
- _v = inp_features[i:end].to(device)
213
- _A = affinity_from_features(subgraph_features, _v, affinity_focal_gamma, distance, False).mT
183
+ n_chunks = ceildiv(inp_features.shape[0], chunk_size)
184
+ V_list = []
185
+ for _v in torch.chunk(inp_features, n_chunks, dim=0):
186
+ _v = _v.to(device)
187
+ _A = affinity_from_features(subgraph_features, _v, affinity_focal_gamma, distance).mT
214
188
 
215
189
  if knn is not None:
216
190
  mask = torch.full_like(_A, True, dtype=torch.bool)
217
- mask[torch.arange(end - i)[:, None], _A.topk(knn, dim=-1, largest=True).indices] = False
191
+ mask[torch.arange(len(_v))[:, None], _A.topk(knn, dim=-1, largest=True).indices] = False
218
192
  _A[mask] = 0.0
219
193
  _A = F.normalize(_A, p=1, dim=-1)
220
194
 
@@ -238,10 +212,8 @@ def propagate_nearest(
238
212
  ):
239
213
  device = subgraph_output.device if device is None else device
240
214
  if distance == 'cosine':
241
- if not check_if_normalized(inp_features):
242
- inp_features = F.normalize(inp_features, dim=-1)
243
- if not check_if_normalized(subgraph_features):
244
- subgraph_features = F.normalize(subgraph_features, dim=-1)
215
+ inp_features = lazy_normalize(inp_features, dim=-1)
216
+ subgraph_features = lazy_normalize(subgraph_features, dim=-1)
245
217
 
246
218
  # used in nystrom_tsne, equivalent to propagate_by_knn with knn=1
247
219
  # propagate tSNE from subgraph to full graph
@@ -250,7 +222,7 @@ def propagate_nearest(
250
222
  for i in range(0, inp_features.shape[0], chunk_size):
251
223
  end = min(i + chunk_size, inp_features.shape[0])
252
224
  _v = inp_features[i:end].to(device)
253
- _A = -distance_from_features(subgraph_features, _v, distance, False).mT
225
+ _A = -distance_from_features(subgraph_features, _v, distance).mT
254
226
 
255
227
  # keep top1 for each row
256
228
  top_idx = _A.argmax(dim=-1).cpu()
@@ -273,7 +245,6 @@ def propagate_eigenvectors(
273
245
  sample_method: Literal["farthest", "random"],
274
246
  chunk_size: int,
275
247
  device: str,
276
- use_tqdm: bool,
277
248
  ):
278
249
  """Propagate eigenvectors to new nodes using KNN. Note: this is equivalent to the class API `NCUT.tranform(new_features)`, expect for the sampling is re-done in this function.
279
250
  Args:
@@ -285,8 +256,6 @@ def propagate_eigenvectors(
285
256
  sample_method (str): sample method, 'farthest' (default) or 'random'
286
257
  chunk_size (int): chunk size for matrix multiplication, default 8096
287
258
  device (str): device to use for computation, if None, will not change device
288
- use_tqdm (bool): show progress bar when propagating eigenvectors from subgraph to full graph
289
-
290
259
  Returns:
291
260
  torch.Tensor: propagated eigenvectors, shape (n_new_samples, num_eig)
292
261
 
@@ -319,21 +288,10 @@ def propagate_eigenvectors(
319
288
  knn=knn,
320
289
  chunk_size=chunk_size,
321
290
  device=device,
322
- use_tqdm=use_tqdm,
323
291
  )
324
-
325
292
  return new_eigenvectors
326
293
 
327
294
 
328
- def check_if_normalized(x, n=1000):
329
- """check if the input tensor is normalized (unit norm)"""
330
- n = min(n, x.shape[0])
331
- random_indices = torch.randperm(x.shape[0])[:n]
332
- _x = x[random_indices]
333
- flag = torch.allclose(torch.norm(_x, dim=-1), torch.ones(n, device=x.device))
334
- return flag
335
-
336
-
337
295
  def quantile_min_max(x, q1=0.01, q2=0.99, n_sample=10000):
338
296
  if x.shape[0] > n_sample:
339
297
  np.random.seed(0)
@@ -6,11 +6,11 @@ import torch
6
6
  import torch.nn.functional as F
7
7
  from sklearn.base import BaseEstimator
8
8
 
9
+ from .common import lazy_normalize
9
10
  from .propagation_utils import (
10
11
  run_subgraph_sampling,
11
12
  propagate_knn,
12
13
  propagate_eigenvectors,
13
- check_if_normalized,
14
14
  quantile_min_max,
15
15
  quantile_normalize
16
16
  )
@@ -20,75 +20,6 @@ def _identity(X: torch.Tensor) -> torch.Tensor:
20
20
  return X
21
21
 
22
22
 
23
- def eigenvector_to_rgb(
24
- eigen_vector: torch.Tensor,
25
- method: Literal["tsne_2d", "tsne_3d", "umap_sphere", "umap_2d", "umap_3d"] = "tsne_3d",
26
- num_sample: int = 1000,
27
- perplexity: int = 150,
28
- n_neighbors: int = 150,
29
- min_distance: float = 0.1,
30
- metric: Literal["cosine", "euclidean"] = "cosine",
31
- device: str = None,
32
- q: float = 0.95,
33
- knn: int = 10,
34
- seed: int = 0,
35
- ):
36
- """Use t-SNE or UMAP to convert eigenvectors (more than 3) to RGB color (3D RGB CUBE).
37
-
38
- Args:
39
- eigen_vector (torch.Tensor): eigenvectors, shape (n_samples, num_eig)
40
- method (str): method to convert eigenvectors to RGB,
41
- choices are: ['tsne_2d', 'tsne_3d', 'umap_sphere', 'umap_2d', 'umap_3d']
42
- num_sample (int): number of samples for Nystrom-like approximation, increase for better approximation
43
- perplexity (int): perplexity for t-SNE, increase for more global structure
44
- n_neighbors (int): number of neighbors for UMAP, increase for more global structure
45
- min_distance (float): minimum distance for UMAP
46
- metric (str): distance metric, default 'cosine'
47
- device (str): device to use for computation, if None, will not change device
48
- q (float): quantile for RGB normalization, default 0.95. lower q results in more sharp colors
49
- knn (int): number of KNN for propagating eigenvectors from subgraph to full graph,
50
- smaller knn result in more sharp colors, default 1. knn>1 will smooth-out the embedding
51
- in the t-SNE or UMAP space.
52
- seed (int): random seed for t-SNE or UMAP
53
-
54
- Examples:
55
- >>> from ncut_pytorch import eigenvector_to_rgb
56
- >>> X_3d, rgb = eigenvector_to_rgb(eigenvectors, method='tsne_3d')
57
- >>> print(X_3d.shape, rgb.shape)
58
- >>> # (10000, 3) (10000, 3)
59
-
60
- Returns:
61
- (torch.Tensor): t-SNE or UMAP embedding, shape (n_samples, 2) or (n_samples, 3)
62
- (torch.Tensor): RGB color for each data sample, shape (n_samples, 3)
63
- """
64
- kwargs = {
65
- "num_sample": num_sample,
66
- "perplexity": perplexity,
67
- "n_neighbors": n_neighbors,
68
- "min_distance": min_distance,
69
- "metric": metric,
70
- "device": device,
71
- "q": q,
72
- "knn": knn,
73
- "seed": seed,
74
- }
75
-
76
- if method == "tsne_2d":
77
- embed, rgb = rgb_from_tsne_2d(eigen_vector, **kwargs)
78
- elif method == "tsne_3d":
79
- embed, rgb = rgb_from_tsne_3d(eigen_vector, **kwargs)
80
- elif method == "umap_sphere":
81
- embed, rgb = rgb_from_umap_sphere(eigen_vector, **kwargs)
82
- elif method == "umap_2d":
83
- embed, rgb = rgb_from_umap_2d(eigen_vector, **kwargs)
84
- elif method == "umap_3d":
85
- embed, rgb = rgb_from_umap_3d(eigen_vector, **kwargs)
86
- else:
87
- raise ValueError("method should be 'tsne_2d', 'tsne_3d' or 'umap_sphere'")
88
-
89
- return embed, rgb
90
-
91
-
92
23
  def _rgb_with_dimensionality_reduction(
93
24
  features: torch.Tensor,
94
25
  num_sample: int,
@@ -126,7 +57,7 @@ def _rgb_with_dimensionality_reduction(
126
57
  move_output_to_cpu=True,
127
58
  ))
128
59
  rgb = rgb_func(X_nd, q)
129
- return X_nd.numpy(force=True), rgb
60
+ return X_nd, rgb
130
61
 
131
62
 
132
63
  def rgb_from_tsne_2d(
@@ -138,7 +69,6 @@ def rgb_from_tsne_2d(
138
69
  seed: int = 0,
139
70
  q: float = 0.95,
140
71
  knn: int = 10,
141
- **kwargs: Any,
142
72
  ):
143
73
  """
144
74
  Returns:
@@ -169,7 +99,6 @@ def rgb_from_tsne_2d(
169
99
  "perplexity": perplexity,
170
100
  },
171
101
  )
172
-
173
102
  return x2d, rgb
174
103
 
175
104
 
@@ -182,7 +111,6 @@ def rgb_from_tsne_3d(
182
111
  seed: int = 0,
183
112
  q: float = 0.95,
184
113
  knn: int = 10,
185
- **kwargs: Any,
186
114
  ):
187
115
  """
188
116
  Returns:
@@ -213,7 +141,6 @@ def rgb_from_tsne_3d(
213
141
  "perplexity": perplexity,
214
142
  },
215
143
  )
216
-
217
144
  return x3d, rgb
218
145
 
219
146
 
@@ -225,7 +152,6 @@ def rgb_from_cosine_tsne_3d(
225
152
  seed: int = 0,
226
153
  q: float = 0.95,
227
154
  knn: int = 10,
228
- **kwargs: Any,
229
155
  ):
230
156
  """
231
157
  Returns:
@@ -272,7 +198,6 @@ def rgb_from_cosine_tsne_3d(
272
198
  "perplexity": perplexity,
273
199
  },
274
200
  )
275
-
276
201
  return x3d, rgb
277
202
 
278
203
 
@@ -286,7 +211,6 @@ def rgb_from_umap_2d(
286
211
  seed: int = 0,
287
212
  q: float = 0.95,
288
213
  knn: int = 10,
289
- **kwargs: Any,
290
214
  ):
291
215
  """
292
216
  Returns:
@@ -310,7 +234,6 @@ def rgb_from_umap_2d(
310
234
  "min_dist": min_dist,
311
235
  },
312
236
  )
313
-
314
237
  return x2d, rgb
315
238
 
316
239
 
@@ -324,7 +247,6 @@ def rgb_from_umap_sphere(
324
247
  seed: int = 0,
325
248
  q: float = 0.95,
326
249
  knn: int = 10,
327
- **kwargs: Any,
328
250
  ):
329
251
  """
330
252
  Returns:
@@ -357,7 +279,6 @@ def rgb_from_umap_sphere(
357
279
  },
358
280
  transform_func=transform_func
359
281
  )
360
-
361
282
  return x3d, rgb
362
283
 
363
284
 
@@ -371,7 +292,6 @@ def rgb_from_umap_3d(
371
292
  seed: int = 0,
372
293
  q: float = 0.95,
373
294
  knn: int = 10,
374
- **kwargs: Any,
375
295
  ):
376
296
  """
377
297
  Returns:
@@ -395,7 +315,6 @@ def rgb_from_umap_3d(
395
315
  "min_dist": min_dist,
396
316
  },
397
317
  )
398
-
399
318
  return x3d, rgb
400
319
 
401
320
 
@@ -417,13 +336,11 @@ def rotate_rgb_cube(rgb, position=1):
417
336
  torch.Tensor: RGB color space, shape (n_samples, 3)
418
337
  """
419
338
  assert position in range(0, 7), "position should be 0, 1, 2, 3, 4, 5, 6"
420
- rotation_matrix = torch.tensor(
421
- [
422
- [0, 1, 0],
423
- [0, 0, 1],
424
- [1, 0, 0],
425
- ]
426
- ).float()
339
+ rotation_matrix = torch.tensor((
340
+ (0., 1., 0.),
341
+ (0., 0., 1.),
342
+ (1., 0., 0.),
343
+ ))
427
344
  n_mul = position % 3
428
345
  rotation_matrix = torch.matrix_power(rotation_matrix, n_mul)
429
346
  rgb = rgb @ rotation_matrix
@@ -505,7 +422,6 @@ def propagate_rgb_color(
505
422
  sample_method: Literal["farthest", "random"] = "farthest",
506
423
  chunk_size: int = 8096,
507
424
  device: str = None,
508
- use_tqdm: bool = False,
509
425
  ):
510
426
  """Propagate RGB color to new nodes using KNN.
511
427
  Args:
@@ -517,8 +433,6 @@ def propagate_rgb_color(
517
433
  sample_method (str): sample method, 'farthest' (default) or 'random'
518
434
  chunk_size (int): chunk size for matrix multiplication, default 8096
519
435
  device (str): device to use for computation, if None, will not change device
520
- use_tqdm (bool): show progress bar when propagating RGB color from subgraph to full graph
521
-
522
436
  Returns:
523
437
  torch.Tensor: propagated RGB color for each data sample, shape (n_new_samples, 3)
524
438
 
@@ -538,7 +452,6 @@ def propagate_rgb_color(
538
452
  sample_method=sample_method,
539
453
  chunk_size=chunk_size,
540
454
  device=device,
541
- use_tqdm=use_tqdm,
542
455
  )
543
456
 
544
457
 
@@ -627,9 +540,7 @@ def get_mask(
627
540
  """
628
541
 
629
542
  # normalize the eigenvectors to unit norm, to compute cosine similarity
630
- if not check_if_normalized(all_eigvecs.reshape(-1, all_eigvecs.shape[-1])):
631
- all_eigvecs = F.normalize(all_eigvecs, p=2, dim=-1)
632
-
543
+ all_eigvecs = lazy_normalize(all_eigvecs, p=2, dim=-1)
633
544
  prompt_eigvec = F.normalize(prompt_eigvec, p=2, dim=-1)
634
545
 
635
546
  # compute the cosine similarity
@@ -642,7 +553,7 @@ def get_mask(
642
553
  heatmap = _transform_heatmap(heatmap, gamma=gamma)
643
554
 
644
555
  masks = heatmap > threshold
645
- masks = masks.cpu().numpy().astype(np.uint8)
556
+ masks = masks.numpy(force=True).astype(np.uint8)
646
557
 
647
558
  if denoise:
648
559
  cleaned_masks = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nystrom_ncut
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Summary: Normalized Cut and Nyström Approximation
5
5
  Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
6
6
  Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/
@@ -0,0 +1,11 @@
1
+ nystrom_ncut/__init__.py,sha256=Cww-_OsyQHLKpgw_Wh28_tUOvIMMr7Ey8w-tH7v99xQ,452
2
+ nystrom_ncut/common.py,sha256=qdR_JwknT9H1Cv5LopwdwZfORFx-O8MLiRI6ZF1Qohc,558
3
+ nystrom_ncut/ncut_pytorch.py,sha256=Lz0aQwZMOUnxQRvf6m9-eWZG8Zha71sRikp7sDuvNHo,11980
4
+ nystrom_ncut/nystrom.py,sha256=Jo-P-2vnYk8yEZinGZnN3jHMiiB5AueoaLAYK4OmRqE,8604
5
+ nystrom_ncut/propagation_utils.py,sha256=rBya8WnspnvhF_sGDAprIHEmerw_93td7ddRG3lUQHA,12116
6
+ nystrom_ncut/visualize_utils.py,sha256=1-eoF2FlMKJSjqHacuBiJ9IcEcvV-WJkBtKp_PoIg-0,18569
7
+ nystrom_ncut-0.0.2.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
8
+ nystrom_ncut-0.0.2.dist-info/METADATA,sha256=x040uvrRFlXh9iXvPEyNcymw2rGmkYnCOGp4eIF-pKQ,6058
9
+ nystrom_ncut-0.0.2.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
10
+ nystrom_ncut-0.0.2.dist-info/top_level.txt,sha256=j7g_j0S048EvguFFnGgD5Ewd3r2H6klsxd5A4dd-wHw,13
11
+ nystrom_ncut-0.0.2.dist-info/RECORD,,