nystrom-ncut 0.0.2__tar.gz → 0.0.4__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nystrom_ncut
3
- Version: 0.0.2
3
+ Version: 0.0.4
4
4
  Summary: Normalized Cut and Nyström Approximation
5
5
  Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
6
6
  Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "nystrom_ncut"
7
- version = "0.0.2"
7
+ version = "0.0.4"
8
8
  authors = [
9
9
  { name = "Huzheng Yang", email = "huze.yann@gmail.com" },
10
10
  { name = "Wentinn Liao", email = "wentinn.liao@gmail.com" },
@@ -94,20 +94,19 @@ class NCUT(OnlineNystrom):
94
94
 
95
95
  def __init__(
96
96
  self,
97
- num_eig: int = 100,
97
+ n_components: int = 100,
98
98
  affinity_focal_gamma: float = 1.0,
99
99
  num_sample: int = 10000,
100
100
  sample_method: Literal["farthest", "random"] = "farthest",
101
101
  distance: DistanceOptions = "cosine",
102
102
  eig_solver: EigSolverOptions = "svd_lowrank",
103
103
  normalize_features: bool = None,
104
- device: str = None,
105
104
  move_output_to_cpu: bool = False,
106
- matmul_chunk_size: int = 8096,
105
+ chunk_size: int = 8192,
107
106
  ):
108
107
  """
109
108
  Args:
110
- num_eig (int): number of top eigenvectors to return
109
+ n_components (int): number of top eigenvectors to return
111
110
  affinity_focal_gamma (float): affinity matrix temperature, lower t reduce the not-so-connected edge weights,
112
111
  smaller t result in more sharp eigenvectors.
113
112
  num_sample (int): number of samples for Nystrom-like approximation,
@@ -118,17 +117,15 @@ class NCUT(OnlineNystrom):
118
117
  eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh'].
119
118
  normalize_features (bool): normalize input features before computing affinity matrix,
120
119
  default 'None' is True for cosine distance, False for euclidean distance and rbf
121
- device (str): device to use for eigen computation,
122
- move to GPU to speeds up a bit (~5x faster)
123
120
  move_output_to_cpu (bool): move output to CPU, set to True if you have memory issue
124
- matmul_chunk_size (int): chunk size for large-scale matrix multiplication
121
+ chunk_size (int): chunk size for large-scale matrix multiplication
125
122
  """
126
123
  OnlineNystrom.__init__(
127
124
  self,
128
- n_components=num_eig,
125
+ n_components=n_components,
129
126
  kernel=LaplacianKernel(affinity_focal_gamma, distance, eig_solver),
130
127
  eig_solver=eig_solver,
131
- chunk_size=matmul_chunk_size,
128
+ chunk_size=chunk_size,
132
129
  )
133
130
  self.num_sample = num_sample
134
131
  self.sample_method = sample_method
@@ -140,19 +137,14 @@ class NCUT(OnlineNystrom):
140
137
  if distance in ["euclidean", "rbf"]:
141
138
  self.normalize_features = False
142
139
 
143
- self.device = device
144
140
  self.move_output_to_cpu = move_output_to_cpu
145
- self.matmul_chunk_size = matmul_chunk_size
141
+ self.chunk_size = chunk_size
146
142
 
147
143
  def _fit_helper(
148
144
  self,
149
145
  features: torch.Tensor,
150
146
  precomputed_sampled_indices: torch.Tensor,
151
147
  ) -> Tuple[torch.Tensor, torch.Tensor]:
152
- # move subgraph gpu to speed up
153
- original_device = features.device
154
- device = original_device if self.device is None else self.device
155
-
156
148
  _n = features.shape[0]
157
149
  if self.num_sample >= _n:
158
150
  logging.info(
@@ -184,13 +176,13 @@ class NCUT(OnlineNystrom):
184
176
  num_sample=self.num_sample,
185
177
  sample_method=self.sample_method,
186
178
  )
187
- sampled_features = features[sampled_indices].to(device)
179
+ sampled_features = features[sampled_indices]
188
180
  OnlineNystrom.fit(self, sampled_features)
189
181
 
190
182
  _n_not_sampled = _n - len(sampled_features)
191
183
  if _n_not_sampled > 0:
192
- unsampled_indices = torch.full((_n,), True).scatter(0, sampled_indices, False)
193
- unsampled_features = features[unsampled_indices].to(device)
184
+ unsampled_indices = torch.full((_n,), True, device=features.device).scatter_(0, sampled_indices, False)
185
+ unsampled_features = features[unsampled_indices]
194
186
  V_unsampled, _ = OnlineNystrom.update(self, unsampled_features)
195
187
  else:
196
188
  unsampled_indices = V_unsampled = None
@@ -231,7 +223,7 @@ class NCUT(OnlineNystrom):
231
223
  V_sampled, L = OnlineNystrom.transform(self)
232
224
 
233
225
  if unsampled_indices is not None:
234
- V = torch.zeros((len(unsampled_indices), self.n_components))
226
+ V = torch.zeros((len(unsampled_indices), self.n_components), device=features.device)
235
227
  V[~unsampled_indices] = V_sampled
236
228
  V[unsampled_indices] = V_unsampled
237
229
  else:
@@ -52,6 +52,18 @@ class OnlineNystrom:
52
52
  self.transform_matrix: torch.Tensor = None # [n x n_components]
53
53
  self.LS: torch.Tensor = None # [n]
54
54
 
55
+ def _update_to_kernel(self) -> Tuple[torch.Tensor, torch.Tensor]:
56
+ self.A = self.S = self.kernel.transform()
57
+ U, L = solve_eig(
58
+ self.A,
59
+ num_eig=self.inverse_approximation_dim,
60
+ eig_solver=self.eig_solver,
61
+ ) # [n x (? + 1)], [? + 1]
62
+ self.Ahinv_UL = U * (L ** -0.5) # [n x (? + 1)]
63
+ self.Ahinv_VT = U.mT # [(? + 1) x n]
64
+ self.Ahinv = self.Ahinv_UL @ self.Ahinv_VT # [n x n]
65
+ return U, L
66
+
55
67
  def fit(self, features: torch.Tensor):
56
68
  OnlineNystrom.fit_transform(self, features)
57
69
  return self
@@ -60,17 +72,8 @@ class OnlineNystrom:
60
72
  self.anchor_features = features
61
73
 
62
74
  self.kernel.fit(self.anchor_features)
63
- self.A = self.S = self.kernel.transform() # [n x n]
64
-
65
75
  self.inverse_approximation_dim = max(self.n_components, features.shape[-1]) + 1
66
- U, L = solve_eig(
67
- self.A,
68
- num_eig=self.inverse_approximation_dim,
69
- eig_solver=self.eig_solver,
70
- ) # [n x (? + 1)], [? + 1]
71
- self.Ahinv_UL = U * (L ** -0.5) # [n x (? + 1)]
72
- self.Ahinv_VT = U.mT # [(? + 1) x n]
73
- self.Ahinv = self.Ahinv_UL @ self.Ahinv_VT # [n x n]
76
+ U, L = self._update_to_kernel() # [n x (? + 1)], [? + 1]
74
77
 
75
78
  self.transform_matrix = (U / L)[:, :self.n_components] # [n x n_components]
76
79
  self.LS = L[:self.n_components] # [n_components]
@@ -83,6 +86,7 @@ class OnlineNystrom:
83
86
  chunks = torch.chunk(features, n_chunks, dim=0)
84
87
  for chunk in chunks:
85
88
  self.kernel.update(chunk)
89
+ self._update_to_kernel()
86
90
 
87
91
  compressed_BBT = torch.zeros((self.inverse_approximation_dim, self.inverse_approximation_dim)) # [(? + 1) x (? + 1))]
88
92
  for i, chunk in enumerate(chunks):
@@ -101,6 +105,7 @@ class OnlineNystrom:
101
105
  else:
102
106
  """ Unchunked version """
103
107
  B = self.kernel.update(features).mT # [n x m]
108
+ self._update_to_kernel()
104
109
  compressed_B = self.Ahinv_VT @ B # [indirect_pca_dim x m]
105
110
 
106
111
  self.S = self.S + self.Ahinv_UL @ (compressed_B @ compressed_B.mT) @ self.Ahinv_UL.mT # [n x n]
@@ -43,7 +43,7 @@ def run_subgraph_sampling(
43
43
  sampled_indices = torch.randperm(features.shape[0])[:num_sample]
44
44
  else:
45
45
  raise ValueError("sample_method should be 'farthest' or 'random'")
46
- return sampled_indices
46
+ return sampled_indices.to(features.device)
47
47
 
48
48
 
49
49
  def farthest_point_sampling(
@@ -139,7 +139,7 @@ def propagate_knn(
139
139
  knn: int = 10,
140
140
  distance: Literal["cosine", "euclidean", "rbf"] = "cosine",
141
141
  affinity_focal_gamma: float = 1.0,
142
- chunk_size: int = 8096,
142
+ chunk_size: int = 8192,
143
143
  device: str = None,
144
144
  move_output_to_cpu: bool = False,
145
145
  ):
@@ -206,7 +206,7 @@ def propagate_nearest(
206
206
  inp_features: torch.Tensor,
207
207
  subgraph_features: torch.Tensor,
208
208
  distance: Literal["cosine", "euclidean", "rbf"] = "cosine",
209
- chunk_size: int = 8096,
209
+ chunk_size: int = 8192,
210
210
  device: str = None,
211
211
  move_output_to_cpu: bool = False,
212
212
  ):
@@ -254,7 +254,7 @@ def propagate_eigenvectors(
254
254
  knn (int): number of KNN to propagate eigenvectors, default 3
255
255
  num_sample (int): number of samples for subgraph sampling, default 50000
256
256
  sample_method (str): sample method, 'farthest' (default) or 'random'
257
- chunk_size (int): chunk size for matrix multiplication, default 8096
257
+ chunk_size (int): chunk size for matrix multiplication, default 8192
258
258
  device (str): device to use for computation, if None, will not change device
259
259
  Returns:
260
260
  torch.Tensor: propagated eigenvectors, shape (n_new_samples, num_eig)
@@ -420,7 +420,7 @@ def propagate_rgb_color(
420
420
  knn: int = 10,
421
421
  num_sample: int = 1000,
422
422
  sample_method: Literal["farthest", "random"] = "farthest",
423
- chunk_size: int = 8096,
423
+ chunk_size: int = 8192,
424
424
  device: str = None,
425
425
  ):
426
426
  """Propagate RGB color to new nodes using KNN.
@@ -431,7 +431,7 @@ def propagate_rgb_color(
431
431
  knn (int): number of KNN to propagate RGB color, default 1
432
432
  num_sample (int): number of samples for subgraph sampling, default 50000
433
433
  sample_method (str): sample method, 'farthest' (default) or 'random'
434
- chunk_size (int): chunk size for matrix multiplication, default 8096
434
+ chunk_size (int): chunk size for matrix multiplication, default 8192
435
435
  device (str): device to use for computation, if None, will not change device
436
436
  Returns:
437
437
  torch.Tensor: propagated RGB color for each data sample, shape (n_new_samples, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nystrom_ncut
3
- Version: 0.0.2
3
+ Version: 0.0.4
4
4
  Summary: Normalized Cut and Nyström Approximation
5
5
  Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
6
6
  Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/
@@ -1,5 +1,7 @@
1
1
  import numpy as np
2
2
  import torch
3
+ import torch.nn.functional as Fn
4
+
3
5
  from src.nystrom_ncut.ncut_pytorch import NCUT, axis_align
4
6
  # from ncut_pytorch.src import rgb_from_umap_sphere
5
7
  # from ncut_pytorch.src.new_ncut_pytorch import NewNCUT
@@ -35,28 +37,36 @@ if __name__ == "__main__":
35
37
  # # ))
36
38
  # raise Exception(
37
39
 
38
- torch.set_printoptions(precision=12, sci_mode=False, linewidth=400)
39
- torch.manual_seed(1212)
40
- np.random.seed(1212)
40
+ torch.set_printoptions(precision=8, sci_mode=False, linewidth=400)
41
+ torch.set_default_dtype(torch.float64)
42
+ # torch.manual_seed(1212)
43
+ # np.random.seed(1212)
41
44
 
42
- M = torch.rand((10000, 12))
43
- # NC = NCUT(num_eig=5, knn=None, verbose=True)
44
- kwargs = dict(num_eig=7, sample_method="random")
45
- nNC = NCUT(**kwargs)
45
+ M = torch.rand((200, 12))
46
+ NC = NCUT(n_components=12, num_sample=80, sample_method="random", chunk_size=20)
46
47
 
47
48
  torch.manual_seed(1212)
48
49
  np.random.seed(1212)
49
- nX, neigs = nNC.fit_transform(M)
50
- # print(neigs)
51
- # print(nX.mT @ nX)
50
+ X, eigs = NC.fit_transform(M)
51
+ print(eigs)
52
+ raise Exception()
52
53
 
53
- torch.manual_seed(1212)
54
- np.random.seed(1212)
54
+ normalized_M = Fn.normalize(M, p=2, dim=-1)
55
+ A = torch.exp(-(1 - normalized_M @ normalized_M.mT))
56
+ R = torch.diag(torch.sum(A, dim=-1) ** -0.5)
57
+ L = R @ A @ R
58
+ # print(L)
59
+ print(X @ torch.diag(eigs) @ X.mT)
60
+ print(L)
61
+ print(torch.abs(X @ torch.diag(eigs) @ X.mT / L - 1))
55
62
 
56
- aX, R = axis_align(nX)
57
- print(aX[:3])
58
- print(R)
59
- print(R @ R.mT)
63
+ # torch.manual_seed(1212)
64
+ # np.random.seed(1212)
65
+ #
66
+ # aX, R = axis_align(X)
67
+ # print(aX[:3])
68
+ # print(R)
69
+ # print(R @ R.mT)
60
70
  raise Exception()
61
71
 
62
72
 
File without changes
File without changes
File without changes
File without changes