nystrom-ncut 0.0.8__tar.gz → 0.0.10__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (22) hide show
  1. {nystrom_ncut-0.0.8/src/nystrom_ncut.egg-info → nystrom_ncut-0.0.10}/PKG-INFO +1 -1
  2. {nystrom_ncut-0.0.8 → nystrom_ncut-0.0.10}/pyproject.toml +1 -1
  3. nystrom_ncut-0.0.10/src/__init__.py +0 -0
  4. {nystrom_ncut-0.0.8 → nystrom_ncut-0.0.10}/src/nystrom_ncut/__init__.py +4 -3
  5. nystrom_ncut-0.0.10/src/nystrom_ncut/common.py +61 -0
  6. nystrom_ncut-0.0.10/src/nystrom_ncut/nystrom/__init__.py +7 -0
  7. nystrom_ncut-0.0.10/src/nystrom_ncut/nystrom/distance_realization.py +127 -0
  8. nystrom_ncut-0.0.8/src/nystrom_ncut/ncut_pytorch.py → nystrom_ncut-0.0.10/src/nystrom_ncut/nystrom/normalized_cut.py +17 -114
  9. {nystrom_ncut-0.0.8/src/nystrom_ncut → nystrom_ncut-0.0.10/src/nystrom_ncut/nystrom}/nystrom.py +104 -1
  10. {nystrom_ncut-0.0.8 → nystrom_ncut-0.0.10}/src/nystrom_ncut/propagation_utils.py +1 -38
  11. {nystrom_ncut-0.0.8 → nystrom_ncut-0.0.10}/src/nystrom_ncut/visualize_utils.py +57 -42
  12. {nystrom_ncut-0.0.8 → nystrom_ncut-0.0.10/src/nystrom_ncut.egg-info}/PKG-INFO +1 -1
  13. {nystrom_ncut-0.0.8 → nystrom_ncut-0.0.10}/src/nystrom_ncut.egg-info/SOURCES.txt +5 -2
  14. {nystrom_ncut-0.0.8 → nystrom_ncut-0.0.10}/src/nystrom_ncut.egg-info/top_level.txt +1 -0
  15. {nystrom_ncut-0.0.8 → nystrom_ncut-0.0.10}/tests/test.py +3 -4
  16. nystrom_ncut-0.0.8/src/nystrom_ncut/common.py +0 -24
  17. {nystrom_ncut-0.0.8 → nystrom_ncut-0.0.10}/LICENSE +0 -0
  18. {nystrom_ncut-0.0.8 → nystrom_ncut-0.0.10}/MANIFEST.in +0 -0
  19. {nystrom_ncut-0.0.8 → nystrom_ncut-0.0.10}/README.md +0 -0
  20. {nystrom_ncut-0.0.8 → nystrom_ncut-0.0.10}/requirements.txt +0 -0
  21. {nystrom_ncut-0.0.8 → nystrom_ncut-0.0.10}/setup.cfg +0 -0
  22. {nystrom_ncut-0.0.8 → nystrom_ncut-0.0.10}/src/nystrom_ncut.egg-info/dependency_links.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nystrom_ncut
3
- Version: 0.0.8
3
+ Version: 0.0.10
4
4
  Summary: Normalized Cut and Nyström Approximation
5
5
  Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
6
6
  Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "nystrom_ncut"
7
- version = "0.0.8"
7
+ version = "0.0.10"
8
8
  authors = [
9
9
  { name = "Huzheng Yang", email = "huze.yann@gmail.com" },
10
10
  { name = "Wentinn Liao", email = "wentinn.liao@gmail.com" },
File without changes
@@ -1,12 +1,13 @@
1
- from .ncut_pytorch import (
2
- NCUT,
1
+ from .nystrom import (
2
+ DistanceRealization,
3
+ NCut,
3
4
  axis_align,
4
5
  )
5
6
  from .propagation_utils import (
7
+ distance_from_features,
6
8
  affinity_from_features,
7
9
  extrapolate_knn_with_subsampling,
8
10
  extrapolate_knn,
9
- quantile_normalize,
10
11
  )
11
12
  from .visualize_utils import (
12
13
  rgb_from_tsne_3d,
@@ -0,0 +1,61 @@
1
+ from typing import Any, Literal
2
+
3
+ import numpy as np
4
+ import torch
5
+ import torch.nn.functional as Fn
6
+
7
+
8
+ DistanceOptions = Literal["cosine", "euclidean", "rbf"]
9
+ SampleOptions = Literal["farthest", "random"]
10
+
11
+
12
+ def ceildiv(a: int, b: int) -> int:
13
+ return -(-a // b)
14
+
15
+
16
+ def lazy_normalize(x: torch.Tensor, n: int = 1000, **normalize_kwargs: Any) -> torch.Tensor:
17
+ numel = np.prod(x.shape[:-1])
18
+ n = min(n, numel)
19
+ random_indices = torch.randperm(numel)[:n]
20
+ _x = x.flatten(0, -2)[random_indices]
21
+ if torch.allclose(torch.norm(_x, **normalize_kwargs), torch.ones(n, device=x.device)):
22
+ return x
23
+ else:
24
+ return Fn.normalize(x, **normalize_kwargs)
25
+
26
+
27
+ def quantile_min_max(x, q1=0.01, q2=0.99, n_sample=10000):
28
+ if x.shape[0] > n_sample:
29
+ np.random.seed(0)
30
+ random_idx = np.random.choice(x.shape[0], n_sample, replace=False)
31
+ vmin, vmax = x[random_idx].quantile(q1), x[random_idx].quantile(q2)
32
+ else:
33
+ vmin, vmax = x.quantile(q1), x.quantile(q2)
34
+ return vmin, vmax
35
+
36
+
37
+ def quantile_normalize(x, q=0.95):
38
+ """normalize each dimension of x to [0, 1], take 95-th percentage, this robust to outliers
39
+ </br> 1. sort x
40
+ </br> 2. take q-th quantile
41
+ </br> min_value -> (1-q)-th quantile
42
+ </br> max_value -> q-th quantile
43
+ </br> 3. normalize
44
+ </br> x = (x - min_value) / (max_value - min_value)
45
+
46
+ Args:
47
+ x (torch.Tensor): input tensor, shape (n_samples, n_features)
48
+ normalize each feature to 0-1 range
49
+ q (float): quantile, default 0.95
50
+
51
+ Returns:
52
+ torch.Tensor: quantile normalized tensor
53
+ """
54
+ # normalize x to 0-1 range, max value is q-th quantile
55
+ # quantile makes the normalization robust to outliers
56
+ if isinstance(x, np.ndarray):
57
+ x = torch.tensor(x)
58
+ vmax, vmin = quantile_min_max(x, q, 1 - q)
59
+ x = (x - vmin) / (vmax - vmin)
60
+ x = x.clamp(0, 1)
61
+ return x
@@ -0,0 +1,7 @@
1
+ from .distance_realization import (
2
+ DistanceRealization,
3
+ )
4
+ from .normalized_cut import (
5
+ NCut,
6
+ axis_align,
7
+ )
@@ -0,0 +1,127 @@
1
+ import torch
2
+
3
+ from .nystrom import (
4
+ EigSolverOptions,
5
+ OnlineKernel,
6
+ OnlineNystromSubsampleFit,
7
+ solve_eig,
8
+ )
9
+ from ..common import (
10
+ DistanceOptions,
11
+ SampleOptions,
12
+ )
13
+ from ..propagation_utils import (
14
+ distance_from_features,
15
+ )
16
+
17
+
18
+ class GramKernel(OnlineKernel):
19
+ def __init__(
20
+ self,
21
+ distance: DistanceOptions,
22
+ eig_solver: EigSolverOptions,
23
+ ):
24
+ self.distance: DistanceOptions = distance
25
+ self.eig_solver: EigSolverOptions = eig_solver
26
+
27
+ # Anchor matrices
28
+ self.anchor_features: torch.Tensor = None # [n x d]
29
+ self.A: torch.Tensor = None # [n x n]
30
+ self.Ainv: torch.Tensor = None # [n x n]
31
+
32
+ # Updated matrices
33
+ self.a_r: torch.Tensor = None # [n]
34
+ self.b_r: torch.Tensor = None # [n]
35
+ self.matrix_sum: torch.Tensor = torch.zeros(()) # []
36
+ self.n_features: int = None # N
37
+
38
+ def fit(self, features: torch.Tensor) -> None:
39
+ self.anchor_features = features # [n x d]
40
+ self.A = -0.5 * distance_from_features(
41
+ self.anchor_features, # [n x d]
42
+ self.anchor_features,
43
+ distance=self.distance,
44
+ ) # [n x n]
45
+ d = features.shape[-1]
46
+ U, L = solve_eig(
47
+ self.A,
48
+ num_eig=d + 1, # d * (d + 3) // 2 + 1,
49
+ eig_solver=self.eig_solver,
50
+ ) # [n x (d + 1)], [d + 1]
51
+ self.Ainv = U @ torch.diag(1 / L) @ U.mT # [n x n]
52
+ self.a_r = torch.sum(self.A, dim=-1) # [n]
53
+ self.b_r = torch.zeros_like(self.a_r) # [n]
54
+ self.matrix_sum = torch.sum(self.a_r) # []
55
+ self.n_features = features.shape[0] # n
56
+
57
+ def update(self, features: torch.Tensor) -> torch.Tensor:
58
+ B = -0.5 * distance_from_features(
59
+ self.anchor_features, # [n x d]
60
+ features, # [m x d]
61
+ distance=self.distance,
62
+ ) # [n x m]
63
+ b_r = torch.sum(B, dim=-1) # [n]
64
+ b_c = torch.sum(B, dim=-2) # [m]
65
+ self.b_r = self.b_r + b_r # [n]
66
+ self.matrix_sum = (
67
+ torch.sum(self.a_r)
68
+ + 2 * torch.sum(self.b_r)
69
+ + self.Ainv @ self.b_r @ self.b_r
70
+ ) # []
71
+ self.n_features += features.shape[0] # N
72
+
73
+ row_sum = self.a_r + self.b_r # [n]
74
+ col_sum = b_c + B.mT @ self.Ainv @ self.b_r # [m]
75
+ shift = -(row_sum[:, None] + col_sum) / self.n_features + self.matrix_sum / (self.n_features ** 2) # [n x m]
76
+ return (B + shift).mT # [m x n]
77
+
78
+ def transform(self, features: torch.Tensor = None) -> torch.Tensor:
79
+ row_sum = self.a_r + self.b_r
80
+ if features is None:
81
+ B = self.A # [n x n]
82
+ col_sum = row_sum # [n]
83
+ else:
84
+ B = -0.5 * distance_from_features(
85
+ self.anchor_features,
86
+ features,
87
+ distance=self.distance,
88
+ )
89
+ b_c = torch.sum(B, dim=-2) # [m]
90
+ col_sum = b_c + B.mT @ self.Ainv @ self.b_r # [m]
91
+ shift = -(row_sum[:, None] + col_sum) / self.n_features + self.matrix_sum / (self.n_features ** 2) # [n x m]
92
+ return (B + shift).mT # [m x n]
93
+
94
+
95
+ class DistanceRealization(OnlineNystromSubsampleFit):
96
+ """Nystrom Distance Realization for large scale graph."""
97
+
98
+ def __init__(
99
+ self,
100
+ n_components: int = 100,
101
+ num_sample: int = 10000,
102
+ sample_method: SampleOptions = "farthest",
103
+ distance: DistanceOptions = "cosine",
104
+ eig_solver: EigSolverOptions = "svd_lowrank",
105
+ chunk_size: int = 8192,
106
+ ):
107
+ """
108
+ Args:
109
+ n_components (int): number of top eigenvectors to return
110
+ num_sample (int): number of samples for Nystrom-like approximation,
111
+ reduce only if memory is not enough, increase for better approximation
112
+ sample_method (str): subgraph sampling, ['farthest', 'random'].
113
+ farthest point sampling is recommended for better Nystrom-approximation accuracy
114
+ distance (str): distance metric for affinity matrix, ['cosine', 'euclidean', 'rbf'].
115
+ eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh'].
116
+ chunk_size (int): chunk size for large-scale matrix multiplication
117
+ """
118
+ OnlineNystromSubsampleFit.__init__(
119
+ self,
120
+ n_components=n_components,
121
+ kernel=GramKernel(distance, eig_solver),
122
+ num_sample=num_sample,
123
+ sample_method=sample_method,
124
+ eig_solver=eig_solver,
125
+ chunk_size=chunk_size,
126
+ )
127
+ self.distance: DistanceOptions = distance
@@ -1,22 +1,18 @@
1
- import logging
2
- from typing import Tuple
3
-
4
1
  import torch
5
2
  import torch.nn.functional as Fn
6
3
 
7
- from .common import (
8
- DistanceOptions,
9
- SampleOptions,
10
- )
11
4
  from .nystrom import (
12
5
  EigSolverOptions,
13
6
  OnlineKernel,
14
- OnlineNystrom,
7
+ OnlineNystromSubsampleFit,
15
8
  solve_eig,
16
9
  )
17
- from .propagation_utils import (
10
+ from ..common import (
11
+ DistanceOptions,
12
+ SampleOptions,
13
+ )
14
+ from ..propagation_utils import (
18
15
  affinity_from_features,
19
- run_subgraph_sampling,
20
16
  )
21
17
 
22
18
 
@@ -68,16 +64,16 @@ class LaplacianKernel(OnlineKernel):
68
64
  b_c = torch.sum(B, dim=-2) # [m]
69
65
  self.b_r = self.b_r + b_r # [n]
70
66
 
71
- rowscale = self.a_r + self.b_r # [n]
72
- colscale = b_c + B.mT @ self.Ainv @ self.b_r # [m]
73
- scale = (rowscale[:, None] * colscale) ** -0.5 # [n x m]
67
+ row_sum = self.a_r + self.b_r # [n]
68
+ col_sum = b_c + B.mT @ self.Ainv @ self.b_r # [m]
69
+ scale = (row_sum[:, None] * col_sum) ** -0.5 # [n x m]
74
70
  return (B * scale).mT # [m x n]
75
71
 
76
72
  def transform(self, features: torch.Tensor = None) -> torch.Tensor:
77
- rowscale = self.a_r + self.b_r # [n]
73
+ row_sum = self.a_r + self.b_r # [n]
78
74
  if features is None:
79
75
  B = self.A # [n x n]
80
- colscale = rowscale # [n]
76
+ col_sum = row_sum # [n]
81
77
  else:
82
78
  B = affinity_from_features(
83
79
  self.anchor_features, # [n x d]
@@ -86,12 +82,12 @@ class LaplacianKernel(OnlineKernel):
86
82
  distance=self.distance,
87
83
  ) # [n x m]
88
84
  b_c = torch.sum(B, dim=-2) # [m]
89
- colscale = b_c + B.mT @ self.Ainv @ self.b_r # [m]
90
- scale = (rowscale[:, None] * colscale) ** -0.5 # [n x m]
85
+ col_sum = b_c + B.mT @ self.Ainv @ self.b_r # [m]
86
+ scale = (row_sum[:, None] * col_sum) ** -0.5 # [n x m]
91
87
  return (B * scale).mT # [m x n]
92
88
 
93
89
 
94
- class NCUT(OnlineNystrom):
90
+ class NCut(OnlineNystromSubsampleFit):
95
91
  """Nystrom Normalized Cut for large scale graph."""
96
92
 
97
93
  def __init__(
@@ -102,7 +98,6 @@ class NCUT(OnlineNystrom):
102
98
  sample_method: SampleOptions = "farthest",
103
99
  distance: DistanceOptions = "cosine",
104
100
  eig_solver: EigSolverOptions = "svd_lowrank",
105
- normalize_features: bool = None,
106
101
  chunk_size: int = 8192,
107
102
  ):
108
103
  """
@@ -116,110 +111,18 @@ class NCUT(OnlineNystrom):
116
111
  farthest point sampling is recommended for better Nystrom-approximation accuracy
117
112
  distance (str): distance metric for affinity matrix, ['cosine', 'euclidean', 'rbf'].
118
113
  eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh'].
119
- normalize_features (bool): normalize input features before computing affinity matrix,
120
- default 'None' is True for cosine distance, False for euclidean distance and rbf
121
114
  chunk_size (int): chunk size for large-scale matrix multiplication
122
115
  """
123
- OnlineNystrom.__init__(
116
+ OnlineNystromSubsampleFit.__init__(
124
117
  self,
125
118
  n_components=n_components,
126
119
  kernel=LaplacianKernel(affinity_focal_gamma, distance, eig_solver),
120
+ num_sample=num_sample,
121
+ sample_method=sample_method,
127
122
  eig_solver=eig_solver,
128
123
  chunk_size=chunk_size,
129
124
  )
130
- self.num_sample: int = num_sample
131
- self.sample_method: SampleOptions = sample_method
132
- self.anchor_indices: torch.Tensor = None
133
125
  self.distance: DistanceOptions = distance
134
- self.normalize_features: bool = normalize_features
135
- if self.normalize_features is None:
136
- if distance in ["cosine"]:
137
- self.normalize_features = True
138
- if distance in ["euclidean", "rbf"]:
139
- self.normalize_features = False
140
-
141
- self.chunk_size: int = chunk_size
142
-
143
- def _fit_helper(
144
- self,
145
- features: torch.Tensor,
146
- precomputed_sampled_indices: torch.Tensor,
147
- ) -> Tuple[torch.Tensor, torch.Tensor]:
148
- _n = features.shape[0]
149
- if self.num_sample >= _n:
150
- logging.info(
151
- f"NCUT nystrom num_sample is larger than number of input samples, nyström approximation is not needed, setting num_sample={_n}"
152
- )
153
- self.num_sample = _n
154
-
155
- assert self.distance in ["cosine", "euclidean", "rbf"], "distance should be 'cosine', 'euclidean', 'rbf'"
156
-
157
- if self.normalize_features:
158
- # features need to be normalized for affinity matrix computation (cosine distance)
159
- features = torch.nn.functional.normalize(features, dim=-1)
160
-
161
- if precomputed_sampled_indices is not None:
162
- _sampled_indices = precomputed_sampled_indices
163
- else:
164
- _sampled_indices = run_subgraph_sampling(
165
- features,
166
- self.num_sample,
167
- sample_method=self.sample_method,
168
- )
169
- self.anchor_indices = torch.sort(_sampled_indices).values
170
- sampled_features = features[self.anchor_indices]
171
- OnlineNystrom.fit(self, sampled_features)
172
-
173
- _n_not_sampled = _n - len(sampled_features)
174
- if _n_not_sampled > 0:
175
- unsampled_indices = torch.full((_n,), True, device=features.device).scatter_(0, self.anchor_indices, False)
176
- unsampled_features = features[unsampled_indices]
177
- V_unsampled, _ = OnlineNystrom.update(self, unsampled_features)
178
- else:
179
- unsampled_indices = V_unsampled = None
180
- return unsampled_indices, V_unsampled
181
-
182
- def fit(
183
- self,
184
- features: torch.Tensor,
185
- precomputed_sampled_indices: torch.Tensor = None,
186
- ):
187
- """Fit Nystrom Normalized Cut on the input features.
188
- Args:
189
- features (torch.Tensor): input features, shape (n_samples, n_features)
190
- precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
191
- override the sample_method, if not None
192
- Returns:
193
- (NCUT): self
194
- """
195
- NCUT._fit_helper(self, features, precomputed_sampled_indices)
196
- return self
197
-
198
- def fit_transform(
199
- self,
200
- features: torch.Tensor,
201
- precomputed_sampled_indices: torch.Tensor = None,
202
- ) -> Tuple[torch.Tensor, torch.Tensor]:
203
- """
204
- Args:
205
- features (torch.Tensor): input features, shape (n_samples, n_features)
206
- precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
207
- override the sample_method, if not None
208
-
209
- Returns:
210
- (torch.Tensor): eigen_vectors, shape (n_samples, num_eig)
211
- (torch.Tensor): eigen_values, sorted in descending order, shape (num_eig,)
212
- """
213
- unsampled_indices, V_unsampled = NCUT._fit_helper(self, features, precomputed_sampled_indices)
214
- V_sampled, L = OnlineNystrom.transform(self)
215
-
216
- if unsampled_indices is not None:
217
- V = torch.zeros((len(unsampled_indices), self.n_components), device=features.device)
218
- V[~unsampled_indices] = V_sampled
219
- V[unsampled_indices] = V_unsampled
220
- else:
221
- V = V_sampled
222
- return V, L
223
126
 
224
127
 
225
128
  def axis_align(eigen_vectors: torch.Tensor, max_iter=300):
@@ -1,8 +1,15 @@
1
+ import logging
1
2
  from typing import Literal, Tuple
2
3
 
3
4
  import torch
4
5
 
5
- from .common import ceildiv
6
+ from ..common import (
7
+ SampleOptions,
8
+ ceildiv,
9
+ )
10
+ from ..propagation_utils import (
11
+ run_subgraph_sampling,
12
+ )
6
13
 
7
14
 
8
15
  EigSolverOptions = Literal["svd_lowrank", "lobpcg", "svd", "eigh"]
@@ -132,6 +139,102 @@ class OnlineNystrom:
132
139
  return VS, self.LS # [m x n_components], [n_components]
133
140
 
134
141
 
142
+ class OnlineNystromSubsampleFit(OnlineNystrom):
143
+ def __init__(
144
+ self,
145
+ n_components: int,
146
+ kernel: OnlineKernel,
147
+ num_sample: int,
148
+ sample_method: SampleOptions,
149
+ eig_solver: EigSolverOptions = "svd_lowrank",
150
+ chunk_size: int = 8192,
151
+ ):
152
+ OnlineNystrom.__init__(
153
+ self,
154
+ n_components=n_components,
155
+ kernel=kernel,
156
+ eig_solver=eig_solver,
157
+ chunk_size=chunk_size,
158
+ )
159
+ self.num_sample: int = num_sample
160
+ self.sample_method: SampleOptions = sample_method
161
+ self.anchor_indices: torch.Tensor = None
162
+
163
+ def _fit_helper(
164
+ self,
165
+ features: torch.Tensor,
166
+ precomputed_sampled_indices: torch.Tensor,
167
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
168
+ _n = features.shape[0]
169
+ if self.num_sample >= _n:
170
+ logging.info(
171
+ f"NCUT nystrom num_sample is larger than number of input samples, nyström approximation is not needed, setting num_sample={_n}"
172
+ )
173
+ self.num_sample = _n
174
+
175
+ if precomputed_sampled_indices is not None:
176
+ self.anchor_indices = precomputed_sampled_indices
177
+ else:
178
+ self.anchor_indices = run_subgraph_sampling(
179
+ features,
180
+ self.num_sample,
181
+ sample_method=self.sample_method,
182
+ )
183
+ sampled_features = features[self.anchor_indices]
184
+ OnlineNystrom.fit(self, sampled_features)
185
+
186
+ _n_not_sampled = _n - len(sampled_features)
187
+ if _n_not_sampled > 0:
188
+ unsampled_indices = torch.full((_n,), True, device=features.device).scatter_(0, self.anchor_indices, False)
189
+ unsampled_features = features[unsampled_indices]
190
+ V_unsampled, _ = OnlineNystrom.update(self, unsampled_features)
191
+ else:
192
+ unsampled_indices = V_unsampled = None
193
+ return unsampled_indices, V_unsampled
194
+
195
+ def fit(
196
+ self,
197
+ features: torch.Tensor,
198
+ precomputed_sampled_indices: torch.Tensor = None,
199
+ ):
200
+ """Fit Nystrom Normalized Cut on the input features.
201
+ Args:
202
+ features (torch.Tensor): input features, shape (n_samples, n_features)
203
+ precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
204
+ override the sample_method, if not None
205
+ Returns:
206
+ (NCut): self
207
+ """
208
+ OnlineNystromSubsampleFit._fit_helper(self, features, precomputed_sampled_indices)
209
+ return self
210
+
211
+ def fit_transform(
212
+ self,
213
+ features: torch.Tensor,
214
+ precomputed_sampled_indices: torch.Tensor = None,
215
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
216
+ """
217
+ Args:
218
+ features (torch.Tensor): input features, shape (n_samples, n_features)
219
+ precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
220
+ override the sample_method, if not None
221
+
222
+ Returns:
223
+ (torch.Tensor): eigen_vectors, shape (n_samples, num_eig)
224
+ (torch.Tensor): eigen_values, sorted in descending order, shape (num_eig,)
225
+ """
226
+ unsampled_indices, V_unsampled = OnlineNystromSubsampleFit._fit_helper(self, features, precomputed_sampled_indices)
227
+ V_sampled, L = OnlineNystrom.transform(self)
228
+
229
+ if unsampled_indices is not None:
230
+ V = torch.zeros((len(unsampled_indices), self.n_components), device=features.device)
231
+ V[~unsampled_indices] = V_sampled
232
+ V[unsampled_indices] = V_unsampled
233
+ else:
234
+ V = V_sampled
235
+ return V, L
236
+
237
+
135
238
  def solve_eig(
136
239
  A: torch.Tensor,
137
240
  num_eig: int,
@@ -1,5 +1,4 @@
1
1
  import logging
2
- from typing import Literal
3
2
 
4
3
  import numpy as np
5
4
  import torch
@@ -48,6 +47,7 @@ def run_subgraph_sampling(
48
47
  sampled_indices = torch.randperm(features.shape[0])[:num_sample]
49
48
  else:
50
49
  raise ValueError("sample_method should be 'farthest' or 'random'")
50
+ sampled_indices = torch.sort(sampled_indices).values
51
51
  return sampled_indices.to(features.device)
52
52
 
53
53
 
@@ -256,40 +256,3 @@ def extrapolate_knn_with_subsampling(
256
256
  device=device
257
257
  )
258
258
  return new_eigenvectors
259
-
260
-
261
- def quantile_min_max(x, q1=0.01, q2=0.99, n_sample=10000):
262
- if x.shape[0] > n_sample:
263
- np.random.seed(0)
264
- random_idx = np.random.choice(x.shape[0], n_sample, replace=False)
265
- vmin, vmax = x[random_idx].quantile(q1), x[random_idx].quantile(q2)
266
- else:
267
- vmin, vmax = x.quantile(q1), x.quantile(q2)
268
- return vmin, vmax
269
-
270
-
271
- def quantile_normalize(x, q=0.95):
272
- """normalize each dimension of x to [0, 1], take 95-th percentage, this robust to outliers
273
- </br> 1. sort x
274
- </br> 2. take q-th quantile
275
- </br> min_value -> (1-q)-th quantile
276
- </br> max_value -> q-th quantile
277
- </br> 3. normalize
278
- </br> x = (x - min_value) / (max_value - min_value)
279
-
280
- Args:
281
- x (torch.Tensor): input tensor, shape (n_samples, n_features)
282
- normalize each feature to 0-1 range
283
- q (float): quantile, default 0.95
284
-
285
- Returns:
286
- torch.Tensor: quantile normalized tensor
287
- """
288
- # normalize x to 0-1 range, max value is q-th quantile
289
- # quantile makes the normalization robust to outliers
290
- if isinstance(x, np.ndarray):
291
- x = torch.tensor(x)
292
- vmax, vmin = quantile_min_max(x, q, 1 - q)
293
- x = (x - vmin) / (vmax - vmin)
294
- x = x.clamp(0, 1)
295
- return x
@@ -7,15 +7,13 @@ import torch.nn.functional as F
7
7
  from sklearn.base import BaseEstimator
8
8
 
9
9
  from .common import (
10
- DistanceOptions,
11
10
  lazy_normalize,
11
+ quantile_min_max,
12
+ quantile_normalize,
12
13
  )
13
14
  from .propagation_utils import (
14
15
  run_subgraph_sampling,
15
16
  extrapolate_knn,
16
- extrapolate_knn_with_subsampling,
17
- quantile_min_max,
18
- quantile_normalize
19
17
  )
20
18
 
21
19
 
@@ -28,19 +26,25 @@ def _rgb_with_dimensionality_reduction(
28
26
  num_sample: int,
29
27
  metric: Literal["cosine", "euclidean"],
30
28
  rgb_func: Callable[[torch.Tensor, float], torch.Tensor],
31
- q: float, knn: int,
32
- seed: int, device: str,
29
+ q: float,
30
+ knn: int,
33
31
  reduction: Callable[..., BaseEstimator],
34
32
  reduction_dim: int,
35
33
  reduction_kwargs: Dict[str, Any],
36
- transform_func: Callable[[torch.Tensor], torch.Tensor] = _identity,
37
- pre_smooth: bool = True,
34
+ transform_func: Callable[[torch.Tensor], torch.Tensor],
35
+ seed: int,
36
+ device: str,
38
37
  ) -> Tuple[torch.Tensor, torch.Tensor]:
39
38
 
40
- if pre_smooth:
41
- features = extrapolate_knn(
42
- features,
39
+ if True:
40
+ _subgraph_indices = run_subgraph_sampling(
43
41
  features,
42
+ num_sample=10000,
43
+ sample_method="farthest",
44
+ )
45
+ features = extrapolate_knn(
46
+ features[_subgraph_indices],
47
+ features[_subgraph_indices],
44
48
  features,
45
49
  distance="cosine",
46
50
  )
@@ -78,10 +82,10 @@ def rgb_from_tsne_2d(
78
82
  num_sample: int = 1000,
79
83
  perplexity: int = 150,
80
84
  metric: Literal["cosine", "euclidean"] = "cosine",
81
- device: str = None,
82
- seed: int = 0,
83
85
  q: float = 0.95,
84
86
  knn: int = 10,
87
+ seed: int = 0,
88
+ device: str = None,
85
89
  ):
86
90
  """
87
91
  Returns:
@@ -106,11 +110,13 @@ def rgb_from_tsne_2d(
106
110
  num_sample=num_sample,
107
111
  metric=metric,
108
112
  rgb_func=rgb_from_2d_colormap,
109
- q=q, knn=knn,
110
- seed=seed, device=device,
113
+ q=q,
114
+ knn=knn,
111
115
  reduction=TSNE, reduction_dim=2, reduction_kwargs={
112
116
  "perplexity": perplexity,
113
- },
117
+ }, transform_func=_identity,
118
+ seed=seed,
119
+ device=device,
114
120
  )
115
121
  return x2d, rgb
116
122
 
@@ -120,10 +126,10 @@ def rgb_from_tsne_3d(
120
126
  num_sample: int = 1000,
121
127
  perplexity: int = 150,
122
128
  metric: Literal["cosine", "euclidean"] = "cosine",
123
- device: str = None,
124
- seed: int = 0,
125
129
  q: float = 0.95,
126
130
  knn: int = 10,
131
+ seed: int = 0,
132
+ device: str = None,
127
133
  ):
128
134
  """
129
135
  Returns:
@@ -148,11 +154,13 @@ def rgb_from_tsne_3d(
148
154
  num_sample=num_sample,
149
155
  metric=metric,
150
156
  rgb_func=rgb_from_3d_rgb_cube,
151
- q=q, knn=knn,
152
- seed=seed, device=device,
157
+ q=q,
158
+ knn=knn,
153
159
  reduction=TSNE, reduction_dim=3, reduction_kwargs={
154
160
  "perplexity": perplexity,
155
- },
161
+ }, transform_func=_identity,
162
+ seed=seed,
163
+ device=device,
156
164
  )
157
165
  return x3d, rgb
158
166
 
@@ -161,10 +169,10 @@ def rgb_from_cosine_tsne_3d(
161
169
  features: torch.Tensor,
162
170
  num_sample: int = 1000,
163
171
  perplexity: int = 150,
164
- device: str = None,
165
- seed: int = 0,
166
172
  q: float = 0.95,
167
173
  knn: int = 10,
174
+ seed: int = 0,
175
+ device: str = None
168
176
  ):
169
177
  """
170
178
  Returns:
@@ -205,11 +213,13 @@ def rgb_from_cosine_tsne_3d(
205
213
  num_sample=num_sample,
206
214
  metric="cosine",
207
215
  rgb_func=rgb_from_cosine,
208
- q=q, knn=knn,
209
- seed=seed, device=device,
216
+ q=q,
217
+ knn=knn,
210
218
  reduction=TSNE, reduction_dim=3, reduction_kwargs={
211
219
  "perplexity": perplexity,
212
- },
220
+ }, transform_func=_identity,
221
+ seed=seed,
222
+ device=device,
213
223
  )
214
224
  return x3d, rgb
215
225
 
@@ -220,10 +230,10 @@ def rgb_from_umap_2d(
220
230
  n_neighbors: int = 150,
221
231
  min_dist: float = 0.1,
222
232
  metric: Literal["cosine", "euclidean"] = "cosine",
223
- device: str = None,
224
- seed: int = 0,
225
233
  q: float = 0.95,
226
234
  knn: int = 10,
235
+ seed: int = 0,
236
+ device: str = None,
227
237
  ):
228
238
  """
229
239
  Returns:
@@ -240,12 +250,14 @@ def rgb_from_umap_2d(
240
250
  num_sample=num_sample,
241
251
  metric=metric,
242
252
  rgb_func=rgb_from_2d_colormap,
243
- q=q, knn=knn,
244
- seed=seed, device=device,
253
+ q=q,
254
+ knn=knn,
245
255
  reduction=UMAP, reduction_dim=2, reduction_kwargs={
246
256
  "n_neighbors": n_neighbors,
247
257
  "min_dist": min_dist,
248
- },
258
+ }, transform_func=_identity,
259
+ seed=seed,
260
+ device=device,
249
261
  )
250
262
  return x2d, rgb
251
263
 
@@ -256,10 +268,10 @@ def rgb_from_umap_sphere(
256
268
  n_neighbors: int = 150,
257
269
  min_dist: float = 0.1,
258
270
  metric: Literal["cosine", "euclidean"] = "cosine",
259
- device: str = None,
260
- seed: int = 0,
261
271
  q: float = 0.95,
262
272
  knn: int = 10,
273
+ seed: int = 0,
274
+ device: str = None,
263
275
  ):
264
276
  """
265
277
  Returns:
@@ -283,14 +295,15 @@ def rgb_from_umap_sphere(
283
295
  num_sample=num_sample,
284
296
  metric=metric,
285
297
  rgb_func=rgb_from_3d_rgb_cube,
286
- q=q, knn=knn,
287
- seed=seed, device=device,
298
+ q=q,
299
+ knn=knn,
288
300
  reduction=UMAP, reduction_dim=2, reduction_kwargs={
289
301
  "n_neighbors": n_neighbors,
290
302
  "min_dist": min_dist,
291
303
  "output_metric": "haversine",
292
- },
293
- transform_func=transform_func
304
+ }, transform_func=transform_func,
305
+ seed=seed,
306
+ device=device,
294
307
  )
295
308
  return x3d, rgb
296
309
 
@@ -301,10 +314,10 @@ def rgb_from_umap_3d(
301
314
  n_neighbors: int = 150,
302
315
  min_dist: float = 0.1,
303
316
  metric: Literal["cosine", "euclidean"] = "cosine",
304
- device: str = None,
305
- seed: int = 0,
306
317
  q: float = 0.95,
307
318
  knn: int = 10,
319
+ seed: int = 0,
320
+ device: str = None,
308
321
  ):
309
322
  """
310
323
  Returns:
@@ -321,12 +334,14 @@ def rgb_from_umap_3d(
321
334
  num_sample=num_sample,
322
335
  metric=metric,
323
336
  rgb_func=rgb_from_3d_rgb_cube,
324
- q=q, knn=knn,
325
- seed=seed, device=device,
337
+ q=q,
338
+ knn=knn,
326
339
  reduction=UMAP, reduction_dim=3, reduction_kwargs={
327
340
  "n_neighbors": n_neighbors,
328
341
  "min_dist": min_dist,
329
- },
342
+ }, transform_func=_identity,
343
+ seed=seed,
344
+ device=device,
330
345
  )
331
346
  return x3d, rgb
332
347
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: nystrom_ncut
3
- Version: 0.0.8
3
+ Version: 0.0.10
4
4
  Summary: Normalized Cut and Nyström Approximation
5
5
  Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
6
6
  Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/
@@ -3,14 +3,17 @@ MANIFEST.in
3
3
  README.md
4
4
  pyproject.toml
5
5
  requirements.txt
6
+ src/__init__.py
6
7
  src/nystrom_ncut/__init__.py
7
8
  src/nystrom_ncut/common.py
8
- src/nystrom_ncut/ncut_pytorch.py
9
- src/nystrom_ncut/nystrom.py
10
9
  src/nystrom_ncut/propagation_utils.py
11
10
  src/nystrom_ncut/visualize_utils.py
12
11
  src/nystrom_ncut.egg-info/PKG-INFO
13
12
  src/nystrom_ncut.egg-info/SOURCES.txt
14
13
  src/nystrom_ncut.egg-info/dependency_links.txt
15
14
  src/nystrom_ncut.egg-info/top_level.txt
15
+ src/nystrom_ncut/nystrom/__init__.py
16
+ src/nystrom_ncut/nystrom/distance_realization.py
17
+ src/nystrom_ncut/nystrom/normalized_cut.py
18
+ src/nystrom_ncut/nystrom/nystrom.py
16
19
  tests/test.py
@@ -1,10 +1,9 @@
1
1
  import numpy as np
2
2
  import torch
3
- import torch.nn.functional as Fn
4
3
  from matplotlib import pyplot as plt
5
4
 
6
- from src.nystrom_ncut.ncut_pytorch import NCUT, axis_align, affinity_from_features
7
- from ncut_pytorch import NCUT as OldNCUT
5
+ from src.nystrom_ncut import NCut, affinity_from_features
6
+
8
7
  # from ncut_pytorch.src import rgb_from_umap_sphere
9
8
  # from ncut_pytorch.src.new_ncut_pytorch import NewNCUT
10
9
 
@@ -73,7 +72,7 @@ if __name__ == "__main__":
73
72
  def print_re(re):
74
73
  print(f"max: {re.max().item()}, mean: {re.mean().item()}, min: {re.min().item()}")
75
74
 
76
- nc0 = NCUT(n_components=n_components, num_sample=num_sample, distance=distance, eig_solver=eig_solver)
75
+ nc0 = NCut(n_components=n_components, num_sample=num_sample, distance=distance, eig_solver=eig_solver)
77
76
  X0, eigs0 = nc0.fit_transform(M)
78
77
 
79
78
  re0 = rel_error(X0, eigs0)
@@ -1,24 +0,0 @@
1
- from typing import Any, Literal
2
-
3
- import numpy as np
4
- import torch
5
- import torch.nn.functional as Fn
6
-
7
-
8
- DistanceOptions = Literal["cosine", "euclidean", "rbf"]
9
- SampleOptions = Literal["farthest", "random"]
10
-
11
-
12
- def ceildiv(a: int, b: int) -> int:
13
- return -(-a // b)
14
-
15
-
16
- def lazy_normalize(x: torch.Tensor, n: int = 1000, **normalize_kwargs: Any) -> torch.Tensor:
17
- numel = np.prod(x.shape[:-1])
18
- n = min(n, numel)
19
- random_indices = torch.randperm(numel)[:n]
20
- _x = x.flatten(0, -2)[random_indices]
21
- if torch.allclose(torch.norm(_x, **normalize_kwargs), torch.ones(n, device=x.device)):
22
- return x
23
- else:
24
- return Fn.normalize(x, **normalize_kwargs)
File without changes
File without changes
File without changes
File without changes