nystrom-ncut 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- __init__.py +0 -0
- nystrom_ncut/__init__.py +4 -3
- nystrom_ncut/common.py +37 -0
- nystrom_ncut/nystrom/__init__.py +7 -0
- nystrom_ncut/nystrom/distance_realization.py +127 -0
- nystrom_ncut/{ncut_pytorch.py → nystrom/normalized_cut.py} +17 -114
- nystrom_ncut/{nystrom.py → nystrom/nystrom.py} +104 -1
- nystrom_ncut/propagation_utils.py +1 -38
- nystrom_ncut/visualize_utils.py +57 -42
- {nystrom_ncut-0.0.8.dist-info → nystrom_ncut-0.0.10.dist-info}/METADATA +1 -1
- nystrom_ncut-0.0.10.dist-info/RECORD +14 -0
- {nystrom_ncut-0.0.8.dist-info → nystrom_ncut-0.0.10.dist-info}/top_level.txt +1 -0
- nystrom_ncut-0.0.8.dist-info/RECORD +0 -11
- {nystrom_ncut-0.0.8.dist-info → nystrom_ncut-0.0.10.dist-info}/LICENSE +0 -0
- {nystrom_ncut-0.0.8.dist-info → nystrom_ncut-0.0.10.dist-info}/WHEEL +0 -0
__init__.py
ADDED
File without changes
|
nystrom_ncut/__init__.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
from .
|
2
|
-
|
1
|
+
from .nystrom import (
|
2
|
+
DistanceRealization,
|
3
|
+
NCut,
|
3
4
|
axis_align,
|
4
5
|
)
|
5
6
|
from .propagation_utils import (
|
7
|
+
distance_from_features,
|
6
8
|
affinity_from_features,
|
7
9
|
extrapolate_knn_with_subsampling,
|
8
10
|
extrapolate_knn,
|
9
|
-
quantile_normalize,
|
10
11
|
)
|
11
12
|
from .visualize_utils import (
|
12
13
|
rgb_from_tsne_3d,
|
nystrom_ncut/common.py
CHANGED
@@ -22,3 +22,40 @@ def lazy_normalize(x: torch.Tensor, n: int = 1000, **normalize_kwargs: Any) -> t
|
|
22
22
|
return x
|
23
23
|
else:
|
24
24
|
return Fn.normalize(x, **normalize_kwargs)
|
25
|
+
|
26
|
+
|
27
|
+
def quantile_min_max(x, q1=0.01, q2=0.99, n_sample=10000):
|
28
|
+
if x.shape[0] > n_sample:
|
29
|
+
np.random.seed(0)
|
30
|
+
random_idx = np.random.choice(x.shape[0], n_sample, replace=False)
|
31
|
+
vmin, vmax = x[random_idx].quantile(q1), x[random_idx].quantile(q2)
|
32
|
+
else:
|
33
|
+
vmin, vmax = x.quantile(q1), x.quantile(q2)
|
34
|
+
return vmin, vmax
|
35
|
+
|
36
|
+
|
37
|
+
def quantile_normalize(x, q=0.95):
|
38
|
+
"""normalize each dimension of x to [0, 1], take 95-th percentage, this robust to outliers
|
39
|
+
</br> 1. sort x
|
40
|
+
</br> 2. take q-th quantile
|
41
|
+
</br> min_value -> (1-q)-th quantile
|
42
|
+
</br> max_value -> q-th quantile
|
43
|
+
</br> 3. normalize
|
44
|
+
</br> x = (x - min_value) / (max_value - min_value)
|
45
|
+
|
46
|
+
Args:
|
47
|
+
x (torch.Tensor): input tensor, shape (n_samples, n_features)
|
48
|
+
normalize each feature to 0-1 range
|
49
|
+
q (float): quantile, default 0.95
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
torch.Tensor: quantile normalized tensor
|
53
|
+
"""
|
54
|
+
# normalize x to 0-1 range, max value is q-th quantile
|
55
|
+
# quantile makes the normalization robust to outliers
|
56
|
+
if isinstance(x, np.ndarray):
|
57
|
+
x = torch.tensor(x)
|
58
|
+
vmax, vmin = quantile_min_max(x, q, 1 - q)
|
59
|
+
x = (x - vmin) / (vmax - vmin)
|
60
|
+
x = x.clamp(0, 1)
|
61
|
+
return x
|
@@ -0,0 +1,127 @@
|
|
1
|
+
import torch
|
2
|
+
|
3
|
+
from .nystrom import (
|
4
|
+
EigSolverOptions,
|
5
|
+
OnlineKernel,
|
6
|
+
OnlineNystromSubsampleFit,
|
7
|
+
solve_eig,
|
8
|
+
)
|
9
|
+
from ..common import (
|
10
|
+
DistanceOptions,
|
11
|
+
SampleOptions,
|
12
|
+
)
|
13
|
+
from ..propagation_utils import (
|
14
|
+
distance_from_features,
|
15
|
+
)
|
16
|
+
|
17
|
+
|
18
|
+
class GramKernel(OnlineKernel):
|
19
|
+
def __init__(
|
20
|
+
self,
|
21
|
+
distance: DistanceOptions,
|
22
|
+
eig_solver: EigSolverOptions,
|
23
|
+
):
|
24
|
+
self.distance: DistanceOptions = distance
|
25
|
+
self.eig_solver: EigSolverOptions = eig_solver
|
26
|
+
|
27
|
+
# Anchor matrices
|
28
|
+
self.anchor_features: torch.Tensor = None # [n x d]
|
29
|
+
self.A: torch.Tensor = None # [n x n]
|
30
|
+
self.Ainv: torch.Tensor = None # [n x n]
|
31
|
+
|
32
|
+
# Updated matrices
|
33
|
+
self.a_r: torch.Tensor = None # [n]
|
34
|
+
self.b_r: torch.Tensor = None # [n]
|
35
|
+
self.matrix_sum: torch.Tensor = torch.zeros(()) # []
|
36
|
+
self.n_features: int = None # N
|
37
|
+
|
38
|
+
def fit(self, features: torch.Tensor) -> None:
|
39
|
+
self.anchor_features = features # [n x d]
|
40
|
+
self.A = -0.5 * distance_from_features(
|
41
|
+
self.anchor_features, # [n x d]
|
42
|
+
self.anchor_features,
|
43
|
+
distance=self.distance,
|
44
|
+
) # [n x n]
|
45
|
+
d = features.shape[-1]
|
46
|
+
U, L = solve_eig(
|
47
|
+
self.A,
|
48
|
+
num_eig=d + 1, # d * (d + 3) // 2 + 1,
|
49
|
+
eig_solver=self.eig_solver,
|
50
|
+
) # [n x (d + 1)], [d + 1]
|
51
|
+
self.Ainv = U @ torch.diag(1 / L) @ U.mT # [n x n]
|
52
|
+
self.a_r = torch.sum(self.A, dim=-1) # [n]
|
53
|
+
self.b_r = torch.zeros_like(self.a_r) # [n]
|
54
|
+
self.matrix_sum = torch.sum(self.a_r) # []
|
55
|
+
self.n_features = features.shape[0] # n
|
56
|
+
|
57
|
+
def update(self, features: torch.Tensor) -> torch.Tensor:
|
58
|
+
B = -0.5 * distance_from_features(
|
59
|
+
self.anchor_features, # [n x d]
|
60
|
+
features, # [m x d]
|
61
|
+
distance=self.distance,
|
62
|
+
) # [n x m]
|
63
|
+
b_r = torch.sum(B, dim=-1) # [n]
|
64
|
+
b_c = torch.sum(B, dim=-2) # [m]
|
65
|
+
self.b_r = self.b_r + b_r # [n]
|
66
|
+
self.matrix_sum = (
|
67
|
+
torch.sum(self.a_r)
|
68
|
+
+ 2 * torch.sum(self.b_r)
|
69
|
+
+ self.Ainv @ self.b_r @ self.b_r
|
70
|
+
) # []
|
71
|
+
self.n_features += features.shape[0] # N
|
72
|
+
|
73
|
+
row_sum = self.a_r + self.b_r # [n]
|
74
|
+
col_sum = b_c + B.mT @ self.Ainv @ self.b_r # [m]
|
75
|
+
shift = -(row_sum[:, None] + col_sum) / self.n_features + self.matrix_sum / (self.n_features ** 2) # [n x m]
|
76
|
+
return (B + shift).mT # [m x n]
|
77
|
+
|
78
|
+
def transform(self, features: torch.Tensor = None) -> torch.Tensor:
|
79
|
+
row_sum = self.a_r + self.b_r
|
80
|
+
if features is None:
|
81
|
+
B = self.A # [n x n]
|
82
|
+
col_sum = row_sum # [n]
|
83
|
+
else:
|
84
|
+
B = -0.5 * distance_from_features(
|
85
|
+
self.anchor_features,
|
86
|
+
features,
|
87
|
+
distance=self.distance,
|
88
|
+
)
|
89
|
+
b_c = torch.sum(B, dim=-2) # [m]
|
90
|
+
col_sum = b_c + B.mT @ self.Ainv @ self.b_r # [m]
|
91
|
+
shift = -(row_sum[:, None] + col_sum) / self.n_features + self.matrix_sum / (self.n_features ** 2) # [n x m]
|
92
|
+
return (B + shift).mT # [m x n]
|
93
|
+
|
94
|
+
|
95
|
+
class DistanceRealization(OnlineNystromSubsampleFit):
|
96
|
+
"""Nystrom Distance Realization for large scale graph."""
|
97
|
+
|
98
|
+
def __init__(
|
99
|
+
self,
|
100
|
+
n_components: int = 100,
|
101
|
+
num_sample: int = 10000,
|
102
|
+
sample_method: SampleOptions = "farthest",
|
103
|
+
distance: DistanceOptions = "cosine",
|
104
|
+
eig_solver: EigSolverOptions = "svd_lowrank",
|
105
|
+
chunk_size: int = 8192,
|
106
|
+
):
|
107
|
+
"""
|
108
|
+
Args:
|
109
|
+
n_components (int): number of top eigenvectors to return
|
110
|
+
num_sample (int): number of samples for Nystrom-like approximation,
|
111
|
+
reduce only if memory is not enough, increase for better approximation
|
112
|
+
sample_method (str): subgraph sampling, ['farthest', 'random'].
|
113
|
+
farthest point sampling is recommended for better Nystrom-approximation accuracy
|
114
|
+
distance (str): distance metric for affinity matrix, ['cosine', 'euclidean', 'rbf'].
|
115
|
+
eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh'].
|
116
|
+
chunk_size (int): chunk size for large-scale matrix multiplication
|
117
|
+
"""
|
118
|
+
OnlineNystromSubsampleFit.__init__(
|
119
|
+
self,
|
120
|
+
n_components=n_components,
|
121
|
+
kernel=GramKernel(distance, eig_solver),
|
122
|
+
num_sample=num_sample,
|
123
|
+
sample_method=sample_method,
|
124
|
+
eig_solver=eig_solver,
|
125
|
+
chunk_size=chunk_size,
|
126
|
+
)
|
127
|
+
self.distance: DistanceOptions = distance
|
@@ -1,22 +1,18 @@
|
|
1
|
-
import logging
|
2
|
-
from typing import Tuple
|
3
|
-
|
4
1
|
import torch
|
5
2
|
import torch.nn.functional as Fn
|
6
3
|
|
7
|
-
from .common import (
|
8
|
-
DistanceOptions,
|
9
|
-
SampleOptions,
|
10
|
-
)
|
11
4
|
from .nystrom import (
|
12
5
|
EigSolverOptions,
|
13
6
|
OnlineKernel,
|
14
|
-
|
7
|
+
OnlineNystromSubsampleFit,
|
15
8
|
solve_eig,
|
16
9
|
)
|
17
|
-
from
|
10
|
+
from ..common import (
|
11
|
+
DistanceOptions,
|
12
|
+
SampleOptions,
|
13
|
+
)
|
14
|
+
from ..propagation_utils import (
|
18
15
|
affinity_from_features,
|
19
|
-
run_subgraph_sampling,
|
20
16
|
)
|
21
17
|
|
22
18
|
|
@@ -68,16 +64,16 @@ class LaplacianKernel(OnlineKernel):
|
|
68
64
|
b_c = torch.sum(B, dim=-2) # [m]
|
69
65
|
self.b_r = self.b_r + b_r # [n]
|
70
66
|
|
71
|
-
|
72
|
-
|
73
|
-
scale = (
|
67
|
+
row_sum = self.a_r + self.b_r # [n]
|
68
|
+
col_sum = b_c + B.mT @ self.Ainv @ self.b_r # [m]
|
69
|
+
scale = (row_sum[:, None] * col_sum) ** -0.5 # [n x m]
|
74
70
|
return (B * scale).mT # [m x n]
|
75
71
|
|
76
72
|
def transform(self, features: torch.Tensor = None) -> torch.Tensor:
|
77
|
-
|
73
|
+
row_sum = self.a_r + self.b_r # [n]
|
78
74
|
if features is None:
|
79
75
|
B = self.A # [n x n]
|
80
|
-
|
76
|
+
col_sum = row_sum # [n]
|
81
77
|
else:
|
82
78
|
B = affinity_from_features(
|
83
79
|
self.anchor_features, # [n x d]
|
@@ -86,12 +82,12 @@ class LaplacianKernel(OnlineKernel):
|
|
86
82
|
distance=self.distance,
|
87
83
|
) # [n x m]
|
88
84
|
b_c = torch.sum(B, dim=-2) # [m]
|
89
|
-
|
90
|
-
scale = (
|
85
|
+
col_sum = b_c + B.mT @ self.Ainv @ self.b_r # [m]
|
86
|
+
scale = (row_sum[:, None] * col_sum) ** -0.5 # [n x m]
|
91
87
|
return (B * scale).mT # [m x n]
|
92
88
|
|
93
89
|
|
94
|
-
class
|
90
|
+
class NCut(OnlineNystromSubsampleFit):
|
95
91
|
"""Nystrom Normalized Cut for large scale graph."""
|
96
92
|
|
97
93
|
def __init__(
|
@@ -102,7 +98,6 @@ class NCUT(OnlineNystrom):
|
|
102
98
|
sample_method: SampleOptions = "farthest",
|
103
99
|
distance: DistanceOptions = "cosine",
|
104
100
|
eig_solver: EigSolverOptions = "svd_lowrank",
|
105
|
-
normalize_features: bool = None,
|
106
101
|
chunk_size: int = 8192,
|
107
102
|
):
|
108
103
|
"""
|
@@ -116,110 +111,18 @@ class NCUT(OnlineNystrom):
|
|
116
111
|
farthest point sampling is recommended for better Nystrom-approximation accuracy
|
117
112
|
distance (str): distance metric for affinity matrix, ['cosine', 'euclidean', 'rbf'].
|
118
113
|
eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh'].
|
119
|
-
normalize_features (bool): normalize input features before computing affinity matrix,
|
120
|
-
default 'None' is True for cosine distance, False for euclidean distance and rbf
|
121
114
|
chunk_size (int): chunk size for large-scale matrix multiplication
|
122
115
|
"""
|
123
|
-
|
116
|
+
OnlineNystromSubsampleFit.__init__(
|
124
117
|
self,
|
125
118
|
n_components=n_components,
|
126
119
|
kernel=LaplacianKernel(affinity_focal_gamma, distance, eig_solver),
|
120
|
+
num_sample=num_sample,
|
121
|
+
sample_method=sample_method,
|
127
122
|
eig_solver=eig_solver,
|
128
123
|
chunk_size=chunk_size,
|
129
124
|
)
|
130
|
-
self.num_sample: int = num_sample
|
131
|
-
self.sample_method: SampleOptions = sample_method
|
132
|
-
self.anchor_indices: torch.Tensor = None
|
133
125
|
self.distance: DistanceOptions = distance
|
134
|
-
self.normalize_features: bool = normalize_features
|
135
|
-
if self.normalize_features is None:
|
136
|
-
if distance in ["cosine"]:
|
137
|
-
self.normalize_features = True
|
138
|
-
if distance in ["euclidean", "rbf"]:
|
139
|
-
self.normalize_features = False
|
140
|
-
|
141
|
-
self.chunk_size: int = chunk_size
|
142
|
-
|
143
|
-
def _fit_helper(
|
144
|
-
self,
|
145
|
-
features: torch.Tensor,
|
146
|
-
precomputed_sampled_indices: torch.Tensor,
|
147
|
-
) -> Tuple[torch.Tensor, torch.Tensor]:
|
148
|
-
_n = features.shape[0]
|
149
|
-
if self.num_sample >= _n:
|
150
|
-
logging.info(
|
151
|
-
f"NCUT nystrom num_sample is larger than number of input samples, nyström approximation is not needed, setting num_sample={_n}"
|
152
|
-
)
|
153
|
-
self.num_sample = _n
|
154
|
-
|
155
|
-
assert self.distance in ["cosine", "euclidean", "rbf"], "distance should be 'cosine', 'euclidean', 'rbf'"
|
156
|
-
|
157
|
-
if self.normalize_features:
|
158
|
-
# features need to be normalized for affinity matrix computation (cosine distance)
|
159
|
-
features = torch.nn.functional.normalize(features, dim=-1)
|
160
|
-
|
161
|
-
if precomputed_sampled_indices is not None:
|
162
|
-
_sampled_indices = precomputed_sampled_indices
|
163
|
-
else:
|
164
|
-
_sampled_indices = run_subgraph_sampling(
|
165
|
-
features,
|
166
|
-
self.num_sample,
|
167
|
-
sample_method=self.sample_method,
|
168
|
-
)
|
169
|
-
self.anchor_indices = torch.sort(_sampled_indices).values
|
170
|
-
sampled_features = features[self.anchor_indices]
|
171
|
-
OnlineNystrom.fit(self, sampled_features)
|
172
|
-
|
173
|
-
_n_not_sampled = _n - len(sampled_features)
|
174
|
-
if _n_not_sampled > 0:
|
175
|
-
unsampled_indices = torch.full((_n,), True, device=features.device).scatter_(0, self.anchor_indices, False)
|
176
|
-
unsampled_features = features[unsampled_indices]
|
177
|
-
V_unsampled, _ = OnlineNystrom.update(self, unsampled_features)
|
178
|
-
else:
|
179
|
-
unsampled_indices = V_unsampled = None
|
180
|
-
return unsampled_indices, V_unsampled
|
181
|
-
|
182
|
-
def fit(
|
183
|
-
self,
|
184
|
-
features: torch.Tensor,
|
185
|
-
precomputed_sampled_indices: torch.Tensor = None,
|
186
|
-
):
|
187
|
-
"""Fit Nystrom Normalized Cut on the input features.
|
188
|
-
Args:
|
189
|
-
features (torch.Tensor): input features, shape (n_samples, n_features)
|
190
|
-
precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
|
191
|
-
override the sample_method, if not None
|
192
|
-
Returns:
|
193
|
-
(NCUT): self
|
194
|
-
"""
|
195
|
-
NCUT._fit_helper(self, features, precomputed_sampled_indices)
|
196
|
-
return self
|
197
|
-
|
198
|
-
def fit_transform(
|
199
|
-
self,
|
200
|
-
features: torch.Tensor,
|
201
|
-
precomputed_sampled_indices: torch.Tensor = None,
|
202
|
-
) -> Tuple[torch.Tensor, torch.Tensor]:
|
203
|
-
"""
|
204
|
-
Args:
|
205
|
-
features (torch.Tensor): input features, shape (n_samples, n_features)
|
206
|
-
precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
|
207
|
-
override the sample_method, if not None
|
208
|
-
|
209
|
-
Returns:
|
210
|
-
(torch.Tensor): eigen_vectors, shape (n_samples, num_eig)
|
211
|
-
(torch.Tensor): eigen_values, sorted in descending order, shape (num_eig,)
|
212
|
-
"""
|
213
|
-
unsampled_indices, V_unsampled = NCUT._fit_helper(self, features, precomputed_sampled_indices)
|
214
|
-
V_sampled, L = OnlineNystrom.transform(self)
|
215
|
-
|
216
|
-
if unsampled_indices is not None:
|
217
|
-
V = torch.zeros((len(unsampled_indices), self.n_components), device=features.device)
|
218
|
-
V[~unsampled_indices] = V_sampled
|
219
|
-
V[unsampled_indices] = V_unsampled
|
220
|
-
else:
|
221
|
-
V = V_sampled
|
222
|
-
return V, L
|
223
126
|
|
224
127
|
|
225
128
|
def axis_align(eigen_vectors: torch.Tensor, max_iter=300):
|
@@ -1,8 +1,15 @@
|
|
1
|
+
import logging
|
1
2
|
from typing import Literal, Tuple
|
2
3
|
|
3
4
|
import torch
|
4
5
|
|
5
|
-
from
|
6
|
+
from ..common import (
|
7
|
+
SampleOptions,
|
8
|
+
ceildiv,
|
9
|
+
)
|
10
|
+
from ..propagation_utils import (
|
11
|
+
run_subgraph_sampling,
|
12
|
+
)
|
6
13
|
|
7
14
|
|
8
15
|
EigSolverOptions = Literal["svd_lowrank", "lobpcg", "svd", "eigh"]
|
@@ -132,6 +139,102 @@ class OnlineNystrom:
|
|
132
139
|
return VS, self.LS # [m x n_components], [n_components]
|
133
140
|
|
134
141
|
|
142
|
+
class OnlineNystromSubsampleFit(OnlineNystrom):
|
143
|
+
def __init__(
|
144
|
+
self,
|
145
|
+
n_components: int,
|
146
|
+
kernel: OnlineKernel,
|
147
|
+
num_sample: int,
|
148
|
+
sample_method: SampleOptions,
|
149
|
+
eig_solver: EigSolverOptions = "svd_lowrank",
|
150
|
+
chunk_size: int = 8192,
|
151
|
+
):
|
152
|
+
OnlineNystrom.__init__(
|
153
|
+
self,
|
154
|
+
n_components=n_components,
|
155
|
+
kernel=kernel,
|
156
|
+
eig_solver=eig_solver,
|
157
|
+
chunk_size=chunk_size,
|
158
|
+
)
|
159
|
+
self.num_sample: int = num_sample
|
160
|
+
self.sample_method: SampleOptions = sample_method
|
161
|
+
self.anchor_indices: torch.Tensor = None
|
162
|
+
|
163
|
+
def _fit_helper(
|
164
|
+
self,
|
165
|
+
features: torch.Tensor,
|
166
|
+
precomputed_sampled_indices: torch.Tensor,
|
167
|
+
) -> Tuple[torch.Tensor, torch.Tensor]:
|
168
|
+
_n = features.shape[0]
|
169
|
+
if self.num_sample >= _n:
|
170
|
+
logging.info(
|
171
|
+
f"NCUT nystrom num_sample is larger than number of input samples, nyström approximation is not needed, setting num_sample={_n}"
|
172
|
+
)
|
173
|
+
self.num_sample = _n
|
174
|
+
|
175
|
+
if precomputed_sampled_indices is not None:
|
176
|
+
self.anchor_indices = precomputed_sampled_indices
|
177
|
+
else:
|
178
|
+
self.anchor_indices = run_subgraph_sampling(
|
179
|
+
features,
|
180
|
+
self.num_sample,
|
181
|
+
sample_method=self.sample_method,
|
182
|
+
)
|
183
|
+
sampled_features = features[self.anchor_indices]
|
184
|
+
OnlineNystrom.fit(self, sampled_features)
|
185
|
+
|
186
|
+
_n_not_sampled = _n - len(sampled_features)
|
187
|
+
if _n_not_sampled > 0:
|
188
|
+
unsampled_indices = torch.full((_n,), True, device=features.device).scatter_(0, self.anchor_indices, False)
|
189
|
+
unsampled_features = features[unsampled_indices]
|
190
|
+
V_unsampled, _ = OnlineNystrom.update(self, unsampled_features)
|
191
|
+
else:
|
192
|
+
unsampled_indices = V_unsampled = None
|
193
|
+
return unsampled_indices, V_unsampled
|
194
|
+
|
195
|
+
def fit(
|
196
|
+
self,
|
197
|
+
features: torch.Tensor,
|
198
|
+
precomputed_sampled_indices: torch.Tensor = None,
|
199
|
+
):
|
200
|
+
"""Fit Nystrom Normalized Cut on the input features.
|
201
|
+
Args:
|
202
|
+
features (torch.Tensor): input features, shape (n_samples, n_features)
|
203
|
+
precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
|
204
|
+
override the sample_method, if not None
|
205
|
+
Returns:
|
206
|
+
(NCut): self
|
207
|
+
"""
|
208
|
+
OnlineNystromSubsampleFit._fit_helper(self, features, precomputed_sampled_indices)
|
209
|
+
return self
|
210
|
+
|
211
|
+
def fit_transform(
|
212
|
+
self,
|
213
|
+
features: torch.Tensor,
|
214
|
+
precomputed_sampled_indices: torch.Tensor = None,
|
215
|
+
) -> Tuple[torch.Tensor, torch.Tensor]:
|
216
|
+
"""
|
217
|
+
Args:
|
218
|
+
features (torch.Tensor): input features, shape (n_samples, n_features)
|
219
|
+
precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
|
220
|
+
override the sample_method, if not None
|
221
|
+
|
222
|
+
Returns:
|
223
|
+
(torch.Tensor): eigen_vectors, shape (n_samples, num_eig)
|
224
|
+
(torch.Tensor): eigen_values, sorted in descending order, shape (num_eig,)
|
225
|
+
"""
|
226
|
+
unsampled_indices, V_unsampled = OnlineNystromSubsampleFit._fit_helper(self, features, precomputed_sampled_indices)
|
227
|
+
V_sampled, L = OnlineNystrom.transform(self)
|
228
|
+
|
229
|
+
if unsampled_indices is not None:
|
230
|
+
V = torch.zeros((len(unsampled_indices), self.n_components), device=features.device)
|
231
|
+
V[~unsampled_indices] = V_sampled
|
232
|
+
V[unsampled_indices] = V_unsampled
|
233
|
+
else:
|
234
|
+
V = V_sampled
|
235
|
+
return V, L
|
236
|
+
|
237
|
+
|
135
238
|
def solve_eig(
|
136
239
|
A: torch.Tensor,
|
137
240
|
num_eig: int,
|
@@ -1,5 +1,4 @@
|
|
1
1
|
import logging
|
2
|
-
from typing import Literal
|
3
2
|
|
4
3
|
import numpy as np
|
5
4
|
import torch
|
@@ -48,6 +47,7 @@ def run_subgraph_sampling(
|
|
48
47
|
sampled_indices = torch.randperm(features.shape[0])[:num_sample]
|
49
48
|
else:
|
50
49
|
raise ValueError("sample_method should be 'farthest' or 'random'")
|
50
|
+
sampled_indices = torch.sort(sampled_indices).values
|
51
51
|
return sampled_indices.to(features.device)
|
52
52
|
|
53
53
|
|
@@ -256,40 +256,3 @@ def extrapolate_knn_with_subsampling(
|
|
256
256
|
device=device
|
257
257
|
)
|
258
258
|
return new_eigenvectors
|
259
|
-
|
260
|
-
|
261
|
-
def quantile_min_max(x, q1=0.01, q2=0.99, n_sample=10000):
|
262
|
-
if x.shape[0] > n_sample:
|
263
|
-
np.random.seed(0)
|
264
|
-
random_idx = np.random.choice(x.shape[0], n_sample, replace=False)
|
265
|
-
vmin, vmax = x[random_idx].quantile(q1), x[random_idx].quantile(q2)
|
266
|
-
else:
|
267
|
-
vmin, vmax = x.quantile(q1), x.quantile(q2)
|
268
|
-
return vmin, vmax
|
269
|
-
|
270
|
-
|
271
|
-
def quantile_normalize(x, q=0.95):
|
272
|
-
"""normalize each dimension of x to [0, 1], take 95-th percentage, this robust to outliers
|
273
|
-
</br> 1. sort x
|
274
|
-
</br> 2. take q-th quantile
|
275
|
-
</br> min_value -> (1-q)-th quantile
|
276
|
-
</br> max_value -> q-th quantile
|
277
|
-
</br> 3. normalize
|
278
|
-
</br> x = (x - min_value) / (max_value - min_value)
|
279
|
-
|
280
|
-
Args:
|
281
|
-
x (torch.Tensor): input tensor, shape (n_samples, n_features)
|
282
|
-
normalize each feature to 0-1 range
|
283
|
-
q (float): quantile, default 0.95
|
284
|
-
|
285
|
-
Returns:
|
286
|
-
torch.Tensor: quantile normalized tensor
|
287
|
-
"""
|
288
|
-
# normalize x to 0-1 range, max value is q-th quantile
|
289
|
-
# quantile makes the normalization robust to outliers
|
290
|
-
if isinstance(x, np.ndarray):
|
291
|
-
x = torch.tensor(x)
|
292
|
-
vmax, vmin = quantile_min_max(x, q, 1 - q)
|
293
|
-
x = (x - vmin) / (vmax - vmin)
|
294
|
-
x = x.clamp(0, 1)
|
295
|
-
return x
|
nystrom_ncut/visualize_utils.py
CHANGED
@@ -7,15 +7,13 @@ import torch.nn.functional as F
|
|
7
7
|
from sklearn.base import BaseEstimator
|
8
8
|
|
9
9
|
from .common import (
|
10
|
-
DistanceOptions,
|
11
10
|
lazy_normalize,
|
11
|
+
quantile_min_max,
|
12
|
+
quantile_normalize,
|
12
13
|
)
|
13
14
|
from .propagation_utils import (
|
14
15
|
run_subgraph_sampling,
|
15
16
|
extrapolate_knn,
|
16
|
-
extrapolate_knn_with_subsampling,
|
17
|
-
quantile_min_max,
|
18
|
-
quantile_normalize
|
19
17
|
)
|
20
18
|
|
21
19
|
|
@@ -28,19 +26,25 @@ def _rgb_with_dimensionality_reduction(
|
|
28
26
|
num_sample: int,
|
29
27
|
metric: Literal["cosine", "euclidean"],
|
30
28
|
rgb_func: Callable[[torch.Tensor, float], torch.Tensor],
|
31
|
-
q: float,
|
32
|
-
|
29
|
+
q: float,
|
30
|
+
knn: int,
|
33
31
|
reduction: Callable[..., BaseEstimator],
|
34
32
|
reduction_dim: int,
|
35
33
|
reduction_kwargs: Dict[str, Any],
|
36
|
-
transform_func: Callable[[torch.Tensor], torch.Tensor]
|
37
|
-
|
34
|
+
transform_func: Callable[[torch.Tensor], torch.Tensor],
|
35
|
+
seed: int,
|
36
|
+
device: str,
|
38
37
|
) -> Tuple[torch.Tensor, torch.Tensor]:
|
39
38
|
|
40
|
-
if
|
41
|
-
|
42
|
-
features,
|
39
|
+
if True:
|
40
|
+
_subgraph_indices = run_subgraph_sampling(
|
43
41
|
features,
|
42
|
+
num_sample=10000,
|
43
|
+
sample_method="farthest",
|
44
|
+
)
|
45
|
+
features = extrapolate_knn(
|
46
|
+
features[_subgraph_indices],
|
47
|
+
features[_subgraph_indices],
|
44
48
|
features,
|
45
49
|
distance="cosine",
|
46
50
|
)
|
@@ -78,10 +82,10 @@ def rgb_from_tsne_2d(
|
|
78
82
|
num_sample: int = 1000,
|
79
83
|
perplexity: int = 150,
|
80
84
|
metric: Literal["cosine", "euclidean"] = "cosine",
|
81
|
-
device: str = None,
|
82
|
-
seed: int = 0,
|
83
85
|
q: float = 0.95,
|
84
86
|
knn: int = 10,
|
87
|
+
seed: int = 0,
|
88
|
+
device: str = None,
|
85
89
|
):
|
86
90
|
"""
|
87
91
|
Returns:
|
@@ -106,11 +110,13 @@ def rgb_from_tsne_2d(
|
|
106
110
|
num_sample=num_sample,
|
107
111
|
metric=metric,
|
108
112
|
rgb_func=rgb_from_2d_colormap,
|
109
|
-
q=q,
|
110
|
-
|
113
|
+
q=q,
|
114
|
+
knn=knn,
|
111
115
|
reduction=TSNE, reduction_dim=2, reduction_kwargs={
|
112
116
|
"perplexity": perplexity,
|
113
|
-
},
|
117
|
+
}, transform_func=_identity,
|
118
|
+
seed=seed,
|
119
|
+
device=device,
|
114
120
|
)
|
115
121
|
return x2d, rgb
|
116
122
|
|
@@ -120,10 +126,10 @@ def rgb_from_tsne_3d(
|
|
120
126
|
num_sample: int = 1000,
|
121
127
|
perplexity: int = 150,
|
122
128
|
metric: Literal["cosine", "euclidean"] = "cosine",
|
123
|
-
device: str = None,
|
124
|
-
seed: int = 0,
|
125
129
|
q: float = 0.95,
|
126
130
|
knn: int = 10,
|
131
|
+
seed: int = 0,
|
132
|
+
device: str = None,
|
127
133
|
):
|
128
134
|
"""
|
129
135
|
Returns:
|
@@ -148,11 +154,13 @@ def rgb_from_tsne_3d(
|
|
148
154
|
num_sample=num_sample,
|
149
155
|
metric=metric,
|
150
156
|
rgb_func=rgb_from_3d_rgb_cube,
|
151
|
-
q=q,
|
152
|
-
|
157
|
+
q=q,
|
158
|
+
knn=knn,
|
153
159
|
reduction=TSNE, reduction_dim=3, reduction_kwargs={
|
154
160
|
"perplexity": perplexity,
|
155
|
-
},
|
161
|
+
}, transform_func=_identity,
|
162
|
+
seed=seed,
|
163
|
+
device=device,
|
156
164
|
)
|
157
165
|
return x3d, rgb
|
158
166
|
|
@@ -161,10 +169,10 @@ def rgb_from_cosine_tsne_3d(
|
|
161
169
|
features: torch.Tensor,
|
162
170
|
num_sample: int = 1000,
|
163
171
|
perplexity: int = 150,
|
164
|
-
device: str = None,
|
165
|
-
seed: int = 0,
|
166
172
|
q: float = 0.95,
|
167
173
|
knn: int = 10,
|
174
|
+
seed: int = 0,
|
175
|
+
device: str = None
|
168
176
|
):
|
169
177
|
"""
|
170
178
|
Returns:
|
@@ -205,11 +213,13 @@ def rgb_from_cosine_tsne_3d(
|
|
205
213
|
num_sample=num_sample,
|
206
214
|
metric="cosine",
|
207
215
|
rgb_func=rgb_from_cosine,
|
208
|
-
q=q,
|
209
|
-
|
216
|
+
q=q,
|
217
|
+
knn=knn,
|
210
218
|
reduction=TSNE, reduction_dim=3, reduction_kwargs={
|
211
219
|
"perplexity": perplexity,
|
212
|
-
},
|
220
|
+
}, transform_func=_identity,
|
221
|
+
seed=seed,
|
222
|
+
device=device,
|
213
223
|
)
|
214
224
|
return x3d, rgb
|
215
225
|
|
@@ -220,10 +230,10 @@ def rgb_from_umap_2d(
|
|
220
230
|
n_neighbors: int = 150,
|
221
231
|
min_dist: float = 0.1,
|
222
232
|
metric: Literal["cosine", "euclidean"] = "cosine",
|
223
|
-
device: str = None,
|
224
|
-
seed: int = 0,
|
225
233
|
q: float = 0.95,
|
226
234
|
knn: int = 10,
|
235
|
+
seed: int = 0,
|
236
|
+
device: str = None,
|
227
237
|
):
|
228
238
|
"""
|
229
239
|
Returns:
|
@@ -240,12 +250,14 @@ def rgb_from_umap_2d(
|
|
240
250
|
num_sample=num_sample,
|
241
251
|
metric=metric,
|
242
252
|
rgb_func=rgb_from_2d_colormap,
|
243
|
-
q=q,
|
244
|
-
|
253
|
+
q=q,
|
254
|
+
knn=knn,
|
245
255
|
reduction=UMAP, reduction_dim=2, reduction_kwargs={
|
246
256
|
"n_neighbors": n_neighbors,
|
247
257
|
"min_dist": min_dist,
|
248
|
-
},
|
258
|
+
}, transform_func=_identity,
|
259
|
+
seed=seed,
|
260
|
+
device=device,
|
249
261
|
)
|
250
262
|
return x2d, rgb
|
251
263
|
|
@@ -256,10 +268,10 @@ def rgb_from_umap_sphere(
|
|
256
268
|
n_neighbors: int = 150,
|
257
269
|
min_dist: float = 0.1,
|
258
270
|
metric: Literal["cosine", "euclidean"] = "cosine",
|
259
|
-
device: str = None,
|
260
|
-
seed: int = 0,
|
261
271
|
q: float = 0.95,
|
262
272
|
knn: int = 10,
|
273
|
+
seed: int = 0,
|
274
|
+
device: str = None,
|
263
275
|
):
|
264
276
|
"""
|
265
277
|
Returns:
|
@@ -283,14 +295,15 @@ def rgb_from_umap_sphere(
|
|
283
295
|
num_sample=num_sample,
|
284
296
|
metric=metric,
|
285
297
|
rgb_func=rgb_from_3d_rgb_cube,
|
286
|
-
q=q,
|
287
|
-
|
298
|
+
q=q,
|
299
|
+
knn=knn,
|
288
300
|
reduction=UMAP, reduction_dim=2, reduction_kwargs={
|
289
301
|
"n_neighbors": n_neighbors,
|
290
302
|
"min_dist": min_dist,
|
291
303
|
"output_metric": "haversine",
|
292
|
-
},
|
293
|
-
|
304
|
+
}, transform_func=transform_func,
|
305
|
+
seed=seed,
|
306
|
+
device=device,
|
294
307
|
)
|
295
308
|
return x3d, rgb
|
296
309
|
|
@@ -301,10 +314,10 @@ def rgb_from_umap_3d(
|
|
301
314
|
n_neighbors: int = 150,
|
302
315
|
min_dist: float = 0.1,
|
303
316
|
metric: Literal["cosine", "euclidean"] = "cosine",
|
304
|
-
device: str = None,
|
305
|
-
seed: int = 0,
|
306
317
|
q: float = 0.95,
|
307
318
|
knn: int = 10,
|
319
|
+
seed: int = 0,
|
320
|
+
device: str = None,
|
308
321
|
):
|
309
322
|
"""
|
310
323
|
Returns:
|
@@ -321,12 +334,14 @@ def rgb_from_umap_3d(
|
|
321
334
|
num_sample=num_sample,
|
322
335
|
metric=metric,
|
323
336
|
rgb_func=rgb_from_3d_rgb_cube,
|
324
|
-
q=q,
|
325
|
-
|
337
|
+
q=q,
|
338
|
+
knn=knn,
|
326
339
|
reduction=UMAP, reduction_dim=3, reduction_kwargs={
|
327
340
|
"n_neighbors": n_neighbors,
|
328
341
|
"min_dist": min_dist,
|
329
|
-
},
|
342
|
+
}, transform_func=_identity,
|
343
|
+
seed=seed,
|
344
|
+
device=device,
|
330
345
|
)
|
331
346
|
return x3d, rgb
|
332
347
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nystrom_ncut
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.10
|
4
4
|
Summary: Normalized Cut and Nyström Approximation
|
5
5
|
Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
|
6
6
|
Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/
|
@@ -0,0 +1,14 @@
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
nystrom_ncut/__init__.py,sha256=JKfF6atok5T9V692RhlhgeRO5a2cN-bfAVa9irmTLfs,463
|
3
|
+
nystrom_ncut/common.py,sha256=RMPQvg9R2s7V-q7zAStN9YCZt7gpc5Ut-KSKtvELBQ4,1934
|
4
|
+
nystrom_ncut/propagation_utils.py,sha256=WeWKxRBm01ITILMgjsit5_fCe9oW1kJOPmAjjcmliMo,10340
|
5
|
+
nystrom_ncut/visualize_utils.py,sha256=Z_bcoxwmWpTxhQ_yoAXqTnYDf269IuT0b0Sm2EVQpRw,17422
|
6
|
+
nystrom_ncut/nystrom/__init__.py,sha256=4EpxD3Cmc8Fif4vo8DG-6FpTfCnNanD5zCZxK3WrMwQ,121
|
7
|
+
nystrom_ncut/nystrom/distance_realization.py,sha256=8AWUlZKZEPfhQHxYTZt0uzKedVp8ZB1wb__7M2Fy-Eo,5529
|
8
|
+
nystrom_ncut/nystrom/normalized_cut.py,sha256=_U3zrbe6V-5TQ4uWmqckxs2JTIhygQlnRDTFBI1ghD4,7194
|
9
|
+
nystrom_ncut/nystrom/nystrom.py,sha256=VJPA17I8cVvjILUABJjkVA5kkXbTmHDyrtcWvu5xs-0,12571
|
10
|
+
nystrom_ncut-0.0.10.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
|
11
|
+
nystrom_ncut-0.0.10.dist-info/METADATA,sha256=sqs2WHdNbJeT5zvlq_WWHHRvHTz1mHVbDL3PsE1NMBI,6059
|
12
|
+
nystrom_ncut-0.0.10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
13
|
+
nystrom_ncut-0.0.10.dist-info/top_level.txt,sha256=gM8IWWHYysIRTCvCTcdS4RShOyl9pxpylgSwPUZR2XM,22
|
14
|
+
nystrom_ncut-0.0.10.dist-info/RECORD,,
|
@@ -1,11 +0,0 @@
|
|
1
|
-
nystrom_ncut/__init__.py,sha256=Vlc_iAlfvTNUiJXpZLWUOaL2Q-YqZqgr7WoG6cVnD0g,439
|
2
|
-
nystrom_ncut/common.py,sha256=G6w_8_BfBUMc6r8WFgA0NH4K6am7AzZCSdrQEVjra7U,671
|
3
|
-
nystrom_ncut/ncut_pytorch.py,sha256=-SKs9AdkafJSGkeYt4LwhbKZr8oq9JA5caAqjiVDAzU,11220
|
4
|
-
nystrom_ncut/nystrom.py,sha256=-l26oiJ0oPReSGlMlYV3gftszgFdAAHAi7OFtGPZ4Ic,8802
|
5
|
-
nystrom_ncut/propagation_utils.py,sha256=0d2VhT0JrLRurd44hZbnxBvBh-QscPKxtV7VrwYtTdo,11569
|
6
|
-
nystrom_ncut/visualize_utils.py,sha256=jDjuyZ9rdd25jqrPObJgK8zCLHc3Oms0fQnaIetHk-U,17112
|
7
|
-
nystrom_ncut-0.0.8.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
|
8
|
-
nystrom_ncut-0.0.8.dist-info/METADATA,sha256=zQpx3REOOckpJSuc7N6UNpXZoqgsM5UoFWV6__DuaRQ,6058
|
9
|
-
nystrom_ncut-0.0.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
10
|
-
nystrom_ncut-0.0.8.dist-info/top_level.txt,sha256=j7g_j0S048EvguFFnGgD5Ewd3r2H6klsxd5A4dd-wHw,13
|
11
|
-
nystrom_ncut-0.0.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|