nystrom-ncut 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- nystrom_ncut/__init__.py +4 -3
- nystrom_ncut/common.py +37 -0
- nystrom_ncut/nystrom/__init__.py +7 -0
- nystrom_ncut/nystrom/distance_realization.py +127 -0
- nystrom_ncut/{ncut_pytorch.py → nystrom/normalized_cut.py} +17 -114
- nystrom_ncut/{nystrom.py → nystrom/nystrom.py} +104 -1
- nystrom_ncut/propagation_utils.py +1 -38
- nystrom_ncut/visualize_utils.py +57 -42
- {nystrom_ncut-0.0.8.dist-info → nystrom_ncut-0.0.10.dist-info}/METADATA +1 -1
- nystrom_ncut-0.0.10.dist-info/RECORD +14 -0
- {nystrom_ncut-0.0.8.dist-info → nystrom_ncut-0.0.10.dist-info}/top_level.txt +1 -0
- nystrom_ncut-0.0.8.dist-info/RECORD +0 -11
- {nystrom_ncut-0.0.8.dist-info → nystrom_ncut-0.0.10.dist-info}/LICENSE +0 -0
- {nystrom_ncut-0.0.8.dist-info → nystrom_ncut-0.0.10.dist-info}/WHEEL +0 -0
__init__.py
ADDED
File without changes
|
nystrom_ncut/__init__.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
from .
|
2
|
-
|
1
|
+
from .nystrom import (
|
2
|
+
DistanceRealization,
|
3
|
+
NCut,
|
3
4
|
axis_align,
|
4
5
|
)
|
5
6
|
from .propagation_utils import (
|
7
|
+
distance_from_features,
|
6
8
|
affinity_from_features,
|
7
9
|
extrapolate_knn_with_subsampling,
|
8
10
|
extrapolate_knn,
|
9
|
-
quantile_normalize,
|
10
11
|
)
|
11
12
|
from .visualize_utils import (
|
12
13
|
rgb_from_tsne_3d,
|
nystrom_ncut/common.py
CHANGED
@@ -22,3 +22,40 @@ def lazy_normalize(x: torch.Tensor, n: int = 1000, **normalize_kwargs: Any) -> t
|
|
22
22
|
return x
|
23
23
|
else:
|
24
24
|
return Fn.normalize(x, **normalize_kwargs)
|
25
|
+
|
26
|
+
|
27
|
+
def quantile_min_max(x, q1=0.01, q2=0.99, n_sample=10000):
|
28
|
+
if x.shape[0] > n_sample:
|
29
|
+
np.random.seed(0)
|
30
|
+
random_idx = np.random.choice(x.shape[0], n_sample, replace=False)
|
31
|
+
vmin, vmax = x[random_idx].quantile(q1), x[random_idx].quantile(q2)
|
32
|
+
else:
|
33
|
+
vmin, vmax = x.quantile(q1), x.quantile(q2)
|
34
|
+
return vmin, vmax
|
35
|
+
|
36
|
+
|
37
|
+
def quantile_normalize(x, q=0.95):
|
38
|
+
"""normalize each dimension of x to [0, 1], take 95-th percentage, this robust to outliers
|
39
|
+
</br> 1. sort x
|
40
|
+
</br> 2. take q-th quantile
|
41
|
+
</br> min_value -> (1-q)-th quantile
|
42
|
+
</br> max_value -> q-th quantile
|
43
|
+
</br> 3. normalize
|
44
|
+
</br> x = (x - min_value) / (max_value - min_value)
|
45
|
+
|
46
|
+
Args:
|
47
|
+
x (torch.Tensor): input tensor, shape (n_samples, n_features)
|
48
|
+
normalize each feature to 0-1 range
|
49
|
+
q (float): quantile, default 0.95
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
torch.Tensor: quantile normalized tensor
|
53
|
+
"""
|
54
|
+
# normalize x to 0-1 range, max value is q-th quantile
|
55
|
+
# quantile makes the normalization robust to outliers
|
56
|
+
if isinstance(x, np.ndarray):
|
57
|
+
x = torch.tensor(x)
|
58
|
+
vmax, vmin = quantile_min_max(x, q, 1 - q)
|
59
|
+
x = (x - vmin) / (vmax - vmin)
|
60
|
+
x = x.clamp(0, 1)
|
61
|
+
return x
|
@@ -0,0 +1,127 @@
|
|
1
|
+
import torch
|
2
|
+
|
3
|
+
from .nystrom import (
|
4
|
+
EigSolverOptions,
|
5
|
+
OnlineKernel,
|
6
|
+
OnlineNystromSubsampleFit,
|
7
|
+
solve_eig,
|
8
|
+
)
|
9
|
+
from ..common import (
|
10
|
+
DistanceOptions,
|
11
|
+
SampleOptions,
|
12
|
+
)
|
13
|
+
from ..propagation_utils import (
|
14
|
+
distance_from_features,
|
15
|
+
)
|
16
|
+
|
17
|
+
|
18
|
+
class GramKernel(OnlineKernel):
|
19
|
+
def __init__(
|
20
|
+
self,
|
21
|
+
distance: DistanceOptions,
|
22
|
+
eig_solver: EigSolverOptions,
|
23
|
+
):
|
24
|
+
self.distance: DistanceOptions = distance
|
25
|
+
self.eig_solver: EigSolverOptions = eig_solver
|
26
|
+
|
27
|
+
# Anchor matrices
|
28
|
+
self.anchor_features: torch.Tensor = None # [n x d]
|
29
|
+
self.A: torch.Tensor = None # [n x n]
|
30
|
+
self.Ainv: torch.Tensor = None # [n x n]
|
31
|
+
|
32
|
+
# Updated matrices
|
33
|
+
self.a_r: torch.Tensor = None # [n]
|
34
|
+
self.b_r: torch.Tensor = None # [n]
|
35
|
+
self.matrix_sum: torch.Tensor = torch.zeros(()) # []
|
36
|
+
self.n_features: int = None # N
|
37
|
+
|
38
|
+
def fit(self, features: torch.Tensor) -> None:
|
39
|
+
self.anchor_features = features # [n x d]
|
40
|
+
self.A = -0.5 * distance_from_features(
|
41
|
+
self.anchor_features, # [n x d]
|
42
|
+
self.anchor_features,
|
43
|
+
distance=self.distance,
|
44
|
+
) # [n x n]
|
45
|
+
d = features.shape[-1]
|
46
|
+
U, L = solve_eig(
|
47
|
+
self.A,
|
48
|
+
num_eig=d + 1, # d * (d + 3) // 2 + 1,
|
49
|
+
eig_solver=self.eig_solver,
|
50
|
+
) # [n x (d + 1)], [d + 1]
|
51
|
+
self.Ainv = U @ torch.diag(1 / L) @ U.mT # [n x n]
|
52
|
+
self.a_r = torch.sum(self.A, dim=-1) # [n]
|
53
|
+
self.b_r = torch.zeros_like(self.a_r) # [n]
|
54
|
+
self.matrix_sum = torch.sum(self.a_r) # []
|
55
|
+
self.n_features = features.shape[0] # n
|
56
|
+
|
57
|
+
def update(self, features: torch.Tensor) -> torch.Tensor:
|
58
|
+
B = -0.5 * distance_from_features(
|
59
|
+
self.anchor_features, # [n x d]
|
60
|
+
features, # [m x d]
|
61
|
+
distance=self.distance,
|
62
|
+
) # [n x m]
|
63
|
+
b_r = torch.sum(B, dim=-1) # [n]
|
64
|
+
b_c = torch.sum(B, dim=-2) # [m]
|
65
|
+
self.b_r = self.b_r + b_r # [n]
|
66
|
+
self.matrix_sum = (
|
67
|
+
torch.sum(self.a_r)
|
68
|
+
+ 2 * torch.sum(self.b_r)
|
69
|
+
+ self.Ainv @ self.b_r @ self.b_r
|
70
|
+
) # []
|
71
|
+
self.n_features += features.shape[0] # N
|
72
|
+
|
73
|
+
row_sum = self.a_r + self.b_r # [n]
|
74
|
+
col_sum = b_c + B.mT @ self.Ainv @ self.b_r # [m]
|
75
|
+
shift = -(row_sum[:, None] + col_sum) / self.n_features + self.matrix_sum / (self.n_features ** 2) # [n x m]
|
76
|
+
return (B + shift).mT # [m x n]
|
77
|
+
|
78
|
+
def transform(self, features: torch.Tensor = None) -> torch.Tensor:
|
79
|
+
row_sum = self.a_r + self.b_r
|
80
|
+
if features is None:
|
81
|
+
B = self.A # [n x n]
|
82
|
+
col_sum = row_sum # [n]
|
83
|
+
else:
|
84
|
+
B = -0.5 * distance_from_features(
|
85
|
+
self.anchor_features,
|
86
|
+
features,
|
87
|
+
distance=self.distance,
|
88
|
+
)
|
89
|
+
b_c = torch.sum(B, dim=-2) # [m]
|
90
|
+
col_sum = b_c + B.mT @ self.Ainv @ self.b_r # [m]
|
91
|
+
shift = -(row_sum[:, None] + col_sum) / self.n_features + self.matrix_sum / (self.n_features ** 2) # [n x m]
|
92
|
+
return (B + shift).mT # [m x n]
|
93
|
+
|
94
|
+
|
95
|
+
class DistanceRealization(OnlineNystromSubsampleFit):
|
96
|
+
"""Nystrom Distance Realization for large scale graph."""
|
97
|
+
|
98
|
+
def __init__(
|
99
|
+
self,
|
100
|
+
n_components: int = 100,
|
101
|
+
num_sample: int = 10000,
|
102
|
+
sample_method: SampleOptions = "farthest",
|
103
|
+
distance: DistanceOptions = "cosine",
|
104
|
+
eig_solver: EigSolverOptions = "svd_lowrank",
|
105
|
+
chunk_size: int = 8192,
|
106
|
+
):
|
107
|
+
"""
|
108
|
+
Args:
|
109
|
+
n_components (int): number of top eigenvectors to return
|
110
|
+
num_sample (int): number of samples for Nystrom-like approximation,
|
111
|
+
reduce only if memory is not enough, increase for better approximation
|
112
|
+
sample_method (str): subgraph sampling, ['farthest', 'random'].
|
113
|
+
farthest point sampling is recommended for better Nystrom-approximation accuracy
|
114
|
+
distance (str): distance metric for affinity matrix, ['cosine', 'euclidean', 'rbf'].
|
115
|
+
eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh'].
|
116
|
+
chunk_size (int): chunk size for large-scale matrix multiplication
|
117
|
+
"""
|
118
|
+
OnlineNystromSubsampleFit.__init__(
|
119
|
+
self,
|
120
|
+
n_components=n_components,
|
121
|
+
kernel=GramKernel(distance, eig_solver),
|
122
|
+
num_sample=num_sample,
|
123
|
+
sample_method=sample_method,
|
124
|
+
eig_solver=eig_solver,
|
125
|
+
chunk_size=chunk_size,
|
126
|
+
)
|
127
|
+
self.distance: DistanceOptions = distance
|
@@ -1,22 +1,18 @@
|
|
1
|
-
import logging
|
2
|
-
from typing import Tuple
|
3
|
-
|
4
1
|
import torch
|
5
2
|
import torch.nn.functional as Fn
|
6
3
|
|
7
|
-
from .common import (
|
8
|
-
DistanceOptions,
|
9
|
-
SampleOptions,
|
10
|
-
)
|
11
4
|
from .nystrom import (
|
12
5
|
EigSolverOptions,
|
13
6
|
OnlineKernel,
|
14
|
-
|
7
|
+
OnlineNystromSubsampleFit,
|
15
8
|
solve_eig,
|
16
9
|
)
|
17
|
-
from
|
10
|
+
from ..common import (
|
11
|
+
DistanceOptions,
|
12
|
+
SampleOptions,
|
13
|
+
)
|
14
|
+
from ..propagation_utils import (
|
18
15
|
affinity_from_features,
|
19
|
-
run_subgraph_sampling,
|
20
16
|
)
|
21
17
|
|
22
18
|
|
@@ -68,16 +64,16 @@ class LaplacianKernel(OnlineKernel):
|
|
68
64
|
b_c = torch.sum(B, dim=-2) # [m]
|
69
65
|
self.b_r = self.b_r + b_r # [n]
|
70
66
|
|
71
|
-
|
72
|
-
|
73
|
-
scale = (
|
67
|
+
row_sum = self.a_r + self.b_r # [n]
|
68
|
+
col_sum = b_c + B.mT @ self.Ainv @ self.b_r # [m]
|
69
|
+
scale = (row_sum[:, None] * col_sum) ** -0.5 # [n x m]
|
74
70
|
return (B * scale).mT # [m x n]
|
75
71
|
|
76
72
|
def transform(self, features: torch.Tensor = None) -> torch.Tensor:
|
77
|
-
|
73
|
+
row_sum = self.a_r + self.b_r # [n]
|
78
74
|
if features is None:
|
79
75
|
B = self.A # [n x n]
|
80
|
-
|
76
|
+
col_sum = row_sum # [n]
|
81
77
|
else:
|
82
78
|
B = affinity_from_features(
|
83
79
|
self.anchor_features, # [n x d]
|
@@ -86,12 +82,12 @@ class LaplacianKernel(OnlineKernel):
|
|
86
82
|
distance=self.distance,
|
87
83
|
) # [n x m]
|
88
84
|
b_c = torch.sum(B, dim=-2) # [m]
|
89
|
-
|
90
|
-
scale = (
|
85
|
+
col_sum = b_c + B.mT @ self.Ainv @ self.b_r # [m]
|
86
|
+
scale = (row_sum[:, None] * col_sum) ** -0.5 # [n x m]
|
91
87
|
return (B * scale).mT # [m x n]
|
92
88
|
|
93
89
|
|
94
|
-
class
|
90
|
+
class NCut(OnlineNystromSubsampleFit):
|
95
91
|
"""Nystrom Normalized Cut for large scale graph."""
|
96
92
|
|
97
93
|
def __init__(
|
@@ -102,7 +98,6 @@ class NCUT(OnlineNystrom):
|
|
102
98
|
sample_method: SampleOptions = "farthest",
|
103
99
|
distance: DistanceOptions = "cosine",
|
104
100
|
eig_solver: EigSolverOptions = "svd_lowrank",
|
105
|
-
normalize_features: bool = None,
|
106
101
|
chunk_size: int = 8192,
|
107
102
|
):
|
108
103
|
"""
|
@@ -116,110 +111,18 @@ class NCUT(OnlineNystrom):
|
|
116
111
|
farthest point sampling is recommended for better Nystrom-approximation accuracy
|
117
112
|
distance (str): distance metric for affinity matrix, ['cosine', 'euclidean', 'rbf'].
|
118
113
|
eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh'].
|
119
|
-
normalize_features (bool): normalize input features before computing affinity matrix,
|
120
|
-
default 'None' is True for cosine distance, False for euclidean distance and rbf
|
121
114
|
chunk_size (int): chunk size for large-scale matrix multiplication
|
122
115
|
"""
|
123
|
-
|
116
|
+
OnlineNystromSubsampleFit.__init__(
|
124
117
|
self,
|
125
118
|
n_components=n_components,
|
126
119
|
kernel=LaplacianKernel(affinity_focal_gamma, distance, eig_solver),
|
120
|
+
num_sample=num_sample,
|
121
|
+
sample_method=sample_method,
|
127
122
|
eig_solver=eig_solver,
|
128
123
|
chunk_size=chunk_size,
|
129
124
|
)
|
130
|
-
self.num_sample: int = num_sample
|
131
|
-
self.sample_method: SampleOptions = sample_method
|
132
|
-
self.anchor_indices: torch.Tensor = None
|
133
125
|
self.distance: DistanceOptions = distance
|
134
|
-
self.normalize_features: bool = normalize_features
|
135
|
-
if self.normalize_features is None:
|
136
|
-
if distance in ["cosine"]:
|
137
|
-
self.normalize_features = True
|
138
|
-
if distance in ["euclidean", "rbf"]:
|
139
|
-
self.normalize_features = False
|
140
|
-
|
141
|
-
self.chunk_size: int = chunk_size
|
142
|
-
|
143
|
-
def _fit_helper(
|
144
|
-
self,
|
145
|
-
features: torch.Tensor,
|
146
|
-
precomputed_sampled_indices: torch.Tensor,
|
147
|
-
) -> Tuple[torch.Tensor, torch.Tensor]:
|
148
|
-
_n = features.shape[0]
|
149
|
-
if self.num_sample >= _n:
|
150
|
-
logging.info(
|
151
|
-
f"NCUT nystrom num_sample is larger than number of input samples, nyström approximation is not needed, setting num_sample={_n}"
|
152
|
-
)
|
153
|
-
self.num_sample = _n
|
154
|
-
|
155
|
-
assert self.distance in ["cosine", "euclidean", "rbf"], "distance should be 'cosine', 'euclidean', 'rbf'"
|
156
|
-
|
157
|
-
if self.normalize_features:
|
158
|
-
# features need to be normalized for affinity matrix computation (cosine distance)
|
159
|
-
features = torch.nn.functional.normalize(features, dim=-1)
|
160
|
-
|
161
|
-
if precomputed_sampled_indices is not None:
|
162
|
-
_sampled_indices = precomputed_sampled_indices
|
163
|
-
else:
|
164
|
-
_sampled_indices = run_subgraph_sampling(
|
165
|
-
features,
|
166
|
-
self.num_sample,
|
167
|
-
sample_method=self.sample_method,
|
168
|
-
)
|
169
|
-
self.anchor_indices = torch.sort(_sampled_indices).values
|
170
|
-
sampled_features = features[self.anchor_indices]
|
171
|
-
OnlineNystrom.fit(self, sampled_features)
|
172
|
-
|
173
|
-
_n_not_sampled = _n - len(sampled_features)
|
174
|
-
if _n_not_sampled > 0:
|
175
|
-
unsampled_indices = torch.full((_n,), True, device=features.device).scatter_(0, self.anchor_indices, False)
|
176
|
-
unsampled_features = features[unsampled_indices]
|
177
|
-
V_unsampled, _ = OnlineNystrom.update(self, unsampled_features)
|
178
|
-
else:
|
179
|
-
unsampled_indices = V_unsampled = None
|
180
|
-
return unsampled_indices, V_unsampled
|
181
|
-
|
182
|
-
def fit(
|
183
|
-
self,
|
184
|
-
features: torch.Tensor,
|
185
|
-
precomputed_sampled_indices: torch.Tensor = None,
|
186
|
-
):
|
187
|
-
"""Fit Nystrom Normalized Cut on the input features.
|
188
|
-
Args:
|
189
|
-
features (torch.Tensor): input features, shape (n_samples, n_features)
|
190
|
-
precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
|
191
|
-
override the sample_method, if not None
|
192
|
-
Returns:
|
193
|
-
(NCUT): self
|
194
|
-
"""
|
195
|
-
NCUT._fit_helper(self, features, precomputed_sampled_indices)
|
196
|
-
return self
|
197
|
-
|
198
|
-
def fit_transform(
|
199
|
-
self,
|
200
|
-
features: torch.Tensor,
|
201
|
-
precomputed_sampled_indices: torch.Tensor = None,
|
202
|
-
) -> Tuple[torch.Tensor, torch.Tensor]:
|
203
|
-
"""
|
204
|
-
Args:
|
205
|
-
features (torch.Tensor): input features, shape (n_samples, n_features)
|
206
|
-
precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
|
207
|
-
override the sample_method, if not None
|
208
|
-
|
209
|
-
Returns:
|
210
|
-
(torch.Tensor): eigen_vectors, shape (n_samples, num_eig)
|
211
|
-
(torch.Tensor): eigen_values, sorted in descending order, shape (num_eig,)
|
212
|
-
"""
|
213
|
-
unsampled_indices, V_unsampled = NCUT._fit_helper(self, features, precomputed_sampled_indices)
|
214
|
-
V_sampled, L = OnlineNystrom.transform(self)
|
215
|
-
|
216
|
-
if unsampled_indices is not None:
|
217
|
-
V = torch.zeros((len(unsampled_indices), self.n_components), device=features.device)
|
218
|
-
V[~unsampled_indices] = V_sampled
|
219
|
-
V[unsampled_indices] = V_unsampled
|
220
|
-
else:
|
221
|
-
V = V_sampled
|
222
|
-
return V, L
|
223
126
|
|
224
127
|
|
225
128
|
def axis_align(eigen_vectors: torch.Tensor, max_iter=300):
|
@@ -1,8 +1,15 @@
|
|
1
|
+
import logging
|
1
2
|
from typing import Literal, Tuple
|
2
3
|
|
3
4
|
import torch
|
4
5
|
|
5
|
-
from
|
6
|
+
from ..common import (
|
7
|
+
SampleOptions,
|
8
|
+
ceildiv,
|
9
|
+
)
|
10
|
+
from ..propagation_utils import (
|
11
|
+
run_subgraph_sampling,
|
12
|
+
)
|
6
13
|
|
7
14
|
|
8
15
|
EigSolverOptions = Literal["svd_lowrank", "lobpcg", "svd", "eigh"]
|
@@ -132,6 +139,102 @@ class OnlineNystrom:
|
|
132
139
|
return VS, self.LS # [m x n_components], [n_components]
|
133
140
|
|
134
141
|
|
142
|
+
class OnlineNystromSubsampleFit(OnlineNystrom):
|
143
|
+
def __init__(
|
144
|
+
self,
|
145
|
+
n_components: int,
|
146
|
+
kernel: OnlineKernel,
|
147
|
+
num_sample: int,
|
148
|
+
sample_method: SampleOptions,
|
149
|
+
eig_solver: EigSolverOptions = "svd_lowrank",
|
150
|
+
chunk_size: int = 8192,
|
151
|
+
):
|
152
|
+
OnlineNystrom.__init__(
|
153
|
+
self,
|
154
|
+
n_components=n_components,
|
155
|
+
kernel=kernel,
|
156
|
+
eig_solver=eig_solver,
|
157
|
+
chunk_size=chunk_size,
|
158
|
+
)
|
159
|
+
self.num_sample: int = num_sample
|
160
|
+
self.sample_method: SampleOptions = sample_method
|
161
|
+
self.anchor_indices: torch.Tensor = None
|
162
|
+
|
163
|
+
def _fit_helper(
|
164
|
+
self,
|
165
|
+
features: torch.Tensor,
|
166
|
+
precomputed_sampled_indices: torch.Tensor,
|
167
|
+
) -> Tuple[torch.Tensor, torch.Tensor]:
|
168
|
+
_n = features.shape[0]
|
169
|
+
if self.num_sample >= _n:
|
170
|
+
logging.info(
|
171
|
+
f"NCUT nystrom num_sample is larger than number of input samples, nyström approximation is not needed, setting num_sample={_n}"
|
172
|
+
)
|
173
|
+
self.num_sample = _n
|
174
|
+
|
175
|
+
if precomputed_sampled_indices is not None:
|
176
|
+
self.anchor_indices = precomputed_sampled_indices
|
177
|
+
else:
|
178
|
+
self.anchor_indices = run_subgraph_sampling(
|
179
|
+
features,
|
180
|
+
self.num_sample,
|
181
|
+
sample_method=self.sample_method,
|
182
|
+
)
|
183
|
+
sampled_features = features[self.anchor_indices]
|
184
|
+
OnlineNystrom.fit(self, sampled_features)
|
185
|
+
|
186
|
+
_n_not_sampled = _n - len(sampled_features)
|
187
|
+
if _n_not_sampled > 0:
|
188
|
+
unsampled_indices = torch.full((_n,), True, device=features.device).scatter_(0, self.anchor_indices, False)
|
189
|
+
unsampled_features = features[unsampled_indices]
|
190
|
+
V_unsampled, _ = OnlineNystrom.update(self, unsampled_features)
|
191
|
+
else:
|
192
|
+
unsampled_indices = V_unsampled = None
|
193
|
+
return unsampled_indices, V_unsampled
|
194
|
+
|
195
|
+
def fit(
|
196
|
+
self,
|
197
|
+
features: torch.Tensor,
|
198
|
+
precomputed_sampled_indices: torch.Tensor = None,
|
199
|
+
):
|
200
|
+
"""Fit Nystrom Normalized Cut on the input features.
|
201
|
+
Args:
|
202
|
+
features (torch.Tensor): input features, shape (n_samples, n_features)
|
203
|
+
precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
|
204
|
+
override the sample_method, if not None
|
205
|
+
Returns:
|
206
|
+
(NCut): self
|
207
|
+
"""
|
208
|
+
OnlineNystromSubsampleFit._fit_helper(self, features, precomputed_sampled_indices)
|
209
|
+
return self
|
210
|
+
|
211
|
+
def fit_transform(
|
212
|
+
self,
|
213
|
+
features: torch.Tensor,
|
214
|
+
precomputed_sampled_indices: torch.Tensor = None,
|
215
|
+
) -> Tuple[torch.Tensor, torch.Tensor]:
|
216
|
+
"""
|
217
|
+
Args:
|
218
|
+
features (torch.Tensor): input features, shape (n_samples, n_features)
|
219
|
+
precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
|
220
|
+
override the sample_method, if not None
|
221
|
+
|
222
|
+
Returns:
|
223
|
+
(torch.Tensor): eigen_vectors, shape (n_samples, num_eig)
|
224
|
+
(torch.Tensor): eigen_values, sorted in descending order, shape (num_eig,)
|
225
|
+
"""
|
226
|
+
unsampled_indices, V_unsampled = OnlineNystromSubsampleFit._fit_helper(self, features, precomputed_sampled_indices)
|
227
|
+
V_sampled, L = OnlineNystrom.transform(self)
|
228
|
+
|
229
|
+
if unsampled_indices is not None:
|
230
|
+
V = torch.zeros((len(unsampled_indices), self.n_components), device=features.device)
|
231
|
+
V[~unsampled_indices] = V_sampled
|
232
|
+
V[unsampled_indices] = V_unsampled
|
233
|
+
else:
|
234
|
+
V = V_sampled
|
235
|
+
return V, L
|
236
|
+
|
237
|
+
|
135
238
|
def solve_eig(
|
136
239
|
A: torch.Tensor,
|
137
240
|
num_eig: int,
|
@@ -1,5 +1,4 @@
|
|
1
1
|
import logging
|
2
|
-
from typing import Literal
|
3
2
|
|
4
3
|
import numpy as np
|
5
4
|
import torch
|
@@ -48,6 +47,7 @@ def run_subgraph_sampling(
|
|
48
47
|
sampled_indices = torch.randperm(features.shape[0])[:num_sample]
|
49
48
|
else:
|
50
49
|
raise ValueError("sample_method should be 'farthest' or 'random'")
|
50
|
+
sampled_indices = torch.sort(sampled_indices).values
|
51
51
|
return sampled_indices.to(features.device)
|
52
52
|
|
53
53
|
|
@@ -256,40 +256,3 @@ def extrapolate_knn_with_subsampling(
|
|
256
256
|
device=device
|
257
257
|
)
|
258
258
|
return new_eigenvectors
|
259
|
-
|
260
|
-
|
261
|
-
def quantile_min_max(x, q1=0.01, q2=0.99, n_sample=10000):
|
262
|
-
if x.shape[0] > n_sample:
|
263
|
-
np.random.seed(0)
|
264
|
-
random_idx = np.random.choice(x.shape[0], n_sample, replace=False)
|
265
|
-
vmin, vmax = x[random_idx].quantile(q1), x[random_idx].quantile(q2)
|
266
|
-
else:
|
267
|
-
vmin, vmax = x.quantile(q1), x.quantile(q2)
|
268
|
-
return vmin, vmax
|
269
|
-
|
270
|
-
|
271
|
-
def quantile_normalize(x, q=0.95):
|
272
|
-
"""normalize each dimension of x to [0, 1], take 95-th percentage, this robust to outliers
|
273
|
-
</br> 1. sort x
|
274
|
-
</br> 2. take q-th quantile
|
275
|
-
</br> min_value -> (1-q)-th quantile
|
276
|
-
</br> max_value -> q-th quantile
|
277
|
-
</br> 3. normalize
|
278
|
-
</br> x = (x - min_value) / (max_value - min_value)
|
279
|
-
|
280
|
-
Args:
|
281
|
-
x (torch.Tensor): input tensor, shape (n_samples, n_features)
|
282
|
-
normalize each feature to 0-1 range
|
283
|
-
q (float): quantile, default 0.95
|
284
|
-
|
285
|
-
Returns:
|
286
|
-
torch.Tensor: quantile normalized tensor
|
287
|
-
"""
|
288
|
-
# normalize x to 0-1 range, max value is q-th quantile
|
289
|
-
# quantile makes the normalization robust to outliers
|
290
|
-
if isinstance(x, np.ndarray):
|
291
|
-
x = torch.tensor(x)
|
292
|
-
vmax, vmin = quantile_min_max(x, q, 1 - q)
|
293
|
-
x = (x - vmin) / (vmax - vmin)
|
294
|
-
x = x.clamp(0, 1)
|
295
|
-
return x
|
nystrom_ncut/visualize_utils.py
CHANGED
@@ -7,15 +7,13 @@ import torch.nn.functional as F
|
|
7
7
|
from sklearn.base import BaseEstimator
|
8
8
|
|
9
9
|
from .common import (
|
10
|
-
DistanceOptions,
|
11
10
|
lazy_normalize,
|
11
|
+
quantile_min_max,
|
12
|
+
quantile_normalize,
|
12
13
|
)
|
13
14
|
from .propagation_utils import (
|
14
15
|
run_subgraph_sampling,
|
15
16
|
extrapolate_knn,
|
16
|
-
extrapolate_knn_with_subsampling,
|
17
|
-
quantile_min_max,
|
18
|
-
quantile_normalize
|
19
17
|
)
|
20
18
|
|
21
19
|
|
@@ -28,19 +26,25 @@ def _rgb_with_dimensionality_reduction(
|
|
28
26
|
num_sample: int,
|
29
27
|
metric: Literal["cosine", "euclidean"],
|
30
28
|
rgb_func: Callable[[torch.Tensor, float], torch.Tensor],
|
31
|
-
q: float,
|
32
|
-
|
29
|
+
q: float,
|
30
|
+
knn: int,
|
33
31
|
reduction: Callable[..., BaseEstimator],
|
34
32
|
reduction_dim: int,
|
35
33
|
reduction_kwargs: Dict[str, Any],
|
36
|
-
transform_func: Callable[[torch.Tensor], torch.Tensor]
|
37
|
-
|
34
|
+
transform_func: Callable[[torch.Tensor], torch.Tensor],
|
35
|
+
seed: int,
|
36
|
+
device: str,
|
38
37
|
) -> Tuple[torch.Tensor, torch.Tensor]:
|
39
38
|
|
40
|
-
if
|
41
|
-
|
42
|
-
features,
|
39
|
+
if True:
|
40
|
+
_subgraph_indices = run_subgraph_sampling(
|
43
41
|
features,
|
42
|
+
num_sample=10000,
|
43
|
+
sample_method="farthest",
|
44
|
+
)
|
45
|
+
features = extrapolate_knn(
|
46
|
+
features[_subgraph_indices],
|
47
|
+
features[_subgraph_indices],
|
44
48
|
features,
|
45
49
|
distance="cosine",
|
46
50
|
)
|
@@ -78,10 +82,10 @@ def rgb_from_tsne_2d(
|
|
78
82
|
num_sample: int = 1000,
|
79
83
|
perplexity: int = 150,
|
80
84
|
metric: Literal["cosine", "euclidean"] = "cosine",
|
81
|
-
device: str = None,
|
82
|
-
seed: int = 0,
|
83
85
|
q: float = 0.95,
|
84
86
|
knn: int = 10,
|
87
|
+
seed: int = 0,
|
88
|
+
device: str = None,
|
85
89
|
):
|
86
90
|
"""
|
87
91
|
Returns:
|
@@ -106,11 +110,13 @@ def rgb_from_tsne_2d(
|
|
106
110
|
num_sample=num_sample,
|
107
111
|
metric=metric,
|
108
112
|
rgb_func=rgb_from_2d_colormap,
|
109
|
-
q=q,
|
110
|
-
|
113
|
+
q=q,
|
114
|
+
knn=knn,
|
111
115
|
reduction=TSNE, reduction_dim=2, reduction_kwargs={
|
112
116
|
"perplexity": perplexity,
|
113
|
-
},
|
117
|
+
}, transform_func=_identity,
|
118
|
+
seed=seed,
|
119
|
+
device=device,
|
114
120
|
)
|
115
121
|
return x2d, rgb
|
116
122
|
|
@@ -120,10 +126,10 @@ def rgb_from_tsne_3d(
|
|
120
126
|
num_sample: int = 1000,
|
121
127
|
perplexity: int = 150,
|
122
128
|
metric: Literal["cosine", "euclidean"] = "cosine",
|
123
|
-
device: str = None,
|
124
|
-
seed: int = 0,
|
125
129
|
q: float = 0.95,
|
126
130
|
knn: int = 10,
|
131
|
+
seed: int = 0,
|
132
|
+
device: str = None,
|
127
133
|
):
|
128
134
|
"""
|
129
135
|
Returns:
|
@@ -148,11 +154,13 @@ def rgb_from_tsne_3d(
|
|
148
154
|
num_sample=num_sample,
|
149
155
|
metric=metric,
|
150
156
|
rgb_func=rgb_from_3d_rgb_cube,
|
151
|
-
q=q,
|
152
|
-
|
157
|
+
q=q,
|
158
|
+
knn=knn,
|
153
159
|
reduction=TSNE, reduction_dim=3, reduction_kwargs={
|
154
160
|
"perplexity": perplexity,
|
155
|
-
},
|
161
|
+
}, transform_func=_identity,
|
162
|
+
seed=seed,
|
163
|
+
device=device,
|
156
164
|
)
|
157
165
|
return x3d, rgb
|
158
166
|
|
@@ -161,10 +169,10 @@ def rgb_from_cosine_tsne_3d(
|
|
161
169
|
features: torch.Tensor,
|
162
170
|
num_sample: int = 1000,
|
163
171
|
perplexity: int = 150,
|
164
|
-
device: str = None,
|
165
|
-
seed: int = 0,
|
166
172
|
q: float = 0.95,
|
167
173
|
knn: int = 10,
|
174
|
+
seed: int = 0,
|
175
|
+
device: str = None
|
168
176
|
):
|
169
177
|
"""
|
170
178
|
Returns:
|
@@ -205,11 +213,13 @@ def rgb_from_cosine_tsne_3d(
|
|
205
213
|
num_sample=num_sample,
|
206
214
|
metric="cosine",
|
207
215
|
rgb_func=rgb_from_cosine,
|
208
|
-
q=q,
|
209
|
-
|
216
|
+
q=q,
|
217
|
+
knn=knn,
|
210
218
|
reduction=TSNE, reduction_dim=3, reduction_kwargs={
|
211
219
|
"perplexity": perplexity,
|
212
|
-
},
|
220
|
+
}, transform_func=_identity,
|
221
|
+
seed=seed,
|
222
|
+
device=device,
|
213
223
|
)
|
214
224
|
return x3d, rgb
|
215
225
|
|
@@ -220,10 +230,10 @@ def rgb_from_umap_2d(
|
|
220
230
|
n_neighbors: int = 150,
|
221
231
|
min_dist: float = 0.1,
|
222
232
|
metric: Literal["cosine", "euclidean"] = "cosine",
|
223
|
-
device: str = None,
|
224
|
-
seed: int = 0,
|
225
233
|
q: float = 0.95,
|
226
234
|
knn: int = 10,
|
235
|
+
seed: int = 0,
|
236
|
+
device: str = None,
|
227
237
|
):
|
228
238
|
"""
|
229
239
|
Returns:
|
@@ -240,12 +250,14 @@ def rgb_from_umap_2d(
|
|
240
250
|
num_sample=num_sample,
|
241
251
|
metric=metric,
|
242
252
|
rgb_func=rgb_from_2d_colormap,
|
243
|
-
q=q,
|
244
|
-
|
253
|
+
q=q,
|
254
|
+
knn=knn,
|
245
255
|
reduction=UMAP, reduction_dim=2, reduction_kwargs={
|
246
256
|
"n_neighbors": n_neighbors,
|
247
257
|
"min_dist": min_dist,
|
248
|
-
},
|
258
|
+
}, transform_func=_identity,
|
259
|
+
seed=seed,
|
260
|
+
device=device,
|
249
261
|
)
|
250
262
|
return x2d, rgb
|
251
263
|
|
@@ -256,10 +268,10 @@ def rgb_from_umap_sphere(
|
|
256
268
|
n_neighbors: int = 150,
|
257
269
|
min_dist: float = 0.1,
|
258
270
|
metric: Literal["cosine", "euclidean"] = "cosine",
|
259
|
-
device: str = None,
|
260
|
-
seed: int = 0,
|
261
271
|
q: float = 0.95,
|
262
272
|
knn: int = 10,
|
273
|
+
seed: int = 0,
|
274
|
+
device: str = None,
|
263
275
|
):
|
264
276
|
"""
|
265
277
|
Returns:
|
@@ -283,14 +295,15 @@ def rgb_from_umap_sphere(
|
|
283
295
|
num_sample=num_sample,
|
284
296
|
metric=metric,
|
285
297
|
rgb_func=rgb_from_3d_rgb_cube,
|
286
|
-
q=q,
|
287
|
-
|
298
|
+
q=q,
|
299
|
+
knn=knn,
|
288
300
|
reduction=UMAP, reduction_dim=2, reduction_kwargs={
|
289
301
|
"n_neighbors": n_neighbors,
|
290
302
|
"min_dist": min_dist,
|
291
303
|
"output_metric": "haversine",
|
292
|
-
},
|
293
|
-
|
304
|
+
}, transform_func=transform_func,
|
305
|
+
seed=seed,
|
306
|
+
device=device,
|
294
307
|
)
|
295
308
|
return x3d, rgb
|
296
309
|
|
@@ -301,10 +314,10 @@ def rgb_from_umap_3d(
|
|
301
314
|
n_neighbors: int = 150,
|
302
315
|
min_dist: float = 0.1,
|
303
316
|
metric: Literal["cosine", "euclidean"] = "cosine",
|
304
|
-
device: str = None,
|
305
|
-
seed: int = 0,
|
306
317
|
q: float = 0.95,
|
307
318
|
knn: int = 10,
|
319
|
+
seed: int = 0,
|
320
|
+
device: str = None,
|
308
321
|
):
|
309
322
|
"""
|
310
323
|
Returns:
|
@@ -321,12 +334,14 @@ def rgb_from_umap_3d(
|
|
321
334
|
num_sample=num_sample,
|
322
335
|
metric=metric,
|
323
336
|
rgb_func=rgb_from_3d_rgb_cube,
|
324
|
-
q=q,
|
325
|
-
|
337
|
+
q=q,
|
338
|
+
knn=knn,
|
326
339
|
reduction=UMAP, reduction_dim=3, reduction_kwargs={
|
327
340
|
"n_neighbors": n_neighbors,
|
328
341
|
"min_dist": min_dist,
|
329
|
-
},
|
342
|
+
}, transform_func=_identity,
|
343
|
+
seed=seed,
|
344
|
+
device=device,
|
330
345
|
)
|
331
346
|
return x3d, rgb
|
332
347
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nystrom_ncut
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.10
|
4
4
|
Summary: Normalized Cut and Nyström Approximation
|
5
5
|
Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
|
6
6
|
Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/
|
@@ -0,0 +1,14 @@
|
|
1
|
+
__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
nystrom_ncut/__init__.py,sha256=JKfF6atok5T9V692RhlhgeRO5a2cN-bfAVa9irmTLfs,463
|
3
|
+
nystrom_ncut/common.py,sha256=RMPQvg9R2s7V-q7zAStN9YCZt7gpc5Ut-KSKtvELBQ4,1934
|
4
|
+
nystrom_ncut/propagation_utils.py,sha256=WeWKxRBm01ITILMgjsit5_fCe9oW1kJOPmAjjcmliMo,10340
|
5
|
+
nystrom_ncut/visualize_utils.py,sha256=Z_bcoxwmWpTxhQ_yoAXqTnYDf269IuT0b0Sm2EVQpRw,17422
|
6
|
+
nystrom_ncut/nystrom/__init__.py,sha256=4EpxD3Cmc8Fif4vo8DG-6FpTfCnNanD5zCZxK3WrMwQ,121
|
7
|
+
nystrom_ncut/nystrom/distance_realization.py,sha256=8AWUlZKZEPfhQHxYTZt0uzKedVp8ZB1wb__7M2Fy-Eo,5529
|
8
|
+
nystrom_ncut/nystrom/normalized_cut.py,sha256=_U3zrbe6V-5TQ4uWmqckxs2JTIhygQlnRDTFBI1ghD4,7194
|
9
|
+
nystrom_ncut/nystrom/nystrom.py,sha256=VJPA17I8cVvjILUABJjkVA5kkXbTmHDyrtcWvu5xs-0,12571
|
10
|
+
nystrom_ncut-0.0.10.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
|
11
|
+
nystrom_ncut-0.0.10.dist-info/METADATA,sha256=sqs2WHdNbJeT5zvlq_WWHHRvHTz1mHVbDL3PsE1NMBI,6059
|
12
|
+
nystrom_ncut-0.0.10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
13
|
+
nystrom_ncut-0.0.10.dist-info/top_level.txt,sha256=gM8IWWHYysIRTCvCTcdS4RShOyl9pxpylgSwPUZR2XM,22
|
14
|
+
nystrom_ncut-0.0.10.dist-info/RECORD,,
|
@@ -1,11 +0,0 @@
|
|
1
|
-
nystrom_ncut/__init__.py,sha256=Vlc_iAlfvTNUiJXpZLWUOaL2Q-YqZqgr7WoG6cVnD0g,439
|
2
|
-
nystrom_ncut/common.py,sha256=G6w_8_BfBUMc6r8WFgA0NH4K6am7AzZCSdrQEVjra7U,671
|
3
|
-
nystrom_ncut/ncut_pytorch.py,sha256=-SKs9AdkafJSGkeYt4LwhbKZr8oq9JA5caAqjiVDAzU,11220
|
4
|
-
nystrom_ncut/nystrom.py,sha256=-l26oiJ0oPReSGlMlYV3gftszgFdAAHAi7OFtGPZ4Ic,8802
|
5
|
-
nystrom_ncut/propagation_utils.py,sha256=0d2VhT0JrLRurd44hZbnxBvBh-QscPKxtV7VrwYtTdo,11569
|
6
|
-
nystrom_ncut/visualize_utils.py,sha256=jDjuyZ9rdd25jqrPObJgK8zCLHc3Oms0fQnaIetHk-U,17112
|
7
|
-
nystrom_ncut-0.0.8.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
|
8
|
-
nystrom_ncut-0.0.8.dist-info/METADATA,sha256=zQpx3REOOckpJSuc7N6UNpXZoqgsM5UoFWV6__DuaRQ,6058
|
9
|
-
nystrom_ncut-0.0.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
10
|
-
nystrom_ncut-0.0.8.dist-info/top_level.txt,sha256=j7g_j0S048EvguFFnGgD5Ewd3r2H6klsxd5A4dd-wHw,13
|
11
|
-
nystrom_ncut-0.0.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|