PyPI - nystrom-ncut - Versions diffs - 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

nystrom-ncut 0.2.1py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

nystrom_ncut/common.py +18 -5
nystrom_ncut/distance_utils.py +54 -32
nystrom_ncut/nystrom/__init__.py +0 -3
nystrom_ncut/nystrom/distance_realization.py +8 -9
nystrom_ncut/nystrom/normalized_cut.py +51 -47
nystrom_ncut/nystrom/nystrom_utils.py +78 -69
nystrom_ncut/sampling_utils.py +64 -51
nystrom_ncut/visualize_utils.py +31 -43
{nystrom_ncut-0.2.1.dist-info → nystrom_ncut-0.3.0.dist-info}/METADATA +1 -1
nystrom_ncut-0.3.0.dist-info/RECORD +18 -0
{nystrom_ncut-0.2.1.dist-info → nystrom_ncut-0.3.0.dist-info}/WHEEL +1 -1
nystrom_ncut-0.2.1.dist-info/RECORD +0 -18
{nystrom_ncut-0.2.1.dist-info → nystrom_ncut-0.3.0.dist-info}/LICENSE +0 -0
{nystrom_ncut-0.2.1.dist-info → nystrom_ncut-0.3.0.dist-info}/top_level.txt +0 -0

nystrom_ncut/common.py CHANGED Viewed

@@ -12,8 +12,8 @@ def ceildiv(a: int, b: int) -> int:
 def lazy_normalize(x: torch.Tensor, n: int = 1000, **normalize_kwargs: Any) -> torch.Tensor:
     numel = np.prod(x.shape[:-1])
     n = min(n, numel)
-    random_indices = torch.randperm(numel)[:n]
-    _x = x.flatten(0, -2)[random_indices]
+    random_indices = torch.randperm(numel, device=x.device)[:n]
+    _x = x.view((-1, x.shape[-1]))[random_indices]
     if torch.allclose(torch.norm(_x, **normalize_kwargs), torch.ones(n, device=x.device)):
         return x
     else:
@@ -21,13 +21,14 @@ def lazy_normalize(x: torch.Tensor, n: int = 1000, **normalize_kwargs: Any) -> t
 def quantile_min_max(x: torch.Tensor, q1: float, q2: float, n_sample: int = 10000):
-    if x.shape[0] > n_sample:
+    x = x.flatten()
+    if len(x) > n_sample:
         np.random.seed(0)
-        random_idx = np.random.choice(x.shape[0], n_sample, replace=False)
+        random_idx = np.random.choice(len(x), n_sample, replace=False)
         vmin, vmax = x[random_idx].quantile(q1), x[random_idx].quantile(q2)
     else:
         vmin, vmax = x.quantile(q1), x.quantile(q2)
-    return vmin, vmax
+    return vmin.item(), vmax.item()
 def quantile_normalize(x: torch.Tensor, q: float = 0.95):
@@ -57,5 +58,17 @@ def quantile_normalize(x: torch.Tensor, q: float = 0.95):
     return x
+class default_device:
+    def __init__(self, device: torch.device):
+        self._device = device
+    def __enter__(self):
+        self._original_device = torch.get_default_device()
+        torch.set_default_device(self._device)
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        torch.set_default_device(self._original_device)
 def profile(name: str, t: torch.Tensor) -> None:
     print(f"{name} --- nan: {t.isnan().any()}, inf: {t.isinf().any()}, max: {t.abs().max()}, min: {t.abs().min()}")

nystrom_ncut/distance_utils.py CHANGED Viewed

@@ -1,61 +1,71 @@
-from typing import Literal
+import collections
+from typing import List, Literal, OrderedDict
 import torch
 from .common import lazy_normalize
-DistanceOptions = Literal["cosine", "euclidean", "rbf"]
+DistanceOptions = Literal["cosine", "euclidean"]
+AffinityOptions = Literal["cosine", "rbf", "laplacian"]
+# noinspection PyTypeChecker
+DISTANCE_TO_AFFINITY: OrderedDict[DistanceOptions, List[AffinityOptions]] = collections.OrderedDict([
+    ("cosine", ["cosine"]),
+    ("euclidean", ["rbf", "laplacian"]),
+])
+# noinspection PyTypeChecker
+AFFINITY_TO_DISTANCE: OrderedDict[AffinityOptions, DistanceOptions] = collections.OrderedDict(sum([
+    [(affinity_type, distance_type) for affinity_type in affinity_types]
+    for distance_type, affinity_types in DISTANCE_TO_AFFINITY.items()
+], start=[]))
-def to_euclidean(x: torch.Tensor, disttype: DistanceOptions) -> torch.Tensor:
-    if disttype == "cosine":
+def to_euclidean(x: torch.Tensor, distance_type: DistanceOptions) -> torch.Tensor:
+    if distance_type == "cosine":
         return lazy_normalize(x, p=2, dim=-1)
-    elif disttype == "rbf":
+    elif distance_type == "euclidean":
         return x
     else:
-        raise ValueError(f"to_euclidean not implemented for disttype {disttype}.")
+        raise ValueError(f"to_euclidean not implemented for distance_type {distance_type}.")
 def distance_from_features(
     features: torch.Tensor,
     features_B: torch.Tensor,
-    distance: DistanceOptions,
+    distance_type: DistanceOptions,
 ):
-    """Compute affinity matrix from input features.
+    """Compute distance matrix from input features.
     Args:
         features (torch.Tensor): input features, shape (n_samples, n_features)
         features_B (torch.Tensor, optional): optional, if not None, compute affinity between two features
-        distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'.
+        distance_type (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'.
     Returns:
         (torch.Tensor): affinity matrix, shape (n_samples, n_samples)
     """
     # compute distance matrix from input features
-    if distance == "cosine":
-        features = lazy_normalize(features, dim=-1)
-        features_B = lazy_normalize(features_B, dim=-1)
-        D = 1 - features @ features_B.T
-    elif distance == "euclidean":
-        D = torch.cdist(features, features_B, p=2)
-    elif distance == "rbf":
-        D = 0.5 * torch.cdist(features, features_B, p=2) ** 2
-        # Outlier-robust scale invariance using quantiles to estimate standard deviation
-        c = 2.0
-        p = torch.erf(torch.tensor((-c, c), device=features.device) * (2 ** -0.5))
-        stds = torch.quantile(features, q=(p + 1) / 2, dim=0)
-        stds = (stds[1] - stds[0]) / (2 * c)
-        D = D / (torch.linalg.norm(stds) ** 2)
-    else:
-        raise ValueError("distance should be 'cosine' or 'euclidean', 'rbf'")
-    return D
+    shape: torch.Size = features.shape[:-2]
+    features = features.view((-1, *features.shape[-2:]))
+    features_B = features_B.view((-1, *features_B.shape[-2:]))
+    match distance_type:
+        case "cosine":
+            features = lazy_normalize(features, dim=-1)
+            features_B = lazy_normalize(features_B, dim=-1)
+            D = 1 - features @ features_B.mT
+        case "euclidean":
+            D = torch.cdist(features, features_B, p=2)
+        case _:
+            raise ValueError("Distance should be 'cosine' or 'euclidean'")
+    return D.view((*shape, *D.shape[-2:]))
 def affinity_from_features(
     features: torch.Tensor,
     features_B: torch.Tensor = None,
     affinity_focal_gamma: float = 1.0,
-    distance: DistanceOptions = "cosine",
+    affinity_type: AffinityOptions = "cosine",
 ):
     """Compute affinity matrix from input features.
@@ -64,7 +74,7 @@ def affinity_from_features(
         features_B (torch.Tensor, optional): optional, if not None, compute affinity between two features
         affinity_focal_gamma (float): affinity matrix parameter, lower t reduce the edge weights
             on weak connections, default 1.0
-        distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'.
+        affinity_type (str): distance metric, 'cosine' (default) or 'euclidean'.
     Returns:
         (torch.Tensor): affinity matrix, shape (n_samples, n_samples)
     """
@@ -75,9 +85,21 @@ def affinity_from_features(
     features_B = features if features_B is None else features_B
     # compute distance matrix from input features
-    D = distance_from_features(features, features_B, distance)
+    D = distance_from_features(features, features_B, AFFINITY_TO_DISTANCE[affinity_type])
-    # torch.exp make affinity matrix positive definite,
     # lower affinity_focal_gamma reduce the weak edge weights
-    A = torch.exp(-D / affinity_focal_gamma)
+    match affinity_type:
+        case "cosine" | "laplacian":
+            A = torch.exp(-D / affinity_focal_gamma)                                        # [... x n x n]
+        case "rbf":
+            # Outlier-robust scale invariance using quantiles to estimate standard deviation
+            c = 2.0
+            p = torch.erf(torch.tensor((-c, c), device=features.device) * (2 ** -0.5))
+            stds = torch.nanquantile(features, q=(p + 1) / 2, dim=-2)                       # [2 x ... x d]
+            stds = (stds[1] - stds[0]) / (2 * c)                                            # [... x d]
+            D = 0.5 * (D / torch.norm(stds, dim=-1)[..., None, None]) ** 2
+            A = torch.exp(-D / affinity_focal_gamma)
+        case _:
+            raise ValueError("Affinity should be 'cosine', 'rbf', or 'laplacian'")
     return A

nystrom_ncut/nystrom/__init__.py CHANGED Viewed

@@ -1,6 +1,3 @@
-from .distance_realization import (
-    DistanceRealization,
-)
 from .normalized_cut import (
     NCut,
 )

nystrom_ncut/nystrom/distance_realization.py CHANGED Viewed

@@ -18,10 +18,10 @@ from ..sampling_utils import (
 class GramKernel(OnlineKernel):
     def __init__(
         self,
-        distance: DistanceOptions,
+        distance_type: DistanceOptions,
         eig_solver: EigSolverOptions,
     ):
-        self.distance: DistanceOptions = distance
+        self.distance_type: DistanceOptions = distance_type
         self.eig_solver: EigSolverOptions = eig_solver
         # Anchor matrices
@@ -40,7 +40,7 @@ class GramKernel(OnlineKernel):
         self.A = -0.5 * distance_from_features(
             self.anchor_features,                               # [n x d]
             self.anchor_features,
-            distance=self.distance,
+            distance_type=self.distance_type,
         )                                                       # [n x n]
         d = features.shape[-1]
         U, L = solve_eig(
@@ -58,7 +58,7 @@ class GramKernel(OnlineKernel):
         B = -0.5 * distance_from_features(
             self.anchor_features,                               # [n x d]
             features,                                           # [m x d]
-            distance=self.distance,
+            distance_type=self.distance_type,
         )                                                       # [n x m]
         b_r = torch.sum(B, dim=-1)                              # [n]
         b_c = torch.sum(B, dim=-2)                              # [m]
@@ -84,7 +84,7 @@ class GramKernel(OnlineKernel):
             B = -0.5 * distance_from_features(
                 self.anchor_features,
                 features,
-                distance=self.distance,
+                distance_type=self.distance_type,
             )
             b_c = torch.sum(B, dim=-2)                          # [m]
             col_sum = b_c + B.mT @ self.Ainv @ self.b_r         # [m]
@@ -98,7 +98,7 @@ class DistanceRealization(OnlineNystromSubsampleFit):
     def __init__(
         self,
         n_components: int = 100,
-        distance: DistanceOptions = "cosine",
+        distance_type: DistanceOptions = "cosine",
         sample_config: SampleConfig = SampleConfig(),
         eig_solver: EigSolverOptions = "svd_lowrank",
         chunk_size: int = 8192,
@@ -115,13 +115,12 @@ class DistanceRealization(OnlineNystromSubsampleFit):
         OnlineNystromSubsampleFit.__init__(
             self,
             n_components=n_components,
-            kernel=GramKernel(distance, eig_solver),
-            distance=distance,
+            kernel=GramKernel(distance_type, eig_solver),
+            distance_type=distance_type,
             sample_config=sample_config,
             eig_solver=eig_solver,
             chunk_size=chunk_size,
         )
-        self.distance: DistanceOptions = distance
     def fit_transform(
         self,

nystrom_ncut/nystrom/normalized_cut.py CHANGED Viewed

@@ -8,7 +8,8 @@ from .nystrom_utils import (
     solve_eig,
 )
 from ..distance_utils import (
-    DistanceOptions,
+    AffinityOptions,
+    AFFINITY_TO_DISTANCE,
     affinity_from_features,
 )
 from ..sampling_utils import (
@@ -20,80 +21,83 @@ class LaplacianKernel(OnlineKernel):
     def __init__(
         self,
         affinity_focal_gamma: float,
-        distance: DistanceOptions,
+        affinity_type: AffinityOptions,
         adaptive_scaling: bool,
         eig_solver: EigSolverOptions,
     ):
         self.affinity_focal_gamma = affinity_focal_gamma
-        self.distance: DistanceOptions = distance
+        self.affinity_type: AffinityOptions = affinity_type
         self.adaptive_scaling: bool = adaptive_scaling
         self.eig_solver: EigSolverOptions = eig_solver
         # Anchor matrices
-        self.anchor_features: torch.Tensor = None               # [n x d]
-        self.A: torch.Tensor = None                             # [n x n]
-        self.Ainv: torch.Tensor = None                          # [n x n]
+        self.anchor_features: torch.Tensor = None                                   # [... x n x d]
+        self.anchor_mask: torch.Tensor = None
+        self.A: torch.Tensor = None                                                 # [... x n x n]
+        self.Ainv: torch.Tensor = None                                              # [... x n x n]
         # Updated matrices
-        self.a_r: torch.Tensor = None                           # [n]
-        self.b_r: torch.Tensor = None                           # [n]
+        self.a_r: torch.Tensor = None                                               # [... x n]
+        self.b_r: torch.Tensor = None                                               # [... x n]
     def fit(self, features: torch.Tensor) -> None:
-        self.anchor_features = features                         # [n x d]
-        self.A = affinity_from_features(
-            self.anchor_features,                               # [n x d]
+        self.anchor_features = features                                             # [... x n x d]
+        self.anchor_mask = torch.all(torch.isnan(self.anchor_features), dim=-1)     # [... x n]
+        self.A = torch.nan_to_num(affinity_from_features(
+            self.anchor_features,                                                   # [... x n x d]
             affinity_focal_gamma=self.affinity_focal_gamma,
-            distance=self.distance,
-        )                                                       # [n x n]
+            affinity_type=self.affinity_type,
+        ), nan=0.0)                                                                 # [... x n x n]
         d = features.shape[-1]
         U, L = solve_eig(
             self.A,
             num_eig=d + 1,  # d * (d + 3) // 2 + 1,
             eig_solver=self.eig_solver,
-        )                                                       # [n x (d + 1)], [d + 1]
-        self.Ainv = U @ torch.diag(1 / L) @ U.mT                # [n x n]
-        self.a_r = torch.sum(self.A, dim=-1)                    # [n]
-        self.b_r = torch.zeros_like(self.a_r)                   # [n]
+        )                                                                           # [... x n x (d + 1)], [... x (d + 1)]
+        self.Ainv = U @ torch.diag_embed(1 / L) @ U.mT                              # [... x n x n]
+        self.a_r = torch.where(self.anchor_mask, torch.inf, torch.sum(self.A, dim=-1))  # [... x n]
+        self.b_r = torch.zeros_like(self.a_r)                                       # [... x n]
     def _affinity(self, features: torch.Tensor) -> torch.Tensor:
-        B = affinity_from_features(
-            self.anchor_features,                               # [n x d]
-            features,                                           # [m x d]
+        B = torch.where(self.anchor_mask[..., None], 0.0, affinity_from_features(
+            self.anchor_features,                                                   # [... x n x d]
+            features,                                                               # [... x m x d]
             affinity_focal_gamma=self.affinity_focal_gamma,
-            distance=self.distance,
-        )                                                       # [n x m]
+            affinity_type=self.affinity_type,
+        ))                                                                          # [... x n x m]
         if self.adaptive_scaling:
             diagonal = (
-                einops.rearrange(B, "n m -> m 1 n")             # [m x 1 x n]
-                @ self.Ainv                                     # [n x n]
-                @ einops.rearrange(B, "n m -> m n 1")           # [m x n x 1]
-            ).squeeze(1, 2)                                     # [m]
-            adaptive_scale = diagonal ** -0.5                   # [m]
-            B = B * adaptive_scale
-        return B                                                # [n x m]
+                einops.rearrange(B, "... n m -> ... m 1 n")                         # [... x m x 1 x n]
+                @ self.Ainv                                                         # [... x n x n]
+                @ einops.rearrange(B, "... n m -> ... m n 1")                       # [... x m x n x 1]
+            ).squeeze(-2, -1)                                                       # [... x m]
+            adaptive_scale = diagonal ** -0.5                                       # [... x m]
+            B = B * adaptive_scale[..., None, :]
+        return B                                                                    # [... x n x m]
     def update(self, features: torch.Tensor) -> torch.Tensor:
-        B = self._affinity(features)                            # [n x m]
-        b_r = torch.sum(B, dim=-1)                              # [n]
-        b_c = torch.sum(B, dim=-2)                              # [m]
-        self.b_r = self.b_r + b_r                               # [n]
+        B = self._affinity(features)                                                # [... x n x m]
+        b_r = torch.sum(torch.nan_to_num(B, nan=0.0), dim=-1)                       # [... x n]
+        b_c = torch.sum(B, dim=-2)                                                  # [... x m]
+        self.b_r = self.b_r + b_r                                                   # [... x n]
-        row_sum = self.a_r + self.b_r                           # [n]
-        col_sum = b_c + B.mT @ self.Ainv @ self.b_r             # [m]
-        scale = (row_sum[:, None] * col_sum) ** -0.5            # [n x m]
-        return (B * scale).mT                                   # [m x n]
+        row_sum = self.a_r + self.b_r                                               # [... x n]
+        col_sum = b_c + (B.mT @ (self.Ainv @ self.b_r[..., None]))[..., 0]          # [... x m]
+        scale = (row_sum[..., :, None] * col_sum[..., None, :]) ** -0.5             # [... x n x m]
+        return (B * scale).mT                                                       # [... x m x n]
     def transform(self, features: torch.Tensor = None) -> torch.Tensor:
-        row_sum = self.a_r + self.b_r                           # [n]
+        row_sum = self.a_r + self.b_r                                               # [... x n]
         if features is None:
-            B = self.A                                          # [n x n]
-            col_sum = row_sum                                   # [n]
+            B = self.A                                                              # [... x n x n]
+            col_sum = row_sum                                                       # [... x n]
         else:
             B = self._affinity(features)
-            b_c = torch.sum(B, dim=-2)                          # [m]
-            col_sum = b_c + B.mT @ self.Ainv @ self.b_r         # [m]
-        scale = (row_sum[:, None] * col_sum) ** -0.5            # [n x m]
-        return (B * scale).mT                                   # [m x n]
+            b_c = torch.sum(B, dim=-2)                                              # [... x m]
+            col_sum = b_c + (B.mT @ (self.Ainv @ self.b_r[..., None]))[..., 0]      # [... x m]
+        scale = (row_sum[..., :, None] * col_sum[..., None, :]) ** -0.5             # [... x n x m]
+        return (B * scale).mT                                                       # [... x m x n]
 class NCut(OnlineNystromSubsampleFit):
@@ -103,7 +107,7 @@ class NCut(OnlineNystromSubsampleFit):
         self,
         n_components: int = 100,
         affinity_focal_gamma: float = 1.0,
-        distance: DistanceOptions = "cosine",
+        affinity_type: AffinityOptions = "cosine",
         adaptive_scaling: bool = False,
         sample_config: SampleConfig = SampleConfig(),
         eig_solver: EigSolverOptions = "svd_lowrank",
@@ -124,8 +128,8 @@ class NCut(OnlineNystromSubsampleFit):
         OnlineNystromSubsampleFit.__init__(
             self,
             n_components=n_components,
-            kernel=LaplacianKernel(affinity_focal_gamma, distance, adaptive_scaling, eig_solver),
-            distance=distance,
+            kernel=LaplacianKernel(affinity_focal_gamma, affinity_type, adaptive_scaling, eig_solver),
+            distance_type=AFFINITY_TO_DISTANCE[affinity_type],
             sample_config=sample_config,
             eig_solver=eig_solver,
             chunk_size=chunk_size,

nystrom_ncut/nystrom/nystrom_utils.py CHANGED Viewed

@@ -25,15 +25,15 @@ EigSolverOptions = Literal["svd_lowrank", "lobpcg", "svd", "eigh"]
 class OnlineKernel:
     @abstractmethod
-    def fit(self, features: torch.Tensor) -> "OnlineKernel":                # [n x d]
+    def fit(self, features: torch.Tensor) -> "OnlineKernel":                # [... x n x d]
         """"""
     @abstractmethod
-    def update(self, features: torch.Tensor) -> torch.Tensor:               # [m x d] -> [m x n]
+    def update(self, features: torch.Tensor) -> torch.Tensor:               # [... x m x d] -> [... x m x n]
         """"""
     @abstractmethod
-    def transform(self, features: torch.Tensor = None) -> torch.Tensor:     # [m x d] -> [m x n]
+    def transform(self, features: torch.Tensor = None) -> torch.Tensor:     # [... x m x d] -> [... x m x n]
         """"""
@@ -54,20 +54,21 @@ class OnlineNystrom(TorchTransformerMixin):
         self.n_components: int = n_components
         self.kernel: OnlineKernel = kernel
         self.eig_solver: EigSolverOptions = eig_solver
+        self.shape: torch.Size = None               # ...
         self.chunk_size = chunk_size
         # Anchor matrices
-        self.anchor_features: torch.Tensor = None   # [n x d]
-        self.A: torch.Tensor = None                 # [n x n]
-        self.Ahinv: torch.Tensor = None             # [n x n]
-        self.Ahinv_UL: torch.Tensor = None          # [n x indirect_pca_dim]
-        self.Ahinv_VT: torch.Tensor = None          # [indirect_pca_dim x n]
+        self.anchor_features: torch.Tensor = None   # [... x n x d]
+        self.A: torch.Tensor = None                 # [... x n x n]
+        self.Ahinv: torch.Tensor = None             # [... x n x n]
+        self.Ahinv_UL: torch.Tensor = None          # [... x n x indirect_pca_dim]
+        self.Ahinv_VT: torch.Tensor = None          # [... x indirect_pca_dim x n]
         # Updated matrices
-        self.S: torch.Tensor = None                 # [n x n]
-        self.transform_matrix: torch.Tensor = None  # [n x n_components]
-        self.eigenvalues_: torch.Tensor = None      # [n]
+        self.S: torch.Tensor = None                 # [... x n x n]
+        self.transform_matrix: torch.Tensor = None  # [... x n x n_components]
+        self.eigenvalues_: torch.Tensor = None      # [... x n]
     def _update_to_kernel(self, d: int) -> Tuple[torch.Tensor, torch.Tensor]:
         self.A = self.S = self.kernel.transform()
@@ -75,10 +76,10 @@ class OnlineNystrom(TorchTransformerMixin):
             self.A,
             num_eig=d + 1,  # d * (d + 3) // 2 + 1,
             eig_solver=self.eig_solver,
-        )                                                                                           # [n x (? + 1)], [? + 1]
-        self.Ahinv_UL = U * (L ** -0.5)                                                             # [n x (? + 1)]
-        self.Ahinv_VT = U.mT                                                                        # [(? + 1) x n]
-        self.Ahinv = self.Ahinv_UL @ self.Ahinv_VT                                                  # [n x n]
+        )                                                                                           # [... x n x (? + 1)], [... x (? + 1)]
+        self.Ahinv_UL = U * (L[..., None, :] ** -0.5)                                               # [... x n x (? + 1)]
+        self.Ahinv_VT = U.mT                                                                        # [... x (? + 1) x n]
+        self.Ahinv = self.Ahinv_UL @ self.Ahinv_VT                                                  # [... x n x n]
         return U, L
     def fit(self, features: torch.Tensor) -> "OnlineNystrom":
@@ -89,64 +90,63 @@ class OnlineNystrom(TorchTransformerMixin):
         self.anchor_features = features
         self.kernel.fit(self.anchor_features)
-        U, L = self._update_to_kernel(features.shape[-1])                                           # [n x (d + 1)], [d + 1]
+        U, L = self._update_to_kernel(features.shape[-1])                                           # [... x n x (d + 1)], [... x (d + 1)]
-        self.transform_matrix = (U / L)[:, :self.n_components]                                      # [n x n_components]
-        self.eigenvalues_ = L[:self.n_components]                                                   # [n_components]
-        return U[:, :self.n_components]                                                             # [n x n_components]
+        self.transform_matrix = (U / L[..., None, :])[..., :, :self.n_components]                   # [... x n x n_components]
+        self.eigenvalues_ = L[..., :self.n_components]                                              # [... x n_components]
+        return U[..., :, :self.n_components]                                                        # [... x n x n_components]
     def update(self, features: torch.Tensor) -> torch.Tensor:
         d = features.shape[-1]
-        n_chunks = ceildiv(len(features), self.chunk_size)
+        n_chunks = ceildiv(features.shape[-2], self.chunk_size)
         if n_chunks > 1:
             """ Chunked version """
-            chunks = torch.chunk(features, n_chunks, dim=0)
+            chunks = torch.chunk(features, n_chunks, dim=-2)
             for chunk in chunks:
                 self.kernel.update(chunk)
             self._update_to_kernel(d)
-            compressed_BBT = 0.0                                                                    # [(? + 1) x (? + 1))]
+            compressed_BBT = 0.0                                                                    # [... x (? + 1) x (? + 1))]
             for chunk in chunks:
-                _B = self.kernel.transform(chunk).mT                                                # [n x _m]
-                _compressed_B = self.Ahinv_VT @ _B                                                  # [(? + 1) x _m]
-                compressed_BBT = compressed_BBT + _compressed_B @ _compressed_B.mT                  # [(? + 1) x (? + 1)]
-            self.S = self.S + self.Ahinv_UL @ compressed_BBT @ self.Ahinv_UL.mT                     # [n x n]
-            US, self.eigenvalues_ = solve_eig(self.S, self.n_components, self.eig_solver)           # [n x n_components], [n_components]
-            self.transform_matrix = self.Ahinv @ US * (self.eigenvalues_ ** -0.5)                   # [n x n_components]
+                _B = self.kernel.transform(chunk).mT                                                # [... x n x _m]
+                _compressed_B = self.Ahinv_VT @ _B                                                  # [... x (? + 1) x _m]
+                _compressed_B = torch.nan_to_num(_compressed_B, nan=0.0)
+                compressed_BBT = compressed_BBT + _compressed_B @ _compressed_B.mT                  # [... x (? + 1) x (? + 1)]
+            self.S = self.S + self.Ahinv_UL @ compressed_BBT @ self.Ahinv_UL.mT                     # [... x n x n]
+            US, self.eigenvalues_ = solve_eig(self.S, self.n_components, self.eig_solver)           # [... x n x n_components], [... x n_components]
+            self.transform_matrix = self.Ahinv @ US * (self.eigenvalues_[..., None, :] ** -0.5)     # [... x n x n_components]
             VS = []
             for chunk in chunks:
-                VS.append(self.kernel.transform(chunk) @ self.transform_matrix)                     # [_m x n_components]
-            VS = torch.cat(VS, dim=0)
-            return VS                                                                               # [m x n_components]
+                VS.append(self.kernel.transform(chunk) @ self.transform_matrix)                     # [... x _m x n_components]
+            VS = torch.cat(VS, dim=-2)
+            return VS                                                                               # [... x m x n_components]
         else:
             """ Unchunked version """
-            B = self.kernel.update(features).mT                                                     # [n x m]
+            B = self.kernel.update(features).mT                                                     # [... x n x m]
             self._update_to_kernel(d)
-            compressed_B = self.Ahinv_VT @ B                                                        # [indirect_pca_dim x m]
+            compressed_B = self.Ahinv_VT @ B                                                        # [... x (? + 1) x m]
+            compressed_B = torch.nan_to_num(compressed_B, nan=0.0)
-            self.S = self.S + self.Ahinv_UL @ (compressed_B @ compressed_B.mT) @ self.Ahinv_UL.mT   # [n x n]
-            US, self.eigenvalues_ = solve_eig(self.S, self.n_components, self.eig_solver)           # [n x n_components], [n_components]
-            self.transform_matrix = self.Ahinv @ US * (self.eigenvalues_ ** -0.5)                   # [n x n_components]
+            self.S = self.S + self.Ahinv_UL @ (compressed_B @ compressed_B.mT) @ self.Ahinv_UL.mT   # [... x n x n]
+            US, self.eigenvalues_ = solve_eig(self.S, self.n_components, self.eig_solver)           # [... x n x n_components], [... x n_components]
+            self.transform_matrix = self.Ahinv @ US * (self.eigenvalues_[..., None, :] ** -0.5)     # [... x n x n_components]
-            return B.mT @ self.transform_matrix                                                     # [m x n_components]
+            return B.mT @ self.transform_matrix                                                     # [... x m x n_components]
-    def transform(self, features: torch.Tensor = None) -> torch.Tensor:
-        if features is None:
-            VS = self.A @ self.transform_matrix                                                     # [n x n_components]
+    def transform(self, features: torch.Tensor) -> torch.Tensor:
+        n_chunks = ceildiv(features.shape[-2], self.chunk_size)
+        if n_chunks > 1:
+            """ Chunked version """
+            chunks = torch.chunk(features, n_chunks, dim=-2)
+            VS = []
+            for chunk in chunks:
+                VS.append(self.kernel.transform(chunk) @ self.transform_matrix)                     # [... x _m x n_components]
+            VS = torch.cat(VS, dim=-2)
         else:
-            n_chunks = ceildiv(len(features), self.chunk_size)
-            if n_chunks > 1:
-                """ Chunked version """
-                chunks = torch.chunk(features, n_chunks, dim=0)
-                VS = []
-                for chunk in chunks:
-                    VS.append(self.kernel.transform(chunk) @ self.transform_matrix)                 # [_m x n_components]
-                VS = torch.cat(VS, dim=0)
-            else:
-                """ Unchunked version """
-                VS = self.kernel.transform(features) @ self.transform_matrix                        # [m x n_components]
-        return VS                                                                                   # [m x n_components]
+            """ Unchunked version """
+            VS = self.kernel.transform(features) @ self.transform_matrix                            # [... x m x n_components]
+        return VS                                                                                   # [... x m x n_components]
 class OnlineNystromSubsampleFit(OnlineNystrom):
@@ -154,7 +154,7 @@ class OnlineNystromSubsampleFit(OnlineNystrom):
         self,
         n_components: int,
         kernel: OnlineKernel,
-        distance: DistanceOptions,
+        distance_type: DistanceOptions,
         sample_config: SampleConfig,
         eig_solver: EigSolverOptions = "svd_lowrank",
         chunk_size: int = 8192,
@@ -166,7 +166,7 @@ class OnlineNystromSubsampleFit(OnlineNystrom):
             eig_solver=eig_solver,
             chunk_size=chunk_size,
         )
-        self.distance: DistanceOptions = distance
+        self.distance_type: DistanceOptions = distance_type
         self.sample_config: SampleConfig = sample_config
         self.sample_config._ncut_obj = copy.deepcopy(self)
         self.anchor_indices: torch.Tensor = None
@@ -176,7 +176,7 @@ class OnlineNystromSubsampleFit(OnlineNystrom):
         features: torch.Tensor,
         precomputed_sampled_indices: torch.Tensor,
     ) -> Tuple[torch.Tensor, torch.Tensor]:
-        _n = features.shape[0]
+        _n = features.shape[-2]
         if self.sample_config.num_sample >= _n:
             logging.info(
                 f"NCUT nystrom num_sample is larger than number of input samples, nyström approximation is not needed, setting num_sample={_n}"
@@ -188,16 +188,17 @@ class OnlineNystromSubsampleFit(OnlineNystrom):
         else:
             self.anchor_indices = subsample_features(
                 features=features,
-                disttype=self.distance,
+                distance_type=self.distance_type,
                 config=self.sample_config,
             )
-        sampled_features = features[self.anchor_indices]
+        sampled_features = torch.gather(features, -2, self.anchor_indices[..., None].expand([-1] * self.anchor_indices.ndim + [features.shape[-1]]))
         OnlineNystrom.fit(self, sampled_features)
-        _n_not_sampled = _n - len(sampled_features)
+        _n_not_sampled = _n - self.anchor_indices.shape[-1]
         if _n_not_sampled > 0:
-            unsampled_indices = torch.full((_n,), True, device=features.device).scatter_(0, self.anchor_indices, False)
-            unsampled_features = features[unsampled_indices]
+            unsampled_mask = torch.full(features.shape[:-1], True, device=features.device).scatter_(-1, self.anchor_indices, False)
+            unsampled_indices = torch.where(unsampled_mask)[-1].view((*features.shape[:-2], -1))
+            unsampled_features = torch.gather(features, -2, unsampled_indices[..., None].expand([-1] * unsampled_indices.ndim + [features.shape[-1]]))
             V_unsampled = OnlineNystrom.update(self, unsampled_features)
         else:
             unsampled_indices = V_unsampled = None
@@ -235,12 +236,12 @@ class OnlineNystromSubsampleFit(OnlineNystrom):
             (torch.Tensor): eigen_values, sorted in descending order, shape (num_eig,)
         """
         unsampled_indices, V_unsampled = OnlineNystromSubsampleFit._fit_helper(self, features, precomputed_sampled_indices)
-        V_sampled = OnlineNystrom.transform(self)
+        V_sampled = OnlineNystrom.transform(self, self.anchor_features)
         if unsampled_indices is not None:
-            V = torch.zeros((len(unsampled_indices), self.n_components), device=features.device)
-            V[~unsampled_indices] = V_sampled
-            V[unsampled_indices] = V_unsampled
+            V = torch.zeros((*features.shape[:-1], self.n_components), device=features.device)
+            for (indices, _V) in [(self.anchor_indices, V_sampled), (unsampled_indices, V_unsampled)]:
+                V.scatter_(-2, indices[..., None].expand([-1] * indices.ndim + [self.n_components]), _V)
         else:
             V = V_sampled
         return V
@@ -263,12 +264,16 @@ def solve_eig(
         (torch.Tensor): eigenvectors corresponding to the eigenvalues, shape (n_samples, num_eig)
         (torch.Tensor): eigenvalues of the eigenvectors, sorted in descending order
     """
-    A = A + eig_value_buffer * torch.eye(A.shape[0], device=A.device)
+    shape: torch.Size = A.shape[:-2]
+    A = A.view((-1, *A.shape[-2:]))
+    bsz: int = A.shape[0]
+    A = A + eig_value_buffer * torch.eye(A.shape[-1], device=A.device)
     # compute eigenvectors
     if eig_solver == "svd_lowrank":  # default
         # only top q eigenvectors, fastest
-        eigen_vector, eigen_value, _ = torch.svd_lowrank(A, q=num_eig)
+        eigen_vector, eigen_value, _ = torch.svd_lowrank(A, q=num_eig)              # complex: [(...) x N x D], [(...) x D]
     elif eig_solver == "lobpcg":
         # only top k eigenvectors, fast
         eigen_value, eigen_vector = torch.lobpcg(A, k=num_eig)
@@ -285,11 +290,15 @@ def solve_eig(
     eigen_value = eigen_value - eig_value_buffer
     # sort eigenvectors by eigenvalues, take top (descending order)
-    indices = torch.topk(eigen_value.abs(), k=num_eig, dim=0).indices
-    eigen_value, eigen_vector = eigen_value[indices], eigen_vector[:, indices]
+    indices = torch.topk(eigen_value.abs(), k=num_eig, dim=-1).indices              # int: [(...) x S]
+    eigen_value = eigen_value[torch.arange(bsz)[:, None], indices]                  # complex: [(...) x S]
+    eigen_vector = eigen_vector[torch.arange(bsz)[:, None], :, indices].mT          # complex: [(...) x N x S]
     # correct the random rotation (flipping sign) of eigenvectors
-    sign = torch.sum(eigen_vector.real, dim=0).sign()
+    sign = torch.sign(torch.sum(eigen_vector.real, dim=-2, keepdim=True))           # float: [(...) x 1 x S]
     sign[sign == 0] = 1.0
     eigen_vector = eigen_vector * sign
+    eigen_value = eigen_value.view((*shape, *eigen_value.shape[-1:]))               # complex: [... x S]
+    eigen_vector = eigen_vector.view((*shape, *eigen_vector.shape[-2:]))            # complex: [... x N x S]
     return eigen_vector, eigen_value

nystrom_ncut/sampling_utils.py CHANGED Viewed

@@ -1,17 +1,22 @@
-import logging
 from dataclasses import dataclass
 from typing import Literal
 import torch
 from pytorch3d.ops import sample_farthest_points
+from .common import (
+    default_device,
+)
 from .distance_utils import (
     DistanceOptions,
     to_euclidean,
 )
+from .transformer import (
+    TorchTransformerMixin,
+)
-SampleOptions = Literal["random", "fps", "fps_recursive"]
+SampleOptions = Literal["full", "random", "fps", "fps_recursive"]
 @dataclass
@@ -20,69 +25,77 @@ class SampleConfig:
     num_sample: int = 10000
     fps_dim: int = 12
     n_iter: int = None
-    _ncut_obj: object = None
+    _ncut_obj: TorchTransformerMixin = None
 @torch.no_grad()
 def subsample_features(
     features: torch.Tensor,
-    disttype: DistanceOptions,
+    distance_type: DistanceOptions,
     config: SampleConfig,
-    max_draw: int = 1000000,
 ):
-    features = features.detach()
-    if config.num_sample >= features.shape[0]:
-        # if too many samples, use all samples and bypass Nystrom-like approximation
-        logging.info(
-            "num_sample is larger than total, bypass Nystrom-like approximation"
-        )
-        sampled_indices = torch.arange(features.shape[0])
-    else:
-        # sample subgraph
-        if config.method == "fps":  # default
-            features = to_euclidean(features, disttype)
-            if config.num_sample > max_draw:
-                logging.warning(
-                    f"num_sample is larger than max_draw, apply farthest point sampling on random sampled {max_draw} samples"
-                )
-                draw_indices = torch.randperm(features.shape[0])[:max_draw]
-                sampled_indices = fpsample(features[draw_indices], config)
-                sampled_indices = draw_indices[sampled_indices]
-            else:
-                sampled_indices = fpsample(features, config)
-        elif config.method == "random":  # not recommended
-            sampled_indices = torch.randperm(features.shape[0])[:config.num_sample]
-        elif config.method == "fps_recursive":
-            features = to_euclidean(features, disttype)
-            sampled_indices = subsample_features(
-                features=features,
-                disttype=disttype,
-                config=SampleConfig(method="fps", num_sample=config.num_sample, fps_dim=config.fps_dim)
-            )
-            nc = config._ncut_obj
-            for _ in range(config.n_iter):
-                fps_features, eigenvalues = nc.fit_transform(features, precomputed_sampled_indices=sampled_indices)
-                fps_features = to_euclidean(fps_features[:, :config.fps_dim], "cosine")
-                sampled_indices = torch.sort(fpsample(fps_features, config)).values
+    features = features.detach()                                                                        # float: [... x n x d]
+    with default_device(features.device):
+        if config.method == "full" or config.num_sample >= features.shape[0]:
+            sampled_indices = torch.arange(features.shape[-2]).expand(features.shape[:-1])              # int: [... x n]
         else:
-            raise ValueError("sample_method should be 'farthest' or 'random'")
-        sampled_indices = torch.sort(sampled_indices).values
-    return sampled_indices.to(features.device)
+            # sample
+            match config.method:
+                case "fps":  # default
+                    sampled_indices = fpsample(to_euclidean(features, distance_type), config)
+                case "random":  # not recommended
+                    mask = torch.all(torch.isfinite(features), dim=-1)                                  # bool: [... x n]
+                    weights = mask.to(torch.float) + torch.rand(mask.shape)                             # float: [... x n]
+                    sampled_indices = torch.topk(weights, k=config.num_sample, dim=-1).indices          # int: [... x num_sample]
+                case "fps_recursive":
+                    features = to_euclidean(features, distance_type)                                    # float: [... x n x d]
+                    sampled_indices = subsample_features(
+                        features=features,
+                        distance_type=distance_type,
+                        config=SampleConfig(method="fps", num_sample=config.num_sample, fps_dim=config.fps_dim)
+                    )                                                                                   # int: [... x num_sample]
+                    nc = config._ncut_obj
+                    for _ in range(config.n_iter):
+                        fps_features, eigenvalues = nc.fit_transform(features, precomputed_sampled_indices=sampled_indices)
+                        fps_features = to_euclidean(fps_features[:, :config.fps_dim], "cosine")
+                        sampled_indices = torch.sort(fpsample(fps_features, config), dim=-1).values
+                case _:
+                    raise ValueError("sample_method should be 'farthest' or 'random'")
+            sampled_indices = torch.sort(sampled_indices, dim=-1).values
+        return sampled_indices
 def fpsample(
     features: torch.Tensor,
     config: SampleConfig,
 ):
-    # PCA to reduce the dimension
-    if features.shape[1] > config.fps_dim:
-        U, S, V = torch.pca_lowrank(features, q=config.fps_dim)
-        features = U * S
+    shape = features.shape[:-2]                                                         # ...
+    features = features.view((-1, *features.shape[-2:]))                                # [(...) x n x d]
+    bsz = features.shape[0]
+    mask = torch.all(torch.isfinite(features), dim=-1)                                  # bool: [(...) x n]
+    count = torch.sum(mask, dim=-1)                                                     # int: [(...)]
+    order = torch.topk(mask.to(torch.int), k=torch.max(count).item(), dim=-1).indices   # int: [(...) x max_count]
+    features = torch.nan_to_num(features[torch.arange(bsz)[:, None], order], nan=0.0)   # float: [(...) x max_count x d]
+    if features.shape[-1] > config.fps_dim:
+        U, S, V = torch.pca_lowrank(features, q=config.fps_dim)                         # float: [(...) x max_count x fps_dim], [(...) x fps_dim], [(...) x fps_dim x fps_dim]
+        features = U * S[..., None, :]                                                  # float: [(...) x max_count x fps_dim]
     try:
-        return sample_farthest_points(features[None], K=config.num_sample)[1][0]
+        sample_indices = sample_farthest_points(
+            features, lengths=count, K=config.num_sample
+        )[1]                                                                            # int: [(...) x num_sample]
     except RuntimeError:
-        return sample_farthest_points(features[None].cpu(), K=config.num_sample)[1][0].to(features.device)
+        original_device = features.device
+        alternative_device = "cuda" if original_device == "cpu" else "cpu"
+        sample_indices = sample_farthest_points(
+            features.to(alternative_device), lengths=count.to(alternative_device), K=config.num_sample
+        )[1].to(original_device)                                                        # int: [(...) x num_sample]
+    sample_indices = torch.gather(order, 1, sample_indices)                             # int: [(...) x num_sample]
+    return sample_indices.view((*shape, *sample_indices.shape[-1:]))                    # int: [... x num_sample]

nystrom_ncut/visualize_utils.py CHANGED Viewed

@@ -1,5 +1,4 @@
-import logging
-from typing import Any, Callable, Dict, Literal, Union
+from typing import Any, Callable, Dict, Union
 import numpy as np
 import torch
@@ -13,7 +12,8 @@ from .common import (
     quantile_normalize,
 )
 from .distance_utils import (
-    DistanceOptions,
+    AffinityOptions,
+    AFFINITY_TO_DISTANCE,
     to_euclidean,
     affinity_from_features,
 )
@@ -27,7 +27,7 @@ def extrapolate_knn(
     anchor_features: torch.Tensor,          # [n x d]
     anchor_output: torch.Tensor,            # [n x d']
     extrapolation_features: torch.Tensor,   # [m x d]
-    distance: DistanceOptions,
+    affinity_type: AffinityOptions,
     knn: int = 10,                          # k
     affinity_focal_gamma: float = 1.0,
     chunk_size: int = 8192,
@@ -41,7 +41,7 @@ def extrapolate_knn(
         anchor_output (torch.Tensor): output from subgraph, shape (num_sample, D)
         extrapolation_features (torch.Tensor): features from existing nodes, shape (new_num_samples, n_features)
         knn (int): number of KNN to propagate eige nvectors
-        distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'
+        affinity_type (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'
         chunk_size (int): chunk size for matrix multiplication
         device (str): device to use for computation, if None, will not change device
     Returns:
@@ -66,7 +66,7 @@ def extrapolate_knn(
     for _v in torch.chunk(extrapolation_features, n_chunks, dim=0):
         _v = _v.to(device)                                                                              # [_m x d]
-        _A = affinity_from_features(anchor_features, _v, affinity_focal_gamma, distance).mT             # [_m x n]
+        _A = affinity_from_features(anchor_features, _v, affinity_focal_gamma, affinity_type).mT        # [_m x n]
         if knn is not None:
             _A, indices = _A.topk(k=knn, dim=-1, largest=True)                                          # [_m x k], [_m x k]
             _anchor_output = anchor_output[indices]                                                     # [_m x k x d]
@@ -90,7 +90,7 @@ def extrapolate_knn_with_subsampling(
     full_output: torch.Tensor,              # [n x d']
     extrapolation_features: torch.Tensor,   # [m x d]
     sample_config: SampleConfig,
-    distance: DistanceOptions,
+    affinity_type: AffinityOptions,
     knn: int = 10,                          # k
     affinity_focal_gamma: float = 1.0,
     chunk_size: int = 8192,
@@ -122,7 +122,7 @@ def extrapolate_knn_with_subsampling(
     # sample subgraph
     anchor_indices = subsample_features(
         features=full_features,
-        disttype=distance,
+        distance_type=AFFINITY_TO_DISTANCE[affinity_type],
         config=sample_config,
     )
@@ -135,7 +135,7 @@ def extrapolate_knn_with_subsampling(
         anchor_features,
         anchor_output,
         extrapolation_features,
-        distance,
+        affinity_type,
         knn=knn,
         affinity_focal_gamma=affinity_focal_gamma,
         chunk_size=chunk_size,
@@ -148,7 +148,7 @@ def extrapolate_knn_with_subsampling(
 def _rgb_with_dimensionality_reduction(
     features: torch.Tensor,
     num_sample: int,
-    disttype: Literal["cosine", "euclidean"],
+    affinity_type: AffinityOptions,
     rgb_func: Callable[[torch.Tensor, float], torch.Tensor],
     q: float,
     knn: int,
@@ -162,26 +162,26 @@ def _rgb_with_dimensionality_reduction(
     if True:
         _subgraph_indices = subsample_features(
             features=features,
-            disttype=disttype,
+            distance_type=AFFINITY_TO_DISTANCE[affinity_type],
             config=SampleConfig(method="fps"),
         )
         features = extrapolate_knn(
             anchor_features=features[_subgraph_indices],
             anchor_output=features[_subgraph_indices],
             extrapolation_features=features,
-            distance=disttype,
+            affinity_type=affinity_type,
         )
     subgraph_indices = subsample_features(
         features=features,
-        disttype=disttype,
+        distance_type=AFFINITY_TO_DISTANCE[affinity_type],
         config=SampleConfig(method="fps", num_sample=num_sample),
     )
     _inp = features[subgraph_indices].numpy(force=True)
     _subgraph_embed = torch.tensor(reduction(
         n_components=reduction_dim,
-        metric=disttype,
+        metric=AFFINITY_TO_DISTANCE[affinity_type],
         random_state=seed,
         **reduction_kwargs
     ).fit_transform(_inp), device=features.device, dtype=features.dtype)
@@ -190,7 +190,7 @@ def _rgb_with_dimensionality_reduction(
         features[subgraph_indices],
         _subgraph_embed,
         features,
-        disttype,
+        affinity_type,
         knn=knn,
         device=device,
         move_output_to_cpu=True
@@ -201,7 +201,7 @@ def _rgb_with_dimensionality_reduction(
 def rgb_from_tsne_2d(
     features: torch.Tensor,
     num_sample: int = 1000,
-    disttype: Literal["cosine", "euclidean"] = "cosine",
+    affinity_type: AffinityOptions = "cosine",
     perplexity: int = 150,
     q: float = 0.95,
     knn: int = 10,
@@ -220,16 +220,12 @@ def rgb_from_tsne_2d(
             "sklearn import failed, please install `pip install scikit-learn`"
         )
     num_sample = min(num_sample, features.shape[0])
-    if perplexity > num_sample // 2:
-        logging.warning(
-            f"perplexity is larger than num_sample, set perplexity to {num_sample // 2}"
-        )
-        perplexity = num_sample // 2
+    perplexity = min(perplexity, num_sample // 2)
     rgb = _rgb_with_dimensionality_reduction(
         features=features,
         num_sample=num_sample,
-        disttype=disttype,
+        affinity_type=affinity_type,
         rgb_func=rgb_from_2d_colormap,
         q=q,
         knn=knn,
@@ -245,7 +241,7 @@ def rgb_from_tsne_2d(
 def rgb_from_tsne_3d(
     features: torch.Tensor,
     num_sample: int = 1000,
-    disttype: Literal["cosine", "euclidean"] = "cosine",
+    affinity_type: AffinityOptions = "cosine",
     perplexity: int = 150,
     q: float = 0.95,
     knn: int = 10,
@@ -264,16 +260,12 @@ def rgb_from_tsne_3d(
             "sklearn import failed, please install `pip install scikit-learn`"
         )
     num_sample = min(num_sample, features.shape[0])
-    if perplexity > num_sample // 2:
-        logging.warning(
-            f"perplexity is larger than num_sample, set perplexity to {num_sample // 2}"
-        )
-        perplexity = num_sample // 2
+    perplexity = min(perplexity, num_sample // 2)
     rgb = _rgb_with_dimensionality_reduction(
         features=features,
         num_sample=num_sample,
-        disttype=disttype,
+        affinity_type=affinity_type,
         rgb_func=rgb_from_3d_rgb_cube,
         q=q,
         knn=knn,
@@ -289,7 +281,7 @@ def rgb_from_tsne_3d(
 def rgb_from_euclidean_tsne_3d(
     features: torch.Tensor,
     num_sample: int = 1000,
-    disttype: Literal["cosine", "euclidean"] = "cosine",
+    affinity_type: AffinityOptions = "cosine",
     perplexity: int = 150,
     q: float = 0.95,
     knn: int = 10,
@@ -308,19 +300,15 @@ def rgb_from_euclidean_tsne_3d(
             "sklearn import failed, please install `pip install scikit-learn`"
         )
     num_sample = min(num_sample, features.shape[0])
-    if perplexity > num_sample // 2:
-        logging.warning(
-            f"perplexity is larger than num_sample, set perplexity to {num_sample // 2}"
-        )
-        perplexity = num_sample // 2
+    perplexity = min(perplexity, num_sample // 2)
     def rgb_func(X_3d: torch.Tensor, q: float) -> torch.Tensor:
-        return rgb_from_3d_rgb_cube(to_euclidean(X_3d, disttype), q=q)
+        return rgb_from_3d_rgb_cube(to_euclidean(X_3d, AFFINITY_TO_DISTANCE[affinity_type]), q=q)
     rgb = _rgb_with_dimensionality_reduction(
         features=features,
         num_sample=num_sample,
-        disttype="cosine",
+        affinity_type=affinity_type,
         rgb_func=rgb_func,
         q=q,
         knn=knn,
@@ -336,7 +324,7 @@ def rgb_from_euclidean_tsne_3d(
 def rgb_from_umap_2d(
     features: torch.Tensor,
     num_sample: int = 1000,
-    disttype: Literal["cosine", "euclidean"] = "cosine",
+    affinity_type: AffinityOptions = "cosine",
     n_neighbors: int = 150,
     min_dist: float = 0.1,
     q: float = 0.95,
@@ -357,7 +345,7 @@ def rgb_from_umap_2d(
     rgb = _rgb_with_dimensionality_reduction(
         features=features,
         num_sample=num_sample,
-        disttype=disttype,
+        affinity_type=affinity_type,
         rgb_func=rgb_from_2d_colormap,
         q=q,
         knn=knn,
@@ -374,7 +362,7 @@ def rgb_from_umap_2d(
 def rgb_from_umap_sphere(
     features: torch.Tensor,
     num_sample: int = 1000,
-    disttype: Literal["cosine", "euclidean"] = "cosine",
+    affinity_type: AffinityOptions = "cosine",
     n_neighbors: int = 150,
     min_dist: float = 0.1,
     q: float = 0.95,
@@ -402,7 +390,7 @@ def rgb_from_umap_sphere(
     rgb = _rgb_with_dimensionality_reduction(
         features=features,
         num_sample=num_sample,
-        disttype=disttype,
+        affinity_type=affinity_type,
         rgb_func=rgb_func,
         q=q,
         knn=knn,
@@ -420,7 +408,7 @@ def rgb_from_umap_sphere(
 def rgb_from_umap_3d(
     features: torch.Tensor,
     num_sample: int = 1000,
-    disttype: Literal["cosine", "euclidean"] = "cosine",
+    affinity_type: AffinityOptions = "cosine",
     n_neighbors: int = 150,
     min_dist: float = 0.1,
     q: float = 0.95,
@@ -441,7 +429,7 @@ def rgb_from_umap_3d(
     rgb = _rgb_with_dimensionality_reduction(
         features=features,
         num_sample=num_sample,
-        disttype=disttype,
+        affinity_type=affinity_type,
         rgb_func=rgb_from_3d_rgb_cube,
         q=q,
         knn=knn,

{nystrom_ncut-0.2.1.dist-info → nystrom_ncut-0.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: nystrom_ncut
-Version: 0.2.1
+Version: 0.3.0
 Summary: Normalized Cut and Nyström Approximation
 Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
 Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/

nystrom_ncut-0.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,18 @@
+__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+nystrom_ncut/__init__.py,sha256=tKq9-2QRNFetckHY77qAaKEMjMCYTYcorS2f74aNtvk,540
+nystrom_ncut/common.py,sha256=eie19AHTMk6AGTxNnYq1UcFkHJVimeywAUYryXwaiHk,2428
+nystrom_ncut/distance_utils.py,sha256=pJA8NcIKyS7-YDpRGOkc7mwBQQEsYVemdkHiTjyU4n8,4300
+nystrom_ncut/sampling_utils.py,sha256=6lP8F6gftl4mgkavPsD7Vuk4erj4RtgILPhcj3YqLXk,4840
+nystrom_ncut/visualize_utils.py,sha256=Sfi_kKpvFFzBFoJnbo-pQpH2jhs-A6tH64SV_WGoq58,22740
+nystrom_ncut/nystrom/__init__.py,sha256=1aUXK87g4cXRXqNt6XkZsfyauw1-yv3sv0NmdmkWo-8,42
+nystrom_ncut/nystrom/distance_realization.py,sha256=RTI1_Q8fCUGAPSbXaVuNA-2B-11CEAfy2CwKWPJj6xQ,5830
+nystrom_ncut/nystrom/normalized_cut.py,sha256=jB_QALMY3l5CFfZPsrOFpEaquTrJP17muTrDZXxzUA8,7177
+nystrom_ncut/nystrom/nystrom_utils.py,sha256=hksDO8uuAb9xKoA1ZafGwXDlQN_gZJn_qHscaSoO8JE,14120
+nystrom_ncut/transformer/__init__.py,sha256=jjXjcNp3LrxeF6mqG9VY5k3asrqaY6bXzJz6wTpH78Q,105
+nystrom_ncut/transformer/axis_align.py,sha256=6LTR-syJ-f4pcbnMexFmFNn1QADDhH5gka6979YBRrI,3549
+nystrom_ncut/transformer/transformer_mixin.py,sha256=YAjrDWTL5Hjnk9J2OsoxvtwT2N0u8IdgMSx0rRFmZzE,1653
+nystrom_ncut-0.3.0.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
+nystrom_ncut-0.3.0.dist-info/METADATA,sha256=lhxicufu5Eo9HQsUiS_K-CzocemOeNravAaIXeCtriM,6058
+nystrom_ncut-0.3.0.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
+nystrom_ncut-0.3.0.dist-info/top_level.txt,sha256=gM8IWWHYysIRTCvCTcdS4RShOyl9pxpylgSwPUZR2XM,22
+nystrom_ncut-0.3.0.dist-info/RECORD,,

{nystrom_ncut-0.2.1.dist-info → nystrom_ncut-0.3.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.0)
+Generator: setuptools (76.0.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

nystrom_ncut-0.2.1.dist-info/RECORD DELETED Viewed

@@ -1,18 +0,0 @@
-__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-nystrom_ncut/__init__.py,sha256=tKq9-2QRNFetckHY77qAaKEMjMCYTYcorS2f74aNtvk,540
-nystrom_ncut/common.py,sha256=_PGJoImSk_Fb_5Ri-e_IsFoCcSfbGS8CxYUUHVoNM50,2036
-nystrom_ncut/distance_utils.py,sha256=p-pYdpRrJsIhzxM_IxUqja7N8okngx52WGXD9pu_Aec,3129
-nystrom_ncut/sampling_utils.py,sha256=oMmhFcd_N_D15Ht7F0rCGPSgLeitJszAKMD3ICKwHNU,3105
-nystrom_ncut/visualize_utils.py,sha256=d3VXjzJPZPPyUMg_b8hKLQoBaRWvutu6u7l36S2gmIM,23007
-nystrom_ncut/nystrom/__init__.py,sha256=lAoO00i4FG5xqGKDO_OYcSvO4qPK64x_X_hDNBvuLUc,105
-nystrom_ncut/nystrom/distance_realization.py,sha256=InajllGtRVnLVlZoipZNbHFTGHaTs3zxizKe3kI2Los,5815
-nystrom_ncut/nystrom/normalized_cut.py,sha256=2ocwc4U3A6GGFs0cuL0DO1yNvt59SJ3uDtj00U0foPM,5906
-nystrom_ncut/nystrom/nystrom_utils.py,sha256=5w-2GAMb7b6ArZdPEnAnKPFFrsbHSfC-S78cvrR6O20,12806
-nystrom_ncut/transformer/__init__.py,sha256=jjXjcNp3LrxeF6mqG9VY5k3asrqaY6bXzJz6wTpH78Q,105
-nystrom_ncut/transformer/axis_align.py,sha256=6LTR-syJ-f4pcbnMexFmFNn1QADDhH5gka6979YBRrI,3549
-nystrom_ncut/transformer/transformer_mixin.py,sha256=YAjrDWTL5Hjnk9J2OsoxvtwT2N0u8IdgMSx0rRFmZzE,1653
-nystrom_ncut-0.2.1.dist-info/LICENSE,sha256=2bm9uFabQZ3Ykb_SaSU_uUbAj2-htc6WJQmS_65qD00,1073
-nystrom_ncut-0.2.1.dist-info/METADATA,sha256=l5t4vEFtPANsQY8PK0YHDJ1tw6dZUulU5daxX9T8QC0,6058
-nystrom_ncut-0.2.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-nystrom_ncut-0.2.1.dist-info/top_level.txt,sha256=gM8IWWHYysIRTCvCTcdS4RShOyl9pxpylgSwPUZR2XM,22
-nystrom_ncut-0.2.1.dist-info/RECORD,,

{nystrom_ncut-0.2.1.dist-info → nystrom_ncut-0.3.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{nystrom_ncut-0.2.1.dist-info → nystrom_ncut-0.3.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

nystrom-ncut 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

nystrom-ncut 0.2.1py3-none-any.whl → 0.3.0py3-none-any.whl