PyPI - nystrom-ncut - Versions diffs - 0.2.2__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

nystrom-ncut 0.2.2py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

nystrom_ncut/common.py +18 -5
nystrom_ncut/distance_utils.py +54 -32
nystrom_ncut/nystrom/__init__.py +0 -3
nystrom_ncut/nystrom/distance_realization.py +8 -9
nystrom_ncut/nystrom/normalized_cut.py +51 -47
nystrom_ncut/nystrom/nystrom_utils.py +78 -70
nystrom_ncut/sampling_utils.py +64 -51
nystrom_ncut/transformer/axis_align.py +58 -47
nystrom_ncut/transformer/transformer_mixin.py +0 -2
nystrom_ncut/visualize_utils.py +31 -43
{nystrom_ncut-0.2.2.dist-info → nystrom_ncut-0.3.1.dist-info}/METADATA +1 -1
nystrom_ncut-0.3.1.dist-info/RECORD +18 -0
{nystrom_ncut-0.2.2.dist-info → nystrom_ncut-0.3.1.dist-info}/WHEEL +1 -1
nystrom_ncut-0.2.2.dist-info/RECORD +0 -18
{nystrom_ncut-0.2.2.dist-info → nystrom_ncut-0.3.1.dist-info}/LICENSE +0 -0
{nystrom_ncut-0.2.2.dist-info → nystrom_ncut-0.3.1.dist-info}/top_level.txt +0 -0

nystrom_ncut/nystrom/nystrom_utils.py CHANGED Viewed

@@ -25,15 +25,15 @@ EigSolverOptions = Literal["svd_lowrank", "lobpcg", "svd", "eigh"]
 class OnlineKernel:
     @abstractmethod
-    def fit(self, features: torch.Tensor) -> "OnlineKernel":                # [n x d]
+    def fit(self, features: torch.Tensor) -> "OnlineKernel":                # [... x n x d]
         """"""
     @abstractmethod
-    def update(self, features: torch.Tensor) -> torch.Tensor:               # [m x d] -> [m x n]
+    def update(self, features: torch.Tensor) -> torch.Tensor:               # [... x m x d] -> [... x m x n]
         """"""
     @abstractmethod
-    def transform(self, features: torch.Tensor = None) -> torch.Tensor:     # [m x d] -> [m x n]
+    def transform(self, features: torch.Tensor = None) -> torch.Tensor:     # [... x m x d] -> [... x m x n]
         """"""
@@ -54,20 +54,21 @@ class OnlineNystrom(TorchTransformerMixin):
         self.n_components: int = n_components
         self.kernel: OnlineKernel = kernel
         self.eig_solver: EigSolverOptions = eig_solver
+        self.shape: torch.Size = None               # ...
         self.chunk_size = chunk_size
         # Anchor matrices
-        self.anchor_features: torch.Tensor = None   # [n x d]
-        self.A: torch.Tensor = None                 # [n x n]
-        self.Ahinv: torch.Tensor = None             # [n x n]
-        self.Ahinv_UL: torch.Tensor = None          # [n x indirect_pca_dim]
-        self.Ahinv_VT: torch.Tensor = None          # [indirect_pca_dim x n]
+        self.anchor_features: torch.Tensor = None   # [... x n x d]
+        self.A: torch.Tensor = None                 # [... x n x n]
+        self.Ahinv: torch.Tensor = None             # [... x n x n]
+        self.Ahinv_UL: torch.Tensor = None          # [... x n x indirect_pca_dim]
+        self.Ahinv_VT: torch.Tensor = None          # [... x indirect_pca_dim x n]
         # Updated matrices
-        self.S: torch.Tensor = None                 # [n x n]
-        self.transform_matrix: torch.Tensor = None  # [n x n_components]
-        self.eigenvalues_: torch.Tensor = None      # [n]
+        self.S: torch.Tensor = None                 # [... x n x n]
+        self.transform_matrix: torch.Tensor = None  # [... x n x n_components]
+        self.eigenvalues_: torch.Tensor = None      # [... x n]
     def _update_to_kernel(self, d: int) -> Tuple[torch.Tensor, torch.Tensor]:
         self.A = self.S = self.kernel.transform()
@@ -75,10 +76,10 @@ class OnlineNystrom(TorchTransformerMixin):
             self.A,
             num_eig=d + 1,  # d * (d + 3) // 2 + 1,
             eig_solver=self.eig_solver,
-        )                                                                                           # [n x (? + 1)], [? + 1]
-        self.Ahinv_UL = U * (L ** -0.5)                                                             # [n x (? + 1)]
-        self.Ahinv_VT = U.mT                                                                        # [(? + 1) x n]
-        self.Ahinv = self.Ahinv_UL @ self.Ahinv_VT                                                  # [n x n]
+        )                                                                                           # [... x n x (? + 1)], [... x (? + 1)]
+        self.Ahinv_UL = U * (L[..., None, :] ** -0.5)                                               # [... x n x (? + 1)]
+        self.Ahinv_VT = U.mT                                                                        # [... x (? + 1) x n]
+        self.Ahinv = self.Ahinv_UL @ self.Ahinv_VT                                                  # [... x n x n]
         return U, L
     def fit(self, features: torch.Tensor) -> "OnlineNystrom":
@@ -89,65 +90,63 @@ class OnlineNystrom(TorchTransformerMixin):
         self.anchor_features = features
         self.kernel.fit(self.anchor_features)
-        U, L = self._update_to_kernel(features.shape[-1])                                           # [n x (d + 1)], [d + 1]
+        U, L = self._update_to_kernel(features.shape[-1])                                           # [... x n x (d + 1)], [... x (d + 1)]
-        self.transform_matrix = (U / L)[:, :self.n_components]                                      # [n x n_components]
-        self.eigenvalues_ = L[:self.n_components]                                                   # [n_components]
-        self.is_fitted = True
-        return U[:, :self.n_components]                                                             # [n x n_components]
+        self.transform_matrix = (U / L[..., None, :])[..., :, :self.n_components]                   # [... x n x n_components]
+        self.eigenvalues_ = L[..., :self.n_components]                                              # [... x n_components]
+        return U[..., :, :self.n_components]                                                        # [... x n x n_components]
     def update(self, features: torch.Tensor) -> torch.Tensor:
         d = features.shape[-1]
-        n_chunks = ceildiv(len(features), self.chunk_size)
+        n_chunks = ceildiv(features.shape[-2], self.chunk_size)
         if n_chunks > 1:
             """ Chunked version """
-            chunks = torch.chunk(features, n_chunks, dim=0)
+            chunks = torch.chunk(features, n_chunks, dim=-2)
             for chunk in chunks:
                 self.kernel.update(chunk)
             self._update_to_kernel(d)
-            compressed_BBT = 0.0                                                                    # [(? + 1) x (? + 1))]
+            compressed_BBT = 0.0                                                                    # [... x (? + 1) x (? + 1))]
             for chunk in chunks:
-                _B = self.kernel.transform(chunk).mT                                                # [n x _m]
-                _compressed_B = self.Ahinv_VT @ _B                                                  # [(? + 1) x _m]
-                compressed_BBT = compressed_BBT + _compressed_B @ _compressed_B.mT                  # [(? + 1) x (? + 1)]
-            self.S = self.S + self.Ahinv_UL @ compressed_BBT @ self.Ahinv_UL.mT                     # [n x n]
-            US, self.eigenvalues_ = solve_eig(self.S, self.n_components, self.eig_solver)           # [n x n_components], [n_components]
-            self.transform_matrix = self.Ahinv @ US * (self.eigenvalues_ ** -0.5)                   # [n x n_components]
+                _B = self.kernel.transform(chunk).mT                                                # [... x n x _m]
+                _compressed_B = self.Ahinv_VT @ _B                                                  # [... x (? + 1) x _m]
+                _compressed_B = torch.nan_to_num(_compressed_B, nan=0.0)
+                compressed_BBT = compressed_BBT + _compressed_B @ _compressed_B.mT                  # [... x (? + 1) x (? + 1)]
+            self.S = self.S + self.Ahinv_UL @ compressed_BBT @ self.Ahinv_UL.mT                     # [... x n x n]
+            US, self.eigenvalues_ = solve_eig(self.S, self.n_components, self.eig_solver)           # [... x n x n_components], [... x n_components]
+            self.transform_matrix = self.Ahinv @ US * (self.eigenvalues_[..., None, :] ** -0.5)     # [... x n x n_components]
             VS = []
             for chunk in chunks:
-                VS.append(self.kernel.transform(chunk) @ self.transform_matrix)                     # [_m x n_components]
-            VS = torch.cat(VS, dim=0)
-            return VS                                                                               # [m x n_components]
+                VS.append(self.kernel.transform(chunk) @ self.transform_matrix)                     # [... x _m x n_components]
+            VS = torch.cat(VS, dim=-2)
+            return VS                                                                               # [... x m x n_components]
         else:
             """ Unchunked version """
-            B = self.kernel.update(features).mT                                                     # [n x m]
+            B = self.kernel.update(features).mT                                                     # [... x n x m]
             self._update_to_kernel(d)
-            compressed_B = self.Ahinv_VT @ B                                                        # [indirect_pca_dim x m]
+            compressed_B = self.Ahinv_VT @ B                                                        # [... x (? + 1) x m]
+            compressed_B = torch.nan_to_num(compressed_B, nan=0.0)
-            self.S = self.S + self.Ahinv_UL @ (compressed_B @ compressed_B.mT) @ self.Ahinv_UL.mT   # [n x n]
-            US, self.eigenvalues_ = solve_eig(self.S, self.n_components, self.eig_solver)           # [n x n_components], [n_components]
-            self.transform_matrix = self.Ahinv @ US * (self.eigenvalues_ ** -0.5)                   # [n x n_components]
+            self.S = self.S + self.Ahinv_UL @ (compressed_B @ compressed_B.mT) @ self.Ahinv_UL.mT   # [... x n x n]
+            US, self.eigenvalues_ = solve_eig(self.S, self.n_components, self.eig_solver)           # [... x n x n_components], [... x n_components]
+            self.transform_matrix = self.Ahinv @ US * (self.eigenvalues_[..., None, :] ** -0.5)     # [... x n x n_components]
-            return B.mT @ self.transform_matrix                                                     # [m x n_components]
+            return B.mT @ self.transform_matrix                                                     # [... x m x n_components]
-    def transform(self, features: torch.Tensor = None) -> torch.Tensor:
-        if features is None:
-            VS = self.A @ self.transform_matrix                                                     # [n x n_components]
+    def transform(self, features: torch.Tensor) -> torch.Tensor:
+        n_chunks = ceildiv(features.shape[-2], self.chunk_size)
+        if n_chunks > 1:
+            """ Chunked version """
+            chunks = torch.chunk(features, n_chunks, dim=-2)
+            VS = []
+            for chunk in chunks:
+                VS.append(self.kernel.transform(chunk) @ self.transform_matrix)                     # [... x _m x n_components]
+            VS = torch.cat(VS, dim=-2)
         else:
-            n_chunks = ceildiv(len(features), self.chunk_size)
-            if n_chunks > 1:
-                """ Chunked version """
-                chunks = torch.chunk(features, n_chunks, dim=0)
-                VS = []
-                for chunk in chunks:
-                    VS.append(self.kernel.transform(chunk) @ self.transform_matrix)                 # [_m x n_components]
-                VS = torch.cat(VS, dim=0)
-            else:
-                """ Unchunked version """
-                VS = self.kernel.transform(features) @ self.transform_matrix                        # [m x n_components]
-        return VS                                                                                   # [m x n_components]
+            """ Unchunked version """
+            VS = self.kernel.transform(features) @ self.transform_matrix                            # [... x m x n_components]
+        return VS                                                                                   # [... x m x n_components]
 class OnlineNystromSubsampleFit(OnlineNystrom):
@@ -155,7 +154,7 @@ class OnlineNystromSubsampleFit(OnlineNystrom):
         self,
         n_components: int,
         kernel: OnlineKernel,
-        distance: DistanceOptions,
+        distance_type: DistanceOptions,
         sample_config: SampleConfig,
         eig_solver: EigSolverOptions = "svd_lowrank",
         chunk_size: int = 8192,
@@ -167,7 +166,7 @@ class OnlineNystromSubsampleFit(OnlineNystrom):
             eig_solver=eig_solver,
             chunk_size=chunk_size,
         )
-        self.distance: DistanceOptions = distance
+        self.distance_type: DistanceOptions = distance_type
         self.sample_config: SampleConfig = sample_config
         self.sample_config._ncut_obj = copy.deepcopy(self)
         self.anchor_indices: torch.Tensor = None
@@ -177,7 +176,7 @@ class OnlineNystromSubsampleFit(OnlineNystrom):
         features: torch.Tensor,
         precomputed_sampled_indices: torch.Tensor,
     ) -> Tuple[torch.Tensor, torch.Tensor]:
-        _n = features.shape[0]
+        _n = features.shape[-2]
         if self.sample_config.num_sample >= _n:
             logging.info(
                 f"NCUT nystrom num_sample is larger than number of input samples, nyström approximation is not needed, setting num_sample={_n}"
@@ -189,16 +188,17 @@ class OnlineNystromSubsampleFit(OnlineNystrom):
         else:
             self.anchor_indices = subsample_features(
                 features=features,
-                disttype=self.distance,
+                distance_type=self.distance_type,
                 config=self.sample_config,
             )
-        sampled_features = features[self.anchor_indices]
+        sampled_features = torch.gather(features, -2, self.anchor_indices[..., None].expand([-1] * self.anchor_indices.ndim + [features.shape[-1]]))
         OnlineNystrom.fit(self, sampled_features)
-        _n_not_sampled = _n - len(sampled_features)
+        _n_not_sampled = _n - self.anchor_indices.shape[-1]
         if _n_not_sampled > 0:
-            unsampled_indices = torch.full((_n,), True, device=features.device).scatter_(0, self.anchor_indices, False)
-            unsampled_features = features[unsampled_indices]
+            unsampled_mask = torch.full(features.shape[:-1], True, device=features.device).scatter_(-1, self.anchor_indices, False)
+            unsampled_indices = torch.where(unsampled_mask)[-1].view((*features.shape[:-2], -1))
+            unsampled_features = torch.gather(features, -2, unsampled_indices[..., None].expand([-1] * unsampled_indices.ndim + [features.shape[-1]]))
             V_unsampled = OnlineNystrom.update(self, unsampled_features)
         else:
             unsampled_indices = V_unsampled = None
@@ -236,12 +236,12 @@ class OnlineNystromSubsampleFit(OnlineNystrom):
             (torch.Tensor): eigen_values, sorted in descending order, shape (num_eig,)
         """
         unsampled_indices, V_unsampled = OnlineNystromSubsampleFit._fit_helper(self, features, precomputed_sampled_indices)
-        V_sampled = OnlineNystrom.transform(self)
+        V_sampled = OnlineNystrom.transform(self, self.anchor_features)
         if unsampled_indices is not None:
-            V = torch.zeros((len(unsampled_indices), self.n_components), device=features.device)
-            V[~unsampled_indices] = V_sampled
-            V[unsampled_indices] = V_unsampled
+            V = torch.zeros((*features.shape[:-1], self.n_components), device=features.device)
+            for (indices, _V) in [(self.anchor_indices, V_sampled), (unsampled_indices, V_unsampled)]:
+                V.scatter_(-2, indices[..., None].expand([-1] * indices.ndim + [self.n_components]), _V)
         else:
             V = V_sampled
         return V
@@ -264,12 +264,16 @@ def solve_eig(
         (torch.Tensor): eigenvectors corresponding to the eigenvalues, shape (n_samples, num_eig)
         (torch.Tensor): eigenvalues of the eigenvectors, sorted in descending order
     """
-    A = A + eig_value_buffer * torch.eye(A.shape[0], device=A.device)
+    shape: torch.Size = A.shape[:-2]
+    A = A.view((-1, *A.shape[-2:]))
+    bsz: int = A.shape[0]
+    A = A + eig_value_buffer * torch.eye(A.shape[-1], device=A.device)
     # compute eigenvectors
     if eig_solver == "svd_lowrank":  # default
         # only top q eigenvectors, fastest
-        eigen_vector, eigen_value, _ = torch.svd_lowrank(A, q=num_eig)
+        eigen_vector, eigen_value, _ = torch.svd_lowrank(A, q=num_eig)              # complex: [(...) x N x D], [(...) x D]
     elif eig_solver == "lobpcg":
         # only top k eigenvectors, fast
         eigen_value, eigen_vector = torch.lobpcg(A, k=num_eig)
@@ -286,11 +290,15 @@ def solve_eig(
     eigen_value = eigen_value - eig_value_buffer
     # sort eigenvectors by eigenvalues, take top (descending order)
-    indices = torch.topk(eigen_value.abs(), k=num_eig, dim=0).indices
-    eigen_value, eigen_vector = eigen_value[indices], eigen_vector[:, indices]
+    indices = torch.topk(eigen_value.abs(), k=num_eig, dim=-1).indices              # int: [(...) x S]
+    eigen_value = eigen_value[torch.arange(bsz)[:, None], indices]                  # complex: [(...) x S]
+    eigen_vector = eigen_vector[torch.arange(bsz)[:, None], :, indices].mT          # complex: [(...) x N x S]
     # correct the random rotation (flipping sign) of eigenvectors
-    sign = torch.sum(eigen_vector.real, dim=0).sign()
+    sign = torch.sign(torch.sum(eigen_vector.real, dim=-2, keepdim=True))           # float: [(...) x 1 x S]
     sign[sign == 0] = 1.0
     eigen_vector = eigen_vector * sign
+    eigen_value = eigen_value.view((*shape, *eigen_value.shape[-1:]))               # complex: [... x S]
+    eigen_vector = eigen_vector.view((*shape, *eigen_vector.shape[-2:]))            # complex: [... x N x S]
     return eigen_vector, eigen_value

nystrom_ncut/sampling_utils.py CHANGED Viewed

@@ -1,17 +1,22 @@
-import logging
 from dataclasses import dataclass
 from typing import Literal
 import torch
 from pytorch3d.ops import sample_farthest_points
+from .common import (
+    default_device,
+)
 from .distance_utils import (
     DistanceOptions,
     to_euclidean,
 )
+from .transformer import (
+    TorchTransformerMixin,
+)
-SampleOptions = Literal["random", "fps", "fps_recursive"]
+SampleOptions = Literal["full", "random", "fps", "fps_recursive"]
 @dataclass
@@ -20,69 +25,77 @@ class SampleConfig:
     num_sample: int = 10000
     fps_dim: int = 12
     n_iter: int = None
-    _ncut_obj: object = None
+    _ncut_obj: TorchTransformerMixin = None
 @torch.no_grad()
 def subsample_features(
     features: torch.Tensor,
-    disttype: DistanceOptions,
+    distance_type: DistanceOptions,
     config: SampleConfig,
-    max_draw: int = 1000000,
 ):
-    features = features.detach()
-    if config.num_sample >= features.shape[0]:
-        # if too many samples, use all samples and bypass Nystrom-like approximation
-        logging.info(
-            "num_sample is larger than total, bypass Nystrom-like approximation"
-        )
-        sampled_indices = torch.arange(features.shape[0])
-    else:
-        # sample subgraph
-        if config.method == "fps":  # default
-            features = to_euclidean(features, disttype)
-            if config.num_sample > max_draw:
-                logging.warning(
-                    f"num_sample is larger than max_draw, apply farthest point sampling on random sampled {max_draw} samples"
-                )
-                draw_indices = torch.randperm(features.shape[0])[:max_draw]
-                sampled_indices = fpsample(features[draw_indices], config)
-                sampled_indices = draw_indices[sampled_indices]
-            else:
-                sampled_indices = fpsample(features, config)
-        elif config.method == "random":  # not recommended
-            sampled_indices = torch.randperm(features.shape[0])[:config.num_sample]
-        elif config.method == "fps_recursive":
-            features = to_euclidean(features, disttype)
-            sampled_indices = subsample_features(
-                features=features,
-                disttype=disttype,
-                config=SampleConfig(method="fps", num_sample=config.num_sample, fps_dim=config.fps_dim)
-            )
-            nc = config._ncut_obj
-            for _ in range(config.n_iter):
-                fps_features, eigenvalues = nc.fit_transform(features, precomputed_sampled_indices=sampled_indices)
-                fps_features = to_euclidean(fps_features[:, :config.fps_dim], "cosine")
-                sampled_indices = torch.sort(fpsample(fps_features, config)).values
+    features = features.detach()                                                                        # float: [... x n x d]
+    with default_device(features.device):
+        if config.method == "full" or config.num_sample >= features.shape[0]:
+            sampled_indices = torch.arange(features.shape[-2]).expand(features.shape[:-1])              # int: [... x n]
         else:
-            raise ValueError("sample_method should be 'farthest' or 'random'")
-        sampled_indices = torch.sort(sampled_indices).values
-    return sampled_indices.to(features.device)
+            # sample
+            match config.method:
+                case "fps":  # default
+                    sampled_indices = fpsample(to_euclidean(features, distance_type), config)
+                case "random":  # not recommended
+                    mask = torch.all(torch.isfinite(features), dim=-1)                                  # bool: [... x n]
+                    weights = mask.to(torch.float) + torch.rand(mask.shape)                             # float: [... x n]
+                    sampled_indices = torch.topk(weights, k=config.num_sample, dim=-1).indices          # int: [... x num_sample]
+                case "fps_recursive":
+                    features = to_euclidean(features, distance_type)                                    # float: [... x n x d]
+                    sampled_indices = subsample_features(
+                        features=features,
+                        distance_type=distance_type,
+                        config=SampleConfig(method="fps", num_sample=config.num_sample, fps_dim=config.fps_dim)
+                    )                                                                                   # int: [... x num_sample]
+                    nc = config._ncut_obj
+                    for _ in range(config.n_iter):
+                        fps_features, eigenvalues = nc.fit_transform(features, precomputed_sampled_indices=sampled_indices)
+                        fps_features = to_euclidean(fps_features[:, :config.fps_dim], "cosine")
+                        sampled_indices = torch.sort(fpsample(fps_features, config), dim=-1).values
+                case _:
+                    raise ValueError("sample_method should be 'farthest' or 'random'")
+            sampled_indices = torch.sort(sampled_indices, dim=-1).values
+        return sampled_indices
 def fpsample(
     features: torch.Tensor,
     config: SampleConfig,
 ):
-    # PCA to reduce the dimension
-    if features.shape[1] > config.fps_dim:
-        U, S, V = torch.pca_lowrank(features, q=config.fps_dim)
-        features = U * S
+    shape = features.shape[:-2]                                                         # ...
+    features = features.view((-1, *features.shape[-2:]))                                # [(...) x n x d]
+    bsz = features.shape[0]
+    mask = torch.all(torch.isfinite(features), dim=-1)                                  # bool: [(...) x n]
+    count = torch.sum(mask, dim=-1)                                                     # int: [(...)]
+    order = torch.topk(mask.to(torch.int), k=torch.max(count).item(), dim=-1).indices   # int: [(...) x max_count]
+    features = torch.nan_to_num(features[torch.arange(bsz)[:, None], order], nan=0.0)   # float: [(...) x max_count x d]
+    if features.shape[-1] > config.fps_dim:
+        U, S, V = torch.pca_lowrank(features, q=config.fps_dim)                         # float: [(...) x max_count x fps_dim], [(...) x fps_dim], [(...) x fps_dim x fps_dim]
+        features = U * S[..., None, :]                                                  # float: [(...) x max_count x fps_dim]
     try:
-        return sample_farthest_points(features[None], K=config.num_sample)[1][0]
+        sample_indices = sample_farthest_points(
+            features, lengths=count, K=config.num_sample
+        )[1]                                                                            # int: [(...) x num_sample]
     except RuntimeError:
-        return sample_farthest_points(features[None].cpu(), K=config.num_sample)[1][0].to(features.device)
+        original_device = features.device
+        alternative_device = "cuda" if original_device == "cpu" else "cpu"
+        sample_indices = sample_farthest_points(
+            features.to(alternative_device), lengths=count.to(alternative_device), K=config.num_sample
+        )[1].to(original_device)                                                        # int: [(...) x num_sample]
+    sample_indices = torch.gather(order, 1, sample_indices)                             # int: [(...) x num_sample]
+    return sample_indices.view((*shape, *sample_indices.shape[-1:]))                    # int: [... x num_sample]

nystrom_ncut/transformer/axis_align.py CHANGED Viewed

@@ -3,6 +3,9 @@ from typing import Literal
 import torch
 import torch.nn.functional as Fn
+from ..common import (
+    default_device,
+)
 from .transformer_mixin import (
     TorchTransformerMixin,
 )
@@ -27,51 +30,59 @@ class AxisAlign(TorchTransformerMixin):
     def fit(self, X: torch.Tensor) -> "AxisAlign":
         # Normalize eigenvectors
-        n, d = X.shape
-        normalized_X = Fn.normalize(X, p=2, dim=-1)
-        # Initialize R matrix with the first column from a random row of EigenVectors
-        self.R = torch.empty((d, d), device=X.device)
-        self.R[0] = normalized_X[torch.randint(0, n, (), device=X.device)]
-        # Loop to populate R with k orthogonal directions
-        c = torch.zeros((n,), device=X.device)
-        for i in range(1, d):
-            c += torch.abs(normalized_X @ self.R[i - 1])
-            self.R[i] = normalized_X[torch.argmin(c, dim=0)]
-        # Iterative optimization loop
-        idx, prev_objective = None, torch.inf
-        for _ in range(self.max_iter):
-            # Discretize the projected eigenvectors
-            idx = torch.argmax(normalized_X @ self.R.mT, dim=-1)
-            M = torch.zeros((d, d), device=X.device).index_add_(0, idx, normalized_X)
-            # Check for convergence
-            objective = torch.norm(M)
-            if torch.abs(objective - prev_objective) < torch.finfo(torch.float32).eps:
-                break
-            prev_objective = objective
-            # SVD decomposition to compute the next R
-            U, S, Vh = torch.linalg.svd(M, full_matrices=False)
-            self.R = U @ Vh
-        # Permute the rotation matrix so the dimensions are sorted in descending cluster significance
-        if self.sort_method == "count":
-            sort_metric = torch.bincount(idx, minlength=d)
-        elif self.sort_method == "norm":
-            rotated_X = X @ self.R.mT
-            sort_metric = torch.linalg.norm(rotated_X, dim=0)
-        elif self.sort_method == "marginal_norm":
-            rotated_X = X @ self.R.mT
-            sort_metric = torch.zeros((d,), device=X.device).index_add_(0, idx, rotated_X[range(n), idx] ** 2)
-        else:
-            raise ValueError(f"Invalid sort method {self.sort_method}.")
-        self.R = self.R[torch.argsort(sort_metric, dim=0, descending=True)]
-        self.is_fitted = True
-        return self
+        with default_device(X.device):
+            d = X.shape[-1]
+            normalized_X = Fn.normalize(X, p=2, dim=-1)                                                         # float: [... x n x d]
+            # Initialize R matrix with the first column from a random row of EigenVectors
+            def get_idx(idx: torch.Tensor) -> torch.Tensor:
+                return torch.gather(normalized_X, -2, idx[..., None, None].expand([-1] * (X.ndim - 2) + [1, d]))[..., 0, :]
+            self.R = torch.empty((*X.shape[:-2], d, d))                                                         # float: [... x d x d]
+            mask = torch.all(torch.isfinite(normalized_X), dim=-1)                                              # bool: [... x n]
+            start_idx = torch.argmax(mask.to(torch.float) + torch.rand(mask.shape), dim=-1)                     # int: [...]
+            self.R[..., 0, :] = get_idx(start_idx)
+            # Loop to populate R with k orthogonal directions
+            c = torch.zeros(X.shape[:-1])                                                                       # float: [... x n]
+            for i in range(1, d):
+                c += torch.abs(normalized_X @ self.R[..., i - 1, :, None])[..., 0]
+                self.R[..., i, :] = get_idx(torch.argmin(c.nan_to_num(nan=torch.inf), dim=-1))
+            # Iterative optimization loop
+            normalized_X = torch.nan_to_num(normalized_X, nan=0.0)
+            idx, prev_objective = None, torch.inf
+            for _ in range(self.max_iter):
+                # Discretize the projected eigenvectors
+                idx = torch.argmax(normalized_X @ self.R.mT, dim=-1)                                                    # int: [... x n]
+                M = torch.sum((idx[..., None] == torch.arange(d))[..., None] * normalized_X[..., :, None, :], dim=-3)   # float: [... x d x d]
+                # Check for convergence
+                objective = torch.norm(M)
+                if torch.abs(objective - prev_objective) < torch.finfo(torch.float32).eps:
+                    break
+                prev_objective = objective
+                # SVD decomposition to compute the next R
+                U, S, Vh = torch.linalg.svd(M, full_matrices=False)
+                self.R = U @ Vh
+            # Permute the rotation matrix so the dimensions are sorted in descending cluster significance
+            match self.sort_method:
+                case "count":
+                    sort_metric = torch.sum((idx[..., None] == torch.arange(d)), dim=-2)
+                case "norm":
+                    rotated_X = torch.nan_to_num(X @ self.R.mT, nan=0.0)
+                    sort_metric = torch.linalg.norm(rotated_X, dim=-2)
+                case "marginal_norm":
+                    rotated_X = torch.nan_to_num(X @ self.R.mT, nan=0.0)
+                    sort_metric = torch.sum((idx[..., None] == torch.arange(d)) * (torch.gather(rotated_X, -1, idx[..., None]) ** 2), dim=-2)
+                case _:
+                    raise ValueError(f"Invalid sort method {self.sort_method}.")
+            order = torch.argsort(sort_metric, dim=-1, descending=True)
+            self.R = torch.gather(self.R, -2, order[..., None].expand([-1] * order.ndim + [d]))
+            return self
     def transform(self, X: torch.Tensor, normalize: bool = True, hard: bool = False) -> torch.Tensor:
         """
@@ -83,9 +94,9 @@ class AxisAlign(TorchTransformerMixin):
             torch.Tensor: Discretized eigenvectors, shape (n, k), each row is a one-hot vector.
         """
         if normalize:
-            X = Fn.normalize(X, p=2, dim=1)
+            X = Fn.normalize(X, p=2, dim=-1)
         rotated_X = X @ self.R.mT
-        return torch.argmax(rotated_X, dim=1) if hard else rotated_X
+        return torch.argmax(rotated_X, dim=-1) if hard else rotated_X
     def fit_transform(self, X: torch.Tensor, normalize: bool = True, hard: bool = False) -> torch.Tensor:
         return self.fit(X).transform(X, normalize=normalize, hard=hard)

nystrom_ncut/transformer/transformer_mixin.py CHANGED Viewed

@@ -36,8 +36,6 @@ class TorchTransformerMixin:
     >>> transformer.fit_transform(X)
     array([1, 1, 1])
     """
-    def __init__(self):
-        self.is_fitted: bool = False
     @abstractmethod
     def fit(self, X: torch.Tensor, **fit_kwargs: Any) -> "TorchTransformerMixin":

nystrom-ncut 0.2.2__py3-none-any.whl → 0.3.1__py3-none-any.whl

nystrom-ncut 0.2.2py3-none-any.whl → 0.3.1py3-none-any.whl