PyPI - ncut-pytorch - Versions diffs - 2.0.0.dev1__tar.gz → 2.0.0.dev2__tar.gz - Mend

ncut-pytorch 2.0.0.dev1tar.gz → 2.0.0.dev2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

{ncut_pytorch-2.0.0.dev1/ncut_pytorch.egg-info → ncut_pytorch-2.0.0.dev2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ncut_pytorch
-Version: 2.0.0.dev1
+Version: 2.0.0.dev2
 Summary: Normalized Cut and Nyström Approximation
 Author-email: Huzheng Yang <huze.yann@gmail.com>
 License-Expression: MIT
@@ -18,11 +18,12 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: torch~=2.0
 Requires-Dist: fpsample>=0.2.0
-Requires-Dist: torchvision>=0.15.0
 Requires-Dist: pytorch-lightning~=2.0
 Requires-Dist: pillow
 Requires-Dist: numpy
 Requires-Dist: tqdm
+Provides-Extra: vision
+Requires-Dist: torchvision>=0.15.0; extra == "vision"
 Dynamic: license-file

ncut_pytorch-2.0.0.dev2/ncut_pytorch/dino/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .api import hires_dino_256
+from .api import hires_dino_512
+from .api import hires_dino_1024
+from .api import hires_dinov2

ncut_pytorch-2.0.0.dev2/ncut_pytorch/dino/api.py ADDED Viewed

@@ -0,0 +1,50 @@
+from typing import Tuple
+from torchvision import transforms
+from .hires_dino import hires_dino
+from .hires_dino import HighResDINO
+from .transform import get_input_transform
+def hires_dino_256() -> Tuple[HighResDINO, transforms.Compose]:
+    model = hires_dino(dino_name="dino_vitb8",
+                    stride=6,
+                    shift_dists=[1, 2, 3],
+                    flip_transforms=True,
+                    chunk_size=6,
+                    feature_resolution=256)
+    transform = get_input_transform(resize=256)
+    return model, transform
+def hires_dino_512() -> Tuple[HighResDINO, transforms.Compose]:
+    model = hires_dino(dino_name="dino_vitb8",
+                    stride=6,
+                    shift_dists=[1, 2, 3],
+                    flip_transforms=True,
+                    chunk_size=4,
+                    feature_resolution=512)
+    transform = get_input_transform(resize=512)
+    return model, transform
+def hires_dino_1024() -> Tuple[HighResDINO, transforms.Compose]:
+    model = hires_dino(dino_name="dino_vitb8",
+                    stride=6,
+                    shift_dists=[1, 2, 3],
+                    flip_transforms=True,
+                    chunk_size=1,
+                    feature_resolution=1024)
+    transform = get_input_transform(resize=1024)
+    return model, transform
+def hires_dinov2() -> Tuple[HighResDINO, transforms.Compose]:
+    model = hires_dino(dino_name="dinov2_vitb14_reg",
+                    stride=6,
+                    shift_dists=[1, 2, 3],
+                    flip_transforms=True,
+                    chunk_size=1,
+                    feature_resolution=1008)
+    transform = get_input_transform(resize=1008)
+    return model, transform

{ncut_pytorch-2.0.0.dev1 → ncut_pytorch-2.0.0.dev2}/ncut_pytorch/dino/hires_dino.py RENAMED Viewed

@@ -28,10 +28,12 @@ class HighResDINO(nn.Module):
     def __init__(
         self,
         dino_name: DINONameOptions,
-        stride: int = 6,
-        dtype: torch.dtype | int = torch.float32,
+        stride: int = 5,
+        dtype: torch.dtype | int = torch.float16,
         track_grad: bool = False,
-        attention_mask_ratio: float = 0.25,
+        attention_mask_ratio: float = 0.1,
+        chunk_size: int = 4,
+        feature_resolution: int = 1024,
     ) -> None:
         super().__init__()
@@ -60,7 +62,9 @@ class HighResDINO(nn.Module):
         self.feat_dim: int = feat
         self.n_heads: int = 6
         self.n_register_tokens = 4
+        self.chunk_size = chunk_size
+        self.feature_resolution = feature_resolution
         # mask out some of the attention Keys (K) to save compute
         self.attention_mask_ratio = attention_mask_ratio
@@ -142,7 +146,7 @@ class HighResDINO(nn.Module):
         attn_block = final_block.attn  # type: ignore
         # hilariously this also works for dino i.e we can patch dino's attn block forward to
         # use the memeory efficienty attn like in dinov2
-        attn_block.forward = MethodType(Patch._fix_mem_eff_attn(self.attention_mask_ratio), attn_block)
+        attn_block.forward = MethodType(Patch._fix_attn_masking(self.attention_mask_ratio), attn_block)
         if "dinov2" in dino_name:
             final_block.forward = MethodType(Patch._fix_block_forward_dv2(), final_block)  # type: ignore
             dino_model.forward_feats_attn = MethodType(  # type: ignore
@@ -152,7 +156,7 @@ class HighResDINO(nn.Module):
             for i, blk in enumerate(dino_model.blocks):
                 blk.forward = MethodType(Patch._fix_block_forward_dino(), blk)
                 attn_block = blk.attn
-                attn_block.forward = MethodType(Patch._fix_mem_eff_attn(self.attention_mask_ratio), attn_block)
+                attn_block.forward = MethodType(Patch._fix_attn_masking(self.attention_mask_ratio), attn_block)
             final_block.forward = MethodType(Patch._fix_block_forward_dino(), final_block)  # type: ignore
             dino_model.forward_feats_attn = MethodType(  # type: ignore
                 Patch._add_new_forward_features_dino(), dino_model
@@ -161,7 +165,7 @@ class HighResDINO(nn.Module):
             for i, blk in enumerate(dino_model.blocks):
                 blk.forward = MethodType(Patch._fix_block_forward_dino(), blk)
                 attn_block = blk.attn
-                attn_block.forward = MethodType(Patch._fix_mem_eff_attn(self.attention_mask_ratio), attn_block)
+                attn_block.forward = MethodType(Patch._fix_attn_masking(self.attention_mask_ratio), attn_block)
             final_block.forward = MethodType(Patch._fix_block_forward_dino(), final_block)  # type: ignore
             dino_model.forward_feats_attn = MethodType(  # type: ignore
                 Patch._add_new_forward_features_vit(), dino_model
@@ -244,8 +248,8 @@ class HighResDINO(nn.Module):
         out_feature_img: torch.Tensor = torch.zeros(
             1,
             c,
-            img_h,
-            img_w,
+            self.feature_resolution,
+            self.feature_resolution,
             device=x.device,
             dtype=self.dtype,
             requires_grad=self.track_grad,
@@ -264,7 +268,15 @@ class HighResDINO(nn.Module):
                 mode=self.interpolation_mode,
             )
             inverted: torch.Tensor = inv_transform(full_size)
-            out_feature_img += inverted
+            # resize the inverted feature map to the output resolution
+            out = F.interpolate(
+                inverted,
+                (self.feature_resolution, self.feature_resolution),
+                mode=self.interpolation_mode
+            )
+            out_feature_img += out
         n_imgs: int = feature_batch.shape[0]
         mean = out_feature_img / n_imgs
@@ -275,13 +287,11 @@ class HighResDINO(nn.Module):
         self,
         x: torch.Tensor,
         attn_choice: AttentionOptions = "none",
-        chunk_size: int = 6,
     ) -> torch.Tensor:
         """Feed input img $x through network and get high-res features.
         :param x: unbatched image tensor, (c, h, w)
         :param attn_choice: choice of attention, "none" or "q", "k", "v", "o"
-        :param chunk_size: number of images to process in one chunk, default 6, in case of OOM
         :return: upsampled features, (c, h, w)
         """
         if self.dtype != torch.float32:  # cast (i.e to f16)
@@ -291,8 +301,8 @@ class HighResDINO(nn.Module):
         N_imgs = img_batch.shape[0]
         all_features = []
-        for i in range(0, N_imgs, chunk_size):
-            _img_batch = img_batch[i:i+chunk_size]
+        for i in range(0, N_imgs, self.chunk_size):
+            _img_batch = img_batch[i:i+self.chunk_size]
             out_dict = self.dinov2.forward_feats_attn(_img_batch, None, attn_choice)  # type: ignore
             if attn_choice != "none":
                 feats, attn = out_dict["x_norm_patchtokens"], out_dict["x_patchattn"]
@@ -314,23 +324,24 @@ class HighResDINO(nn.Module):
         self,
         x: torch.Tensor,
         attn_choice: AttentionOptions = "none",
-        chunk_size: int = 6,
+        move_to_cpu: bool = True,
     ) -> torch.Tensor:
         """Feed input img $x through network and get low and high res features.
         :param x: batched image tensor, (b, c, h, w)
         :param attn_choice: choice of attention, "none" or "q", "k", "v", "o"
-        :param chunk_size: number of images to process in one chunk, default 6, in case of OOM
         :return: upsampled features, (b, c, h, w)
         :rtype: torch.Tensor
         """
         upsampled_features = []
         for i in range(x.shape[0]):
             if self.track_grad:
-                out = self._forward_one_image(x[i], attn_choice, chunk_size)
+                out = self._forward_one_image(x[i], attn_choice)
             else:
                 with torch.no_grad():
-                    out = self._forward_one_image(x[i], attn_choice, chunk_size)
+                    out = self._forward_one_image(x[i], attn_choice)
+                    if move_to_cpu:
+                        out = out.cpu()
             upsampled_features.append(out)
         upsampled_features = torch.stack(upsampled_features, dim=0)
         return upsampled_features
@@ -338,13 +349,28 @@ class HighResDINO(nn.Module):
 def hires_dino(dino_name: DINONameOptions = "dino_vitb8",
                stride: int = 6,
-               attention_mask_ratio: float = 0.1,
-               shift_dists: List[int] = [1, 2],
+               shift_dists: List[int] = [1, 2, 3],
                flip_transforms: bool = True,
+               attention_mask_ratio: float = 0.1,
                dtype: torch.dtype | int = torch.float16,
-               track_grad: bool = False) -> HighResDINO:
-    model = HighResDINO(dino_name, stride, dtype, track_grad, attention_mask_ratio)
+               track_grad: bool = False,
+               chunk_size: int = 4,
+               feature_resolution: int = 512
+               ) -> HighResDINO:
+    """
+    Args:
+        dino_name: name of the DINO model to use
+        stride: stride size of the tokenization, smaller is better but slower
+        shift_dists: pixel shifts for multiple image transformations, more shifts means more crispy features
+        flip_transforms: whether to use flip transforms, remove positional features
+        attention_mask_ratio: ratio of attention keys to mask out
+        dtype: data type of the model
+        track_grad: whether to track gradients
+        chunk_size: number of images to process in one batch, in case of OOM
+        feature_resolution: resolution of the output features
+    """
+    model = HighResDINO(dino_name, stride, dtype, track_grad, attention_mask_ratio, chunk_size, feature_resolution)
     fwd_shift, inv_shift = get_shift_transforms(shift_dists)
     if flip_transforms:  # add flip transforms
@@ -359,34 +385,3 @@ def hires_dino(dino_name: DINONameOptions = "dino_vitb8",
     return model
-# ==================== MODEL PRESETS ====================
-# stride: stride size of the tokenization, the original model use patch size as stride size
-#         reduce stride size to get more tokens and crispy features
-# shift_dists: pixel shifts for multiple image transformations, more shifts means more crispy features
-# flip_transforms: whether to use flip transforms, remove positional features
-def hires_dino_small() -> HighResDINO:
-    return hires_dino(dino_name="dino_vitb8",
-                    stride=6,
-                    shift_dists=[1, 2, 3],
-                    flip_transforms=True)
-def hires_dino_base() -> HighResDINO:
-    return hires_dino(dino_name="dino_vitb8",
-                    stride=4,
-                    shift_dists=[1, 2, 3],
-                    flip_transforms=True)
-def hires_dino_large() -> HighResDINO:
-    return hires_dino(dino_name="dino_vitb8",
-                    stride=3,
-                    shift_dists=[1, 2, 3],
-                    flip_transforms=True)
-def hires_dinov2() -> HighResDINO:
-    return hires_dino(dino_name="dinov2_vitb14_reg",
-                    stride=4,
-                    shift_dists=[1, 2, 3],
-                    flip_transforms=True)

{ncut_pytorch-2.0.0.dev1 → ncut_pytorch-2.0.0.dev2}/ncut_pytorch/dino/patch.py RENAMED Viewed

@@ -161,7 +161,7 @@ class Patch:
         return forward
     @staticmethod
-    def _fix_mem_eff_attn(attention_mask_ratio: float = 0.25) -> Callable:
+    def _fix_attn_masking(attention_mask_ratio: float) -> Callable:
         """Replaces normal 'forward()' method of the memory efficient attention layer (block.attn)
         in the Dv2 model with an optional early return with attention. Used if xformers used.
@@ -196,8 +196,8 @@ class Patch:
                                                 k.transpose(1, 2),
                                                 v.transpose(1, 2),
                                                 attn_mask=attn_bias)
             x = x.transpose(1, 2)
             to_append: torch.Tensor
             if attn_choice != "none":
                 to_append = get_qkvo_per_head(q, k, v, x, attn_choice, self.attn_drop)

{ncut_pytorch-2.0.0.dev1 → ncut_pytorch-2.0.0.dev2}/ncut_pytorch/ncut.py RENAMED Viewed

@@ -109,9 +109,9 @@ class Ncut:
     def fit_transform(self, X: torch.Tensor) -> torch.Tensor:
         return self.fit(X).transform(X)
-    def __new__(cls, X: torch.Tensor = None, **kwargs):
+    def __new__(cls, X: torch.Tensor = None, n_eig: int = 100, track_grad: bool = False, d_gamma: float = 0.1, device: str = 'auto', **kwargs):
         if X is not None:
-            eigvec, eigval = ncut_fn(X, **kwargs)  # function-like behavior
+            eigvec, eigval = ncut_fn(X, n_eig=n_eig, track_grad=track_grad, d_gamma=d_gamma, device=device, **kwargs)  # function-like behavior
             return eigvec
         return super().__new__(cls)  # normal class instantiation

ncut_pytorch-2.0.0.dev2/ncut_pytorch/ncuts/ncut_click.py ADDED Viewed

@@ -0,0 +1,232 @@
+# %%
+from sympy import Q
+import torch
+from ncut_pytorch.utils.gamma import find_gamma_by_degree_after_fps, find_gamma_by_degree
+from ncut_pytorch.utils.math_utils import get_affinity, normalize_affinity, svd_lowrank, correct_rotation
+from ncut_pytorch.utils.sample_utils import farthest_point_sampling, auto_divice
+from .ncut_kway import kway_ncut
+from .ncut_nystrom import _nystrom_propagate
+from .ncut_nystrom import _plain_ncut
+from .ncut_nystrom import _NYSTROM_CONFIG
+def ncut_click_prompt(
+        X: torch.Tensor,
+        fg_indices: torch.Tensor,
+        bg_indices: torch.Tensor = None,
+        click_weight: float = 0.5,
+        bg_weight: float = 0.1,
+        n_eig: int = 2,
+        track_grad: bool = False,
+        d_gamma: float = 0.1,
+        device: str = 'auto',
+        gamma: float = None,
+        no_propagation: bool = False,
+        **kwargs,
+) -> tuple[torch.Tensor, torch.Tensor] | tuple[torch.Tensor, torch.Tensor, torch.Tensor, float]:
+    _config = _NYSTROM_CONFIG.copy()
+    _config.update(kwargs)
+    # use GPU if available
+    device = auto_divice(X.device, device)
+    # skip pytorch gradient computation if track_grad is False
+    prev_grad_state = torch.is_grad_enabled()
+    torch.set_grad_enabled(track_grad)
+    if bg_indices is None:
+        bg_indices = torch.tensor([], dtype=torch.long)
+    # subsample for nystrom approximation
+    nystrom_indices = farthest_point_sampling(X, n_sample=_config['n_sample'], device=device)
+    nystrom_indices = torch.tensor(nystrom_indices, dtype=torch.long)
+    # remove fg and bg from fps_idx
+    nystrom_indices = nystrom_indices[~torch.isin(nystrom_indices, torch.cat([fg_indices, bg_indices]))]
+    # add fg and bg to fps_idx
+    nystrom_indices = torch.cat([fg_indices, bg_indices, nystrom_indices])
+    fg_indices = torch.arange(len(fg_indices))
+    bg_indices = torch.arange(len(bg_indices)) + len(fg_indices)
+    n_fgbg = len(fg_indices) + len(bg_indices)
+    nystrom_X = X[nystrom_indices].to(device)
+    # find optimal gamma for affinity matrix
+    if gamma is None:
+        gamma = find_gamma_by_degree_after_fps(nystrom_X, d_gamma)
+    # compute Ncut on the nystrom sampled subgraph
+    A = get_affinity(nystrom_X, gamma=gamma)
+    A = normalize_affinity(A)
+    # modify the affinity from the clicks
+    X_click = 1 * A[fg_indices].mean(0)
+    if len(bg_indices) > 0:
+        X_click = X_click - bg_weight * A[bg_indices].mean(0)
+    X_click = X_click * A.shape[0]
+    # gamma2 = find_gamma_by_degree(X_click.unsqueeze(1), d_gamma)
+    # A_click = get_affinity(X_click.unsqueeze(1), gamma=gamma2)
+    A_click = get_affinity(X_click.unsqueeze(1), gamma=0.5)
+    # A_click = - torch.cdist(X_click.unsqueeze(1), X_click.unsqueeze(1))
+    A_click = normalize_affinity(A_click)
+    _A = click_weight * A_click + (1 - click_weight) * A
+    # _A = _A[n_fgbg:, n_fgbg:]
+    # nystrom_indices = nystrom_indices[n_fgbg:]
+    # nystrom_X = nystrom_X[n_fgbg:]
+    nystrom_eigvec, eigval = _plain_ncut(_A, n_eig)
+    if no_propagation:
+        torch.set_grad_enabled(prev_grad_state)
+        return nystrom_eigvec, eigval, nystrom_indices, gamma
+    # propagate eigenvectors from subgraph to full graph
+    eigvec = _nystrom_propagate(
+        nystrom_eigvec,
+        X,
+        nystrom_X,
+        n_neighbors=_config['n_neighbors'],
+        n_sample=_config['n_sample2'],
+        gamma=gamma,
+        chunk_size=_config['matmul_chunk_size'],
+        device=device,
+        move_output_to_cpu=_config['move_output_to_cpu'],
+        track_grad=track_grad,
+    )
+    torch.set_grad_enabled(prev_grad_state)
+    return eigvec, eigval
+def get_mask_and_heatmap(eigvecs, fg_indices, n_cluster=2, device='auto'):
+    device = auto_divice(eigvecs.device, device)
+    eigvecs = eigvecs[:, :n_cluster]
+    eigvecs = kway_ncut(eigvecs, device=device)
+    # find which cluster is the foreground
+    fg_eigvecs = eigvecs[fg_indices]
+    fg_idx = fg_eigvecs.mean(0).argmax().item()
+    bg_idx = 1 if fg_idx == 0 else 0
+    # discretize the eigvecs
+    mask = eigvecs.argmax(dim=-1) == fg_idx
+    heatmap = eigvecs[:, fg_idx] - eigvecs[:, bg_idx]
+    return mask, heatmap
+from ncut_pytorch.utils.math_utils import keep_topk_per_row
+def ncut_click_prompt_cached(
+        nystrom_indices: torch.Tensor,
+        gamma: float,
+        X: torch.Tensor,
+        fg_indices: torch.Tensor,
+        bg_indices: torch.Tensor = None,
+        click_weight: float = 0.5,
+        bg_weight: float = 0.1,
+        n_eig: int = 2,
+        track_grad: bool = False,
+        device: str = 'auto',
+        **kwargs,
+) -> tuple[torch.Tensor, torch.Tensor] | tuple[torch.Tensor, torch.Tensor, torch.Tensor, float]:
+    _config = _NYSTROM_CONFIG.copy()
+    _config.update(kwargs)
+    # use GPU if available
+    device = auto_divice(X.device, device)
+    # skip pytorch gradient computation if track_grad is False
+    prev_grad_state = torch.is_grad_enabled()
+    torch.set_grad_enabled(track_grad)
+    if bg_indices is None:
+        bg_indices = torch.tensor([], dtype=torch.long)
+    # subsample for nystrom approximation
+    nystrom_indices = torch.tensor(nystrom_indices, dtype=torch.long)
+    # add fg and bg to fps_idx
+    nystrom_indices = torch.cat([fg_indices, bg_indices, nystrom_indices])
+    fg_indices = torch.arange(len(fg_indices))
+    bg_indices = torch.arange(len(bg_indices)) + len(fg_indices)
+    n_fgbg = len(fg_indices) + len(bg_indices)
+    nystrom_X = X[nystrom_indices].to(device)
+    # compute Ncut on the nystrom sampled subgraph
+    A = get_affinity(nystrom_X, gamma=gamma)
+    A = normalize_affinity(A)
+    # modify the affinity from the clicks
+    X_click = 1 * A[fg_indices].mean(0)
+    if len(bg_indices) > 0:
+        X_click = X_click - bg_weight * A[bg_indices].mean(0)
+    X_click = X_click * A.shape[0]
+    A_click = get_affinity(X_click.unsqueeze(1), gamma=0.5)
+    A_click = normalize_affinity(A_click)
+    _A = click_weight * A_click + (1 - click_weight) * A
+    _A = _A[n_fgbg:, n_fgbg:]
+    nystrom_indices = nystrom_indices[n_fgbg:]
+    nystrom_X = nystrom_X[n_fgbg:]
+    nystrom_eigvec, eigval = _plain_ncut(_A, n_eig)
+    torch.set_grad_enabled(prev_grad_state)
+    return nystrom_eigvec, eigval
+def _build_nystrom_graph(
+        X: torch.Tensor,
+        nystrom_X: torch.Tensor,
+        gamma: float = 1.0,
+        device: str = 'auto',
+        **kwargs,
+):
+    """propagate output from nystrom sampled nodes to all nodes,
+    use a weighted sum of the nearest neighbors to propagate the output.
+    Args:
+        nystrom_out (torch.Tensor): output from nystrom sampled nodes, shape (m, D)
+        X (torch.Tensor): input features for all nodes, shape (N, D)
+        nystrom_X (torch.Tensor): input features from nystrom sampled nodes, shape (m, D)
+        gamma (float): affinity parameter, default 1.0
+        track_grad (bool): keep track of pytorch gradients, default False
+        device (str): device to use for computation, if 'auto', will detect GPU automatically
+        _config (dict): configuration for nystrom approximation, default _NYSTROM_CONFIG
+    Returns:
+        torch.Tensor: output propagated by nearest neighbors, shape (N, D)
+    """
+    _config = _NYSTROM_CONFIG.copy()
+    _config.update(kwargs)
+    device = auto_divice(X.device, device)
+    nystrom_X = nystrom_X.to(device)
+    all_outs = []
+    n_chunk = _config['matmul_chunk_size']
+    n_neighbors = _config['n_neighbors']
+    cached_weights = torch.zeros((X.shape[0], nystrom_X.shape[0]),
+                                 device=device, dtype=X.dtype)
+    for i in range(0, X.shape[0], n_chunk):
+        end = min(i + n_chunk, X.shape[0])
+        _Ai = get_affinity(X[i:end].to(device), nystrom_X, gamma=gamma)
+        _Ai, _indices = keep_topk_per_row(_Ai, n_neighbors)  # (n, n_neighbors)
+        row_indices = torch.arange(i, end).unsqueeze(1).expand(-1, n_neighbors)  # shape (N, 10)
+        cached_weights[row_indices, _indices] = _Ai
+        print((cached_weights[i] > 0).sum())
+    return cached_weights

{ncut_pytorch-2.0.0.dev1 → ncut_pytorch-2.0.0.dev2}/ncut_pytorch/ncuts/ncut_kway.py RENAMED Viewed

@@ -1,10 +1,12 @@
+from re import U
 import torch
 import torch.nn.functional as F
 from ncut_pytorch.utils.sample_utils import farthest_point_sampling
+from ncut_pytorch.utils.sample_utils import auto_divice
-def kway_ncut(eigvec: torch.Tensor, **kwargs):
+def kway_ncut(eigvec: torch.Tensor, device: str = 'auto', **kwargs):
     """
     Args:
         eigvec (torch.Tensor): eigenvectors from Ncut output, shape (n, k)
@@ -13,14 +15,16 @@ def kway_ncut(eigvec: torch.Tensor, **kwargs):
             eigvec.argmax(dim=1) is the cluster assignment.
             eigvec.argmax(dim=0) is the cluster centroids.
     """
-    R = axis_align(eigvec, **kwargs)
+    # __check_input_tensor(eigvec)
+    R = axis_align(eigvec, device=device, **kwargs)
     eigvec = F.normalize(eigvec, dim=1)
     eigvec = eigvec @ R
     return eigvec
 @torch.no_grad()
-def axis_align(eigvec: torch.Tensor, max_iter=1000, n_sample=10240):
+def axis_align(eigvec: torch.Tensor, device: str = 'auto', max_iter=1000, n_sample=10240):
     """Multiclass Spectral Clustering, SX Yu, J Shi, 2003
     Args:
@@ -33,16 +37,20 @@ def axis_align(eigvec: torch.Tensor, max_iter=1000, n_sample=10240):
     # subsample the eigenvectors, to speed up the computation
     n, k = eigvec.shape
-    n_sample = max(n_sample, k)
-    sample_idx = farthest_point_sampling(eigvec, n_sample)
+    sample_idx = farthest_point_sampling(eigvec, n_sample, device=device)
     eigvec = eigvec[sample_idx]
     eigvec = F.normalize(eigvec, dim=1)
     # Initialize R matrix with the first column from Farthest Point Sampling
-    _sample_idx = farthest_point_sampling(eigvec, k)
+    _sample_idx = farthest_point_sampling(eigvec, k, device=device)
     R = eigvec[_sample_idx].T
+    original_device = eigvec.device
+    device = auto_divice(original_device, device)
+    eigvec = eigvec.to(device=device)
+    R = R.to(device=device)
     # Iterative optimization loop
     last_objective_value = 0
     exit_loop = False
@@ -54,12 +62,13 @@ def axis_align(eigvec: torch.Tensor, max_iter=1000, n_sample=10240):
         # Discretize the projected eigenvectors
         _eigenvectors_continuous = eigvec @ R
         _eigenvectors_discrete = _onehot_discretize(_eigenvectors_continuous)
-        _eigenvectors_discrete = _eigenvectors_discrete.to(device=eigvec.device, dtype=eigvec.dtype)
+        _eigenvectors_discrete = _eigenvectors_discrete.to(device=device, dtype=eigvec.dtype)
         # SVD decomposition
         _out = _eigenvectors_discrete.T @ eigvec
         U, S, Vh = torch.linalg.svd(_out, full_matrices=False)
         V = Vh.T
+        # U, S, V = svd_lowrank(_out, 100)
         # Compute the Ncut value
         ncut_value = 2 * (n - torch.sum(S))
@@ -71,7 +80,8 @@ def axis_align(eigvec: torch.Tensor, max_iter=1000, n_sample=10240):
         else:
             last_objective_value = ncut_value
             R = V @ U.T
+    R = R.to(device=original_device)
     return R

{ncut_pytorch-2.0.0.dev1 → ncut_pytorch-2.0.0.dev2}/ncut_pytorch/ncuts/ncut_nystrom.py RENAMED Viewed

@@ -12,7 +12,6 @@ _NYSTROM_CONFIG = {
     'n_sample2': 1024,  # number of samples for eigenvector propagation, 1024 is large enough for most cases
     'n_neighbors': 10,  # number of neighbors for eigenvector propagation, 10 is large enough for most cases
     'matmul_chunk_size': 16384,  # chunk size for matrix multiplication, larger chunk size is faster but requires more memory
-    'sample_method': "farthest",  # sample method for nystrom approximation, 'farthest' is FPS(Farthest Point Sampling)
     'move_output_to_cpu': True,  # if True, will move output to cpu, which saves memory but loses gradients
 }
@@ -102,17 +101,6 @@ def _plain_ncut(
         A: torch.Tensor,
         n_eig: int = 100,
 ):
-    """Normalized Cut.
-    Args:
-        A (torch.Tensor): affinity matrix, shape (N, N)
-        n_eig (int): number of eigenvectors to return
-    Returns:
-        (torch.Tensor): eigenvectors corresponding to the eigenvalues, shape (N, n_eig)
-        (torch.Tensor): eigenvalues of the eigenvectors, sorted in descending order
-    """
     # normalization; A = D^(-1/2) A D^(-1/2)
     A = normalize_affinity(A)
@@ -120,6 +108,11 @@ def _plain_ncut(
     # correct the random rotation (flipping sign) of eigenvectors
     eigvec = correct_rotation(eigvec)
+    assert not torch.any(torch.isnan(eigvec)), "eigvec contains NaN"
+    assert not torch.any(torch.isinf(eigvec)), "eigvec contains Inf"
+    assert not torch.any(torch.isnan(eigval)), "eigval contains NaN"
+    assert not torch.any(torch.isinf(eigval)), "eigval contains Inf"
     return eigvec, eigval

ncut-pytorch 2.0.0.dev1__tar.gz → 2.0.0.dev2__tar.gz

ncut-pytorch 2.0.0.dev1tar.gz → 2.0.0.dev2tar.gz