PyPI - nystrom-ncut - Versions diffs - 0.0.1__tar.gz → 0.0.3__tar.gz - Mend

nystrom-ncut 0.0.1tar.gz → 0.0.3tar.gz

Files changed (19) hide show

{nystrom_ncut-0.0.1/src/nystrom_ncut.egg-info → nystrom_ncut-0.0.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nystrom_ncut
-Version: 0.0.1
+Version: 0.0.3
 Summary: Normalized Cut and Nyström Approximation
 Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
 Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/

{nystrom_ncut-0.0.1 → nystrom_ncut-0.0.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "nystrom_ncut"
-version = "0.0.1"
+version = "0.0.3"
 authors = [
     { name = "Huzheng Yang", email = "huze.yann@gmail.com" },
     { name = "Wentinn Liao", email = "wentinn.liao@gmail.com" },

{nystrom_ncut-0.0.1 → nystrom_ncut-0.0.3}/requirements.txt RENAMED Viewed

@@ -3,4 +3,5 @@ scikit-learn
 umap-learn
 fpsample>=0.3.2
 pycolormap-2d
-tqdm
+tqdm
+torch

{nystrom_ncut-0.0.1 → nystrom_ncut-0.0.3}/src/nystrom_ncut/__init__.py RENAMED Viewed

@@ -1,4 +1,7 @@
-from .ncut_pytorch import NCUT
+from .ncut_pytorch import (
+    NCUT,
+    axis_align,
+)
 from .propagation_utils import (
     affinity_from_features,
     propagate_eigenvectors,
@@ -6,7 +9,6 @@ from .propagation_utils import (
     quantile_normalize,
 )
 from .visualize_utils import (
-    eigenvector_to_rgb,
     rgb_from_tsne_3d,
     rgb_from_umap_sphere,
     rgb_from_tsne_2d,
@@ -18,5 +20,3 @@ from .visualize_utils import (
     propagate_rgb_color,
     get_mask,
 )
-from .ncut_pytorch import nystrom_ncut, ncut
-from .ncut_pytorch import kway_ncut, axis_align

nystrom_ncut-0.0.3/src/nystrom_ncut/common.py ADDED Viewed

@@ -0,0 +1,20 @@
+from typing import Any
+import numpy as np
+import torch
+import torch.nn.functional as Fn
+def ceildiv(a: int, b: int) -> int:
+    return -(-a // b)
+def lazy_normalize(x: torch.Tensor, n: int = 1000, **normalize_kwargs: Any) -> torch.Tensor:
+    numel = np.prod(x.shape[:-1])
+    n = min(n, numel)
+    random_indices = torch.randperm(numel)[:n]
+    _x = x.flatten(0, -2)[random_indices]
+    if torch.allclose(torch.norm(_x, **normalize_kwargs), torch.ones(n, device=x.device)):
+        return x
+    else:
+        return Fn.normalize(x, **normalize_kwargs)

nystrom_ncut-0.0.1/src/nystrom_ncut/new_ncut_pytorch.py → nystrom_ncut-0.0.3/src/nystrom_ncut/ncut_pytorch.py RENAMED Viewed

@@ -2,6 +2,7 @@ import logging
 from typing import Literal, Tuple
 import torch
+import torch.nn.functional as Fn
 from .nystrom import (
     EigSolverOptions,
@@ -44,7 +45,6 @@ class LaplacianKernel(OnlineKernel):
             self.anchor_features,                               # [n x d]
             affinity_focal_gamma=self.affinity_focal_gamma,
             distance=self.distance,
-            fill_diagonal=False,
         )                                                       # [n x n]
         U, L = solve_eig(
             self.A,
@@ -61,7 +61,6 @@ class LaplacianKernel(OnlineKernel):
             features,                                           # [m x d]
             affinity_focal_gamma=self.affinity_focal_gamma,
             distance=self.distance,
-            fill_diagonal=False,
         )                                                       # [n x m]
         b_r = torch.sum(B, dim=-1)                              # [n]
         b_c = torch.sum(B, dim=-2)                              # [m]
@@ -83,7 +82,6 @@ class LaplacianKernel(OnlineKernel):
                 features,                                       # [m x d]
                 affinity_focal_gamma=self.affinity_focal_gamma,
                 distance=self.distance,
-                fill_diagonal=False,
             )                                                   # [n x m]
             b_c = torch.sum(B, dim=-2)                          # [m]
             colscale = b_c + B.mT @ self.Ainv @ self.b_r        # [m]
@@ -91,25 +89,24 @@ class LaplacianKernel(OnlineKernel):
         return (B * scale).mT                                   # [m x n]
-class NewNCUT(OnlineNystrom):
+class NCUT(OnlineNystrom):
     """Nystrom Normalized Cut for large scale graph."""
     def __init__(
         self,
-        num_eig: int = 100,
+        n_components: int = 100,
         affinity_focal_gamma: float = 1.0,
         num_sample: int = 10000,
         sample_method: Literal["farthest", "random"] = "farthest",
         distance: DistanceOptions = "cosine",
         eig_solver: EigSolverOptions = "svd_lowrank",
         normalize_features: bool = None,
-        device: str = None,
         move_output_to_cpu: bool = False,
-        matmul_chunk_size: int = 8096,
+        chunk_size: int = 8192,
     ):
         """
         Args:
-            num_eig (int): number of top eigenvectors to return
+            n_components (int): number of top eigenvectors to return
             affinity_focal_gamma (float): affinity matrix temperature, lower t reduce the not-so-connected edge weights,
                 smaller t result in more sharp eigenvectors.
             num_sample (int): number of samples for Nystrom-like approximation,
@@ -120,17 +117,15 @@ class NewNCUT(OnlineNystrom):
             eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh'].
             normalize_features (bool): normalize input features before computing affinity matrix,
                 default 'None' is True for cosine distance, False for euclidean distance and rbf
-            device (str): device to use for eigen computation,
-                move to GPU to speeds up a bit (~5x faster)
             move_output_to_cpu (bool): move output to CPU, set to True if you have memory issue
-            matmul_chunk_size (int): chunk size for large-scale matrix multiplication
+            chunk_size (int): chunk size for large-scale matrix multiplication
         """
         OnlineNystrom.__init__(
             self,
-            n_components=num_eig,
+            n_components=n_components,
             kernel=LaplacianKernel(affinity_focal_gamma, distance, eig_solver),
             eig_solver=eig_solver,
-            chunk_size=matmul_chunk_size,
+            chunk_size=chunk_size,
         )
         self.num_sample = num_sample
         self.sample_method = sample_method
@@ -142,19 +137,14 @@ class NewNCUT(OnlineNystrom):
             if distance in ["euclidean", "rbf"]:
                 self.normalize_features = False
-        self.device = device
         self.move_output_to_cpu = move_output_to_cpu
-        self.matmul_chunk_size = matmul_chunk_size
+        self.chunk_size = chunk_size
     def _fit_helper(
         self,
         features: torch.Tensor,
         precomputed_sampled_indices: torch.Tensor,
     ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # move subgraph gpu to speed up
-        original_device = features.device
-        device = original_device if self.device is None else self.device
         _n = features.shape[0]
         if self.num_sample >= _n:
             logging.info(
@@ -186,13 +176,13 @@ class NewNCUT(OnlineNystrom):
                 num_sample=self.num_sample,
                 sample_method=self.sample_method,
             )
-        sampled_features = features[sampled_indices].to(device)
+        sampled_features = features[sampled_indices]
         OnlineNystrom.fit(self, sampled_features)
         _n_not_sampled = _n - len(sampled_features)
         if _n_not_sampled > 0:
-            unsampled_indices = torch.full((_n,), True).scatter(0, sampled_indices, False)
-            unsampled_features = features[unsampled_indices].to(device)
+            unsampled_indices = torch.full((_n,), True, device=features.device).scatter_(0, sampled_indices, False)
+            unsampled_features = features[unsampled_indices]
             V_unsampled, _ = OnlineNystrom.update(self, unsampled_features)
         else:
             unsampled_indices = V_unsampled = None
@@ -211,7 +201,7 @@ class NewNCUT(OnlineNystrom):
         Returns:
             (NCUT): self
         """
-        NewNCUT._fit_helper(self, features, precomputed_sampled_indices)
+        NCUT._fit_helper(self, features, precomputed_sampled_indices)
         return self
     def fit_transform(
@@ -229,13 +219,60 @@ class NewNCUT(OnlineNystrom):
             (torch.Tensor): eigen_vectors, shape (n_samples, num_eig)
             (torch.Tensor): eigen_values, sorted in descending order, shape (num_eig,)
         """
-        unsampled_indices, V_unsampled = NewNCUT._fit_helper(self, features, precomputed_sampled_indices)
+        unsampled_indices, V_unsampled = NCUT._fit_helper(self, features, precomputed_sampled_indices)
         V_sampled, L = OnlineNystrom.transform(self)
         if unsampled_indices is not None:
-            V = torch.zeros((len(unsampled_indices), self.n_components))
+            V = torch.zeros((len(unsampled_indices), self.n_components), device=features.device)
             V[~unsampled_indices] = V_sampled
             V[unsampled_indices] = V_unsampled
         else:
             V = V_sampled
         return V, L
+def axis_align(eigen_vectors: torch.Tensor, max_iter=300):
+    """Multiclass Spectral Clustering, SX Yu, J Shi, 2003
+    Args:
+        eigen_vectors (torch.Tensor): continuous eigenvectors from NCUT, shape (n, k)
+        max_iter (int, optional): Maximum number of iterations.
+    Returns:
+        torch.Tensor: Discretized eigenvectors, shape (n, k), each row is a one-hot vector.
+    """
+    # Normalize eigenvectors
+    n, k = eigen_vectors.shape
+    eigen_vectors = Fn.normalize(eigen_vectors, p=2, dim=-1)
+    # Initialize R matrix with the first column from a random row of EigenVectors
+    R = torch.empty((k, k), device=eigen_vectors.device)
+    R[0] = eigen_vectors[torch.randint(0, n, (1,))].squeeze()
+    # Loop to populate R with k orthogonal directions
+    c = torch.zeros(n, device=eigen_vectors.device)
+    for i in range(1, k):
+        c += torch.abs(eigen_vectors @ R[i - 1])
+        R[i] = eigen_vectors[torch.argmin(c, dim=0)]
+    # Iterative optimization loop
+    eps = torch.finfo(torch.float32).eps
+    prev_objective = torch.inf
+    for _ in range(max_iter):
+        # Discretize the projected eigenvectors
+        idx = torch.argmax(eigen_vectors @ R.mT, dim=-1)
+        M = torch.zeros((k, k)).index_add_(0, idx, eigen_vectors)
+        # Compute the NCut value
+        objective = torch.norm(M)
+        # Check for convergence
+        if torch.abs(objective - prev_objective) < eps:
+            break
+        prev_objective = objective
+        # SVD decomposition
+        U, S, Vh = torch.linalg.svd(M, full_matrices=False)
+        R = U @ Vh
+    return Fn.one_hot(idx, num_classes=k).to(torch.float), R

{nystrom_ncut-0.0.1 → nystrom_ncut-0.0.3}/src/nystrom_ncut/nystrom.py RENAMED Viewed

@@ -2,6 +2,8 @@ from typing import Literal, Tuple
 import torch
+from .common import ceildiv
 EigSolverOptions = Literal["svd_lowrank", "lobpcg", "svd", "eigh"]
@@ -75,7 +77,7 @@ class OnlineNystrom:
         return U[:, :self.n_components], L[:self.n_components]                                      # [n x n_components], [n_components]
     def update(self, features: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-        n_chunks = -(-len(features) // self.chunk_size)
+        n_chunks = ceildiv(len(features), self.chunk_size)
         if n_chunks > 1:
             """ Chunked version """
             chunks = torch.chunk(features, n_chunks, dim=0)
@@ -111,7 +113,7 @@ class OnlineNystrom:
         if features is None:
             VS = self.A @ self.transform_matrix                                                     # [n x n_components]
         else:
-            n_chunks = -(-len(features) // self.chunk_size)
+            n_chunks = ceildiv(len(features), self.chunk_size)
             if n_chunks > 1:
                 """ Chunked version """
                 chunks = torch.chunk(features, n_chunks, dim=0)

{nystrom_ncut-0.0.1 → nystrom_ncut-0.0.3}/src/nystrom_ncut/propagation_utils.py RENAMED Viewed

@@ -1,11 +1,12 @@
 import logging
-import math
 from typing import Literal
 import numpy as np
 import torch
 import torch.nn.functional as F
+from .common import ceildiv, lazy_normalize
 @torch.no_grad()
 def run_subgraph_sampling(
@@ -42,7 +43,7 @@ def run_subgraph_sampling(
             sampled_indices = torch.randperm(features.shape[0])[:num_sample]
         else:
             raise ValueError("sample_method should be 'farthest' or 'random'")
-    return sampled_indices
+    return sampled_indices.to(features.device)
 def farthest_point_sampling(
@@ -60,14 +61,12 @@ def farthest_point_sampling(
     # PCA to reduce the dimension
     if features.shape[1] > 8:
         u, s, v = torch.pca_lowrank(features, q=8)
-        _n = features.shape[0]
-        s /= math.sqrt(_n)
         features = u @ torch.diag(s)
     h = min(h, int(np.log2(features.shape[0])))
     kdline_fps_samples_idx = fpsample.bucket_fps_kdline_sampling(
-        features.cpu().numpy(), num_sample, h
+        features.numpy(force=True), num_sample, h
     ).astype(np.int64)
     return torch.from_numpy(kdline_fps_samples_idx)
@@ -76,26 +75,19 @@ def distance_from_features(
     features: torch.Tensor,
     features_B: torch.Tensor,
     distance: Literal["cosine", "euclidean", "rbf"],
-    fill_diagonal: bool,
 ):
     """Compute affinity matrix from input features.
     Args:
         features (torch.Tensor): input features, shape (n_samples, n_features)
         features_B (torch.Tensor, optional): optional, if not None, compute affinity between two features
-        affinity_focal_gamma (float): affinity matrix parameter, lower t reduce the edge weights
-            on weak connections, default 1.0
         distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'.
-        normalize_features (bool): normalize input features before computing affinity matrix
     Returns:
         (torch.Tensor): affinity matrix, shape (n_samples, n_samples)
     """
     # compute distance matrix from input features
     if distance == "cosine":
-        if not check_if_normalized(features):
-            features = F.normalize(features, dim=-1)
-        if not check_if_normalized(features_B):
-            features_B = F.normalize(features_B, dim=-1)
+        features = lazy_normalize(features, dim=-1)
+        features_B = lazy_normalize(features_B, dim=-1)
         D = 1 - features @ features_B.T
     elif distance == "euclidean":
         D = torch.cdist(features, features_B, p=2)
@@ -105,8 +97,6 @@ def distance_from_features(
     else:
         raise ValueError("distance should be 'cosine' or 'euclidean', 'rbf'")
-    if fill_diagonal:
-        D[torch.arange(D.shape[0]), torch.arange(D.shape[0])] = 0
     return D
@@ -115,7 +105,6 @@ def affinity_from_features(
     features_B: torch.Tensor = None,
     affinity_focal_gamma: float = 1.0,
     distance: Literal["cosine", "euclidean", "rbf"] = "cosine",
-    fill_diagonal: bool = True,
 ):
     """Compute affinity matrix from input features.
@@ -125,8 +114,6 @@ def affinity_from_features(
         affinity_focal_gamma (float): affinity matrix parameter, lower t reduce the edge weights
             on weak connections, default 1.0
         distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'.
-        normalize_features (bool): normalize input features before computing affinity matrix
     Returns:
         (torch.Tensor): affinity matrix, shape (n_samples, n_samples)
     """
@@ -134,12 +121,10 @@ def affinity_from_features(
     # if feature_B is not provided, compute affinity matrix on features x features
     # if feature_B is provided, compute affinity matrix on features x feature_B
-    if features_B is not None:
-        assert not fill_diagonal, "fill_diagonal should be False when feature_B is None"
     features_B = features if features_B is None else features_B
     # compute distance matrix from input features
-    D = distance_from_features(features, features_B, distance, fill_diagonal)
+    D = distance_from_features(features, features_B, distance)
     # torch.exp make affinity matrix positive definite,
     # lower affinity_focal_gamma reduce the weak edge weights
@@ -154,9 +139,8 @@ def propagate_knn(
     knn: int = 10,
     distance: Literal["cosine", "euclidean", "rbf"] = "cosine",
     affinity_focal_gamma: float = 1.0,
-    chunk_size: int = 8096,
+    chunk_size: int = 8192,
     device: str = None,
-    use_tqdm: bool = False,
     move_output_to_cpu: bool = False,
 ):
     """A generic function to propagate new nodes using KNN.
@@ -169,8 +153,6 @@ def propagate_knn(
         distance (str): distance metric, 'cosine' (default) or 'euclidean', 'rbf'
         chunk_size (int): chunk size for matrix multiplication
         device (str): device to use for computation, if None, will not change device
-        use_tqdm (bool): show progress bar when propagating eigenvectors from subgraph to full graph
     Returns:
         torch.Tensor: propagated eigenvectors, shape (new_num_samples, D)
@@ -197,24 +179,16 @@ def propagate_knn(
     # used in nystrom_ncut
     # propagate eigen_vector from subgraph to full graph
     subgraph_output = subgraph_output.to(device)
-    V_list = []
-    iterator = range(0, inp_features.shape[0], chunk_size)
-    try:
-        assert use_tqdm
-        from tqdm import tqdm
-        iterator = tqdm(iterator, "propagate by KNN")
-    except (AssertionError, ImportError):
-        pass
-    subgraph_features = subgraph_features.to(device)
-    for i in iterator:
-        end = min(i + chunk_size, inp_features.shape[0])
-        _v = inp_features[i:end].to(device)
-        _A = affinity_from_features(subgraph_features, _v, affinity_focal_gamma, distance, False).mT
+    n_chunks = ceildiv(inp_features.shape[0], chunk_size)
+    V_list = []
+    for _v in torch.chunk(inp_features, n_chunks, dim=0):
+        _v = _v.to(device)
+        _A = affinity_from_features(subgraph_features, _v, affinity_focal_gamma, distance).mT
         if knn is not None:
             mask = torch.full_like(_A, True, dtype=torch.bool)
-            mask[torch.arange(end - i)[:, None], _A.topk(knn, dim=-1, largest=True).indices] = False
+            mask[torch.arange(len(_v))[:, None], _A.topk(knn, dim=-1, largest=True).indices] = False
             _A[mask] = 0.0
         _A = F.normalize(_A, p=1, dim=-1)
@@ -232,16 +206,14 @@ def propagate_nearest(
     inp_features: torch.Tensor,
     subgraph_features: torch.Tensor,
     distance: Literal["cosine", "euclidean", "rbf"] = "cosine",
-    chunk_size: int = 8096,
+    chunk_size: int = 8192,
     device: str = None,
     move_output_to_cpu: bool = False,
 ):
     device = subgraph_output.device if device is None else device
     if distance == 'cosine':
-        if not check_if_normalized(inp_features):
-            inp_features = F.normalize(inp_features, dim=-1)
-        if not check_if_normalized(subgraph_features):
-            subgraph_features = F.normalize(subgraph_features, dim=-1)
+        inp_features = lazy_normalize(inp_features, dim=-1)
+        subgraph_features = lazy_normalize(subgraph_features, dim=-1)
     # used in nystrom_tsne, equivalent to propagate_by_knn with knn=1
     # propagate tSNE from subgraph to full graph
@@ -250,7 +222,7 @@ def propagate_nearest(
     for i in range(0, inp_features.shape[0], chunk_size):
         end = min(i + chunk_size, inp_features.shape[0])
         _v = inp_features[i:end].to(device)
-        _A = -distance_from_features(subgraph_features, _v, distance, False).mT
+        _A = -distance_from_features(subgraph_features, _v, distance).mT
         # keep top1 for each row
         top_idx = _A.argmax(dim=-1).cpu()
@@ -273,7 +245,6 @@ def propagate_eigenvectors(
     sample_method: Literal["farthest", "random"],
     chunk_size: int,
     device: str,
-    use_tqdm: bool,
 ):
     """Propagate eigenvectors to new nodes using KNN. Note: this is equivalent to the class API `NCUT.tranform(new_features)`, expect for the sampling is re-done in this function.
     Args:
@@ -283,10 +254,8 @@ def propagate_eigenvectors(
         knn (int): number of KNN to propagate eigenvectors, default 3
         num_sample (int): number of samples for subgraph sampling, default 50000
         sample_method (str): sample method, 'farthest' (default) or 'random'
-        chunk_size (int): chunk size for matrix multiplication, default 8096
+        chunk_size (int): chunk size for matrix multiplication, default 8192
         device (str): device to use for computation, if None, will not change device
-        use_tqdm (bool): show progress bar when propagating eigenvectors from subgraph to full graph
     Returns:
         torch.Tensor: propagated eigenvectors, shape (n_new_samples, num_eig)
@@ -319,21 +288,10 @@ def propagate_eigenvectors(
         knn=knn,
         chunk_size=chunk_size,
         device=device,
-        use_tqdm=use_tqdm,
     )
     return new_eigenvectors
-def check_if_normalized(x, n=1000):
-    """check if the input tensor is normalized (unit norm)"""
-    n = min(n, x.shape[0])
-    random_indices = torch.randperm(x.shape[0])[:n]
-    _x = x[random_indices]
-    flag = torch.allclose(torch.norm(_x, dim=-1), torch.ones(n, device=x.device))
-    return flag
 def quantile_min_max(x, q1=0.01, q2=0.99, n_sample=10000):
     if x.shape[0] > n_sample:
         np.random.seed(0)

nystrom-ncut 0.0.1__tar.gz → 0.0.3__tar.gz

nystrom-ncut 0.0.1tar.gz → 0.0.3tar.gz