PyPI - nystrom-ncut - Versions diffs - 0.0.9__tar.gz → 0.0.10__tar.gz - Mend

nystrom-ncut 0.0.9tar.gz → 0.0.10tar.gz

Files changed (21) hide show

{nystrom_ncut-0.0.9/src/nystrom_ncut.egg-info → nystrom_ncut-0.0.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: nystrom_ncut
-Version: 0.0.9
+Version: 0.0.10
 Summary: Normalized Cut and Nyström Approximation
 Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
 Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/

{nystrom_ncut-0.0.9 → nystrom_ncut-0.0.10}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "nystrom_ncut"
-version = "0.0.9"
+version = "0.0.10"
 authors = [
     { name = "Huzheng Yang", email = "huze.yann@gmail.com" },
     { name = "Wentinn Liao", email = "wentinn.liao@gmail.com" },

nystrom_ncut-0.0.10/src/__init__.py ADDED Viewed

File without changes

{nystrom_ncut-0.0.9 → nystrom_ncut-0.0.10}/src/nystrom_ncut/__init__.py RENAMED Viewed

@@ -1,12 +1,13 @@
-from .ncut_pytorch import (
-    NCUT,
+from .nystrom import (
+    DistanceRealization,
+    NCut,
     axis_align,
 )
 from .propagation_utils import (
+    distance_from_features,
     affinity_from_features,
     extrapolate_knn_with_subsampling,
     extrapolate_knn,
-    quantile_normalize,
 )
 from .visualize_utils import (
     rgb_from_tsne_3d,

nystrom_ncut-0.0.10/src/nystrom_ncut/nystrom/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+from .distance_realization import (
+    DistanceRealization,
+)
+from .normalized_cut import (
+    NCut,
+    axis_align,
+)

nystrom_ncut-0.0.10/src/nystrom_ncut/nystrom/distance_realization.py ADDED Viewed

@@ -0,0 +1,127 @@
+import torch
+from .nystrom import (
+    EigSolverOptions,
+    OnlineKernel,
+    OnlineNystromSubsampleFit,
+    solve_eig,
+)
+from ..common import (
+    DistanceOptions,
+    SampleOptions,
+)
+from ..propagation_utils import (
+    distance_from_features,
+)
+class GramKernel(OnlineKernel):
+    def __init__(
+        self,
+        distance: DistanceOptions,
+        eig_solver: EigSolverOptions,
+    ):
+        self.distance: DistanceOptions = distance
+        self.eig_solver: EigSolverOptions = eig_solver
+        # Anchor matrices
+        self.anchor_features: torch.Tensor = None               # [n x d]
+        self.A: torch.Tensor = None                             # [n x n]
+        self.Ainv: torch.Tensor = None                          # [n x n]
+        # Updated matrices
+        self.a_r: torch.Tensor = None                           # [n]
+        self.b_r: torch.Tensor = None                           # [n]
+        self.matrix_sum: torch.Tensor = torch.zeros(())         # []
+        self.n_features: int = None                             # N
+    def fit(self, features: torch.Tensor) -> None:
+        self.anchor_features = features                         # [n x d]
+        self.A = -0.5 * distance_from_features(
+            self.anchor_features,                               # [n x d]
+            self.anchor_features,
+            distance=self.distance,
+        )                                                       # [n x n]
+        d = features.shape[-1]
+        U, L = solve_eig(
+            self.A,
+            num_eig=d + 1,  # d * (d + 3) // 2 + 1,
+            eig_solver=self.eig_solver,
+        )                                                       # [n x (d + 1)], [d + 1]
+        self.Ainv = U @ torch.diag(1 / L) @ U.mT                # [n x n]
+        self.a_r = torch.sum(self.A, dim=-1)                    # [n]
+        self.b_r = torch.zeros_like(self.a_r)                   # [n]
+        self.matrix_sum = torch.sum(self.a_r)                   # []
+        self.n_features = features.shape[0]                     # n
+    def update(self, features: torch.Tensor) -> torch.Tensor:
+        B = -0.5 * distance_from_features(
+            self.anchor_features,                               # [n x d]
+            features,                                           # [m x d]
+            distance=self.distance,
+        )                                                       # [n x m]
+        b_r = torch.sum(B, dim=-1)                              # [n]
+        b_c = torch.sum(B, dim=-2)                              # [m]
+        self.b_r = self.b_r + b_r                               # [n]
+        self.matrix_sum = (
+            torch.sum(self.a_r)
+            + 2 * torch.sum(self.b_r)
+            + self.Ainv @ self.b_r @ self.b_r
+        )                                                       # []
+        self.n_features += features.shape[0]                    # N
+        row_sum = self.a_r + self.b_r                           # [n]
+        col_sum = b_c + B.mT @ self.Ainv @ self.b_r             # [m]
+        shift = -(row_sum[:, None] + col_sum) / self.n_features + self.matrix_sum / (self.n_features ** 2)  # [n x m]
+        return (B + shift).mT                                   # [m x n]
+    def transform(self, features: torch.Tensor = None) -> torch.Tensor:
+        row_sum = self.a_r + self.b_r
+        if features is None:
+            B = self.A                                          # [n x n]
+            col_sum = row_sum                                   # [n]
+        else:
+            B = -0.5 * distance_from_features(
+                self.anchor_features,
+                features,
+                distance=self.distance,
+            )
+            b_c = torch.sum(B, dim=-2)                          # [m]
+            col_sum = b_c + B.mT @ self.Ainv @ self.b_r         # [m]
+        shift = -(row_sum[:, None] + col_sum) / self.n_features + self.matrix_sum / (self.n_features ** 2)  # [n x m]
+        return (B + shift).mT                                   # [m x n]
+class DistanceRealization(OnlineNystromSubsampleFit):
+    """Nystrom Distance Realization for large scale graph."""
+    def __init__(
+        self,
+        n_components: int = 100,
+        num_sample: int = 10000,
+        sample_method: SampleOptions = "farthest",
+        distance: DistanceOptions = "cosine",
+        eig_solver: EigSolverOptions = "svd_lowrank",
+        chunk_size: int = 8192,
+    ):
+        """
+        Args:
+            n_components (int): number of top eigenvectors to return
+            num_sample (int): number of samples for Nystrom-like approximation,
+                reduce only if memory is not enough, increase for better approximation
+            sample_method (str): subgraph sampling, ['farthest', 'random'].
+                farthest point sampling is recommended for better Nystrom-approximation accuracy
+            distance (str): distance metric for affinity matrix, ['cosine', 'euclidean', 'rbf'].
+            eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh'].
+            chunk_size (int): chunk size for large-scale matrix multiplication
+        """
+        OnlineNystromSubsampleFit.__init__(
+            self,
+            n_components=n_components,
+            kernel=GramKernel(distance, eig_solver),
+            num_sample=num_sample,
+            sample_method=sample_method,
+            eig_solver=eig_solver,
+            chunk_size=chunk_size,
+        )
+        self.distance: DistanceOptions = distance

nystrom_ncut-0.0.9/src/nystrom_ncut/ncut_pytorch.py → nystrom_ncut-0.0.10/src/nystrom_ncut/nystrom/normalized_cut.py RENAMED Viewed

@@ -1,22 +1,18 @@
-import logging
-from typing import Tuple
 import torch
 import torch.nn.functional as Fn
-from .common import (
-    DistanceOptions,
-    SampleOptions,
-)
 from .nystrom import (
     EigSolverOptions,
     OnlineKernel,
-    OnlineNystrom,
+    OnlineNystromSubsampleFit,
     solve_eig,
 )
-from .propagation_utils import (
+from ..common import (
+    DistanceOptions,
+    SampleOptions,
+)
+from ..propagation_utils import (
     affinity_from_features,
-    run_subgraph_sampling,
 )
@@ -68,16 +64,16 @@ class LaplacianKernel(OnlineKernel):
         b_c = torch.sum(B, dim=-2)                              # [m]
         self.b_r = self.b_r + b_r                               # [n]
-        rowscale = self.a_r + self.b_r                          # [n]
-        colscale = b_c + B.mT @ self.Ainv @ self.b_r            # [m]
-        scale = (rowscale[:, None] * colscale) ** -0.5          # [n x m]
+        row_sum = self.a_r + self.b_r                           # [n]
+        col_sum = b_c + B.mT @ self.Ainv @ self.b_r             # [m]
+        scale = (row_sum[:, None] * col_sum) ** -0.5            # [n x m]
         return (B * scale).mT                                   # [m x n]
     def transform(self, features: torch.Tensor = None) -> torch.Tensor:
-        rowscale = self.a_r + self.b_r                          # [n]
+        row_sum = self.a_r + self.b_r                           # [n]
         if features is None:
             B = self.A                                          # [n x n]
-            colscale = rowscale                                 # [n]
+            col_sum = row_sum                                   # [n]
         else:
             B = affinity_from_features(
                 self.anchor_features,                           # [n x d]
@@ -86,12 +82,12 @@ class LaplacianKernel(OnlineKernel):
                 distance=self.distance,
             )                                                   # [n x m]
             b_c = torch.sum(B, dim=-2)                          # [m]
-            colscale = b_c + B.mT @ self.Ainv @ self.b_r        # [m]
-        scale = (rowscale[:, None] * colscale) ** -0.5          # [n x m]
+            col_sum = b_c + B.mT @ self.Ainv @ self.b_r         # [m]
+        scale = (row_sum[:, None] * col_sum) ** -0.5            # [n x m]
         return (B * scale).mT                                   # [m x n]
-class NCUT(OnlineNystrom):
+class NCut(OnlineNystromSubsampleFit):
     """Nystrom Normalized Cut for large scale graph."""
     def __init__(
@@ -102,7 +98,6 @@ class NCUT(OnlineNystrom):
         sample_method: SampleOptions = "farthest",
         distance: DistanceOptions = "cosine",
         eig_solver: EigSolverOptions = "svd_lowrank",
-        normalize_features: bool = None,
         chunk_size: int = 8192,
     ):
         """
@@ -116,110 +111,18 @@ class NCUT(OnlineNystrom):
                 farthest point sampling is recommended for better Nystrom-approximation accuracy
             distance (str): distance metric for affinity matrix, ['cosine', 'euclidean', 'rbf'].
             eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh'].
-            normalize_features (bool): normalize input features before computing affinity matrix,
-                default 'None' is True for cosine distance, False for euclidean distance and rbf
             chunk_size (int): chunk size for large-scale matrix multiplication
         """
-        OnlineNystrom.__init__(
+        OnlineNystromSubsampleFit.__init__(
             self,
             n_components=n_components,
             kernel=LaplacianKernel(affinity_focal_gamma, distance, eig_solver),
+            num_sample=num_sample,
+            sample_method=sample_method,
             eig_solver=eig_solver,
             chunk_size=chunk_size,
         )
-        self.num_sample: int = num_sample
-        self.sample_method: SampleOptions = sample_method
-        self.anchor_indices: torch.Tensor = None
         self.distance: DistanceOptions = distance
-        self.normalize_features: bool = normalize_features
-        if self.normalize_features is None:
-            if distance in ["cosine"]:
-                self.normalize_features = True
-            if distance in ["euclidean", "rbf"]:
-                self.normalize_features = False
-        self.chunk_size: int = chunk_size
-    def _fit_helper(
-        self,
-        features: torch.Tensor,
-        precomputed_sampled_indices: torch.Tensor,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        _n = features.shape[0]
-        if self.num_sample >= _n:
-            logging.info(
-                f"NCUT nystrom num_sample is larger than number of input samples, nyström approximation is not needed, setting num_sample={_n}"
-            )
-            self.num_sample = _n
-        assert self.distance in ["cosine", "euclidean", "rbf"], "distance should be 'cosine', 'euclidean', 'rbf'"
-        if self.normalize_features:
-            # features need to be normalized for affinity matrix computation (cosine distance)
-            features = torch.nn.functional.normalize(features, dim=-1)
-        if precomputed_sampled_indices is not None:
-            _sampled_indices = precomputed_sampled_indices
-        else:
-            _sampled_indices = run_subgraph_sampling(
-                features,
-                self.num_sample,
-                sample_method=self.sample_method,
-            )
-        self.anchor_indices = torch.sort(_sampled_indices).values
-        sampled_features = features[self.anchor_indices]
-        OnlineNystrom.fit(self, sampled_features)
-        _n_not_sampled = _n - len(sampled_features)
-        if _n_not_sampled > 0:
-            unsampled_indices = torch.full((_n,), True, device=features.device).scatter_(0, self.anchor_indices, False)
-            unsampled_features = features[unsampled_indices]
-            V_unsampled, _ = OnlineNystrom.update(self, unsampled_features)
-        else:
-            unsampled_indices = V_unsampled = None
-        return unsampled_indices, V_unsampled
-    def fit(
-        self,
-        features: torch.Tensor,
-        precomputed_sampled_indices: torch.Tensor = None,
-    ):
-        """Fit Nystrom Normalized Cut on the input features.
-        Args:
-            features (torch.Tensor): input features, shape (n_samples, n_features)
-            precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
-                override the sample_method, if not None
-        Returns:
-            (NCUT): self
-        """
-        NCUT._fit_helper(self, features, precomputed_sampled_indices)
-        return self
-    def fit_transform(
-        self,
-        features: torch.Tensor,
-        precomputed_sampled_indices: torch.Tensor = None,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """
-        Args:
-            features (torch.Tensor): input features, shape (n_samples, n_features)
-            precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
-                override the sample_method, if not None
-        Returns:
-            (torch.Tensor): eigen_vectors, shape (n_samples, num_eig)
-            (torch.Tensor): eigen_values, sorted in descending order, shape (num_eig,)
-        """
-        unsampled_indices, V_unsampled = NCUT._fit_helper(self, features, precomputed_sampled_indices)
-        V_sampled, L = OnlineNystrom.transform(self)
-        if unsampled_indices is not None:
-            V = torch.zeros((len(unsampled_indices), self.n_components), device=features.device)
-            V[~unsampled_indices] = V_sampled
-            V[unsampled_indices] = V_unsampled
-        else:
-            V = V_sampled
-        return V, L
 def axis_align(eigen_vectors: torch.Tensor, max_iter=300):

{nystrom_ncut-0.0.9/src/nystrom_ncut → nystrom_ncut-0.0.10/src/nystrom_ncut/nystrom}/nystrom.py RENAMED Viewed

@@ -1,10 +1,15 @@
+import logging
 from typing import Literal, Tuple
 import torch
-from .common import (
+from ..common import (
+    SampleOptions,
     ceildiv,
 )
+from ..propagation_utils import (
+    run_subgraph_sampling,
+)
 EigSolverOptions = Literal["svd_lowrank", "lobpcg", "svd", "eigh"]
@@ -134,6 +139,102 @@ class OnlineNystrom:
         return VS, self.LS                                                                          # [m x n_components], [n_components]
+class OnlineNystromSubsampleFit(OnlineNystrom):
+    def __init__(
+        self,
+        n_components: int,
+        kernel: OnlineKernel,
+        num_sample: int,
+        sample_method: SampleOptions,
+        eig_solver: EigSolverOptions = "svd_lowrank",
+        chunk_size: int = 8192,
+    ):
+        OnlineNystrom.__init__(
+            self,
+            n_components=n_components,
+            kernel=kernel,
+            eig_solver=eig_solver,
+            chunk_size=chunk_size,
+        )
+        self.num_sample: int = num_sample
+        self.sample_method: SampleOptions = sample_method
+        self.anchor_indices: torch.Tensor = None
+    def _fit_helper(
+        self,
+        features: torch.Tensor,
+        precomputed_sampled_indices: torch.Tensor,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        _n = features.shape[0]
+        if self.num_sample >= _n:
+            logging.info(
+                f"NCUT nystrom num_sample is larger than number of input samples, nyström approximation is not needed, setting num_sample={_n}"
+            )
+            self.num_sample = _n
+        if precomputed_sampled_indices is not None:
+            self.anchor_indices = precomputed_sampled_indices
+        else:
+            self.anchor_indices = run_subgraph_sampling(
+                features,
+                self.num_sample,
+                sample_method=self.sample_method,
+            )
+        sampled_features = features[self.anchor_indices]
+        OnlineNystrom.fit(self, sampled_features)
+        _n_not_sampled = _n - len(sampled_features)
+        if _n_not_sampled > 0:
+            unsampled_indices = torch.full((_n,), True, device=features.device).scatter_(0, self.anchor_indices, False)
+            unsampled_features = features[unsampled_indices]
+            V_unsampled, _ = OnlineNystrom.update(self, unsampled_features)
+        else:
+            unsampled_indices = V_unsampled = None
+        return unsampled_indices, V_unsampled
+    def fit(
+        self,
+        features: torch.Tensor,
+        precomputed_sampled_indices: torch.Tensor = None,
+    ):
+        """Fit Nystrom Normalized Cut on the input features.
+        Args:
+            features (torch.Tensor): input features, shape (n_samples, n_features)
+            precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
+                override the sample_method, if not None
+        Returns:
+            (NCut): self
+        """
+        OnlineNystromSubsampleFit._fit_helper(self, features, precomputed_sampled_indices)
+        return self
+    def fit_transform(
+        self,
+        features: torch.Tensor,
+        precomputed_sampled_indices: torch.Tensor = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Args:
+            features (torch.Tensor): input features, shape (n_samples, n_features)
+            precomputed_sampled_indices (torch.Tensor): precomputed sampled indices, shape (num_sample,)
+                override the sample_method, if not None
+        Returns:
+            (torch.Tensor): eigen_vectors, shape (n_samples, num_eig)
+            (torch.Tensor): eigen_values, sorted in descending order, shape (num_eig,)
+        """
+        unsampled_indices, V_unsampled = OnlineNystromSubsampleFit._fit_helper(self, features, precomputed_sampled_indices)
+        V_sampled, L = OnlineNystrom.transform(self)
+        if unsampled_indices is not None:
+            V = torch.zeros((len(unsampled_indices), self.n_components), device=features.device)
+            V[~unsampled_indices] = V_sampled
+            V[unsampled_indices] = V_unsampled
+        else:
+            V = V_sampled
+        return V, L
 def solve_eig(
     A: torch.Tensor,
     num_eig: int,

{nystrom_ncut-0.0.9 → nystrom_ncut-0.0.10}/src/nystrom_ncut/propagation_utils.py RENAMED Viewed

@@ -47,6 +47,7 @@ def run_subgraph_sampling(
             sampled_indices = torch.randperm(features.shape[0])[:num_sample]
         else:
             raise ValueError("sample_method should be 'farthest' or 'random'")
+        sampled_indices = torch.sort(sampled_indices).values
     return sampled_indices.to(features.device)

{nystrom_ncut-0.0.9 → nystrom_ncut-0.0.10}/src/nystrom_ncut/visualize_utils.py RENAMED Viewed

@@ -36,12 +36,18 @@ def _rgb_with_dimensionality_reduction(
     device: str,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
-    features = extrapolate_knn(
-        features,
-        features,
-        features,
-        distance="cosine",
-    )
+    if True:
+        _subgraph_indices = run_subgraph_sampling(
+            features,
+            num_sample=10000,
+            sample_method="farthest",
+        )
+        features = extrapolate_knn(
+            features[_subgraph_indices],
+            features[_subgraph_indices],
+            features,
+            distance="cosine",
+        )
     subgraph_indices = run_subgraph_sampling(
         features,

{nystrom_ncut-0.0.9 → nystrom_ncut-0.0.10/src/nystrom_ncut.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: nystrom_ncut
-Version: 0.0.9
+Version: 0.0.10
 Summary: Normalized Cut and Nyström Approximation
 Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
 Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/

{nystrom_ncut-0.0.9 → nystrom_ncut-0.0.10}/src/nystrom_ncut.egg-info/SOURCES.txt RENAMED Viewed

@@ -3,14 +3,17 @@ MANIFEST.in
 README.md
 pyproject.toml
 requirements.txt
+src/__init__.py
 src/nystrom_ncut/__init__.py
 src/nystrom_ncut/common.py
-src/nystrom_ncut/ncut_pytorch.py
-src/nystrom_ncut/nystrom.py
 src/nystrom_ncut/propagation_utils.py
 src/nystrom_ncut/visualize_utils.py
 src/nystrom_ncut.egg-info/PKG-INFO
 src/nystrom_ncut.egg-info/SOURCES.txt
 src/nystrom_ncut.egg-info/dependency_links.txt
 src/nystrom_ncut.egg-info/top_level.txt
+src/nystrom_ncut/nystrom/__init__.py
+src/nystrom_ncut/nystrom/distance_realization.py
+src/nystrom_ncut/nystrom/normalized_cut.py
+src/nystrom_ncut/nystrom/nystrom.py
 tests/test.py

{nystrom_ncut-0.0.9 → nystrom_ncut-0.0.10}/src/nystrom_ncut.egg-info/top_level.txt RENAMED Viewed

	@@ -1 +1,2 @@
1	+ __init__
1 2	nystrom_ncut

{nystrom_ncut-0.0.9 → nystrom_ncut-0.0.10}/tests/test.py RENAMED Viewed

@@ -1,10 +1,9 @@
 import numpy as np
 import torch
-import torch.nn.functional as Fn
 from matplotlib import pyplot as plt
-from src.nystrom_ncut.ncut_pytorch import NCUT, axis_align, affinity_from_features
-from ncut_pytorch import NCUT as OldNCUT
+from src.nystrom_ncut import NCut, affinity_from_features
 # from ncut_pytorch.src import rgb_from_umap_sphere
 # from ncut_pytorch.src.new_ncut_pytorch import NewNCUT
@@ -73,7 +72,7 @@ if __name__ == "__main__":
     def print_re(re):
         print(f"max: {re.max().item()}, mean: {re.mean().item()}, min: {re.min().item()}")
-    nc0 = NCUT(n_components=n_components, num_sample=num_sample, distance=distance, eig_solver=eig_solver)
+    nc0 = NCut(n_components=n_components, num_sample=num_sample, distance=distance, eig_solver=eig_solver)
     X0, eigs0 = nc0.fit_transform(M)
     re0 = rel_error(X0, eigs0)

{nystrom_ncut-0.0.9 → nystrom_ncut-0.0.10}/LICENSE RENAMED Viewed

File without changes

{nystrom_ncut-0.0.9 → nystrom_ncut-0.0.10}/MANIFEST.in RENAMED Viewed

File without changes

{nystrom_ncut-0.0.9 → nystrom_ncut-0.0.10}/README.md RENAMED Viewed

File without changes

{nystrom_ncut-0.0.9 → nystrom_ncut-0.0.10}/requirements.txt RENAMED Viewed

File without changes

{nystrom_ncut-0.0.9 → nystrom_ncut-0.0.10}/setup.cfg RENAMED Viewed

File without changes

{nystrom_ncut-0.0.9 → nystrom_ncut-0.0.10}/src/nystrom_ncut/common.py RENAMED Viewed

File without changes

{nystrom_ncut-0.0.9 → nystrom_ncut-0.0.10}/src/nystrom_ncut.egg-info/dependency_links.txt RENAMED Viewed

File without changes

nystrom-ncut 0.0.9__tar.gz → 0.0.10__tar.gz

nystrom-ncut 0.0.9tar.gz → 0.0.10tar.gz