PyPI - nystrom-ncut - Versions diffs - 0.0.2__tar.gz → 0.0.4__tar.gz - Mend

nystrom-ncut 0.0.2tar.gz → 0.0.4tar.gz

Files changed (18) hide show

{nystrom_ncut-0.0.2/src/nystrom_ncut.egg-info → nystrom_ncut-0.0.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nystrom_ncut
-Version: 0.0.2
+Version: 0.0.4
 Summary: Normalized Cut and Nyström Approximation
 Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
 Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/

{nystrom_ncut-0.0.2 → nystrom_ncut-0.0.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "nystrom_ncut"
-version = "0.0.2"
+version = "0.0.4"
 authors = [
     { name = "Huzheng Yang", email = "huze.yann@gmail.com" },
     { name = "Wentinn Liao", email = "wentinn.liao@gmail.com" },

{nystrom_ncut-0.0.2 → nystrom_ncut-0.0.4}/src/nystrom_ncut/ncut_pytorch.py RENAMED Viewed

@@ -94,20 +94,19 @@ class NCUT(OnlineNystrom):
     def __init__(
         self,
-        num_eig: int = 100,
+        n_components: int = 100,
         affinity_focal_gamma: float = 1.0,
         num_sample: int = 10000,
         sample_method: Literal["farthest", "random"] = "farthest",
         distance: DistanceOptions = "cosine",
         eig_solver: EigSolverOptions = "svd_lowrank",
         normalize_features: bool = None,
-        device: str = None,
         move_output_to_cpu: bool = False,
-        matmul_chunk_size: int = 8096,
+        chunk_size: int = 8192,
     ):
         """
         Args:
-            num_eig (int): number of top eigenvectors to return
+            n_components (int): number of top eigenvectors to return
             affinity_focal_gamma (float): affinity matrix temperature, lower t reduce the not-so-connected edge weights,
                 smaller t result in more sharp eigenvectors.
             num_sample (int): number of samples for Nystrom-like approximation,
@@ -118,17 +117,15 @@ class NCUT(OnlineNystrom):
             eig_solver (str): eigen decompose solver, ['svd_lowrank', 'lobpcg', 'svd', 'eigh'].
             normalize_features (bool): normalize input features before computing affinity matrix,
                 default 'None' is True for cosine distance, False for euclidean distance and rbf
-            device (str): device to use for eigen computation,
-                move to GPU to speeds up a bit (~5x faster)
             move_output_to_cpu (bool): move output to CPU, set to True if you have memory issue
-            matmul_chunk_size (int): chunk size for large-scale matrix multiplication
+            chunk_size (int): chunk size for large-scale matrix multiplication
         """
         OnlineNystrom.__init__(
             self,
-            n_components=num_eig,
+            n_components=n_components,
             kernel=LaplacianKernel(affinity_focal_gamma, distance, eig_solver),
             eig_solver=eig_solver,
-            chunk_size=matmul_chunk_size,
+            chunk_size=chunk_size,
         )
         self.num_sample = num_sample
         self.sample_method = sample_method
@@ -140,19 +137,14 @@ class NCUT(OnlineNystrom):
             if distance in ["euclidean", "rbf"]:
                 self.normalize_features = False
-        self.device = device
         self.move_output_to_cpu = move_output_to_cpu
-        self.matmul_chunk_size = matmul_chunk_size
+        self.chunk_size = chunk_size
     def _fit_helper(
         self,
         features: torch.Tensor,
         precomputed_sampled_indices: torch.Tensor,
     ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # move subgraph gpu to speed up
-        original_device = features.device
-        device = original_device if self.device is None else self.device
         _n = features.shape[0]
         if self.num_sample >= _n:
             logging.info(
@@ -184,13 +176,13 @@ class NCUT(OnlineNystrom):
                 num_sample=self.num_sample,
                 sample_method=self.sample_method,
             )
-        sampled_features = features[sampled_indices].to(device)
+        sampled_features = features[sampled_indices]
         OnlineNystrom.fit(self, sampled_features)
         _n_not_sampled = _n - len(sampled_features)
         if _n_not_sampled > 0:
-            unsampled_indices = torch.full((_n,), True).scatter(0, sampled_indices, False)
-            unsampled_features = features[unsampled_indices].to(device)
+            unsampled_indices = torch.full((_n,), True, device=features.device).scatter_(0, sampled_indices, False)
+            unsampled_features = features[unsampled_indices]
             V_unsampled, _ = OnlineNystrom.update(self, unsampled_features)
         else:
             unsampled_indices = V_unsampled = None
@@ -231,7 +223,7 @@ class NCUT(OnlineNystrom):
         V_sampled, L = OnlineNystrom.transform(self)
         if unsampled_indices is not None:
-            V = torch.zeros((len(unsampled_indices), self.n_components))
+            V = torch.zeros((len(unsampled_indices), self.n_components), device=features.device)
             V[~unsampled_indices] = V_sampled
             V[unsampled_indices] = V_unsampled
         else:

{nystrom_ncut-0.0.2 → nystrom_ncut-0.0.4}/src/nystrom_ncut/nystrom.py RENAMED Viewed

@@ -52,6 +52,18 @@ class OnlineNystrom:
         self.transform_matrix: torch.Tensor = None  # [n x n_components]
         self.LS: torch.Tensor = None                # [n]
+    def _update_to_kernel(self) -> Tuple[torch.Tensor, torch.Tensor]:
+        self.A = self.S = self.kernel.transform()
+        U, L = solve_eig(
+            self.A,
+            num_eig=self.inverse_approximation_dim,
+            eig_solver=self.eig_solver,
+        )                                                                                           # [n x (? + 1)], [? + 1]
+        self.Ahinv_UL = U * (L ** -0.5)                                                             # [n x (? + 1)]
+        self.Ahinv_VT = U.mT                                                                        # [(? + 1) x n]
+        self.Ahinv = self.Ahinv_UL @ self.Ahinv_VT                                                  # [n x n]
+        return U, L
     def fit(self, features: torch.Tensor):
         OnlineNystrom.fit_transform(self, features)
         return self
@@ -60,17 +72,8 @@ class OnlineNystrom:
         self.anchor_features = features
         self.kernel.fit(self.anchor_features)
-        self.A = self.S = self.kernel.transform()                                                   # [n x n]
         self.inverse_approximation_dim = max(self.n_components, features.shape[-1]) + 1
-        U, L = solve_eig(
-            self.A,
-            num_eig=self.inverse_approximation_dim,
-            eig_solver=self.eig_solver,
-        )                                                                                           # [n x (? + 1)], [? + 1]
-        self.Ahinv_UL = U * (L ** -0.5)                                                             # [n x (? + 1)]
-        self.Ahinv_VT = U.mT                                                                        # [(? + 1) x n]
-        self.Ahinv = self.Ahinv_UL @ self.Ahinv_VT                                                  # [n x n]
+        U, L = self._update_to_kernel()                                                             # [n x (? + 1)], [? + 1]
         self.transform_matrix = (U / L)[:, :self.n_components]                                      # [n x n_components]
         self.LS = L[:self.n_components]                                                             # [n_components]
@@ -83,6 +86,7 @@ class OnlineNystrom:
             chunks = torch.chunk(features, n_chunks, dim=0)
             for chunk in chunks:
                 self.kernel.update(chunk)
+            self._update_to_kernel()
             compressed_BBT = torch.zeros((self.inverse_approximation_dim, self.inverse_approximation_dim))  # [(? + 1) x (? + 1))]
             for i, chunk in enumerate(chunks):
@@ -101,6 +105,7 @@ class OnlineNystrom:
         else:
             """ Unchunked version """
             B = self.kernel.update(features).mT                                                         # [n x m]
+            self._update_to_kernel()
             compressed_B = self.Ahinv_VT @ B                                                            # [indirect_pca_dim x m]
             self.S = self.S + self.Ahinv_UL @ (compressed_B @ compressed_B.mT) @ self.Ahinv_UL.mT       # [n x n]

{nystrom_ncut-0.0.2 → nystrom_ncut-0.0.4}/src/nystrom_ncut/propagation_utils.py RENAMED Viewed

@@ -43,7 +43,7 @@ def run_subgraph_sampling(
             sampled_indices = torch.randperm(features.shape[0])[:num_sample]
         else:
             raise ValueError("sample_method should be 'farthest' or 'random'")
-    return sampled_indices
+    return sampled_indices.to(features.device)
 def farthest_point_sampling(
@@ -139,7 +139,7 @@ def propagate_knn(
     knn: int = 10,
     distance: Literal["cosine", "euclidean", "rbf"] = "cosine",
     affinity_focal_gamma: float = 1.0,
-    chunk_size: int = 8096,
+    chunk_size: int = 8192,
     device: str = None,
     move_output_to_cpu: bool = False,
 ):
@@ -206,7 +206,7 @@ def propagate_nearest(
     inp_features: torch.Tensor,
     subgraph_features: torch.Tensor,
     distance: Literal["cosine", "euclidean", "rbf"] = "cosine",
-    chunk_size: int = 8096,
+    chunk_size: int = 8192,
     device: str = None,
     move_output_to_cpu: bool = False,
 ):
@@ -254,7 +254,7 @@ def propagate_eigenvectors(
         knn (int): number of KNN to propagate eigenvectors, default 3
         num_sample (int): number of samples for subgraph sampling, default 50000
         sample_method (str): sample method, 'farthest' (default) or 'random'
-        chunk_size (int): chunk size for matrix multiplication, default 8096
+        chunk_size (int): chunk size for matrix multiplication, default 8192
         device (str): device to use for computation, if None, will not change device
     Returns:
         torch.Tensor: propagated eigenvectors, shape (n_new_samples, num_eig)

{nystrom_ncut-0.0.2 → nystrom_ncut-0.0.4}/src/nystrom_ncut/visualize_utils.py RENAMED Viewed

@@ -420,7 +420,7 @@ def propagate_rgb_color(
     knn: int = 10,
     num_sample: int = 1000,
     sample_method: Literal["farthest", "random"] = "farthest",
-    chunk_size: int = 8096,
+    chunk_size: int = 8192,
     device: str = None,
 ):
     """Propagate RGB color to new nodes using KNN.
@@ -431,7 +431,7 @@ def propagate_rgb_color(
         knn (int): number of KNN to propagate RGB color, default 1
         num_sample (int): number of samples for subgraph sampling, default 50000
         sample_method (str): sample method, 'farthest' (default) or 'random'
-        chunk_size (int): chunk size for matrix multiplication, default 8096
+        chunk_size (int): chunk size for matrix multiplication, default 8192
         device (str): device to use for computation, if None, will not change device
     Returns:
         torch.Tensor: propagated RGB color for each data sample, shape (n_new_samples, 3)

{nystrom_ncut-0.0.2 → nystrom_ncut-0.0.4/src/nystrom_ncut.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nystrom_ncut
-Version: 0.0.2
+Version: 0.0.4
 Summary: Normalized Cut and Nyström Approximation
 Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
 Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/

{nystrom_ncut-0.0.2 → nystrom_ncut-0.0.4}/tests/test.py RENAMED Viewed

@@ -1,5 +1,7 @@
 import numpy as np
 import torch
+import torch.nn.functional as Fn
 from src.nystrom_ncut.ncut_pytorch import NCUT, axis_align
 # from ncut_pytorch.src import rgb_from_umap_sphere
 # from ncut_pytorch.src.new_ncut_pytorch import NewNCUT
@@ -35,28 +37,36 @@ if __name__ == "__main__":
     # # ))
     # raise Exception(
-    torch.set_printoptions(precision=12, sci_mode=False, linewidth=400)
-    torch.manual_seed(1212)
-    np.random.seed(1212)
+    torch.set_printoptions(precision=8, sci_mode=False, linewidth=400)
+    torch.set_default_dtype(torch.float64)
+    # torch.manual_seed(1212)
+    # np.random.seed(1212)
-    M = torch.rand((10000, 12))
-    # NC = NCUT(num_eig=5, knn=None, verbose=True)
-    kwargs = dict(num_eig=7, sample_method="random")
-    nNC = NCUT(**kwargs)
+    M = torch.rand((200, 12))
+    NC = NCUT(n_components=12, num_sample=80, sample_method="random", chunk_size=20)
     torch.manual_seed(1212)
     np.random.seed(1212)
-    nX, neigs = nNC.fit_transform(M)
-    # print(neigs)
-    # print(nX.mT @ nX)
+    X, eigs = NC.fit_transform(M)
+    print(eigs)
+    raise Exception()
-    torch.manual_seed(1212)
-    np.random.seed(1212)
+    normalized_M = Fn.normalize(M, p=2, dim=-1)
+    A = torch.exp(-(1 - normalized_M @ normalized_M.mT))
+    R = torch.diag(torch.sum(A, dim=-1) ** -0.5)
+    L = R @ A @ R
+    # print(L)
+    print(X @ torch.diag(eigs) @ X.mT)
+    print(L)
+    print(torch.abs(X @ torch.diag(eigs) @ X.mT / L - 1))
-    aX, R = axis_align(nX)
-    print(aX[:3])
-    print(R)
-    print(R @ R.mT)
+    # torch.manual_seed(1212)
+    # np.random.seed(1212)
+    #
+    # aX, R = axis_align(X)
+    # print(aX[:3])
+    # print(R)
+    # print(R @ R.mT)
     raise Exception()