PyPI - nystrom-ncut - Versions diffs - 0.0.7__tar.gz → 0.0.9__tar.gz - Mend

nystrom-ncut 0.0.7tar.gz → 0.0.9tar.gz

Files changed (19) hide show

{nystrom_ncut-0.0.7/src/nystrom_ncut.egg-info → nystrom_ncut-0.0.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: nystrom_ncut
-Version: 0.0.7
+Version: 0.0.9
 Summary: Normalized Cut and Nyström Approximation
 Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
 Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/

{nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "nystrom_ncut"
-version = "0.0.7"
+version = "0.0.9"
 authors = [
     { name = "Huzheng Yang", email = "huze.yann@gmail.com" },
     { name = "Wentinn Liao", email = "wentinn.liao@gmail.com" },

nystrom_ncut-0.0.9/src/nystrom_ncut/common.py ADDED Viewed

@@ -0,0 +1,61 @@
+from typing import Any, Literal
+import numpy as np
+import torch
+import torch.nn.functional as Fn
+DistanceOptions = Literal["cosine", "euclidean", "rbf"]
+SampleOptions = Literal["farthest", "random"]
+def ceildiv(a: int, b: int) -> int:
+    return -(-a // b)
+def lazy_normalize(x: torch.Tensor, n: int = 1000, **normalize_kwargs: Any) -> torch.Tensor:
+    numel = np.prod(x.shape[:-1])
+    n = min(n, numel)
+    random_indices = torch.randperm(numel)[:n]
+    _x = x.flatten(0, -2)[random_indices]
+    if torch.allclose(torch.norm(_x, **normalize_kwargs), torch.ones(n, device=x.device)):
+        return x
+    else:
+        return Fn.normalize(x, **normalize_kwargs)
+def quantile_min_max(x, q1=0.01, q2=0.99, n_sample=10000):
+    if x.shape[0] > n_sample:
+        np.random.seed(0)
+        random_idx = np.random.choice(x.shape[0], n_sample, replace=False)
+        vmin, vmax = x[random_idx].quantile(q1), x[random_idx].quantile(q2)
+    else:
+        vmin, vmax = x.quantile(q1), x.quantile(q2)
+    return vmin, vmax
+def quantile_normalize(x, q=0.95):
+    """normalize each dimension of x to [0, 1], take 95-th percentage, this robust to outliers
+        </br> 1. sort x
+        </br> 2. take q-th quantile
+        </br>     min_value -> (1-q)-th quantile
+        </br>     max_value -> q-th quantile
+        </br> 3. normalize
+        </br> x = (x - min_value) / (max_value - min_value)
+    Args:
+        x (torch.Tensor): input tensor, shape (n_samples, n_features)
+            normalize each feature to 0-1 range
+        q (float): quantile, default 0.95
+    Returns:
+        torch.Tensor: quantile normalized tensor
+    """
+    # normalize x to 0-1 range, max value is q-th quantile
+    # quantile makes the normalization robust to outliers
+    if isinstance(x, np.ndarray):
+        x = torch.tensor(x)
+    vmax, vmin = quantile_min_max(x, q, 1 - q)
+    x = (x - vmin) / (vmax - vmin)
+    x = x.clamp(0, 1)
+    return x

{nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/src/nystrom_ncut/ncut_pytorch.py RENAMED Viewed

@@ -1,5 +1,5 @@
 import logging
-from typing import Literal, Tuple
+from typing import Tuple
 import torch
 import torch.nn.functional as Fn

{nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/src/nystrom_ncut/nystrom.py RENAMED Viewed

@@ -2,7 +2,9 @@ from typing import Literal, Tuple
 import torch
-from .common import ceildiv
+from .common import (
+    ceildiv,
+)
 EigSolverOptions = Literal["svd_lowrank", "lobpcg", "svd", "eigh"]

{nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/src/nystrom_ncut/propagation_utils.py RENAMED Viewed

@@ -1,5 +1,4 @@
 import logging
-from typing import Literal
 import numpy as np
 import torch
@@ -98,7 +97,11 @@ def distance_from_features(
         D = torch.cdist(features, features_B, p=2)
     elif distance == "rbf":
         D = torch.cdist(features, features_B, p=2) ** 2
-        D = D / (2 * features.var(dim=0).sum())
+        # Outlier-robust scale invariance using quantiles to estimate standard deviation
+        stds = torch.quantile(features, q=torch.tensor((0.158655, 0.841345), device=features.device), dim=0)
+        stds = (stds[1] - stds[0]) / 2
+        D = D / (2 * torch.linalg.norm(stds) ** 2)
     else:
         raise ValueError("distance should be 'cosine' or 'euclidean', 'rbf'")
     return D
@@ -178,39 +181,17 @@ def extrapolate_knn(
     V_list = []
     for _v in torch.chunk(extrapolation_features, n_chunks, dim=0):
         _v = _v.to(device)                                                                              # [_m x d]
         _A = affinity_from_features(anchor_features, _v, affinity_focal_gamma, distance).mT             # [_m x n]
         if knn is not None:
             _A, indices = _A.topk(k=knn, dim=-1, largest=True)                                          # [_m x k], [_m x k]
             _anchor_output = anchor_output[indices]                                                     # [_m x k x d]
         else:
             _anchor_output = anchor_output[None]                                                        # [1 x n x d]
-        _A = Fn.normalize(_A, p=1, dim=-1)
-        # if distance == 'cosine':
-        #     _A = _v @ subgraph_features.T
-        # elif distance == 'euclidean':
-        #     _A = - torch.cdist(_v, subgraph_features, p=2)
-        # elif distance == 'rbf':
-        #     _A = - torch.cdist(_v, subgraph_features, p=2) ** 2
-        # else:
-        #     raise ValueError("distance should be 'cosine' or 'euclidean', 'rbf'")
-        #
-        # # keep topk KNN for each row
-        # topk_sim, topk_idx = _A.topk(knn, dim=-1, largest=True)
-        # row_id = torch.arange(topk_idx.shape[0], device=_A.device)[:, None].expand(
-        #     -1, topk_idx.shape[1]
-        # )
-        # _A = torch.sparse_coo_tensor(
-        #     torch.stack([row_id, topk_idx], dim=-1).reshape(-1, 2).T,
-        #     topk_sim.reshape(-1),
-        #     size=(_A.shape[0], _A.shape[1]),
-        #     device=_A.device,
-        # )
-        # _A = _A.to_dense().to(dtype=subgraph_output.dtype)
-        # _D = _A.sum(-1)
-        # _A /= _D[:, None]
-        _V = (_A[:, None, :] @ _anchor_output).squeeze(1)
+        _A = Fn.normalize(_A, p=1, dim=-1)                                                              # [_m x k]
+        _V = (_A[:, None, :] @ _anchor_output).squeeze(1)                                               # [_m x d]
         if move_output_to_cpu:
             _V = _V.cpu()
         V_list.append(_V)
@@ -274,40 +255,3 @@ def extrapolate_knn_with_subsampling(
         device=device
     )
     return new_eigenvectors
-def quantile_min_max(x, q1=0.01, q2=0.99, n_sample=10000):
-    if x.shape[0] > n_sample:
-        np.random.seed(0)
-        random_idx = np.random.choice(x.shape[0], n_sample, replace=False)
-        vmin, vmax = x[random_idx].quantile(q1), x[random_idx].quantile(q2)
-    else:
-        vmin, vmax = x.quantile(q1), x.quantile(q2)
-    return vmin, vmax
-def quantile_normalize(x, q=0.95):
-    """normalize each dimension of x to [0, 1], take 95-th percentage, this robust to outliers
-        </br> 1. sort x
-        </br> 2. take q-th quantile
-        </br>     min_value -> (1-q)-th quantile
-        </br>     max_value -> q-th quantile
-        </br> 3. normalize
-        </br> x = (x - min_value) / (max_value - min_value)
-    Args:
-        x (torch.Tensor): input tensor, shape (n_samples, n_features)
-            normalize each feature to 0-1 range
-        q (float): quantile, default 0.95
-    Returns:
-        torch.Tensor: quantile normalized tensor
-    """
-    # normalize x to 0-1 range, max value is q-th quantile
-    # quantile makes the normalization robust to outliers
-    if isinstance(x, np.ndarray):
-        x = torch.tensor(x)
-    vmax, vmin = quantile_min_max(x, q, 1 - q)
-    x = (x - vmin) / (vmax - vmin)
-    x = x.clamp(0, 1)
-    return x

{nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9}/src/nystrom_ncut/visualize_utils.py RENAMED Viewed

@@ -7,15 +7,13 @@ import torch.nn.functional as F
 from sklearn.base import BaseEstimator
 from .common import (
-    DistanceOptions,
     lazy_normalize,
+    quantile_min_max,
+    quantile_normalize,
 )
 from .propagation_utils import (
     run_subgraph_sampling,
     extrapolate_knn,
-    extrapolate_knn_with_subsampling,
-    quantile_min_max,
-    quantile_normalize
 )
@@ -28,27 +26,22 @@ def _rgb_with_dimensionality_reduction(
     num_sample: int,
     metric: Literal["cosine", "euclidean"],
     rgb_func: Callable[[torch.Tensor, float], torch.Tensor],
-    q: float, knn: int,
-    seed: int, device: str,
+    q: float,
+    knn: int,
     reduction: Callable[..., BaseEstimator],
     reduction_dim: int,
     reduction_kwargs: Dict[str, Any],
-    transform_func: Callable[[torch.Tensor], torch.Tensor] = _identity,
-    pre_smooth: bool = True,
+    transform_func: Callable[[torch.Tensor], torch.Tensor],
+    seed: int,
+    device: str,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
-    if pre_smooth:
-        _subgraph_indices = run_subgraph_sampling(
-            features,
-            num_sample,
-            sample_method="farthest",
-        )
-        features = extrapolate_knn(
-            features[_subgraph_indices],
-            features[_subgraph_indices],
-            features,
-            distance="cosine",
-        )
+    features = extrapolate_knn(
+        features,
+        features,
+        features,
+        distance="cosine",
+    )
     subgraph_indices = run_subgraph_sampling(
         features,
@@ -83,10 +76,10 @@ def rgb_from_tsne_2d(
     num_sample: int = 1000,
     perplexity: int = 150,
     metric: Literal["cosine", "euclidean"] = "cosine",
-    device: str = None,
-    seed: int = 0,
     q: float = 0.95,
     knn: int = 10,
+    seed: int = 0,
+    device: str = None,
 ):
     """
     Returns:
@@ -111,11 +104,13 @@ def rgb_from_tsne_2d(
         num_sample=num_sample,
         metric=metric,
         rgb_func=rgb_from_2d_colormap,
-        q=q, knn=knn,
-        seed=seed, device=device,
+        q=q,
+        knn=knn,
         reduction=TSNE, reduction_dim=2, reduction_kwargs={
             "perplexity": perplexity,
-        },
+        }, transform_func=_identity,
+        seed=seed,
+        device=device,
     )
     return x2d, rgb
@@ -125,10 +120,10 @@ def rgb_from_tsne_3d(
     num_sample: int = 1000,
     perplexity: int = 150,
     metric: Literal["cosine", "euclidean"] = "cosine",
-    device: str = None,
-    seed: int = 0,
     q: float = 0.95,
     knn: int = 10,
+    seed: int = 0,
+    device: str = None,
 ):
     """
     Returns:
@@ -153,11 +148,13 @@ def rgb_from_tsne_3d(
         num_sample=num_sample,
         metric=metric,
         rgb_func=rgb_from_3d_rgb_cube,
-        q=q, knn=knn,
-        seed=seed, device=device,
+        q=q,
+        knn=knn,
         reduction=TSNE, reduction_dim=3, reduction_kwargs={
             "perplexity": perplexity,
-        },
+        }, transform_func=_identity,
+        seed=seed,
+        device=device,
     )
     return x3d, rgb
@@ -166,10 +163,10 @@ def rgb_from_cosine_tsne_3d(
     features: torch.Tensor,
     num_sample: int = 1000,
     perplexity: int = 150,
-    device: str = None,
-    seed: int = 0,
     q: float = 0.95,
     knn: int = 10,
+    seed: int = 0,
+    device: str = None
 ):
     """
     Returns:
@@ -210,11 +207,13 @@ def rgb_from_cosine_tsne_3d(
         num_sample=num_sample,
         metric="cosine",
         rgb_func=rgb_from_cosine,
-        q=q, knn=knn,
-        seed=seed, device=device,
+        q=q,
+        knn=knn,
         reduction=TSNE, reduction_dim=3, reduction_kwargs={
             "perplexity": perplexity,
-        },
+        }, transform_func=_identity,
+        seed=seed,
+        device=device,
     )
     return x3d, rgb
@@ -225,10 +224,10 @@ def rgb_from_umap_2d(
     n_neighbors: int = 150,
     min_dist: float = 0.1,
     metric: Literal["cosine", "euclidean"] = "cosine",
-    device: str = None,
-    seed: int = 0,
     q: float = 0.95,
     knn: int = 10,
+    seed: int = 0,
+    device: str = None,
 ):
     """
     Returns:
@@ -245,12 +244,14 @@ def rgb_from_umap_2d(
         num_sample=num_sample,
         metric=metric,
         rgb_func=rgb_from_2d_colormap,
-        q=q, knn=knn,
-        seed=seed, device=device,
+        q=q,
+        knn=knn,
         reduction=UMAP, reduction_dim=2, reduction_kwargs={
             "n_neighbors": n_neighbors,
             "min_dist": min_dist,
-        },
+        }, transform_func=_identity,
+        seed=seed,
+        device=device,
     )
     return x2d, rgb
@@ -261,10 +262,10 @@ def rgb_from_umap_sphere(
     n_neighbors: int = 150,
     min_dist: float = 0.1,
     metric: Literal["cosine", "euclidean"] = "cosine",
-    device: str = None,
-    seed: int = 0,
     q: float = 0.95,
     knn: int = 10,
+    seed: int = 0,
+    device: str = None,
 ):
     """
     Returns:
@@ -288,14 +289,15 @@ def rgb_from_umap_sphere(
         num_sample=num_sample,
         metric=metric,
         rgb_func=rgb_from_3d_rgb_cube,
-        q=q, knn=knn,
-        seed=seed, device=device,
+        q=q,
+        knn=knn,
         reduction=UMAP, reduction_dim=2, reduction_kwargs={
             "n_neighbors": n_neighbors,
             "min_dist": min_dist,
             "output_metric": "haversine",
-        },
-        transform_func=transform_func
+        }, transform_func=transform_func,
+        seed=seed,
+        device=device,
     )
     return x3d, rgb
@@ -306,10 +308,10 @@ def rgb_from_umap_3d(
     n_neighbors: int = 150,
     min_dist: float = 0.1,
     metric: Literal["cosine", "euclidean"] = "cosine",
-    device: str = None,
-    seed: int = 0,
     q: float = 0.95,
     knn: int = 10,
+    seed: int = 0,
+    device: str = None,
 ):
     """
     Returns:
@@ -326,12 +328,14 @@ def rgb_from_umap_3d(
         num_sample=num_sample,
         metric=metric,
         rgb_func=rgb_from_3d_rgb_cube,
-        q=q, knn=knn,
-        seed=seed, device=device,
+        q=q,
+        knn=knn,
         reduction=UMAP, reduction_dim=3, reduction_kwargs={
             "n_neighbors": n_neighbors,
             "min_dist": min_dist,
-        },
+        }, transform_func=_identity,
+        seed=seed,
+        device=device,
     )
     return x3d, rgb

{nystrom_ncut-0.0.7 → nystrom_ncut-0.0.9/src/nystrom_ncut.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: nystrom_ncut
-Version: 0.0.7
+Version: 0.0.9
 Summary: Normalized Cut and Nyström Approximation
 Author-email: Huzheng Yang <huze.yann@gmail.com>, Wentinn Liao <wentinn.liao@gmail.com>
 Project-URL: Documentation, https://github.com/JophiArcana/Nystrom-NCUT/

nystrom_ncut-0.0.7/src/nystrom_ncut/common.py DELETED Viewed

@@ -1,24 +0,0 @@
-from typing import Any, Literal
-import numpy as np
-import torch
-import torch.nn.functional as Fn
-DistanceOptions = Literal["cosine", "euclidean", "rbf"]
-SampleOptions = Literal["farthest", "random"]
-def ceildiv(a: int, b: int) -> int:
-    return -(-a // b)
-def lazy_normalize(x: torch.Tensor, n: int = 1000, **normalize_kwargs: Any) -> torch.Tensor:
-    numel = np.prod(x.shape[:-1])
-    n = min(n, numel)
-    random_indices = torch.randperm(numel)[:n]
-    _x = x.flatten(0, -2)[random_indices]
-    if torch.allclose(torch.norm(_x, **normalize_kwargs), torch.ones(n, device=x.device)):
-        return x
-    else:
-        return Fn.normalize(x, **normalize_kwargs)