fast-ballmapper 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fast_ballmapper/__init__.py +37 -0
- fast_ballmapper/_validation.py +91 -0
- fast_ballmapper/backends/__init__.py +1 -0
- fast_ballmapper/backends/_ball_tree.py +88 -0
- fast_ballmapper/backends/_faiss.py +531 -0
- fast_ballmapper/coloring.py +71 -0
- fast_ballmapper/faiss.py +61 -0
- fast_ballmapper/graph.py +39 -0
- fast_ballmapper/landmarks.py +336 -0
- fast_ballmapper/plotting/__init__.py +5 -0
- fast_ballmapper/plotting/_common.py +28 -0
- fast_ballmapper/plotting/matplotlib.py +77 -0
- fast_ballmapper/plotting/plotly.py +138 -0
- fast_ballmapper/py.typed +0 -0
- fast_ballmapper-0.0.1.dist-info/METADATA +434 -0
- fast_ballmapper-0.0.1.dist-info/RECORD +19 -0
- fast_ballmapper-0.0.1.dist-info/WHEEL +5 -0
- fast_ballmapper-0.0.1.dist-info/licenses/LICENSE +21 -0
- fast_ballmapper-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Fast construction and analysis of Ball Mapper graphs."""
|
|
2
|
+
|
|
3
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
4
|
+
|
|
5
|
+
from fast_ballmapper.coloring import (
|
|
6
|
+
color_by_density,
|
|
7
|
+
color_by_entropy,
|
|
8
|
+
color_by_function,
|
|
9
|
+
color_by_mode,
|
|
10
|
+
color_by_size,
|
|
11
|
+
)
|
|
12
|
+
from fast_ballmapper.faiss import FaissConfig
|
|
13
|
+
from fast_ballmapper.graph import build_mapper, compute_edge_overlaps
|
|
14
|
+
from fast_ballmapper.landmarks import (
|
|
15
|
+
build_cover,
|
|
16
|
+
compute_landmarks,
|
|
17
|
+
compute_landmarks_fps,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
__version__ = version("fast-ballmapper")
|
|
22
|
+
except PackageNotFoundError: # pragma: no cover - source checkout fallback
|
|
23
|
+
__version__ = "0.1.0"
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"FaissConfig",
|
|
27
|
+
"build_cover",
|
|
28
|
+
"build_mapper",
|
|
29
|
+
"color_by_density",
|
|
30
|
+
"color_by_entropy",
|
|
31
|
+
"color_by_function",
|
|
32
|
+
"color_by_mode",
|
|
33
|
+
"color_by_size",
|
|
34
|
+
"compute_edge_overlaps",
|
|
35
|
+
"compute_landmarks",
|
|
36
|
+
"compute_landmarks_fps",
|
|
37
|
+
]
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Validation helpers shared by Ball Mapper algorithms."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Mapping
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def validate_point_cloud(x: np.ndarray, eps: float) -> np.ndarray:
|
|
12
|
+
"""Validate and normalize point-cloud inputs."""
|
|
13
|
+
x = np.asarray(x)
|
|
14
|
+
|
|
15
|
+
if x.ndim != 2:
|
|
16
|
+
raise ValueError("x must be a 2D array with shape (n_samples, n_features).")
|
|
17
|
+
if x.shape[0] > 0 and x.shape[1] == 0:
|
|
18
|
+
raise ValueError("x must contain at least one feature.")
|
|
19
|
+
if not np.issubdtype(x.dtype, np.number):
|
|
20
|
+
raise TypeError("x must contain numeric values.")
|
|
21
|
+
if not np.all(np.isfinite(x)):
|
|
22
|
+
raise ValueError("x must contain only finite values.")
|
|
23
|
+
if not np.isscalar(eps) or not np.isfinite(eps) or eps < 0:
|
|
24
|
+
raise ValueError("eps must be a finite, non-negative scalar.")
|
|
25
|
+
|
|
26
|
+
return x
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def validate_leaf_size(leaf_size: int) -> None:
|
|
30
|
+
"""Validate a BallTree leaf size."""
|
|
31
|
+
if not isinstance(leaf_size, (int, np.integer)) or leaf_size <= 0:
|
|
32
|
+
raise ValueError("leaf_size must be a positive integer.")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def normalize_backend_options(
|
|
36
|
+
method: str,
|
|
37
|
+
metric: str,
|
|
38
|
+
leaf_size: int,
|
|
39
|
+
metric_kwargs: Mapping[str, Any] | None,
|
|
40
|
+
) -> tuple[str, str, dict[str, Any]]:
|
|
41
|
+
"""Normalize backend options and reject unsupported combinations."""
|
|
42
|
+
if not isinstance(method, str):
|
|
43
|
+
raise TypeError("method must be a string.")
|
|
44
|
+
if not isinstance(metric, str):
|
|
45
|
+
raise TypeError("metric must be a string.")
|
|
46
|
+
|
|
47
|
+
method_key = method.lower().replace("-", "_")
|
|
48
|
+
metric_key = metric.lower()
|
|
49
|
+
normalized_metric_kwargs = dict(metric_kwargs or {})
|
|
50
|
+
|
|
51
|
+
# ``balltree`` is accepted as a forgiving alias, while ``ball_tree`` is the
|
|
52
|
+
# documented snake_case spelling.
|
|
53
|
+
if method_key == "balltree":
|
|
54
|
+
method_key = "ball_tree"
|
|
55
|
+
|
|
56
|
+
if method_key not in {"ball_tree", "faiss"}:
|
|
57
|
+
raise ValueError("method must be 'ball_tree' or 'faiss'.")
|
|
58
|
+
|
|
59
|
+
if method_key == "ball_tree":
|
|
60
|
+
validate_leaf_size(leaf_size)
|
|
61
|
+
else:
|
|
62
|
+
if metric_key not in {"euclidean", "cosine"}:
|
|
63
|
+
raise ValueError(
|
|
64
|
+
"For method='faiss', metric must be 'euclidean' or 'cosine'."
|
|
65
|
+
)
|
|
66
|
+
if normalized_metric_kwargs:
|
|
67
|
+
raise ValueError("metric_kwargs is supported only with method='ball_tree'.")
|
|
68
|
+
|
|
69
|
+
return method_key, metric_key, normalized_metric_kwargs
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def validate_start_index(start_index: int | None, n_samples: int) -> None:
|
|
73
|
+
"""Validate an optional farthest-point-sampling start index."""
|
|
74
|
+
if start_index is None:
|
|
75
|
+
return
|
|
76
|
+
if not isinstance(start_index, (int, np.integer)):
|
|
77
|
+
raise TypeError("start_index must be an integer or None.")
|
|
78
|
+
if not 0 <= int(start_index) < n_samples:
|
|
79
|
+
raise IndexError("start_index is out of bounds for x.")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def lexicographically_smallest_index(x: np.ndarray) -> int:
|
|
83
|
+
"""Return the row index ordered by feature 0, then feature 1, and so on."""
|
|
84
|
+
keys = tuple(x[:, column] for column in range(x.shape[1] - 1, -1, -1))
|
|
85
|
+
return int(np.lexsort(keys)[0])
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def validate_cosine_points(points: np.ndarray) -> None:
|
|
89
|
+
"""Reject zero vectors, for which cosine distance is undefined."""
|
|
90
|
+
if np.any(np.linalg.norm(points, axis=1) == 0):
|
|
91
|
+
raise ValueError("Cosine distance is undefined for zero vectors.")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Internal nearest-neighbor backends for fast_ballmapper."""
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""scikit-learn BallTree backend."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Mapping, Sequence
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from sklearn.neighbors import BallTree
|
|
12
|
+
except ImportError: # pragma: no cover - dependency is required by project metadata
|
|
13
|
+
BallTree = None # type: ignore[assignment,misc]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def create_ball_tree(
|
|
17
|
+
x: np.ndarray,
|
|
18
|
+
metric: str,
|
|
19
|
+
leaf_size: int,
|
|
20
|
+
metric_kwargs: Mapping[str, Any],
|
|
21
|
+
):
|
|
22
|
+
"""Create a BallTree with one shared metric configuration."""
|
|
23
|
+
if BallTree is None:
|
|
24
|
+
raise ImportError("scikit-learn is required for method='ball_tree'.")
|
|
25
|
+
return BallTree(
|
|
26
|
+
x,
|
|
27
|
+
metric=metric,
|
|
28
|
+
leaf_size=leaf_size,
|
|
29
|
+
**metric_kwargs,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def compute_landmarks_ball_tree(
|
|
34
|
+
x: np.ndarray,
|
|
35
|
+
eps: float,
|
|
36
|
+
metric: str,
|
|
37
|
+
leaf_size: int,
|
|
38
|
+
metric_kwargs: Mapping[str, Any] | None = None,
|
|
39
|
+
) -> tuple[list[int], list[np.ndarray]]:
|
|
40
|
+
"""Compute greedy landmarks and covers with BallTree radius queries."""
|
|
41
|
+
tree = create_ball_tree(x, metric, leaf_size, dict(metric_kwargs or {}))
|
|
42
|
+
uncovered = np.ones(x.shape[0], dtype=bool)
|
|
43
|
+
landmarks: list[int] = []
|
|
44
|
+
cover: list[np.ndarray] = []
|
|
45
|
+
|
|
46
|
+
while np.any(uncovered):
|
|
47
|
+
landmark_index = int(np.argmax(uncovered))
|
|
48
|
+
landmarks.append(landmark_index)
|
|
49
|
+
point_indices = tree.query_radius(x[landmark_index : landmark_index + 1], eps)[
|
|
50
|
+
0
|
|
51
|
+
]
|
|
52
|
+
cover.append(point_indices)
|
|
53
|
+
uncovered[point_indices] = False
|
|
54
|
+
|
|
55
|
+
return landmarks, cover
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def distances_to_all(
|
|
59
|
+
x: np.ndarray,
|
|
60
|
+
tree,
|
|
61
|
+
point_index: int,
|
|
62
|
+
) -> np.ndarray:
|
|
63
|
+
"""Return distances from one point to every row of ``x`` in row order."""
|
|
64
|
+
distances, indices = tree.query(
|
|
65
|
+
x[point_index : point_index + 1],
|
|
66
|
+
k=x.shape[0],
|
|
67
|
+
return_distance=True,
|
|
68
|
+
)
|
|
69
|
+
result = np.empty(x.shape[0], dtype=float)
|
|
70
|
+
result[indices[0]] = distances[0]
|
|
71
|
+
return result
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def build_cover_ball_tree(
|
|
75
|
+
x: np.ndarray,
|
|
76
|
+
landmarks: Sequence[int],
|
|
77
|
+
eps: float,
|
|
78
|
+
metric: str = "euclidean",
|
|
79
|
+
leaf_size: int = 40,
|
|
80
|
+
metric_kwargs: Mapping[str, Any] | None = None,
|
|
81
|
+
*,
|
|
82
|
+
tree=None,
|
|
83
|
+
) -> list[np.ndarray]:
|
|
84
|
+
"""Build epsilon-ball covers for fixed landmarks using BallTree."""
|
|
85
|
+
if tree is None:
|
|
86
|
+
tree = create_ball_tree(x, metric, leaf_size, dict(metric_kwargs or {}))
|
|
87
|
+
|
|
88
|
+
return [tree.query_radius(x[index : index + 1], eps)[0] for index in landmarks]
|