fast-ballmapper 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,37 @@
1
+ """Fast construction and analysis of Ball Mapper graphs."""
2
+
3
+ from importlib.metadata import PackageNotFoundError, version
4
+
5
+ from fast_ballmapper.coloring import (
6
+ color_by_density,
7
+ color_by_entropy,
8
+ color_by_function,
9
+ color_by_mode,
10
+ color_by_size,
11
+ )
12
+ from fast_ballmapper.faiss import FaissConfig
13
+ from fast_ballmapper.graph import build_mapper, compute_edge_overlaps
14
+ from fast_ballmapper.landmarks import (
15
+ build_cover,
16
+ compute_landmarks,
17
+ compute_landmarks_fps,
18
+ )
19
+
20
+ try:
21
+ __version__ = version("fast-ballmapper")
22
+ except PackageNotFoundError: # pragma: no cover - source checkout fallback
23
+ __version__ = "0.1.0"
24
+
25
+ __all__ = [
26
+ "FaissConfig",
27
+ "build_cover",
28
+ "build_mapper",
29
+ "color_by_density",
30
+ "color_by_entropy",
31
+ "color_by_function",
32
+ "color_by_mode",
33
+ "color_by_size",
34
+ "compute_edge_overlaps",
35
+ "compute_landmarks",
36
+ "compute_landmarks_fps",
37
+ ]
@@ -0,0 +1,91 @@
1
+ """Validation helpers shared by Ball Mapper algorithms."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Mapping
6
+ from typing import Any
7
+
8
+ import numpy as np
9
+
10
+
11
+ def validate_point_cloud(x: np.ndarray, eps: float) -> np.ndarray:
12
+ """Validate and normalize point-cloud inputs."""
13
+ x = np.asarray(x)
14
+
15
+ if x.ndim != 2:
16
+ raise ValueError("x must be a 2D array with shape (n_samples, n_features).")
17
+ if x.shape[0] > 0 and x.shape[1] == 0:
18
+ raise ValueError("x must contain at least one feature.")
19
+ if not np.issubdtype(x.dtype, np.number):
20
+ raise TypeError("x must contain numeric values.")
21
+ if not np.all(np.isfinite(x)):
22
+ raise ValueError("x must contain only finite values.")
23
+ if not np.isscalar(eps) or not np.isfinite(eps) or eps < 0:
24
+ raise ValueError("eps must be a finite, non-negative scalar.")
25
+
26
+ return x
27
+
28
+
29
+ def validate_leaf_size(leaf_size: int) -> None:
30
+ """Validate a BallTree leaf size."""
31
+ if not isinstance(leaf_size, (int, np.integer)) or leaf_size <= 0:
32
+ raise ValueError("leaf_size must be a positive integer.")
33
+
34
+
35
+ def normalize_backend_options(
36
+ method: str,
37
+ metric: str,
38
+ leaf_size: int,
39
+ metric_kwargs: Mapping[str, Any] | None,
40
+ ) -> tuple[str, str, dict[str, Any]]:
41
+ """Normalize backend options and reject unsupported combinations."""
42
+ if not isinstance(method, str):
43
+ raise TypeError("method must be a string.")
44
+ if not isinstance(metric, str):
45
+ raise TypeError("metric must be a string.")
46
+
47
+ method_key = method.lower().replace("-", "_")
48
+ metric_key = metric.lower()
49
+ normalized_metric_kwargs = dict(metric_kwargs or {})
50
+
51
+ # ``balltree`` is accepted as a forgiving alias, while ``ball_tree`` is the
52
+ # documented snake_case spelling.
53
+ if method_key == "balltree":
54
+ method_key = "ball_tree"
55
+
56
+ if method_key not in {"ball_tree", "faiss"}:
57
+ raise ValueError("method must be 'ball_tree' or 'faiss'.")
58
+
59
+ if method_key == "ball_tree":
60
+ validate_leaf_size(leaf_size)
61
+ else:
62
+ if metric_key not in {"euclidean", "cosine"}:
63
+ raise ValueError(
64
+ "For method='faiss', metric must be 'euclidean' or 'cosine'."
65
+ )
66
+ if normalized_metric_kwargs:
67
+ raise ValueError("metric_kwargs is supported only with method='ball_tree'.")
68
+
69
+ return method_key, metric_key, normalized_metric_kwargs
70
+
71
+
72
+ def validate_start_index(start_index: int | None, n_samples: int) -> None:
73
+ """Validate an optional farthest-point-sampling start index."""
74
+ if start_index is None:
75
+ return
76
+ if not isinstance(start_index, (int, np.integer)):
77
+ raise TypeError("start_index must be an integer or None.")
78
+ if not 0 <= int(start_index) < n_samples:
79
+ raise IndexError("start_index is out of bounds for x.")
80
+
81
+
82
+ def lexicographically_smallest_index(x: np.ndarray) -> int:
83
+ """Return the row index ordered by feature 0, then feature 1, and so on."""
84
+ keys = tuple(x[:, column] for column in range(x.shape[1] - 1, -1, -1))
85
+ return int(np.lexsort(keys)[0])
86
+
87
+
88
+ def validate_cosine_points(points: np.ndarray) -> None:
89
+ """Reject zero vectors, for which cosine distance is undefined."""
90
+ if np.any(np.linalg.norm(points, axis=1) == 0):
91
+ raise ValueError("Cosine distance is undefined for zero vectors.")
@@ -0,0 +1 @@
1
+ """Internal nearest-neighbor backends for fast_ballmapper."""
@@ -0,0 +1,88 @@
1
+ """scikit-learn BallTree backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Mapping, Sequence
6
+ from typing import Any
7
+
8
+ import numpy as np
9
+
10
+ try:
11
+ from sklearn.neighbors import BallTree
12
+ except ImportError: # pragma: no cover - dependency is required by project metadata
13
+ BallTree = None # type: ignore[assignment,misc]
14
+
15
+
16
+ def create_ball_tree(
17
+ x: np.ndarray,
18
+ metric: str,
19
+ leaf_size: int,
20
+ metric_kwargs: Mapping[str, Any],
21
+ ):
22
+ """Create a BallTree with one shared metric configuration."""
23
+ if BallTree is None:
24
+ raise ImportError("scikit-learn is required for method='ball_tree'.")
25
+ return BallTree(
26
+ x,
27
+ metric=metric,
28
+ leaf_size=leaf_size,
29
+ **metric_kwargs,
30
+ )
31
+
32
+
33
+ def compute_landmarks_ball_tree(
34
+ x: np.ndarray,
35
+ eps: float,
36
+ metric: str,
37
+ leaf_size: int,
38
+ metric_kwargs: Mapping[str, Any] | None = None,
39
+ ) -> tuple[list[int], list[np.ndarray]]:
40
+ """Compute greedy landmarks and covers with BallTree radius queries."""
41
+ tree = create_ball_tree(x, metric, leaf_size, dict(metric_kwargs or {}))
42
+ uncovered = np.ones(x.shape[0], dtype=bool)
43
+ landmarks: list[int] = []
44
+ cover: list[np.ndarray] = []
45
+
46
+ while np.any(uncovered):
47
+ landmark_index = int(np.argmax(uncovered))
48
+ landmarks.append(landmark_index)
49
+ point_indices = tree.query_radius(x[landmark_index : landmark_index + 1], eps)[
50
+ 0
51
+ ]
52
+ cover.append(point_indices)
53
+ uncovered[point_indices] = False
54
+
55
+ return landmarks, cover
56
+
57
+
58
+ def distances_to_all(
59
+ x: np.ndarray,
60
+ tree,
61
+ point_index: int,
62
+ ) -> np.ndarray:
63
+ """Return distances from one point to every row of ``x`` in row order."""
64
+ distances, indices = tree.query(
65
+ x[point_index : point_index + 1],
66
+ k=x.shape[0],
67
+ return_distance=True,
68
+ )
69
+ result = np.empty(x.shape[0], dtype=float)
70
+ result[indices[0]] = distances[0]
71
+ return result
72
+
73
+
74
+ def build_cover_ball_tree(
75
+ x: np.ndarray,
76
+ landmarks: Sequence[int],
77
+ eps: float,
78
+ metric: str = "euclidean",
79
+ leaf_size: int = 40,
80
+ metric_kwargs: Mapping[str, Any] | None = None,
81
+ *,
82
+ tree=None,
83
+ ) -> list[np.ndarray]:
84
+ """Build epsilon-ball covers for fixed landmarks using BallTree."""
85
+ if tree is None:
86
+ tree = create_ball_tree(x, metric, leaf_size, dict(metric_kwargs or {}))
87
+
88
+ return [tree.query_radius(x[index : index + 1], eps)[0] for index in landmarks]