fast-ballmapper 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. fast_ballmapper-0.0.1/CHANGELOG.md +12 -0
  2. fast_ballmapper-0.0.1/LICENSE +21 -0
  3. fast_ballmapper-0.0.1/MANIFEST.in +4 -0
  4. fast_ballmapper-0.0.1/PKG-INFO +434 -0
  5. fast_ballmapper-0.0.1/README.md +394 -0
  6. fast_ballmapper-0.0.1/examples/basic_usage.py +26 -0
  7. fast_ballmapper-0.0.1/examples/faiss_usage.py +17 -0
  8. fast_ballmapper-0.0.1/pyproject.toml +76 -0
  9. fast_ballmapper-0.0.1/setup.cfg +4 -0
  10. fast_ballmapper-0.0.1/src/fast_ballmapper/__init__.py +37 -0
  11. fast_ballmapper-0.0.1/src/fast_ballmapper/_validation.py +91 -0
  12. fast_ballmapper-0.0.1/src/fast_ballmapper/backends/__init__.py +1 -0
  13. fast_ballmapper-0.0.1/src/fast_ballmapper/backends/_ball_tree.py +88 -0
  14. fast_ballmapper-0.0.1/src/fast_ballmapper/backends/_faiss.py +531 -0
  15. fast_ballmapper-0.0.1/src/fast_ballmapper/coloring.py +71 -0
  16. fast_ballmapper-0.0.1/src/fast_ballmapper/faiss.py +61 -0
  17. fast_ballmapper-0.0.1/src/fast_ballmapper/graph.py +39 -0
  18. fast_ballmapper-0.0.1/src/fast_ballmapper/landmarks.py +336 -0
  19. fast_ballmapper-0.0.1/src/fast_ballmapper/plotting/__init__.py +5 -0
  20. fast_ballmapper-0.0.1/src/fast_ballmapper/plotting/_common.py +28 -0
  21. fast_ballmapper-0.0.1/src/fast_ballmapper/plotting/matplotlib.py +77 -0
  22. fast_ballmapper-0.0.1/src/fast_ballmapper/plotting/plotly.py +138 -0
  23. fast_ballmapper-0.0.1/src/fast_ballmapper/py.typed +0 -0
  24. fast_ballmapper-0.0.1/src/fast_ballmapper.egg-info/PKG-INFO +434 -0
  25. fast_ballmapper-0.0.1/src/fast_ballmapper.egg-info/SOURCES.txt +34 -0
  26. fast_ballmapper-0.0.1/src/fast_ballmapper.egg-info/dependency_links.txt +1 -0
  27. fast_ballmapper-0.0.1/src/fast_ballmapper.egg-info/requires.txt +25 -0
  28. fast_ballmapper-0.0.1/src/fast_ballmapper.egg-info/top_level.txt +1 -0
  29. fast_ballmapper-0.0.1/tests/test_build_cover.py +101 -0
  30. fast_ballmapper-0.0.1/tests/test_coloring.py +25 -0
  31. fast_ballmapper-0.0.1/tests/test_faiss_backend.py +362 -0
  32. fast_ballmapper-0.0.1/tests/test_faiss_integration.py +133 -0
  33. fast_ballmapper-0.0.1/tests/test_graph.py +27 -0
  34. fast_ballmapper-0.0.1/tests/test_landmarks.py +79 -0
  35. fast_ballmapper-0.0.1/tests/test_plotting.py +23 -0
  36. fast_ballmapper-0.0.1/tests/test_public_api.py +21 -0
@@ -0,0 +1,12 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0
4
+
5
+ - Reorganized the project into a PyPI-ready `src` layout.
6
+ - Renamed the distribution to `fast-ballmapper` and the import package to
7
+ `fast_ballmapper`.
8
+ - Converted the public API and internal Python identifiers to snake_case.
9
+ - Split landmark selection, backends, graph construction, coloring, and plotting
10
+ into focused modules.
11
+ - Made FAISS, Matplotlib, and Plotly optional extras.
12
+ - Added automated tests and package build metadata.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 John Rick Dolor Manzanares, Jay-Anne Baga Bulauan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,4 @@
1
+ include LICENSE
2
+ include README.md
3
+ include CHANGELOG.md
4
+ recursive-include examples *.py
@@ -0,0 +1,434 @@
1
+ Metadata-Version: 2.4
2
+ Name: fast-ballmapper
3
+ Version: 0.0.1
4
+ Summary: Fast Ball Mapper construction with BallTree and optional FAISS backends
5
+ Author: John Rick Dolor Manzanares, Jay-Anne Baga Bulauan
6
+ License-Expression: MIT
7
+ Keywords: ball-mapper,topological-data-analysis,point-cloud,networkx
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
16
+ Requires-Python: >=3.10
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: networkx>=3.0
20
+ Requires-Dist: numpy>=1.23
21
+ Requires-Dist: scikit-learn>=1.2
22
+ Provides-Extra: faiss
23
+ Requires-Dist: faiss-cpu>=1.7.4; extra == "faiss"
24
+ Provides-Extra: faiss-gpu
25
+ Requires-Dist: faiss-gpu; extra == "faiss-gpu"
26
+ Provides-Extra: plot
27
+ Requires-Dist: matplotlib>=3.7; extra == "plot"
28
+ Requires-Dist: plotly>=5.0; extra == "plot"
29
+ Provides-Extra: all
30
+ Requires-Dist: faiss-cpu>=1.7.4; extra == "all"
31
+ Requires-Dist: matplotlib>=3.7; extra == "all"
32
+ Requires-Dist: plotly>=5.0; extra == "all"
33
+ Provides-Extra: dev
34
+ Requires-Dist: build>=1.2; extra == "dev"
35
+ Requires-Dist: pytest>=8.0; extra == "dev"
36
+ Requires-Dist: pytest-cov>=5.0; extra == "dev"
37
+ Requires-Dist: ruff>=0.6; extra == "dev"
38
+ Requires-Dist: twine>=5.0; extra == "dev"
39
+ Dynamic: license-file
40
+
41
+ # fast-ballmapper
42
+
43
+ `fast-ballmapper` builds Ball Mapper graph summaries of high-dimensional point
44
+ clouds. It supports scikit-learn's BallTree, optional FAISS acceleration,
45
+ deterministic farthest-point sampling, NetworkX graph construction, node
46
+ coloring, and optional Matplotlib or Plotly visualization.
47
+
48
+ The package is designed around two use cases:
49
+
50
+ 1. exact Ball Mapper cover construction using BallTree or exhaustive FAISS
51
+ indexes;
52
+ 2. experimental approximate range-query construction using configurable FAISS
53
+ indexes.
54
+
55
+ ## Installation
56
+
57
+ Core package:
58
+
59
+ ```bash
60
+ pip install fast-ballmapper
61
+ ````
62
+
63
+ With plotting support:
64
+
65
+ ```bash
66
+ pip install "fast-ballmapper[plot]"
67
+ ```
68
+
69
+ With CPU FAISS support:
70
+
71
+ ```bash
72
+ pip install "fast-ballmapper[faiss]"
73
+ ```
74
+
75
+ With GPU FAISS support:
76
+
77
+ ```bash
78
+ pip install "fast-ballmapper[faiss-gpu]"
79
+ ```
80
+
81
+ With development tools, plotting, and CPU FAISS support:
82
+
83
+ ```bash
84
+ python -m pip install -e ".[dev,plot,faiss]"
85
+ pytest
86
+ ```
87
+
88
+ The PyPI distribution name contains a hyphen, while the Python import package
89
+ uses an underscore:
90
+
91
+ ```text
92
+ pip install fast-ballmapper
93
+ import fast_ballmapper
94
+ ```
95
+
96
+ ### GPU FAISS support
97
+
98
+ GPU support is optional. The `faiss` extra installs the CPU FAISS package. To
99
+ use GPU execution, install a GPU-enabled FAISS build separately for your CUDA
100
+ and platform.
101
+
102
+ The package can request GPU execution through `FaissConfig(device="gpu")` or
103
+ `FaissConfig(device="auto")`. If GPU support is unavailable and
104
+ `gpu_fallback_to_cpu=True`, the backend falls back to CPU execution.
105
+
106
+ ## Minimal example
107
+
108
+ ```python
109
+ import numpy as np
110
+
111
+ from fast_ballmapper import build_mapper, compute_landmarks
112
+
113
+ rng = np.random.default_rng(42)
114
+ x = rng.random((500, 2))
115
+
116
+ landmarks, cover = compute_landmarks(
117
+ x,
118
+ eps=0.1,
119
+ method="ball_tree",
120
+ metric="euclidean",
121
+ leaf_size=40,
122
+ )
123
+
124
+ graph = build_mapper(cover)
125
+
126
+ print(f"Landmarks: {len(landmarks)}")
127
+ print(f"Edges: {graph.number_of_edges()}")
128
+ ```
129
+
130
+ ## Fixed-landmark cover construction
131
+
132
+ The function `compute_landmarks` selects landmarks and constructs their cover.
133
+ The function `build_cover` only constructs cover sets around an already chosen
134
+ landmark collection.
135
+
136
+ This is useful for comparing different backends or approximate FAISS
137
+ configurations on the same landmark set.
138
+
139
+ ```python
140
+ import numpy as np
141
+
142
+ from fast_ballmapper import FaissConfig, build_cover, compute_landmarks
143
+
144
+ rng = np.random.default_rng(42)
145
+ x = rng.random((1000, 8)).astype("float32")
146
+
147
+ landmarks, exact_cover = compute_landmarks(
148
+ x,
149
+ eps=0.25,
150
+ method="faiss",
151
+ metric="euclidean",
152
+ faiss_config=FaissConfig(factory="Flat"),
153
+ )
154
+
155
+ approximate_cover = build_cover(
156
+ x,
157
+ landmarks,
158
+ eps=0.25,
159
+ method="faiss",
160
+ metric="euclidean",
161
+ faiss_config=FaissConfig(
162
+ factory="IVF64,Flat",
163
+ search_params={"nprobe": 8},
164
+ ),
165
+ )
166
+ ```
167
+
168
+ Here, the landmark set is fixed. Therefore, differences between `exact_cover`
169
+ and `approximate_cover` come from the range-query backend rather than from
170
+ different landmark choices.
171
+
172
+ ## Farthest-point sampling
173
+
174
+ ```python
175
+ from fast_ballmapper import compute_landmarks_fps
176
+
177
+ landmarks, cover = compute_landmarks_fps(
178
+ x,
179
+ eps=0.1,
180
+ start_index=None,
181
+ method="ball_tree",
182
+ metric="euclidean",
183
+ leaf_size=40,
184
+ metric_kwargs=None,
185
+ )
186
+ ```
187
+
188
+ When `start_index` is `None`, the lexicographically smallest point is selected
189
+ first, making the result deterministic. Landmark selection and cover queries use
190
+ the same backend and distance.
191
+
192
+ For FAISS, farthest-point sampling uses an exact Flat index. Approximate FAISS
193
+ indexes are not used for farthest-point sampling because the algorithm requires
194
+ distances from each selected landmark to every data point.
195
+
196
+ ## BallTree metrics
197
+
198
+ Any metric supported by scikit-learn's BallTree can be passed through `metric`.
199
+ Parameterized metrics use `metric_kwargs`:
200
+
201
+ ```python
202
+ landmarks, cover = compute_landmarks_fps(
203
+ x,
204
+ eps=0.2,
205
+ metric="minkowski",
206
+ metric_kwargs={"p": 3},
207
+ )
208
+ ```
209
+
210
+ BallTree is useful when metric flexibility is important.
211
+
212
+ ## FAISS backend
213
+
214
+ FAISS supports Euclidean and cosine distances in this package:
215
+
216
+ ```python
217
+ from fast_ballmapper import compute_landmarks
218
+
219
+ landmarks, cover = compute_landmarks(
220
+ x,
221
+ eps=0.1,
222
+ method="faiss",
223
+ metric="euclidean",
224
+ )
225
+ ```
226
+
227
+ For cosine distance, zero vectors are rejected because cosine distance is
228
+ undefined for them.
229
+
230
+ By default, the FAISS backend uses a Flat index:
231
+
232
+ ```python
233
+ from fast_ballmapper import FaissConfig
234
+
235
+ config = FaissConfig(factory="Flat")
236
+ ```
237
+
238
+ A Flat FAISS index is exhaustive. It compares the query with every indexed
239
+ vector, so it accelerates distance computation without changing the Ball Mapper
240
+ range sets, apart from finite-precision effects near the threshold.
241
+
242
+ ### Configurable FAISS indexes
243
+
244
+ The FAISS backend can be configured using `FaissConfig`:
245
+
246
+ ```python
247
+ from fast_ballmapper import FaissConfig, compute_landmarks
248
+
249
+ config = FaissConfig(
250
+ factory="IVF256,Flat",
251
+ search_params={"nprobe": 16},
252
+ )
253
+
254
+ landmarks, cover = compute_landmarks(
255
+ x,
256
+ eps=0.1,
257
+ method="faiss",
258
+ metric="euclidean",
259
+ faiss_config=config,
260
+ )
261
+ ```
262
+
263
+ The `factory` argument is a FAISS index-factory string. Examples include:
264
+
265
+ ```python
266
+ FaissConfig(factory="Flat")
267
+ FaissConfig(factory="IVF256,Flat", search_params={"nprobe": 16})
268
+ FaissConfig(factory="HNSW32", search_params={"efSearch": 64})
269
+ FaissConfig(factory="SQ8")
270
+ FaissConfig(factory="IVF256,SQ8", search_params={"nprobe": 16})
271
+ FaissConfig(factory="IVF256,PQ16x4", query_mode="knn", candidate_k=1024)
272
+ ```
273
+
274
+ Approximate indexes may change the computed cover. Non-exhaustive indexes can
275
+ omit true ball members, while compressed indexes can also introduce points whose
276
+ exact distance lies outside the ball. For controlled comparisons, use
277
+ `build_cover` with a fixed landmark set.
278
+
279
+ ### Candidate search and exact verification
280
+
281
+ Some FAISS configurations use a candidate-limited `k`-nearest-neighbour search
282
+ instead of native range search:
283
+
284
+ ```python
285
+ config = FaissConfig(
286
+ factory="IVF256,PQ16x4",
287
+ search_params={"nprobe": 16},
288
+ query_mode="knn",
289
+ candidate_k=1024,
290
+ )
291
+ ```
292
+
293
+ Exact verification can be enabled to recompute exact distances for candidates
294
+ and remove points outside the requested radius:
295
+
296
+ ```python
297
+ config = FaissConfig(
298
+ factory="IVF256,Flat",
299
+ search_params={"nprobe": 16},
300
+ query_mode="knn",
301
+ candidate_k=1024,
302
+ exact_verify=True,
303
+ )
304
+ ```
305
+
306
+ Exact verification prevents false-positive ball memberships among the examined
307
+ candidates, but it cannot recover true members that were never included in the
308
+ candidate set.
309
+
310
+ ### Optional GPU execution
311
+
312
+ GPU execution can be requested through `FaissConfig`:
313
+
314
+ ```python
315
+ config = FaissConfig(
316
+ factory="Flat",
317
+ device="gpu",
318
+ gpu_device=0,
319
+ gpu_fallback_to_cpu=True,
320
+ )
321
+ ```
322
+
323
+ Use `device="auto"` to use GPU only when a GPU-enabled FAISS build and a
324
+ visible GPU are available:
325
+
326
+ ```python
327
+ config = FaissConfig(
328
+ factory="Flat",
329
+ device="auto",
330
+ )
331
+ ```
332
+
333
+ The default is `device="cpu"` for reproducibility.
334
+
335
+ ## Graph construction and coloring
336
+
337
+ ```python
338
+ from fast_ballmapper import (
339
+ build_mapper,
340
+ color_by_density,
341
+ color_by_entropy,
342
+ color_by_function,
343
+ color_by_mode,
344
+ color_by_size,
345
+ )
346
+
347
+ graph = build_mapper(cover)
348
+
349
+ sizes = color_by_size(cover)
350
+ density = color_by_density(cover)
351
+ mean_first_coordinate = color_by_function(x[:, 0], cover)
352
+ ```
353
+
354
+ ## Matplotlib visualization
355
+
356
+ ```python
357
+ import matplotlib.pyplot as plt
358
+
359
+ from fast_ballmapper.plotting.matplotlib import add_colorbar, draw_ball_mapper
360
+
361
+ fig, ax = plt.subplots(figsize=(8, 6))
362
+
363
+ _, nodes = draw_ball_mapper(
364
+ graph,
365
+ colors=sizes,
366
+ sizes=sizes,
367
+ layout="spring",
368
+ node_scale=500,
369
+ ax=ax,
370
+ )
371
+
372
+ add_colorbar(nodes, ax, label="Ball size")
373
+ plt.show()
374
+ ```
375
+
376
+ ## Plotly visualization
377
+
378
+ ```python
379
+ from fast_ballmapper.plotting.plotly import draw_ball_mapper_plotly
380
+
381
+ figure = draw_ball_mapper_plotly(
382
+ graph,
383
+ cover,
384
+ colorings={"Ball size": sizes, "Density": density},
385
+ sizes=sizes,
386
+ show=True,
387
+ )
388
+ ```
389
+
390
+ Use `export_html="ball_mapper.html"` to create a standalone interactive HTML
391
+ file.
392
+
393
+ ## Experiments
394
+
395
+ Research scripts can be placed in the top-level `experiments/` directory.
396
+ These scripts are not part of the public package API.
397
+
398
+ A fixed-landmark approximate FAISS experiment can be run with:
399
+
400
+ ```bash
401
+ python experiments/run_fixed_landmarks.py \
402
+ --n 5000 \
403
+ --d 32 \
404
+ --seed 0 \
405
+ --target-ball-size 30
406
+ ```
407
+
408
+ The fixed-landmark design first selects landmarks exactly and then compares
409
+ different range-query backends around the same landmark set. This separates
410
+ range-query approximation errors from changes in landmark selection.
411
+
412
+ ## Public API
413
+
414
+ Core functions and configuration objects are exported directly from
415
+ `fast_ballmapper`:
416
+
417
+ * `FaissConfig`
418
+ * `compute_landmarks`
419
+ * `compute_landmarks_fps`
420
+ * `build_cover`
421
+ * `build_mapper`
422
+ * `compute_edge_overlaps`
423
+ * `color_by_function`
424
+ * `color_by_mode`
425
+ * `color_by_entropy`
426
+ * `color_by_size`
427
+ * `color_by_density`
428
+
429
+ Plotting functions live in their optional submodules so importing the core
430
+ package does not import Matplotlib or Plotly.
431
+
432
+ ## License
433
+
434
+ MIT