hyperview 0.2.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {hyperview-0.2.0 → hyperview-0.3.1}/.gitignore +3 -1
  2. {hyperview-0.2.0 → hyperview-0.3.1}/LICENSE +1 -1
  3. {hyperview-0.2.0 → hyperview-0.3.1}/PKG-INFO +7 -6
  4. {hyperview-0.2.0 → hyperview-0.3.1}/README.md +6 -5
  5. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/_version.py +2 -2
  6. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/api.py +26 -18
  7. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/cli.py +73 -25
  8. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/core/dataset.py +353 -185
  9. hyperview-0.3.1/src/hyperview/core/selection.py +309 -0
  10. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/embeddings/__init__.py +2 -3
  11. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/embeddings/engine.py +63 -2
  12. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/embeddings/pipelines.py +108 -39
  13. hyperview-0.3.1/src/hyperview/embeddings/projection.py +467 -0
  14. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/embeddings/providers/lancedb_providers.py +178 -0
  15. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/app.py +157 -31
  16. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/404/index.html +1 -1
  17. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/404.html +1 -1
  18. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/__next.__PAGE__.txt +2 -2
  19. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/__next._full.txt +2 -2
  20. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/__next._head.txt +1 -1
  21. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/__next._index.txt +1 -1
  22. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/__next._tree.txt +1 -1
  23. hyperview-0.3.1/src/hyperview/server/static/_next/static/chunks/077b38561d6ea80d.js +13 -0
  24. hyperview-0.3.1/src/hyperview/server/static/_next/static/chunks/6ab4c63fd83a6bdc.js +1 -0
  25. hyperview-0.3.1/src/hyperview/server/static/_next/static/chunks/6adcb3a43c287a0a.js +407 -0
  26. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_not-found/__next._full.txt +1 -1
  27. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_not-found/__next._head.txt +1 -1
  28. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_not-found/__next._index.txt +1 -1
  29. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_not-found/__next._not-found.__PAGE__.txt +1 -1
  30. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_not-found/__next._not-found.txt +1 -1
  31. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_not-found/__next._tree.txt +1 -1
  32. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_not-found/index.html +1 -1
  33. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_not-found/index.txt +1 -1
  34. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/index.html +1 -1
  35. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/index.txt +2 -2
  36. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/storage/backend.py +2 -1
  37. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/storage/lancedb_backend.py +226 -23
  38. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/storage/memory_backend.py +35 -6
  39. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/storage/schema.py +53 -13
  40. hyperview-0.2.0/src/hyperview/core/selection.py +0 -53
  41. hyperview-0.2.0/src/hyperview/embeddings/projection.py +0 -267
  42. hyperview-0.2.0/src/hyperview/server/static/_next/static/chunks/4543baba6321cb86.js +0 -301
  43. hyperview-0.2.0/src/hyperview/server/static/_next/static/chunks/7f11a0afb44e4703.js +0 -13
  44. hyperview-0.2.0/src/hyperview/server/static/_next/static/chunks/80cd550edf03d788.js +0 -1
  45. {hyperview-0.2.0 → hyperview-0.3.1}/pyproject.toml +0 -0
  46. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/__init__.py +0 -0
  47. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/core/__init__.py +0 -0
  48. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/core/sample.py +0 -0
  49. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/embeddings/compute.py +0 -0
  50. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/embeddings/providers/__init__.py +0 -0
  51. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/__init__.py +0 -0
  52. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_next/static/chunks/462c5e072cd14e02.css +0 -0
  53. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_next/static/chunks/567993cf36cd4ab1.js +0 -0
  54. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_next/static/chunks/86c1fc4cf542f408.js +0 -0
  55. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_next/static/chunks/a6dad97d9634a72d.js +0 -0
  56. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_next/static/chunks/a6dad97d9634a72d.js.map +0 -0
  57. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_next/static/chunks/e954ba82c0a04100.js +0 -0
  58. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_next/static/chunks/f29dd35a99c216ea.js +0 -0
  59. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_next/static/chunks/turbopack-cb59e03a04a579d1.js +0 -0
  60. {hyperview-0.2.0/src/hyperview/server/static/_next/static/u9HWgMoM1R5w0owC62Blr → hyperview-0.3.1/src/hyperview/server/static/_next/static/gMy4JPL2K0MjiU7F71me_}/_buildManifest.js +0 -0
  61. {hyperview-0.2.0/src/hyperview/server/static/_next/static/u9HWgMoM1R5w0owC62Blr → hyperview-0.3.1/src/hyperview/server/static/_next/static/gMy4JPL2K0MjiU7F71me_}/_clientMiddlewareManifest.json +0 -0
  62. {hyperview-0.2.0/src/hyperview/server/static/_next/static/u9HWgMoM1R5w0owC62Blr → hyperview-0.3.1/src/hyperview/server/static/_next/static/gMy4JPL2K0MjiU7F71me_}/_ssgManifest.js +0 -0
  63. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_next/static/media/1bffadaabf893a1e-s.7cd81963.woff2 +0 -0
  64. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_next/static/media/2bbe8d2671613f1f-s.76dcb0b2.woff2 +0 -0
  65. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_next/static/media/2c55a0e60120577a-s.2a48534a.woff2 +0 -0
  66. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_next/static/media/5476f68d60460930-s.c995e352.woff2 +0 -0
  67. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_next/static/media/83afe278b6a6bb3c-s.p.3a6ba036.woff2 +0 -0
  68. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_next/static/media/9c72aa0f40e4eef8-s.18a48cbc.woff2 +0 -0
  69. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/server/static/_next/static/media/ad66f9afd8947f86-s.7a40eb73.woff2 +0 -0
  70. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/storage/__init__.py +0 -0
  71. {hyperview-0.2.0 → hyperview-0.3.1}/src/hyperview/storage/config.py +0 -0
@@ -77,7 +77,9 @@ AGENTS.md
77
77
  .specstory/
78
78
 
79
79
  # Deployment repo (managed as a separate nested git repository)
80
- deploy/
80
+ hyper-models/
81
+ hyperview-spaces/
82
+ eval/
81
83
 
82
84
  # Generated version file (hatch-vcs)
83
85
  src/hyperview/_version.py
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2025 Matin Mahmood
3
+ Copyright (c) 2025 Hyper3Labs
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hyperview
3
- Version: 0.2.0
3
+ Version: 0.3.1
4
4
  Summary: Open-source dataset curation with hyperbolic embeddings visualization
5
5
  Project-URL: Homepage, https://github.com/Hyper3Labs/HyperView
6
6
  Project-URL: Documentation, https://github.com/Hyper3Labs/HyperView#readme
@@ -48,7 +48,7 @@ Description-Content-Type: text/markdown
48
48
 
49
49
  > **Open-source dataset curation + embedding visualization (Euclidean + Poincaré disk)**
50
50
 
51
- [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/Hyper3Labs/HyperView) [![Open in HF Spaces](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-sm.svg)](https://huggingface.co/spaces/hyper3labs/HyperView) [![Discord](https://img.shields.io/badge/Discord-hyper%C2%B3labs-5865F2?logo=discord&logoColor=white)](https://discord.gg/Az7k4Ure)
51
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/Hyper3Labs/HyperView) [![Open in HF Spaces](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-sm.svg)](https://huggingface.co/spaces/hyper3labs/HyperView) [![Discord](https://img.shields.io/badge/Discord-hyper%C2%B3labs-5865F2?logo=discord&logoColor=white)](https://discord.gg/Za3rBkTPSf)
52
52
 
53
53
  <p align="center">
54
54
  <a href="https://huggingface.co/spaces/hyper3labs/HyperView" target="_blank">
@@ -63,7 +63,7 @@ Description-Content-Type: text/markdown
63
63
  ## Features
64
64
 
65
65
  - **Dual-Panel UI**: Image grid + scatter plot with bidirectional selection
66
- - **Euclidean/Poincaré Toggle**: Switch between standard 2D UMAP and Poincaré disk visualization
66
+ - **Multi-Layout Visualizations**: Explore Euclidean, Poincare, and spherical layouts in 2D or 3D with UMAP or PCA projections
67
67
  - **HuggingFace Integration**: Load datasets directly from HuggingFace Hub
68
68
  - **Fast Embeddings**: Uses EmbedAnything for CLIP-based image embeddings
69
69
 
@@ -94,14 +94,15 @@ hyperview \
94
94
  --label-key label \
95
95
  --samples 500 \
96
96
  --model openai/clip-vit-base-patch32 \
97
- --geometry both
97
+ --layout euclidean \
98
+ --layout poincare
98
99
  ```
99
100
 
100
101
  This will:
101
102
  1. Use dataset `cifar10_demo`
102
103
  2. Load up to 500 samples from CIFAR-10
103
104
  3. Compute CLIP embeddings
104
- 4. Generate Euclidean and Poincaré visualizations
105
+ 4. Generate Euclidean and Poincare visualizations
105
106
  5. Start the server at **http://127.0.0.1:6262**
106
107
 
107
108
  You can also launch with explicit dataset/model/projection args:
@@ -116,7 +117,7 @@ hyperview \
116
117
  --samples 1000 \
117
118
  --model openai/clip-vit-base-patch32 \
118
119
  --method umap \
119
- --geometry euclidean
120
+ --layout euclidean
120
121
  ```
121
122
 
122
123
  ### Python API
@@ -2,7 +2,7 @@
2
2
 
3
3
  > **Open-source dataset curation + embedding visualization (Euclidean + Poincaré disk)**
4
4
 
5
- [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/Hyper3Labs/HyperView) [![Open in HF Spaces](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-sm.svg)](https://huggingface.co/spaces/hyper3labs/HyperView) [![Discord](https://img.shields.io/badge/Discord-hyper%C2%B3labs-5865F2?logo=discord&logoColor=white)](https://discord.gg/Az7k4Ure)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/Hyper3Labs/HyperView) [![Open in HF Spaces](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-sm.svg)](https://huggingface.co/spaces/hyper3labs/HyperView) [![Discord](https://img.shields.io/badge/Discord-hyper%C2%B3labs-5865F2?logo=discord&logoColor=white)](https://discord.gg/Za3rBkTPSf)
6
6
 
7
7
  <p align="center">
8
8
  <a href="https://huggingface.co/spaces/hyper3labs/HyperView" target="_blank">
@@ -17,7 +17,7 @@
17
17
  ## Features
18
18
 
19
19
  - **Dual-Panel UI**: Image grid + scatter plot with bidirectional selection
20
- - **Euclidean/Poincaré Toggle**: Switch between standard 2D UMAP and Poincaré disk visualization
20
+ - **Multi-Layout Visualizations**: Explore Euclidean, Poincare, and spherical layouts in 2D or 3D with UMAP or PCA projections
21
21
  - **HuggingFace Integration**: Load datasets directly from HuggingFace Hub
22
22
  - **Fast Embeddings**: Uses EmbedAnything for CLIP-based image embeddings
23
23
 
@@ -48,14 +48,15 @@ hyperview \
48
48
  --label-key label \
49
49
  --samples 500 \
50
50
  --model openai/clip-vit-base-patch32 \
51
- --geometry both
51
+ --layout euclidean \
52
+ --layout poincare
52
53
  ```
53
54
 
54
55
  This will:
55
56
  1. Use dataset `cifar10_demo`
56
57
  2. Load up to 500 samples from CIFAR-10
57
58
  3. Compute CLIP embeddings
58
- 4. Generate Euclidean and Poincaré visualizations
59
+ 4. Generate Euclidean and Poincare visualizations
59
60
  5. Start the server at **http://127.0.0.1:6262**
60
61
 
61
62
  You can also launch with explicit dataset/model/projection args:
@@ -70,7 +71,7 @@ hyperview \
70
71
  --samples 1000 \
71
72
  --model openai/clip-vit-base-patch32 \
72
73
  --method umap \
73
- --geometry euclidean
74
+ --layout euclidean
74
75
  ```
75
76
 
76
77
  ### Python API
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.2.0'
32
- __version_tuple__ = version_tuple = (0, 2, 0)
31
+ __version__ = version = '0.3.1'
32
+ __version_tuple__ = version_tuple = (0, 3, 1)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -7,6 +7,7 @@ import threading
7
7
  import time
8
8
  import webbrowser
9
9
  from dataclasses import dataclass
10
+ from importlib.util import find_spec
10
11
  from urllib.error import URLError
11
12
  from urllib.request import Request, urlopen
12
13
  from uuid import uuid4
@@ -55,6 +56,16 @@ def _read_health(url: str, timeout_s: float) -> _HealthResponse:
55
56
  )
56
57
 
57
58
 
59
+ def _resolve_default_launch_layout(dataset: Dataset) -> str:
60
+ spaces = dataset.list_spaces()
61
+
62
+ if any(space.geometry not in ("hyperboloid", "hypersphere") for space in spaces):
63
+ return "euclidean:2d"
64
+ if any(space.geometry == "hypersphere" for space in spaces):
65
+ return "spherical:3d"
66
+ return "poincare:2d"
67
+
68
+
58
69
  class Session:
59
70
  """A session for the HyperView visualizer."""
60
71
 
@@ -228,9 +239,9 @@ def launch(
228
239
  """Launch the HyperView visualization server.
229
240
 
230
241
  Note:
231
- HyperView's UI needs at least one 2D layout. If layouts are missing but
232
- embedding spaces exist, this function will compute a default layout
233
- automatically (Euclidean if any Euclidean space exists, otherwise Poincaré).
242
+ HyperView needs at least one visualization to display. If no layouts
243
+ exist yet but embedding spaces do, this function computes one default
244
+ layout automatically.
234
245
 
235
246
  Args:
236
247
  dataset: The dataset to visualize.
@@ -318,26 +329,26 @@ def launch(
318
329
  "port or stop the process listening on that port."
319
330
  )
320
331
 
321
- # The frontend requires 2D coords from /api/embeddings.
322
- # Ensure at least one layout exists; do not auto-generate optional geometries.
323
332
  layouts = dataset.list_layouts()
324
333
  spaces = dataset.list_spaces()
325
334
 
326
- if not spaces:
335
+ if not layouts and not spaces:
327
336
  raise ValueError(
328
- "HyperView launch requires 2D projections for the UI. "
329
- "No projections or embedding spaces were found. "
337
+ "HyperView launch requires at least one visualization or embedding space. "
338
+ "No visualizations or embedding spaces were found. "
330
339
  "Call `dataset.compute_embeddings()` and `dataset.compute_visualization()` "
331
- "before `hv.launch()`."
340
+ "or `dataset.set_coords()` before `hv.launch()`."
332
341
  )
333
342
 
334
343
  if not layouts:
335
- has_euclidean_space = any(s.geometry != "hyperboloid" for s in spaces)
336
- default_geometry = "euclidean" if has_euclidean_space else "poincare"
344
+ default_layout = _resolve_default_launch_layout(dataset)
337
345
 
338
- print(f"No layouts found. Computing {default_geometry} visualization...")
346
+ print(f"No visualizations found. Computing {default_layout} visualization...")
339
347
  # Let compute_visualization pick the most appropriate default space.
340
- dataset.compute_visualization(space_key=None, geometry=default_geometry)
348
+ dataset.compute_visualization(
349
+ space_key=None,
350
+ layout=default_layout,
351
+ )
341
352
 
342
353
  session = Session(dataset, host, port)
343
354
 
@@ -390,9 +401,6 @@ def _is_colab() -> bool:
390
401
  """Check if running inside a Google Colab notebook runtime."""
391
402
  if os.environ.get("COLAB_RELEASE_TAG"):
392
403
  return True
393
- try:
394
- import google.colab # type: ignore[import-not-found]
395
-
404
+ if find_spec("google.colab") is not None:
396
405
  return True
397
- except ImportError:
398
- return False
406
+ return False
@@ -5,6 +5,7 @@ from __future__ import annotations
5
5
  import argparse
6
6
 
7
7
  from hyperview import Dataset, launch
8
+ from hyperview.core.dataset import parse_visualization_layout
8
9
 
9
10
 
10
11
  def _build_parser() -> argparse.ArgumentParser:
@@ -38,6 +39,12 @@ def _build_parser() -> argparse.ArgumentParser:
38
39
  default=None,
39
40
  help="HuggingFace split to use (required with --hf-dataset)",
40
41
  )
42
+ parser.add_argument(
43
+ "--hf-config",
44
+ type=str,
45
+ default=None,
46
+ help="Optional HuggingFace subset/configuration to use",
47
+ )
41
48
  parser.add_argument(
42
49
  "--image-key",
43
50
  type=str,
@@ -72,6 +79,14 @@ def _build_parser() -> argparse.ArgumentParser:
72
79
  default=None,
73
80
  help="Maximum number of ingested samples (omit to load all)",
74
81
  )
82
+ parser.add_argument(
83
+ "--hf-streaming",
84
+ action="store_true",
85
+ help=(
86
+ "Stream HuggingFace rows instead of materializing the full split first. "
87
+ "Useful for loading subsets without eager full-split downloads."
88
+ ),
89
+ )
75
90
  parser.add_argument(
76
91
  "--shuffle",
77
92
  action="store_true",
@@ -83,6 +98,15 @@ def _build_parser() -> argparse.ArgumentParser:
83
98
  default=42,
84
99
  help="Random seed used when --shuffle is enabled (default: 42)",
85
100
  )
101
+ parser.add_argument(
102
+ "--hf-shuffle-buffer-size",
103
+ type=int,
104
+ default=1000,
105
+ help=(
106
+ "Shuffle buffer size used with --hf-streaming and --shuffle. "
107
+ "Streaming shuffle is approximate and trades larger buffers for more read-ahead."
108
+ ),
109
+ )
86
110
 
87
111
  parser.add_argument(
88
112
  "--model",
@@ -95,17 +119,20 @@ def _build_parser() -> argparse.ArgumentParser:
95
119
  )
96
120
  parser.add_argument(
97
121
  "--method",
98
- choices=["umap"],
122
+ choices=["umap", "pca"],
99
123
  default="umap",
100
- help="Projection method (currently only 'umap')",
124
+ help="Projection method: 'umap' (default) or 'pca'",
101
125
  )
102
126
  parser.add_argument(
103
- "--geometry",
104
- choices=["auto", "euclidean", "poincare", "both"],
105
- default="both",
127
+ "--layout",
128
+ action="append",
129
+ dest="layouts",
130
+ metavar="GEOMETRY[:2d|3d]",
106
131
  help=(
107
- "Layout geometry to compute when embeddings are computed. "
108
- "auto chooses based on embedding geometry; both computes both layouts."
132
+ "Visualization layout to compute. Repeat this flag to request multiple layouts, "
133
+ "for example '--layout euclidean --layout spherical'. "
134
+ "Omitting the suffix defaults to 2D for euclidean/poincare and 3D for spherical. "
135
+ "If omitted, HyperView picks one sensible default layout for the selected embedding space."
109
136
  ),
110
137
  )
111
138
  parser.add_argument(
@@ -162,6 +189,23 @@ def _build_parser() -> argparse.ArgumentParser:
162
189
 
163
190
 
164
191
  def _validate_args(parser: argparse.ArgumentParser, args: argparse.Namespace) -> None:
192
+ if args.layouts:
193
+ canonical_layouts: list[str] = []
194
+ seen_layouts: set[str] = set()
195
+ for layout_spec in args.layouts:
196
+ try:
197
+ geometry, layout_dimension = parse_visualization_layout(layout_spec)
198
+ except ValueError as exc:
199
+ parser.error(str(exc))
200
+
201
+ canonical_layout = f"{geometry}:{layout_dimension}d"
202
+ if canonical_layout in seen_layouts:
203
+ continue
204
+ seen_layouts.add(canonical_layout)
205
+ canonical_layouts.append(canonical_layout)
206
+
207
+ args.layouts = canonical_layouts
208
+
165
209
  if args.hf_dataset and args.images_dir:
166
210
  parser.error("Use either --hf-dataset or --images-dir, not both.")
167
211
 
@@ -181,6 +225,8 @@ def _validate_args(parser: argparse.ArgumentParser, args: argparse.Namespace) ->
181
225
  parser.error("--split is required when using --hf-dataset.")
182
226
  if not args.image_key:
183
227
  parser.error("--image-key is required when using --hf-dataset.")
228
+ if args.hf_shuffle_buffer_size < 1:
229
+ parser.error("--hf-shuffle-buffer-size must be at least 1.")
184
230
 
185
231
 
186
232
  def _print_ingestion_result(added: int, skipped: int) -> None:
@@ -191,9 +237,11 @@ def _print_ingestion_result(added: int, skipped: int) -> None:
191
237
 
192
238
 
193
239
  def _ingest_huggingface(dataset: Dataset, args: argparse.Namespace, dataset_name: str) -> None:
194
- print(f"Loading HuggingFace dataset {dataset_name}...")
240
+ config_suffix = f" [{args.hf_config}]" if args.hf_config else ""
241
+ print(f"Loading HuggingFace dataset {dataset_name}{config_suffix}...")
195
242
  added, skipped = dataset.add_from_huggingface(
196
243
  dataset_name,
244
+ config=args.hf_config,
197
245
  split=args.split,
198
246
  image_key=args.image_key,
199
247
  label_key=args.label_key,
@@ -201,6 +249,8 @@ def _ingest_huggingface(dataset: Dataset, args: argparse.Namespace, dataset_name
201
249
  max_samples=args.samples,
202
250
  shuffle=args.shuffle,
203
251
  seed=args.seed,
252
+ streaming=args.hf_streaming,
253
+ shuffle_buffer_size=args.hf_shuffle_buffer_size,
204
254
  )
205
255
  _print_ingestion_result(added, skipped)
206
256
 
@@ -228,37 +278,35 @@ def _prepare_dataset(args: argparse.Namespace) -> Dataset:
228
278
  return dataset
229
279
 
230
280
 
231
- def _resolve_geometry_targets(
281
+ def _resolve_default_layouts(
232
282
  dataset: Dataset,
233
- geometry: str,
234
283
  space_key: str | None,
235
284
  ) -> list[str]:
236
- if geometry == "both":
237
- return ["euclidean", "poincare"]
238
-
239
- if geometry in ("euclidean", "poincare"):
240
- return [geometry]
241
-
242
- if space_key is None:
243
- return ["euclidean"]
244
-
245
285
  spaces = dataset.list_spaces()
246
286
  selected = next((space for space in spaces if space.space_key == space_key), None)
247
- if selected is not None and selected.geometry == "hyperboloid":
248
- return ["poincare"]
249
287
 
250
- return ["euclidean"]
288
+ if selected is not None:
289
+ if selected.geometry == "hyperboloid":
290
+ return ["poincare:2d"]
291
+ if selected.geometry == "hypersphere":
292
+ return ["spherical:3d"]
293
+ return ["euclidean:2d"]
251
294
 
295
+ if any(space.geometry not in ("hyperboloid", "hypersphere") for space in spaces):
296
+ return ["euclidean:2d"]
297
+ if any(space.geometry == "hypersphere" for space in spaces):
298
+ return ["spherical:3d"]
299
+ return ["poincare:2d"]
252
300
 
253
301
  def _compute_layouts(dataset: Dataset, args: argparse.Namespace, space_key: str | None) -> None:
254
- targets = _resolve_geometry_targets(dataset, args.geometry, space_key)
302
+ target_layouts = args.layouts or _resolve_default_layouts(dataset, space_key)
255
303
 
256
304
  print("Computing visualizations...")
257
- for target_geometry in targets:
305
+ for target_layout in target_layouts:
258
306
  dataset.compute_visualization(
259
307
  space_key=space_key,
260
308
  method=args.method,
261
- geometry=target_geometry,
309
+ layout=target_layout,
262
310
  n_neighbors=args.n_neighbors,
263
311
  min_dist=args.min_dist,
264
312
  metric=args.metric,