hyperview 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. hyperview-0.1.0/.gitignore +76 -0
  2. hyperview-0.1.0/LICENSE +21 -0
  3. hyperview-0.1.0/PKG-INFO +165 -0
  4. hyperview-0.1.0/README.md +119 -0
  5. hyperview-0.1.0/pyproject.toml +103 -0
  6. hyperview-0.1.0/src/hyperview/__init__.py +14 -0
  7. hyperview-0.1.0/src/hyperview/_version.py +34 -0
  8. hyperview-0.1.0/src/hyperview/api.py +398 -0
  9. hyperview-0.1.0/src/hyperview/cli.py +167 -0
  10. hyperview-0.1.0/src/hyperview/core/__init__.py +6 -0
  11. hyperview-0.1.0/src/hyperview/core/dataset.py +701 -0
  12. hyperview-0.1.0/src/hyperview/core/sample.py +95 -0
  13. hyperview-0.1.0/src/hyperview/core/selection.py +53 -0
  14. hyperview-0.1.0/src/hyperview/embeddings/__init__.py +31 -0
  15. hyperview-0.1.0/src/hyperview/embeddings/compute.py +89 -0
  16. hyperview-0.1.0/src/hyperview/embeddings/engine.py +330 -0
  17. hyperview-0.1.0/src/hyperview/embeddings/pipelines.py +203 -0
  18. hyperview-0.1.0/src/hyperview/embeddings/projection.py +267 -0
  19. hyperview-0.1.0/src/hyperview/embeddings/providers/__init__.py +7 -0
  20. hyperview-0.1.0/src/hyperview/embeddings/providers/lancedb_providers.py +196 -0
  21. hyperview-0.1.0/src/hyperview/server/__init__.py +5 -0
  22. hyperview-0.1.0/src/hyperview/server/app.py +399 -0
  23. hyperview-0.1.0/src/hyperview/storage/__init__.py +19 -0
  24. hyperview-0.1.0/src/hyperview/storage/backend.py +196 -0
  25. hyperview-0.1.0/src/hyperview/storage/config.py +71 -0
  26. hyperview-0.1.0/src/hyperview/storage/lancedb_backend.py +432 -0
  27. hyperview-0.1.0/src/hyperview/storage/memory_backend.py +279 -0
  28. hyperview-0.1.0/src/hyperview/storage/schema.py +283 -0
@@ -0,0 +1,76 @@
1
+ # Python
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+ .Python
7
+ env/
8
+ venv/
9
+ .venv/
10
+ /uv.lock
11
+ *.egg-info/
12
+ .pytest_cache/
13
+ .coverage
14
+ htmlcov/
15
+
16
+ # Jupyter Notebooks
17
+ .ipynb_checkpoints
18
+
19
+ # macOS
20
+ .DS_Store
21
+
22
+ # VS Code
23
+ .vscode/
24
+
25
+ # Generated assets
26
+ assets/demo_animation_frames/
27
+ *.gif
28
+
29
+ # Frontend
30
+ frontend/node_modules/
31
+ frontend/.next/
32
+ frontend/out/
33
+
34
+ # Bundled frontend in Python package (built in CI during release)
35
+ src/hyperview/server/static/
36
+
37
+ # Python package build
38
+ dist/
39
+ build/
40
+ *.egg-info/
41
+
42
+ # Data cache
43
+ *.hf/
44
+ .cache/
45
+
46
+ # external repo (https://github.com/Hyper3Labs/hyper-scatter)
47
+ hyper-scatter/
48
+
49
+ # nohup
50
+ nohup.out
51
+ frontend/nohup.out
52
+
53
+ # Local logs / tool artifacts
54
+ .hyperview-*.log
55
+ .hyperview-*.pid
56
+ .playwright-mcp/
57
+ frontend/tsconfig.tsbuildinfo
58
+
59
+ # Hyperbolic model zoo (kept as a separate repo)
60
+ hyper_model_zoo/
61
+ hyper_models/
62
+ scripts_ignored/
63
+
64
+ # AI Context (Agent files)
65
+ .claude/
66
+ context/
67
+ CLAUDE.md
68
+ TASKS.md
69
+ TESTS.md
70
+ AGENTS.md
71
+ **/AGENTS.md
72
+ .github/agents/
73
+ .specstory/
74
+
75
+ # Generated version file (hatch-vcs)
76
+ src/hyperview/_version.py
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Matin Mahmood
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,165 @@
1
+ Metadata-Version: 2.4
2
+ Name: hyperview
3
+ Version: 0.1.0
4
+ Summary: Open-source dataset curation with hyperbolic embeddings visualization
5
+ Project-URL: Homepage, https://github.com/Hyper3Labs/HyperView
6
+ Project-URL: Documentation, https://github.com/Hyper3Labs/HyperView#readme
7
+ Project-URL: Repository, https://github.com/Hyper3Labs/HyperView
8
+ Project-URL: Issues, https://github.com/Hyper3Labs/HyperView/issues
9
+ Author: hyper3labs
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: curation,dataset,embeddings,hyperbolic,machine-learning,visualization
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Classifier: Topic :: Scientific/Engineering :: Visualization
23
+ Requires-Python: >=3.10
24
+ Requires-Dist: aiofiles>=25.1.0
25
+ Requires-Dist: datasets>=4.5.0
26
+ Requires-Dist: embed-anything>=0.7.0
27
+ Requires-Dist: fastapi>=0.128.0
28
+ Requires-Dist: hyper-models>=0.1.0
29
+ Requires-Dist: lancedb>=0.26.1
30
+ Requires-Dist: numpy<2.4,>=1.26.4
31
+ Requires-Dist: pillow>=12.1.0
32
+ Requires-Dist: pyarrow>=22.0.0
33
+ Requires-Dist: pydantic>=2.12.5
34
+ Requires-Dist: umap-learn>=0.5.11
35
+ Requires-Dist: uvicorn[standard]>=0.40.0
36
+ Provides-Extra: dev
37
+ Requires-Dist: httpx>=0.28.1; extra == 'dev'
38
+ Requires-Dist: pytest-asyncio>=1.3.0; extra == 'dev'
39
+ Requires-Dist: pytest>=9.0.2; extra == 'dev'
40
+ Requires-Dist: ruff>=0.14.13; extra == 'dev'
41
+ Provides-Extra: ml
42
+ Requires-Dist: timm>=1.0.0; extra == 'ml'
43
+ Requires-Dist: torch>=2.9.1; extra == 'ml'
44
+ Requires-Dist: torchvision>=0.24.1; extra == 'ml'
45
+ Description-Content-Type: text/markdown
46
+
47
+ # HyperView
48
+
49
+ > **Open-source dataset curation + embedding visualization (Euclidean + Poincaré disk)**
50
+
51
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/Hyper3Labs/HyperView)
52
+
53
+ <p align="center">
54
+ <a href="https://youtu.be/XLaa8FHSQtc" target="_blank">
55
+ <img src="assets/screenshot.png" alt="HyperView Screenshot" width="100%">
56
+ </a>
57
+ <br>
58
+ <a href="https://youtu.be/XLaa8FHSQtc" target="_blank">Watch the Demo Video</a>
59
+ </p>
60
+
61
+ ---
62
+
63
+ ## Features
64
+
65
+ - **Dual-Panel UI**: Image grid + scatter plot with bidirectional selection
66
+ - **Euclidean/Poincaré Toggle**: Switch between standard 2D UMAP and Poincaré disk visualization
67
+ - **HuggingFace Integration**: Load datasets directly from HuggingFace Hub
68
+ - **Fast Embeddings**: Uses EmbedAnything for CLIP-based image embeddings
69
+
70
+ ## Quick Start
71
+
72
+ **Docs:** [docs/datasets.md](docs/datasets.md) · [docs/colab.md](docs/colab.md) · [CONTRIBUTING.md](CONTRIBUTING.md) · [TESTS.md](TESTS.md)
73
+
74
+ ### Installation
75
+
76
+ ```bash
77
+ git clone https://github.com/Hyper3Labs/HyperView.git
78
+ cd HyperView
79
+
80
+ # Install with uv
81
+ uv venv .venv
82
+ source .venv/bin/activate
83
+ uv pip install -e ".[dev]"
84
+ ```
85
+
86
+ ### Run the Demo
87
+
88
+ ```bash
89
+ hyperview demo --samples 500
90
+ ```
91
+
92
+ This will:
93
+ 1. Load 500 samples from CIFAR-100
94
+ 2. Compute CLIP embeddings
95
+ 3. Generate Euclidean and Poincaré visualizations
96
+ 4. Start the server at **http://127.0.0.1:6262**
97
+
98
+ ### Python API
99
+
100
+ ```python
101
+ import hyperview as hv
102
+
103
+ # Create dataset
104
+ dataset = hv.Dataset("my_dataset")
105
+
106
+ # Load from HuggingFace
107
+ dataset.add_from_huggingface(
108
+ "uoft-cs/cifar100",
109
+ split="train",
110
+ max_samples=1000
111
+ )
112
+
113
+ # Or load from local directory
114
+ # dataset.add_images_dir("/path/to/images", label_from_folder=True)
115
+
116
+ # Compute embeddings and visualization
117
+ dataset.compute_embeddings(model="openai/clip-vit-base-patch32")
118
+ dataset.compute_visualization()
119
+
120
+ # Launch the UI
121
+ hv.launch(dataset) # Opens http://127.0.0.1:6262
122
+ ```
123
+
124
+ ### Google Colab
125
+
126
+ See [docs/colab.md](docs/colab.md) for a fast Colab smoke test and notebook-friendly launch behavior.
127
+
128
+ ### Save and Load Datasets
129
+
130
+ ```python
131
+ # Save dataset with embeddings
132
+ dataset.save("my_dataset.json")
133
+
134
+ # Load later
135
+ dataset = hv.Dataset.load("my_dataset.json")
136
+ hv.launch(dataset)
137
+ ```
138
+
139
+ ## Why Hyperbolic?
140
+
141
+ Traditional Euclidean embeddings struggle with hierarchical data. In Euclidean space, volume grows polynomially ($r^d$), causing **Representation Collapse** where minority classes get crushed together.
142
+
143
+ **Hyperbolic space** (Poincaré disk) has exponential volume growth ($e^r$), naturally preserving hierarchical structure and keeping rare classes distinct.
144
+
145
+ <p align="center">
146
+ <img src="assets/hyperview_infographic.png" alt="Euclidean vs Hyperbolic" width="100%">
147
+ </p>
148
+
149
+ ## Contributing
150
+
151
+ Development setup, frontend hot-reload, and backend API notes live in [CONTRIBUTING.md](CONTRIBUTING.md).
152
+
153
+ ## Related projects
154
+
155
+ - **hyper-scatter**: High-performance WebGL scatterplot engine (Euclidean + Poincaré) used by the frontend: https://github.com/Hyper3Labs/hyper-scatter
156
+ - **hyper-models**: Non-Euclidean model zoo + ONNX exports (e.g. for hyperbolic VLM experiments): https://github.com/Hyper3Labs/hyper-models
157
+
158
+ ## References
159
+
160
+ - [Poincaré Embeddings for Learning Hierarchical Representations](https://arxiv.org/abs/1705.08039) (Nickel & Kiela, 2017)
161
+ - [Hyperbolic Neural Networks](https://arxiv.org/abs/1805.09112) (Ganea et al., 2018)
162
+
163
+ ## License
164
+
165
+ MIT License - see [LICENSE](LICENSE) for details.
@@ -0,0 +1,119 @@
1
+ # HyperView
2
+
3
+ > **Open-source dataset curation + embedding visualization (Euclidean + Poincaré disk)**
4
+
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/Hyper3Labs/HyperView)
6
+
7
+ <p align="center">
8
+ <a href="https://youtu.be/XLaa8FHSQtc" target="_blank">
9
+ <img src="assets/screenshot.png" alt="HyperView Screenshot" width="100%">
10
+ </a>
11
+ <br>
12
+ <a href="https://youtu.be/XLaa8FHSQtc" target="_blank">Watch the Demo Video</a>
13
+ </p>
14
+
15
+ ---
16
+
17
+ ## Features
18
+
19
+ - **Dual-Panel UI**: Image grid + scatter plot with bidirectional selection
20
+ - **Euclidean/Poincaré Toggle**: Switch between standard 2D UMAP and Poincaré disk visualization
21
+ - **HuggingFace Integration**: Load datasets directly from HuggingFace Hub
22
+ - **Fast Embeddings**: Uses EmbedAnything for CLIP-based image embeddings
23
+
24
+ ## Quick Start
25
+
26
+ **Docs:** [docs/datasets.md](docs/datasets.md) · [docs/colab.md](docs/colab.md) · [CONTRIBUTING.md](CONTRIBUTING.md) · [TESTS.md](TESTS.md)
27
+
28
+ ### Installation
29
+
30
+ ```bash
31
+ git clone https://github.com/Hyper3Labs/HyperView.git
32
+ cd HyperView
33
+
34
+ # Install with uv
35
+ uv venv .venv
36
+ source .venv/bin/activate
37
+ uv pip install -e ".[dev]"
38
+ ```
39
+
40
+ ### Run the Demo
41
+
42
+ ```bash
43
+ hyperview demo --samples 500
44
+ ```
45
+
46
+ This will:
47
+ 1. Load 500 samples from CIFAR-100
48
+ 2. Compute CLIP embeddings
49
+ 3. Generate Euclidean and Poincaré visualizations
50
+ 4. Start the server at **http://127.0.0.1:6262**
51
+
52
+ ### Python API
53
+
54
+ ```python
55
+ import hyperview as hv
56
+
57
+ # Create dataset
58
+ dataset = hv.Dataset("my_dataset")
59
+
60
+ # Load from HuggingFace
61
+ dataset.add_from_huggingface(
62
+ "uoft-cs/cifar100",
63
+ split="train",
64
+ max_samples=1000
65
+ )
66
+
67
+ # Or load from local directory
68
+ # dataset.add_images_dir("/path/to/images", label_from_folder=True)
69
+
70
+ # Compute embeddings and visualization
71
+ dataset.compute_embeddings(model="openai/clip-vit-base-patch32")
72
+ dataset.compute_visualization()
73
+
74
+ # Launch the UI
75
+ hv.launch(dataset) # Opens http://127.0.0.1:6262
76
+ ```
77
+
78
+ ### Google Colab
79
+
80
+ See [docs/colab.md](docs/colab.md) for a fast Colab smoke test and notebook-friendly launch behavior.
81
+
82
+ ### Save and Load Datasets
83
+
84
+ ```python
85
+ # Save dataset with embeddings
86
+ dataset.save("my_dataset.json")
87
+
88
+ # Load later
89
+ dataset = hv.Dataset.load("my_dataset.json")
90
+ hv.launch(dataset)
91
+ ```
92
+
93
+ ## Why Hyperbolic?
94
+
95
+ Traditional Euclidean embeddings struggle with hierarchical data. In Euclidean space, volume grows polynomially ($r^d$), causing **Representation Collapse** where minority classes get crushed together.
96
+
97
+ **Hyperbolic space** (Poincaré disk) has exponential volume growth ($e^r$), naturally preserving hierarchical structure and keeping rare classes distinct.
98
+
99
+ <p align="center">
100
+ <img src="assets/hyperview_infographic.png" alt="Euclidean vs Hyperbolic" width="100%">
101
+ </p>
102
+
103
+ ## Contributing
104
+
105
+ Development setup, frontend hot-reload, and backend API notes live in [CONTRIBUTING.md](CONTRIBUTING.md).
106
+
107
+ ## Related projects
108
+
109
+ - **hyper-scatter**: High-performance WebGL scatterplot engine (Euclidean + Poincaré) used by the frontend: https://github.com/Hyper3Labs/hyper-scatter
110
+ - **hyper-models**: Non-Euclidean model zoo + ONNX exports (e.g. for hyperbolic VLM experiments): https://github.com/Hyper3Labs/hyper-models
111
+
112
+ ## References
113
+
114
+ - [Poincaré Embeddings for Learning Hierarchical Representations](https://arxiv.org/abs/1705.08039) (Nickel & Kiela, 2017)
115
+ - [Hyperbolic Neural Networks](https://arxiv.org/abs/1805.09112) (Ganea et al., 2018)
116
+
117
+ ## License
118
+
119
+ MIT License - see [LICENSE](LICENSE) for details.
@@ -0,0 +1,103 @@
1
+ [project]
2
+ name = "hyperview"
3
+ dynamic = ["version"]
4
+ description = "Open-source dataset curation with hyperbolic embeddings visualization"
5
+ readme = "README.md"
6
+ license = { text = "MIT" }
7
+ requires-python = ">=3.10"
8
+ authors = [
9
+ { name = "hyper3labs" }
10
+ ]
11
+ keywords = ["embeddings", "visualization", "hyperbolic", "dataset", "curation", "machine-learning"]
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Intended Audience :: Developers",
15
+ "Intended Audience :: Science/Research",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3.10",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
22
+ "Topic :: Scientific/Engineering :: Visualization",
23
+ ]
24
+
25
+ dependencies = [
26
+ "fastapi>=0.128.0",
27
+ "uvicorn[standard]>=0.40.0",
28
+ "embed-anything>=0.7.0",
29
+ "hyper-models>=0.1.0", # PyPI package: https://pypi.org/project/hyper-models/
30
+ "numpy>=1.26.4,<2.4",
31
+ "umap-learn>=0.5.11",
32
+ "pillow>=12.1.0",
33
+ "pydantic>=2.12.5",
34
+ "aiofiles>=25.1.0",
35
+ "datasets>=4.5.0",
36
+ "lancedb>=0.26.1",
37
+ "pyarrow>=22.0.0",
38
+ ]
39
+
40
+ [project.optional-dependencies]
41
+ dev = [
42
+ "pytest>=9.0.2",
43
+ "pytest-asyncio>=1.3.0",
44
+ "httpx>=0.28.1",
45
+ "ruff>=0.14.13",
46
+ ]
47
+ ml = [
48
+ "torch>=2.9.1",
49
+ "torchvision>=0.24.1",
50
+ "timm>=1.0.0",
51
+ ]
52
+
53
+ [project.scripts]
54
+ hyperview = "hyperview.cli:main"
55
+
56
+ [project.urls]
57
+ Homepage = "https://github.com/Hyper3Labs/HyperView"
58
+ Documentation = "https://github.com/Hyper3Labs/HyperView#readme"
59
+ Repository = "https://github.com/Hyper3Labs/HyperView"
60
+ Issues = "https://github.com/Hyper3Labs/HyperView/issues"
61
+
62
+ [build-system]
63
+ requires = ["hatchling", "hatch-vcs"]
64
+ build-backend = "hatchling.build"
65
+
66
+ [tool.hatch.metadata]
67
+ allow-direct-references = true
68
+
69
+ [tool.hatch.version]
70
+ source = "vcs"
71
+
72
+ [tool.hatch.build.hooks.vcs]
73
+ version-file = "src/hyperview/_version.py"
74
+
75
+ [tool.hatch.build.targets.wheel]
76
+ packages = ["src/hyperview"]
77
+ # Include frontend static assets (pre-built before packaging)
78
+ artifacts = ["src/hyperview/server/static/**"]
79
+
80
+ [tool.hatch.build.targets.sdist]
81
+ include = [
82
+ "/src",
83
+ "/README.md",
84
+ "/LICENSE",
85
+ ]
86
+
87
+ [tool.ruff]
88
+ line-length = 100
89
+ target-version = "py310"
90
+
91
+ [tool.ruff.lint]
92
+ select = ["E", "F", "I", "N", "W", "UP"]
93
+ ignore = ["E501"]
94
+
95
+ [tool.pytest.ini_options]
96
+ asyncio_mode = "auto"
97
+ testpaths = ["tests"]
98
+
99
+ [tool.uv.workspace]
100
+ members = ["hyper_models"]
101
+
102
+ [tool.uv.sources]
103
+ hyper-models = { workspace = true }
@@ -0,0 +1,14 @@
1
+ """HyperView - Open-source dataset curation with hyperbolic embeddings visualization."""
2
+
3
+ from . import _version as _version
4
+ from . import api as _api
5
+
6
+ Dataset = _api.Dataset
7
+ launch = _api.launch
8
+ __version__ = _version.__version__
9
+
10
+ __all__ = [
11
+ "Dataset",
12
+ "launch",
13
+ "__version__",
14
+ ]
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.1.0'
32
+ __version_tuple__ = version_tuple = (0, 1, 0)
33
+
34
+ __commit_id__ = commit_id = None