arrowspace_tuner 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,57 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint:
11
+ name: ruff + mypy
12
+ runs-on: ubuntu-latest
13
+ strategy:
14
+ matrix:
15
+ python-version: ["3.12", "3.13"]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Install uv
21
+ uses: astral-sh/setup-uv@v3
22
+ with:
23
+ version: "latest"
24
+
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ run: uv python install ${{ matrix.python-version }}
27
+
28
+ - name: Install dependencies (no arrowspace — lint only)
29
+ run: uv sync --extra dev --no-install-project
30
+
31
+ - name: Lint (ruff)
32
+ run: uv run ruff check src
33
+
34
+ - name: Type check (mypy)
35
+ run: uv run mypy src
36
+
37
+ test:
38
+ name: pytest
39
+ runs-on: ubuntu-latest
40
+ needs: lint
41
+
42
+ steps:
43
+ - uses: actions/checkout@v4
44
+
45
+ - name: Install uv
46
+ uses: astral-sh/setup-uv@v3
47
+ with:
48
+ version: "latest"
49
+
50
+ - name: Set up Python 3.12
51
+ run: uv python install 3.12
52
+
53
+ - name: Install project + dev dependencies
54
+ run: uv sync --extra dev
55
+
56
+ - name: Run tests
57
+ run: uv run pytest -v --cov=arrowspace_tuner --cov-report=term-missing
@@ -0,0 +1,26 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+
12
+ # Test artefacts
13
+ .coverage
14
+ htmlcov/
15
+ .pytest_cache/
16
+
17
+ # Local experiment outputs — keep folder, ignore contents
18
+ results/*
19
+ !results/.gitkeep
20
+
21
+ # Local data
22
+ data/
23
+
24
+ .ruff_cache/
25
+ .mypy_cache/
26
+ ruff_errors.txt
@@ -0,0 +1 @@
1
+ 3.13
@@ -0,0 +1,26 @@
1
+ # Changelog
2
+
3
+ All notable changes to `arrowspace_tuner` are documented here.
4
+ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
5
+ Versioning follows [Semantic Versioning](https://semver.org/).
6
+
7
+ ---
8
+
9
+ ## [0.1.0] — 2026-04-29
10
+
11
+ Initial release.
12
+
13
+ ### Added
14
+
15
+ - `EpsTuner` — main public class for hyperparameter discovery over `eps`, `k`, `tau`
16
+ - `arrowspace_tuner.optuna()` — one-liner convenience API: `aspace, gl = arrowspace.optuna(embeddings)`
17
+ - `StudyConfig` / `BuildParams` — typed dataclasses for power-user configuration
18
+ - Query-free spectral objective: weighted composite of MRR-Top0 proxy, Fiedler value, and lambda variance
19
+ - Optuna TPE sampler with pruning on degenerate graphs (NNZ ≤ N, disconnected, flat spectrum)
20
+ - `sample_n` subsampling: 33x speedup on 50k corpus with identical best params (validated)
21
+ - `storage` parameter for SQLite-backed persistence and parallel/resumed runs
22
+ - `tuner.save_report()` — saves `trials.csv`, `best_params.json`, and Plotly HTML plots
23
+ - `[report]` optional extra (pandas + plotly) — kept out of hard dependencies
24
+ - `py.typed` marker — PEP 561 compliant, full mypy strict mode
25
+ - Comprehensive test suite: `test_objective.py`, `test_tuner.py`, `conftest.py`
26
+ - CI workflow: pytest + ruff + mypy on every push and pull request
@@ -0,0 +1,56 @@
1
+ # Contributing
2
+
3
+ Thank you for contributing to **arrowspace-tuner**!
4
+
5
+ ## Commit Convention
6
+
7
+ We use [Conventional Commits](https://www.conventionalcommits.org/). Every commit message must follow this format:
8
+
9
+ ```
10
+ <type>: <short summary>
11
+
12
+ [optional body]
13
+ ```
14
+
15
+ ### Types
16
+
17
+ | Type | When to use |
18
+ |---|---|
19
+ | `feat` | New feature or behaviour |
20
+ | `fix` | Bug fix |
21
+ | `test` | Adding or fixing tests |
22
+ | `refactor` | Code change with no behaviour change |
23
+ | `chore` | Tooling, CI, dependencies, repo hygiene |
24
+ | `docs` | Documentation only |
25
+ | `perf` | Performance improvement |
26
+
27
+ ### Examples
28
+
29
+ ```
30
+ feat: add early-stopping to EpsTuner.fit()
31
+ fix: catch BaseException around ArrowSpace .build() for Rust panics
32
+ test: add degenerate-corpus fixture for pruning paths
33
+ chore: update .gitignore, remove .coverage artefact
34
+ docs: add quickstart section to README
35
+ ```
36
+
37
+ ### Rules
38
+
39
+ - Summary line ≤ 72 characters
40
+ - Use the imperative mood: "add", not "added" or "adds"
41
+ - Body explains **why**, not what (the diff shows the what)
42
+ - Reference issues/PRs in the body: `Fixes #12`
43
+
44
+ ## Branch Names
45
+
46
+ ```
47
+ feat/<short-description>
48
+ fix/<short-description>
49
+ chore/<short-description>
50
+ ```
51
+
52
+ ## Pull Requests
53
+
54
+ - All PRs must pass CI (pytest + ruff + mypy) before merging
55
+ - Squash-merge into `main`
56
+ - PR title must follow the same conventional commit format
@@ -0,0 +1,13 @@
1
+ Copyright [2026] Tommaso Moriondo
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
@@ -0,0 +1,154 @@
1
+ Metadata-Version: 2.4
2
+ Name: arrowspace_tuner
3
+ Version: 0.2.0
4
+ Summary: Hyperparameter discovery (eps auto-tuning) for ArrowSpace via Optuna.
5
+ Project-URL: Homepage, https://github.com/Genefold/arrowspace_tuner
6
+ Project-URL: Repository, https://github.com/Genefold/arrowspace_tuner.git
7
+ Author-email: Tommaso Moriondo <moriondotommaso@gmail.com>
8
+ License: Apache-2.0
9
+ License-File: LICENSE
10
+ Keywords: arrowspace,graph-laplacian,hyperparameter-tuning,optuna,spectral-analysis,vector-search
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Classifier: Typing :: Typed
19
+ Requires-Python: >=3.12
20
+ Requires-Dist: arrowspace>=0.26.0
21
+ Requires-Dist: numpy>=2.4.4
22
+ Requires-Dist: optuna>=4.8.0
23
+ Requires-Dist: scipy>=1.17.1
24
+ Provides-Extra: dev
25
+ Requires-Dist: mypy>=1.15; extra == 'dev'
26
+ Requires-Dist: pandas>=3.0.0; extra == 'dev'
27
+ Requires-Dist: plotly>=6.7.0; extra == 'dev'
28
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
29
+ Requires-Dist: pytest>=8.0; extra == 'dev'
30
+ Requires-Dist: ruff>=0.9; extra == 'dev'
31
+ Provides-Extra: report
32
+ Requires-Dist: pandas>=3.0.0; extra == 'report'
33
+ Requires-Dist: plotly>=6.7.0; extra == 'report'
34
+ Description-Content-Type: text/markdown
35
+
36
+ # arrowspace_tuner
37
+
38
+ [![CI](https://github.com/Genefold/arrowspace_tuner/actions/workflows/ci.yml/badge.svg)](https://github.com/Genefold/arrowspace_tuner/actions/workflows/ci.yml)
39
+ [![PyPI](https://img.shields.io/pypi/v/arrowspace-tuner)](https://pypi.org/project/arrowspace-tuner/)
40
+ [![Python](https://img.shields.io/pypi/pyversions/arrowspace-tuner)](https://pypi.org/project/arrowspace-tuner/)
41
+ [![License](https://img.shields.io/badge/license-Apache--2.0-blue)](LICENSE)
42
+
43
+ Hyperparameter discovery for [ArrowSpace](https://github.com/tuned-org-uk/arrowspace-rs) — automatically finds the best `eps`, `k`, and `tau` for your corpus using a query-free spectral objective.
44
+
45
+ ## Why
46
+
47
+ ArrowSpace's retrieval quality depends on three graph-construction parameters:
48
+
49
+ | Parameter | What it controls |
50
+ |---|---|
51
+ | `eps` | Neighbourhood radius for graph edges |
52
+ | `k` | Number of nearest neighbours per node |
53
+ | `tau` | Search temperature (exploration vs. exploitation) |
54
+
55
+ Setting these by hand is tedious and corpus-dependent. `arrowspace_tuner` uses [Optuna](https://optuna.org/) and a label-free spectral MRR proxy to find them automatically in minutes.
56
+
57
+ ## Install
58
+
59
+ ```bash
60
+ # Core (no pandas/plotly)
61
+ pip install arrowspace-tuner
62
+
63
+ # With HTML/CSV reporting
64
+ pip install arrowspace-tuner[report]
65
+ ```
66
+
67
+ ## Quickstart
68
+
69
+ ```python
70
+ import numpy as np
71
+ import arrowspace_tuner as arrowspace
72
+
73
+ embeddings = np.load("corpus.npy") # shape (N, D) float64
74
+
75
+ # One-liner: auto-discover eps, k, tau — runs in ~15 min on 50k corpus
76
+ aspace, gl = arrowspace.optuna(embeddings)
77
+
78
+ # Search as normal
79
+ results = aspace.search(query_embedding, gl, tau=0.8)
80
+ ```
81
+
82
+ ## Power-user API
83
+
84
+ ```python
85
+ from arrowspace_tuner import EpsTuner
86
+
87
+ tuner = EpsTuner(
88
+ n_trials = 15,
89
+ sample_n = 5_000, # 33x faster: explore on 5k, final build on full corpus
90
+ eps_low = 0.8, # narrow bounds if you know your corpus geometry
91
+ eps_high = 2.5,
92
+ k_low = 15,
93
+ k_high = 40,
94
+ tau_low = 0.05,
95
+ tau_high = 0.5,
96
+ n_probe = 50,
97
+ storage = "sqlite:///tune.db", # resume interrupted runs
98
+ )
99
+
100
+ aspace, gl = tuner.fit(embeddings)
101
+
102
+ print(tuner.best_params) # {"eps": 1.615, "k": 38, "tau": 0.114}
103
+ print(tuner.best_score) # 2.138
104
+ print(tuner.best_fiedler) # 0.718 — graph connectivity health
105
+ print(tuner.best_mrr_proxy) # 2.896 — retrieval coherence proxy
106
+
107
+ # Save CSV + HTML plots (requires [report] extra)
108
+ tuner.save_report(out_dir="results")
109
+ ```
110
+
111
+ ## Speed
112
+
113
+ The dominant cost is building the ArrowSpace graph on N vectors. With `sample_n`:
114
+
115
+ | Setting | Per trial | 15 trials | Notes |
116
+ |---|---|---|---|
117
+ | sample_n = 50k | ~23 min | ~5.8h | baseline |
118
+ | `sample_n=5_000` | ~1.5 min | **~27 min** | **33x faster, same best params** |
119
+
120
+ The final build after the study always uses the full corpus.
121
+
122
+ ## Objective
123
+
124
+ The objective is a weighted composite of three spectral signals — no ground-truth labels required:
125
+
126
+ ```
127
+ score = 0.70 * mrr_top0_spectral # retrieval coherence
128
+ + 0.20 * log1p(fiedler) # graph connectivity health
129
+ + 0.10 * log1p(var_lambda) # spectral richness
130
+ ```
131
+
132
+ ## Parallel runs
133
+
134
+ Optuna + SQLite lets you run multiple workers simultaneously:
135
+
136
+ ```bash
137
+ # Terminal 1
138
+ python -m arrowspace_tuner --storage sqlite:///tune.db --trials 15
139
+
140
+ # Terminal 2 (simultaneously)
141
+ python -m arrowspace_tuner --storage sqlite:///tune.db --trials 15
142
+ ```
143
+
144
+ ## Requirements
145
+
146
+ - Python ≥ 3.12
147
+ - `arrowspace >= 0.26.0`
148
+ - `optuna >= 4.8.0`
149
+ - `scipy >= 1.17.1`
150
+ - `numpy >= 2.4.4`
151
+
152
+ ## License
153
+
154
+ Apache-2.0 — see [LICENSE](LICENSE).
@@ -0,0 +1,119 @@
1
+ # arrowspace_tuner
2
+
3
+ [![CI](https://github.com/Genefold/arrowspace_tuner/actions/workflows/ci.yml/badge.svg)](https://github.com/Genefold/arrowspace_tuner/actions/workflows/ci.yml)
4
+ [![PyPI](https://img.shields.io/pypi/v/arrowspace-tuner)](https://pypi.org/project/arrowspace-tuner/)
5
+ [![Python](https://img.shields.io/pypi/pyversions/arrowspace-tuner)](https://pypi.org/project/arrowspace-tuner/)
6
+ [![License](https://img.shields.io/badge/license-Apache--2.0-blue)](LICENSE)
7
+
8
+ Hyperparameter discovery for [ArrowSpace](https://github.com/tuned-org-uk/arrowspace-rs) — automatically finds the best `eps`, `k`, and `tau` for your corpus using a query-free spectral objective.
9
+
10
+ ## Why
11
+
12
+ ArrowSpace's retrieval quality depends on three graph-construction parameters:
13
+
14
+ | Parameter | What it controls |
15
+ |---|---|
16
+ | `eps` | Neighbourhood radius for graph edges |
17
+ | `k` | Number of nearest neighbours per node |
18
+ | `tau` | Search temperature (exploration vs. exploitation) |
19
+
20
+ Setting these by hand is tedious and corpus-dependent. `arrowspace_tuner` uses [Optuna](https://optuna.org/) and a label-free spectral MRR proxy to find them automatically in minutes.
21
+
22
+ ## Install
23
+
24
+ ```bash
25
+ # Core (no pandas/plotly)
26
+ pip install arrowspace-tuner
27
+
28
+ # With HTML/CSV reporting
29
+ pip install arrowspace-tuner[report]
30
+ ```
31
+
32
+ ## Quickstart
33
+
34
+ ```python
35
+ import numpy as np
36
+ import arrowspace_tuner as arrowspace
37
+
38
+ embeddings = np.load("corpus.npy") # shape (N, D) float64
39
+
40
+ # One-liner: auto-discover eps, k, tau — runs in ~15 min on 50k corpus
41
+ aspace, gl = arrowspace.optuna(embeddings)
42
+
43
+ # Search as normal
44
+ results = aspace.search(query_embedding, gl, tau=0.8)
45
+ ```
46
+
47
+ ## Power-user API
48
+
49
+ ```python
50
+ from arrowspace_tuner import EpsTuner
51
+
52
+ tuner = EpsTuner(
53
+ n_trials = 15,
54
+ sample_n = 5_000, # 33x faster: explore on 5k, final build on full corpus
55
+ eps_low = 0.8, # narrow bounds if you know your corpus geometry
56
+ eps_high = 2.5,
57
+ k_low = 15,
58
+ k_high = 40,
59
+ tau_low = 0.05,
60
+ tau_high = 0.5,
61
+ n_probe = 50,
62
+ storage = "sqlite:///tune.db", # resume interrupted runs
63
+ )
64
+
65
+ aspace, gl = tuner.fit(embeddings)
66
+
67
+ print(tuner.best_params) # {"eps": 1.615, "k": 38, "tau": 0.114}
68
+ print(tuner.best_score) # 2.138
69
+ print(tuner.best_fiedler) # 0.718 — graph connectivity health
70
+ print(tuner.best_mrr_proxy) # 2.896 — retrieval coherence proxy
71
+
72
+ # Save CSV + HTML plots (requires [report] extra)
73
+ tuner.save_report(out_dir="results")
74
+ ```
75
+
76
+ ## Speed
77
+
78
+ The dominant cost is building the ArrowSpace graph on N vectors. With `sample_n`:
79
+
80
+ | Setting | Per trial | 15 trials | Notes |
81
+ |---|---|---|---|
82
+ | sample_n = 50k | ~23 min | ~5.8h | baseline |
83
+ | `sample_n=5_000` | ~1.5 min | **~27 min** | **33x faster, same best params** |
84
+
85
+ The final build after the study always uses the full corpus.
86
+
87
+ ## Objective
88
+
89
+ The objective is a weighted composite of three spectral signals — no ground-truth labels required:
90
+
91
+ ```
92
+ score = 0.70 * mrr_top0_spectral # retrieval coherence
93
+ + 0.20 * log1p(fiedler) # graph connectivity health
94
+ + 0.10 * log1p(var_lambda) # spectral richness
95
+ ```
96
+
97
+ ## Parallel runs
98
+
99
+ Optuna + SQLite lets you run multiple workers simultaneously:
100
+
101
+ ```bash
102
+ # Terminal 1
103
+ python -m arrowspace_tuner --storage sqlite:///tune.db --trials 15
104
+
105
+ # Terminal 2 (simultaneously)
106
+ python -m arrowspace_tuner --storage sqlite:///tune.db --trials 15
107
+ ```
108
+
109
+ ## Requirements
110
+
111
+ - Python ≥ 3.12
112
+ - `arrowspace >= 0.26.0`
113
+ - `optuna >= 4.8.0`
114
+ - `scipy >= 1.17.1`
115
+ - `numpy >= 2.4.4`
116
+
117
+ ## License
118
+
119
+ Apache-2.0 — see [LICENSE](LICENSE).
File without changes
@@ -0,0 +1,110 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "arrowspace_tuner"
7
+ version = "0.2.0"
8
+ description = "Hyperparameter discovery (eps auto-tuning) for ArrowSpace via Optuna."
9
+ readme = "README.md"
10
+ license = { text = "Apache-2.0" }
11
+ authors = [
12
+ { name = "Tommaso Moriondo", email = "moriondotommaso@gmail.com" },
13
+ ]
14
+ requires-python = ">=3.12"
15
+ keywords = [
16
+ "vector-search",
17
+ "spectral-analysis",
18
+ "hyperparameter-tuning",
19
+ "optuna",
20
+ "arrowspace",
21
+ "graph-laplacian",
22
+ ]
23
+ classifiers = [
24
+ "Development Status :: 3 - Alpha",
25
+ "Intended Audience :: Developers",
26
+ "Intended Audience :: Science/Research",
27
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
28
+ "Programming Language :: Python :: 3",
29
+ "Programming Language :: Python :: 3.12",
30
+ "Programming Language :: Python :: 3.13",
31
+ "Typing :: Typed",
32
+ ]
33
+
34
+ # ── Hard dependencies ──────────────────────────────────────────────────────
35
+ # These are always required: the Rust wheel, Optuna, SciPy for Fiedler, NumPy.
36
+ # plotly and pandas are NOT here — they are opt-in via [report].
37
+ dependencies = [
38
+ "arrowspace>=0.26.0",
39
+ "numpy>=2.4.4",
40
+ "optuna>=4.8.0",
41
+ "scipy>=1.17.1",
42
+ ]
43
+
44
+ # ── Optional extras ─────────────────────────────────────────────────────────
45
+ [project.optional-dependencies]
46
+
47
+ # pip install arrowspace_tuner[report]
48
+ # Needed for tuner.save_report() and all HTML/CSV output from reporter.py
49
+ report = [
50
+ "plotly>=6.7.0",
51
+ "pandas>=3.0.0",
52
+ ]
53
+
54
+ # pip install arrowspace_tuner[dev]
55
+ # Full dev environment: testing + linting + type checking
56
+ dev = [
57
+ "pytest>=8.0",
58
+ "pytest-cov>=5.0",
59
+ "ruff>=0.9",
60
+ "mypy>=1.15",
61
+ "plotly>=6.7.0", # needed to test reporter.py
62
+ "pandas>=3.0.0",
63
+ ]
64
+
65
+ [project.urls]
66
+ Homepage = "https://github.com/Genefold/arrowspace_tuner"
67
+ Repository = "https://github.com/Genefold/arrowspace_tuner.git"
68
+
69
+ # ── Hatchling config ───────────────────────────────────────────────────────────
70
+ [tool.hatch.build.targets.wheel]
71
+ packages = ["src/arrowspace_tuner"]
72
+ exclude = [
73
+ "tests/",
74
+ "notebooks/",
75
+ "docs/",
76
+ ".github/",
77
+ "*.db",
78
+ "*.sqlite",
79
+ "*.ipynb",
80
+ ".ruff_cache/",
81
+ ".mypy_cache/",
82
+ ".pytest_cache/",
83
+ "dist/",
84
+ "*.egg-info/",
85
+ ]
86
+
87
+ # ── Ruff ──────────────────────────────────────────────────────────────────
88
+ [tool.ruff]
89
+ line-length = 100
90
+ target-version = "py312"
91
+
92
+ [tool.ruff.lint]
93
+ select = ["E", "F", "I", "UP", "ANN"]
94
+ ignore = ["ANN101"]
95
+
96
+ # ── Mypy ────────────────────────────────────────────────────────────────────
97
+ [tool.mypy]
98
+ python_version = "3.12"
99
+ strict = true
100
+ ignore_missing_imports = true # arrowspace has no stubs
101
+
102
+ # ── Pytest ─────────────────────────────────────────────────────────────────
103
+ [tool.pytest.ini_options]
104
+ testpaths = ["tests"]
105
+ addopts = "--cov=arrowspace_tuner --cov-report=term-missing"
106
+
107
+ [dependency-groups]
108
+ dev = [
109
+ "twine>=6.2.0",
110
+ ]
File without changes
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ scripts/test_eval.py
4
+ ====================
5
+ Run the arrowspace_tuner optimisation pipeline on the CVE .npy corpus.
6
+
7
+ uv run python scripts/test_eval.py \
8
+ --data data/cve_embs/cve1999-2025.npy \
9
+ --n 50000 \
10
+ --trials 20 \
11
+ --seed 42
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import argparse
16
+ import logging
17
+
18
+ import numpy as np
19
+
20
+ from arrowspace_tuner.tuner import EpsTuner
21
+
22
+ logging.basicConfig(
23
+ level=logging.INFO,
24
+ format="[%(asctime)s] %(levelname)s %(name)s | %(message)s",
25
+ )
26
+ log = logging.getLogger(__name__)
27
+
28
+
29
+ def load_npy(path: str, n: int, seed: int) -> np.ndarray:
30
+ log.info("Loading %s …", path)
31
+ X = np.load(path)
32
+ log.info(" full shape : %s dtype=%s", X.shape, X.dtype)
33
+ n = min(n, len(X))
34
+ rng = np.random.default_rng(seed)
35
+ idx = rng.choice(len(X), size=n, replace=False)
36
+ idx.sort()
37
+ X = X[idx].astype(np.float64)
38
+ norms = np.linalg.norm(X, axis=1, keepdims=True)
39
+ X = X / np.clip(norms, 1e-12, None)
40
+ log.info(" subsample : %s (L2-normalised)", X.shape)
41
+ return X
42
+
43
+
44
+ def main() -> None:
45
+ parser = argparse.ArgumentParser()
46
+ parser.add_argument("--data", default="data/cve_embs/cve1999-2025.npy")
47
+ parser.add_argument("--n", type=int, default=5000)
48
+ parser.add_argument("--trials", type=int, default=20)
49
+ parser.add_argument("--seed", type=int, default=54)
50
+
51
+ args = parser.parse_args()
52
+
53
+ embeddings = load_npy(args.data, args.n, args.seed)
54
+
55
+ tuner = EpsTuner(
56
+ n_trials = args.trials,
57
+ sample_n = None, # already subsampled above
58
+ seed = args.seed,
59
+ study_name = "cve_arrowspace_fstar",
60
+ storage = None,
61
+ )
62
+
63
+ log.info("Starting | n=%d trials=%d seed=%d", len(embeddings), args.trials, args.seed)
64
+
65
+ aspace, gl = tuner.fit(embeddings)
66
+
67
+ print("\n=== Best result ===")
68
+ print(f" F** : {tuner.best_score:.8f}")
69
+ print(f" eps : {tuner.best_params['eps']:.5f}")
70
+ print(f" k : {tuner.best_params['k']}")
71
+ print(f" tau : {tuner.best_params['tau']:.4f}")
72
+ print(f" fiedler : {tuner.best_fiedler}")
73
+ print(f" var_lambda : {tuner.best_var_lambda}")
74
+ print(f" mrr_proxy : {tuner.best_mrr_proxy}")
75
+ print(f"\n ArrowSpace : {aspace}")
76
+ print(f" Graph : {gl}")
77
+
78
+ tuner.save_report(out_dir="results")
79
+ log.info("Report saved to results/")
80
+
81
+
82
+ if __name__ == "__main__":
83
+ main()
@@ -0,0 +1,40 @@
1
+ """
2
+ arrowspace_tuner — hyperparameter discovery for ArrowSpace.
3
+
4
+ Quickstart
5
+ ----------
6
+ import numpy as np
7
+ import arrowspace_tuner as arrowspace
8
+
9
+ embeddings = np.load("corpus.npy")
10
+
11
+ # one-liner: auto-discover eps, k, tau
12
+ aspace, gl = arrowspace.optuna(embeddings)
13
+
14
+ # power-user: full control + post-run inspection
15
+ from arrowspace_tuner import EpsTuner
16
+
17
+ tuner = EpsTuner(n_trials=100, sample_n=10_000, eps_low=0.5, eps_high=3.0)
18
+ aspace, gl = tuner.fit(embeddings)
19
+ print(tuner.best_params) # {"eps": 1.2, "k": 14, "tau": 0.8}
20
+ print(tuner.best_score)
21
+ tuner.save_report() # requires pip install arrowspace-tuner[report]
22
+ """
23
+ from .api import optuna
24
+
25
+ # Power-user exports: config dataclasses for advanced customisation
26
+ from .core import BuildParams, StudyConfig
27
+ from .tuner import EpsTuner
28
+
29
+ __version__ = "0.1.0"
30
+
31
+ __all__ = [
32
+ # primary public API
33
+ "optuna",
34
+ "EpsTuner",
35
+ # config — for power users
36
+ "BuildParams",
37
+ "StudyConfig",
38
+ # version
39
+ "__version__",
40
+ ]