hypara 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. hypara-0.1.0/LICENSE +21 -0
  2. hypara-0.1.0/MANIFEST.in +9 -0
  3. hypara-0.1.0/PKG-INFO +264 -0
  4. hypara-0.1.0/README.md +235 -0
  5. hypara-0.1.0/configs/full.json +17 -0
  6. hypara-0.1.0/configs/smoke.json +11 -0
  7. hypara-0.1.0/optimizers/hill_climb/main.py +161 -0
  8. hypara-0.1.0/optimizers/hill_climb/manifest.json +4 -0
  9. hypara-0.1.0/optimizers/random_search/main.py +73 -0
  10. hypara-0.1.0/optimizers/random_search/manifest.json +4 -0
  11. hypara-0.1.0/pyproject.toml +51 -0
  12. hypara-0.1.0/setup.cfg +4 -0
  13. hypara-0.1.0/src/hypara/__init__.py +4 -0
  14. hypara-0.1.0/src/hypara/cli.py +192 -0
  15. hypara-0.1.0/src/hypara/event_log.py +44 -0
  16. hypara-0.1.0/src/hypara/metrics.py +153 -0
  17. hypara-0.1.0/src/hypara/problems/__init__.py +0 -0
  18. hypara-0.1.0/src/hypara/problems/_util.py +30 -0
  19. hypara-0.1.0/src/hypara/problems/base.py +62 -0
  20. hypara-0.1.0/src/hypara/problems/conditional_knobs.py +91 -0
  21. hypara-0.1.0/src/hypara/problems/cost_aware.py +58 -0
  22. hypara-0.1.0/src/hypara/problems/dispatch_policy.py +86 -0
  23. hypara-0.1.0/src/hypara/problems/image_pipeline.py +82 -0
  24. hypara-0.1.0/src/hypara/problems/multi_fidelity.py +70 -0
  25. hypara-0.1.0/src/hypara/problems/noisy_lab.py +52 -0
  26. hypara-0.1.0/src/hypara/problems/rag_pipeline.py +90 -0
  27. hypara-0.1.0/src/hypara/problems/rugged_trap.py +54 -0
  28. hypara-0.1.0/src/hypara/problems/smooth_hill.py +43 -0
  29. hypara-0.1.0/src/hypara/problems/sparse_needle.py +59 -0
  30. hypara-0.1.0/src/hypara/protocol.py +92 -0
  31. hypara-0.1.0/src/hypara/registry.py +42 -0
  32. hypara-0.1.0/src/hypara/runner.py +248 -0
  33. hypara-0.1.0/src/hypara/space.py +224 -0
  34. hypara-0.1.0/src/hypara/transport.py +116 -0
  35. hypara-0.1.0/src/hypara.egg-info/PKG-INFO +264 -0
  36. hypara-0.1.0/src/hypara.egg-info/SOURCES.txt +43 -0
  37. hypara-0.1.0/src/hypara.egg-info/dependency_links.txt +1 -0
  38. hypara-0.1.0/src/hypara.egg-info/entry_points.txt +2 -0
  39. hypara-0.1.0/src/hypara.egg-info/requires.txt +5 -0
  40. hypara-0.1.0/src/hypara.egg-info/top_level.txt +1 -0
  41. hypara-0.1.0/tests/test_problems.py +76 -0
  42. hypara-0.1.0/tests/test_runner.py +144 -0
  43. hypara-0.1.0/tests/test_space.py +100 -0
  44. hypara-0.1.0/tests/test_stage2.py +106 -0
  45. hypara-0.1.0/tests/test_transport.py +98 -0
hypara-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 jun76
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,9 @@
1
+ include LICENSE
2
+ include README.md
3
+ include pyproject.toml
4
+ recursive-include optimizers *.py *.json
5
+ recursive-include configs *.json
6
+ recursive-include tests *.py
7
+ prune docs
8
+ prune results
9
+ global-exclude __pycache__ *.pyc
hypara-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,264 @@
1
+ Metadata-Version: 2.4
2
+ Name: hypara
3
+ Version: 0.1.0
4
+ Summary: Benchmark harness for black-box optimizers that speak an ask/tell JSON Lines protocol
5
+ Author-email: jun76 <jun76.main@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/jun76/hypara
8
+ Project-URL: Repository, https://github.com/jun76/hypara
9
+ Project-URL: Issues, https://github.com/jun76/hypara/issues
10
+ Keywords: benchmark,optimization,black-box,hyperparameter-tuning,ask-tell,llm
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Scientific/Engineering
20
+ Classifier: Topic :: Software Development :: Testing
21
+ Requires-Python: >=3.11
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Provides-Extra: dev
25
+ Requires-Dist: pytest>=8; extra == "dev"
26
+ Requires-Dist: build>=1; extra == "dev"
27
+ Requires-Dist: twine>=5; extra == "dev"
28
+ Dynamic: license-file
29
+
30
+ # hypara
31
+
32
+ A benchmark harness for measuring how well an optimizer searches an **unknown
33
+ black-box evaluation function**.
34
+
35
+ hypara is deliberately not about solving famous problems (TSP, knapsack, bin
36
+ packing) where a strong off-the-shelf solver wins. Each problem ships a
37
+ natural-language description, a mixed search space, and a *hidden* evaluator
38
+ whose shape changes with the instance seed. To score well an optimizer has to
39
+ read the description, reason about the space, and adapt its strategy from the
40
+ evaluation history within a limited budget.
41
+
42
+ Optimizers are **language-agnostic external processes**: they talk to the
43
+ runner over a stdin/stdout JSON Lines protocol, so an optimizer can be written
44
+ in Python, Rust, Go, TypeScript, or any executable.
45
+
46
+ ## Install
47
+
48
+ ```bash
49
+ pip install hypara
50
+ ```
51
+
52
+ For development (tests + build tooling):
53
+
54
+ ```bash
55
+ pip install -e .[dev]
56
+ python -m pytest
57
+ ```
58
+
59
+ ## Quickstart
60
+
61
+ List the built-in problems:
62
+
63
+ ```bash
64
+ hypara list
65
+ ```
66
+
67
+ Write a minimal optimizer. Create `my_opt/manifest.json`:
68
+
69
+ ```json
70
+ {"name": "my_opt", "command": ["python", "main.py"]}
71
+ ```
72
+
73
+ and `my_opt/main.py`:
74
+
75
+ ```python
76
+ import json, random, sys
77
+
78
+ space = []
79
+ rng = random.Random()
80
+
81
+ def send(msg):
82
+ sys.stdout.write(json.dumps(msg) + "\n")
83
+ sys.stdout.flush()
84
+
85
+ for line in sys.stdin:
86
+ msg = json.loads(line)
87
+ t = msg.get("type")
88
+ if t == "init":
89
+ space = msg["problem"]["space"]
90
+ rng = random.Random(msg.get("optimizer_seed"))
91
+ send({"type": "ready"})
92
+ elif t == "ask":
93
+ # propose a candidate; here, a trivial random pick over numeric params
94
+ cand = {}
95
+ for p in space:
96
+ if p.get("condition") is not None:
97
+ continue
98
+ if p["type"] == "categorical":
99
+ cand[p["name"]] = rng.choice(p["choices"])
100
+ elif p["type"] == "bool":
101
+ cand[p["name"]] = rng.random() < 0.5
102
+ else:
103
+ lo, hi = p["low"], p["high"]
104
+ v = rng.uniform(lo, hi)
105
+ cand[p["name"]] = int(round(v)) if p["type"] == "int" else v
106
+ send({"type": "propose", "candidate": cand})
107
+ elif t == "tell":
108
+ pass # inspect msg["score"], msg["valid"], msg["remaining"] to adapt
109
+ elif t == "finish":
110
+ break
111
+ ```
112
+
113
+ Run it against one problem, then aggregate:
114
+
115
+ ```bash
116
+ hypara run --problem smooth_hill --optimizer ./my_opt --seed 1
117
+ ```
118
+
119
+ The source repository also includes two reference optimizers
120
+ (`optimizers/random_search`, `optimizers/hill_climb`) and ready-made suite
121
+ configs (`configs/smoke.json`, `configs/full.json`):
122
+
123
+ ```bash
124
+ hypara suite --config configs/smoke.json
125
+ hypara report --dir results/smoke-YYYYmmdd-HHMMSS
126
+ ```
127
+
128
+ ## Built-in problems
129
+
130
+ All problems are single-objective, maximize, with an achievable maximum near
131
+ 1.0. The hidden landscape is reseeded per run, so memorizing an instance does
132
+ not help.
133
+
134
+ | Problem | What it tests |
135
+ |---|---|
136
+ | `smooth_hill` | Smooth unimodal surface; local search should win. |
137
+ | `rugged_trap` | Multimodal with a decoy hill; needs restarts / exploration. |
138
+ | `conditional_knobs` | A categorical choice switches which knobs exist. |
139
+ | `noisy_lab` | Additive gaussian noise; beware chasing lucky readings. |
140
+ | `multi_fidelity` | Cheap biased low-fidelity vs. expensive true high-fidelity. |
141
+ | `sparse_needle` | One hidden combination scores high; weak partial-match signal. |
142
+ | `cost_aware` | The candidate's own `samples` knob drives its evaluation cost. |
143
+ | `rag_pipeline` | Surrogate RAG tuning (chunking, top_k, reranker interactions). |
144
+ | `image_pipeline` | Surrogate diffusion tuning; steps drive quality and cost. |
145
+ | `dispatch_policy` | Surrogate delivery policy; balance, batching, mild noise. |
146
+
147
+ ## Protocol
148
+
149
+ The runner launches the optimizer as a child process (working directory = the
150
+ optimizer's directory; if `command[0]` is `"python"` it is replaced with the
151
+ runner's own interpreter). Messages are one JSON object per line: runner →
152
+ optimizer on stdin, optimizer → runner on stdout. **Optimizer stdout is
153
+ protocol-only; write debug output to stderr** (the runner saves it to
154
+ `optimizer.stderr.log`). Receivers ignore unknown keys. `NaN`/`Infinity` must
155
+ not be sent. Current `protocol_version` is `1`.
156
+
157
+ ### Messages and turn-taking
158
+
159
+ | Direction | `type` | Reply |
160
+ |---|---|---|
161
+ | runner → optimizer | `init` | `ready` (once) |
162
+ | runner → optimizer | `ask` | `propose` (once) |
163
+ | runner → optimizer | `tell` | none |
164
+ | runner → optimizer | `finish` | none; exit promptly |
165
+
166
+ Only one `ask` is outstanding at a time. The `init` reply may take up to 30s,
167
+ each `ask` reply up to 60s by default; overruns end the run as
168
+ `optimizer_timeout`. A crash, an unparseable line, or an out-of-order message
169
+ ends the run as `failed`. The best-so-far is recorded in every case.
170
+
171
+ **init** (runner → optimizer):
172
+
173
+ ```json
174
+ {"type": "init", "protocol_version": 1, "run_id": "smooth_hill--my_opt--s1",
175
+ "problem": {
176
+ "description": "natural-language prompt",
177
+ "space": [ ...param specs (below)... ],
178
+ "objective": "maximize",
179
+ "budget": {"evaluations": 100, "cost_limit": null, "time_limit_sec": 300.0},
180
+ "fidelities": null
181
+ },
182
+ "optimizer_seed": 12345}
183
+ ```
184
+
185
+ `budget` always has at least one of `evaluations` or `cost_limit` non-null.
186
+ `fidelities`, when non-null, is ordered low→high (last entry = top fidelity).
187
+
188
+ **ready / propose** (optimizer → runner):
189
+
190
+ ```json
191
+ {"type": "ready"}
192
+ {"type": "propose", "candidate": {"x0": 0.5, "algo": "alpha"}, "fidelity": "low"}
193
+ ```
194
+
195
+ `fidelity` is optional; omitted/null means top fidelity. Sending a non-null
196
+ `fidelity` to a problem with no fidelities is invalid.
197
+
198
+ **tell** (runner → optimizer):
199
+
200
+ ```json
201
+ {"type": "tell", "candidate_id": "c-0007", "candidate": {"x0": 0.5},
202
+ "valid": true, "score": 0.73, "cost": 1.0, "fidelity": null, "error": null,
203
+ "remaining": {"evaluations": 92, "cost": null, "time_sec": 291.3}}
204
+ ```
205
+
206
+ When invalid: `valid: false`, `score: null`, and `error` gives the reason.
207
+
208
+ **finish** (runner → optimizer): `{"type": "finish", "reason": "budget_exhausted"}`
209
+ (`reason` is `budget_exhausted` or `time_limit`).
210
+
211
+ ### Search space
212
+
213
+ ```json
214
+ [
215
+ {"name": "lr", "type": "float", "low": 1e-4, "high": 1.0, "log": true},
216
+ {"name": "layers", "type": "int", "low": 1, "high": 12},
217
+ {"name": "opt", "type": "categorical", "choices": ["sgd", "adam"]},
218
+ {"name": "warmup", "type": "bool"},
219
+ {"name": "warmup_steps", "type": "int", "low": 10, "high": 1000,
220
+ "condition": {"param": "warmup", "equals": [true]}}
221
+ ]
222
+ ```
223
+
224
+ - Types: `float`, `int`, `categorical`, `bool`. Bounds `low`/`high` are
225
+ inclusive; `log: true` hints a log scale.
226
+ - A param with `condition` is **active** only when
227
+ `candidate[condition.param]` is in `equals`. Conditioning is one level deep
228
+ (the parent must be unconditional).
229
+
230
+ A candidate is validated by the runner: it must be a JSON object containing
231
+ **exactly** the active params (no unknown keys, no inactive params, none
232
+ missing), each of the right type and within range.
233
+
234
+ ### Budget rules
235
+
236
+ - A valid evaluation consumes the evaluator's `cost` (may depend on the
237
+ candidate/fidelity); the `evaluations` axis always consumes 1.
238
+ - **An invalid proposal still consumes budget** (1 evaluation, cost 1.0), so
239
+ spamming invalid candidates cannot mine the space for free.
240
+ - The stop check runs before each `ask`, so the final evaluation may slightly
241
+ overshoot `cost_limit`.
242
+ - For problems with `fidelities`, **only top-fidelity evaluations count toward
243
+ `best_score`**; lower fidelities are available as history but not scored.
244
+
245
+ ## Metrics
246
+
247
+ `hypara report` recomputes everything from the saved logs. Per run: best
248
+ score, best candidate, best-so-far curve (over evaluations or cumulative
249
+ cost), valid rate, status, wall time. Aggregated per (problem, optimizer):
250
+ mean best, a baseline-relative normalized best and normalized anytime AUC
251
+ (0 = baseline median, 1 = best observed for that problem), and an overall
252
+ mean across problems.
253
+
254
+ ## Adding a problem
255
+
256
+ Implement `Problem` under `src/hypara/problems/` and register it in
257
+ `src/hypara/registry.py`. Keep the description and the evaluator's actual
258
+ behavior in sync — the point of the benchmark is that reading the description
259
+ helps. The shared invariants in `tests/test_problems.py` (finite scores,
260
+ determinism given a seed, instance-seed sensitivity) apply automatically.
261
+
262
+ ## License
263
+
264
+ MIT. See [LICENSE](LICENSE).
hypara-0.1.0/README.md ADDED
@@ -0,0 +1,235 @@
1
+ # hypara
2
+
3
+ A benchmark harness for measuring how well an optimizer searches an **unknown
4
+ black-box evaluation function**.
5
+
6
+ hypara is deliberately not about solving famous problems (TSP, knapsack, bin
7
+ packing) where a strong off-the-shelf solver wins. Each problem ships a
8
+ natural-language description, a mixed search space, and a *hidden* evaluator
9
+ whose shape changes with the instance seed. To score well an optimizer has to
10
+ read the description, reason about the space, and adapt its strategy from the
11
+ evaluation history within a limited budget.
12
+
13
+ Optimizers are **language-agnostic external processes**: they talk to the
14
+ runner over a stdin/stdout JSON Lines protocol, so an optimizer can be written
15
+ in Python, Rust, Go, TypeScript, or any executable.
16
+
17
+ ## Install
18
+
19
+ ```bash
20
+ pip install hypara
21
+ ```
22
+
23
+ For development (tests + build tooling):
24
+
25
+ ```bash
26
+ pip install -e .[dev]
27
+ python -m pytest
28
+ ```
29
+
30
+ ## Quickstart
31
+
32
+ List the built-in problems:
33
+
34
+ ```bash
35
+ hypara list
36
+ ```
37
+
38
+ Write a minimal optimizer. Create `my_opt/manifest.json`:
39
+
40
+ ```json
41
+ {"name": "my_opt", "command": ["python", "main.py"]}
42
+ ```
43
+
44
+ and `my_opt/main.py`:
45
+
46
+ ```python
47
+ import json, random, sys
48
+
49
+ space = []
50
+ rng = random.Random()
51
+
52
+ def send(msg):
53
+ sys.stdout.write(json.dumps(msg) + "\n")
54
+ sys.stdout.flush()
55
+
56
+ for line in sys.stdin:
57
+ msg = json.loads(line)
58
+ t = msg.get("type")
59
+ if t == "init":
60
+ space = msg["problem"]["space"]
61
+ rng = random.Random(msg.get("optimizer_seed"))
62
+ send({"type": "ready"})
63
+ elif t == "ask":
64
+ # propose a candidate; here, a trivial random pick over numeric params
65
+ cand = {}
66
+ for p in space:
67
+ if p.get("condition") is not None:
68
+ continue
69
+ if p["type"] == "categorical":
70
+ cand[p["name"]] = rng.choice(p["choices"])
71
+ elif p["type"] == "bool":
72
+ cand[p["name"]] = rng.random() < 0.5
73
+ else:
74
+ lo, hi = p["low"], p["high"]
75
+ v = rng.uniform(lo, hi)
76
+ cand[p["name"]] = int(round(v)) if p["type"] == "int" else v
77
+ send({"type": "propose", "candidate": cand})
78
+ elif t == "tell":
79
+ pass # inspect msg["score"], msg["valid"], msg["remaining"] to adapt
80
+ elif t == "finish":
81
+ break
82
+ ```
83
+
84
+ Run it against one problem, then aggregate:
85
+
86
+ ```bash
87
+ hypara run --problem smooth_hill --optimizer ./my_opt --seed 1
88
+ ```
89
+
90
+ The source repository also includes two reference optimizers
91
+ (`optimizers/random_search`, `optimizers/hill_climb`) and ready-made suite
92
+ configs (`configs/smoke.json`, `configs/full.json`):
93
+
94
+ ```bash
95
+ hypara suite --config configs/smoke.json
96
+ hypara report --dir results/smoke-YYYYmmdd-HHMMSS
97
+ ```
98
+
99
+ ## Built-in problems
100
+
101
+ All problems are single-objective, maximize, with an achievable maximum near
102
+ 1.0. The hidden landscape is reseeded per run, so memorizing an instance does
103
+ not help.
104
+
105
+ | Problem | What it tests |
106
+ |---|---|
107
+ | `smooth_hill` | Smooth unimodal surface; local search should win. |
108
+ | `rugged_trap` | Multimodal with a decoy hill; needs restarts / exploration. |
109
+ | `conditional_knobs` | A categorical choice switches which knobs exist. |
110
+ | `noisy_lab` | Additive gaussian noise; beware chasing lucky readings. |
111
+ | `multi_fidelity` | Cheap biased low-fidelity vs. expensive true high-fidelity. |
112
+ | `sparse_needle` | One hidden combination scores high; weak partial-match signal. |
113
+ | `cost_aware` | The candidate's own `samples` knob drives its evaluation cost. |
114
+ | `rag_pipeline` | Surrogate RAG tuning (chunking, top_k, reranker interactions). |
115
+ | `image_pipeline` | Surrogate diffusion tuning; steps drive quality and cost. |
116
+ | `dispatch_policy` | Surrogate delivery policy; balance, batching, mild noise. |
117
+
118
+ ## Protocol
119
+
120
+ The runner launches the optimizer as a child process (working directory = the
121
+ optimizer's directory; if `command[0]` is `"python"` it is replaced with the
122
+ runner's own interpreter). Messages are one JSON object per line: runner →
123
+ optimizer on stdin, optimizer → runner on stdout. **Optimizer stdout is
124
+ protocol-only; write debug output to stderr** (the runner saves it to
125
+ `optimizer.stderr.log`). Receivers ignore unknown keys. `NaN`/`Infinity` must
126
+ not be sent. Current `protocol_version` is `1`.
127
+
128
+ ### Messages and turn-taking
129
+
130
+ | Direction | `type` | Reply |
131
+ |---|---|---|
132
+ | runner → optimizer | `init` | `ready` (once) |
133
+ | runner → optimizer | `ask` | `propose` (once) |
134
+ | runner → optimizer | `tell` | none |
135
+ | runner → optimizer | `finish` | none; exit promptly |
136
+
137
+ Only one `ask` is outstanding at a time. The `init` reply may take up to 30s,
138
+ each `ask` reply up to 60s by default; overruns end the run as
139
+ `optimizer_timeout`. A crash, an unparseable line, or an out-of-order message
140
+ ends the run as `failed`. The best-so-far is recorded in every case.
141
+
142
+ **init** (runner → optimizer):
143
+
144
+ ```json
145
+ {"type": "init", "protocol_version": 1, "run_id": "smooth_hill--my_opt--s1",
146
+ "problem": {
147
+ "description": "natural-language prompt",
148
+ "space": [ ...param specs (below)... ],
149
+ "objective": "maximize",
150
+ "budget": {"evaluations": 100, "cost_limit": null, "time_limit_sec": 300.0},
151
+ "fidelities": null
152
+ },
153
+ "optimizer_seed": 12345}
154
+ ```
155
+
156
+ `budget` always has at least one of `evaluations` or `cost_limit` non-null.
157
+ `fidelities`, when non-null, is ordered low→high (last entry = top fidelity).
158
+
159
+ **ready / propose** (optimizer → runner):
160
+
161
+ ```json
162
+ {"type": "ready"}
163
+ {"type": "propose", "candidate": {"x0": 0.5, "algo": "alpha"}, "fidelity": "low"}
164
+ ```
165
+
166
+ `fidelity` is optional; omitted/null means top fidelity. Sending a non-null
167
+ `fidelity` to a problem with no fidelities is invalid.
168
+
169
+ **tell** (runner → optimizer):
170
+
171
+ ```json
172
+ {"type": "tell", "candidate_id": "c-0007", "candidate": {"x0": 0.5},
173
+ "valid": true, "score": 0.73, "cost": 1.0, "fidelity": null, "error": null,
174
+ "remaining": {"evaluations": 92, "cost": null, "time_sec": 291.3}}
175
+ ```
176
+
177
+ When invalid: `valid: false`, `score: null`, and `error` gives the reason.
178
+
179
+ **finish** (runner → optimizer): `{"type": "finish", "reason": "budget_exhausted"}`
180
+ (`reason` is `budget_exhausted` or `time_limit`).
181
+
182
+ ### Search space
183
+
184
+ ```json
185
+ [
186
+ {"name": "lr", "type": "float", "low": 1e-4, "high": 1.0, "log": true},
187
+ {"name": "layers", "type": "int", "low": 1, "high": 12},
188
+ {"name": "opt", "type": "categorical", "choices": ["sgd", "adam"]},
189
+ {"name": "warmup", "type": "bool"},
190
+ {"name": "warmup_steps", "type": "int", "low": 10, "high": 1000,
191
+ "condition": {"param": "warmup", "equals": [true]}}
192
+ ]
193
+ ```
194
+
195
+ - Types: `float`, `int`, `categorical`, `bool`. Bounds `low`/`high` are
196
+ inclusive; `log: true` hints a log scale.
197
+ - A param with `condition` is **active** only when
198
+ `candidate[condition.param]` is in `equals`. Conditioning is one level deep
199
+ (the parent must be unconditional).
200
+
201
+ A candidate is validated by the runner: it must be a JSON object containing
202
+ **exactly** the active params (no unknown keys, no inactive params, none
203
+ missing), each of the right type and within range.
204
+
205
+ ### Budget rules
206
+
207
+ - A valid evaluation consumes the evaluator's `cost` (may depend on the
208
+ candidate/fidelity); the `evaluations` axis always consumes 1.
209
+ - **An invalid proposal still consumes budget** (1 evaluation, cost 1.0), so
210
+ spamming invalid candidates cannot mine the space for free.
211
+ - The stop check runs before each `ask`, so the final evaluation may slightly
212
+ overshoot `cost_limit`.
213
+ - For problems with `fidelities`, **only top-fidelity evaluations count toward
214
+ `best_score`**; lower fidelities are available as history but not scored.
215
+
216
+ ## Metrics
217
+
218
+ `hypara report` recomputes everything from the saved logs. Per run: best
219
+ score, best candidate, best-so-far curve (over evaluations or cumulative
220
+ cost), valid rate, status, wall time. Aggregated per (problem, optimizer):
221
+ mean best, a baseline-relative normalized best and normalized anytime AUC
222
+ (0 = baseline median, 1 = best observed for that problem), and an overall
223
+ mean across problems.
224
+
225
+ ## Adding a problem
226
+
227
+ Implement `Problem` under `src/hypara/problems/` and register it in
228
+ `src/hypara/registry.py`. Keep the description and the evaluator's actual
229
+ behavior in sync — the point of the benchmark is that reading the description
230
+ helps. The shared invariants in `tests/test_problems.py` (finite scores,
231
+ determinism given a seed, instance-seed sensitivity) apply automatically.
232
+
233
+ ## License
234
+
235
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "full",
3
+ "problems": [
4
+ "smooth_hill",
5
+ "rugged_trap",
6
+ "conditional_knobs",
7
+ "noisy_lab",
8
+ "multi_fidelity",
9
+ "sparse_needle",
10
+ "cost_aware",
11
+ "rag_pipeline",
12
+ "image_pipeline",
13
+ "dispatch_policy"
14
+ ],
15
+ "optimizers": ["optimizers/random_search", "optimizers/hill_climb"],
16
+ "seeds": [1, 2, 3, 4, 5, 6, 7, 8]
17
+ }
@@ -0,0 +1,11 @@
1
+ {
2
+ "name": "smoke",
3
+ "problems": ["smooth_hill", "rugged_trap", "conditional_knobs"],
4
+ "optimizers": ["optimizers/random_search", "optimizers/hill_climb"],
5
+ "seeds": [1, 2, 3],
6
+ "budget_overrides": {
7
+ "smooth_hill": {"evaluations": 40},
8
+ "rugged_trap": {"evaluations": 40},
9
+ "conditional_knobs": {"evaluations": 40}
10
+ }
11
+ }