livepilot 1.17.1 → 1.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -155,6 +155,9 @@ def build_capability_state(
155
155
  )
156
156
 
157
157
  # ── web ──────────────────────────────────────────────────────────
158
+ # Server-side outbound HTTP capability. True when the MCP host can
159
+ # reach an arbitrary public URL. Does NOT imply curated research
160
+ # corpora are installed — see the ``research`` domain below.
158
161
  web_reasons: list[str] = []
159
162
  if not web_ok:
160
163
  web_reasons.append("web_unavailable")
@@ -166,6 +169,21 @@ def build_capability_state(
166
169
  reasons=web_reasons,
167
170
  )
168
171
 
172
+ # ── flucoma ──────────────────────────────────────────────────────
173
+ # Optional dependency (the ``flucoma`` Python package). Emitted
174
+ # unconditionally so consumers can distinguish "probed and missing"
175
+ # from "probe not run yet".
176
+ flucoma_reasons: list[str] = []
177
+ if not flucoma_ok:
178
+ flucoma_reasons.append("flucoma_not_installed")
179
+ domains["flucoma"] = CapabilityDomain(
180
+ name="flucoma",
181
+ available=flucoma_ok,
182
+ confidence=0.9 if flucoma_ok else 0.0,
183
+ mode="available" if flucoma_ok else "unavailable",
184
+ reasons=flucoma_reasons,
185
+ )
186
+
169
187
  # ── research (composite) ────────────────────────────────────────
170
188
  research_reasons: list[str] = []
171
189
  research_sources = 0
@@ -0,0 +1,62 @@
1
+ """Explicit degradation signalling for engines that fall back to synthesized data.
2
+
3
+ Before PR-B, several engines silently substituted defaults when a data
4
+ source failed — ``song_brain`` injected ``tempo=120.0, track_count=0``
5
+ on session-fetch failure, and ``preview_studio`` compiled variants
6
+ against an empty-but-valid kernel when the caller didn't supply one.
7
+ Downstream consumers had no way to tell synthesized data from real
8
+ data, so polished outputs were returned as if they were real.
9
+
10
+ ``DegradationInfo`` is the shared payload engines attach to their
11
+ responses whenever they substitute fallback values. Consumers can
12
+ inspect ``is_degraded``, ``reasons``, and ``substituted_fields`` to
13
+ decide whether to trust the response or re-try the operation.
14
+
15
+ Usage::
16
+
17
+ from mcp_server.runtime.degradation import DegradationInfo
18
+
19
+ deg = DegradationInfo()
20
+ try:
21
+ data = fetch_real_data()
22
+ except Exception:
23
+ data = FALLBACK_DATA
24
+ deg = DegradationInfo(
25
+ is_degraded=True,
26
+ reasons=["data_source_unreachable"],
27
+ substituted_fields=["tempo", "track_count"],
28
+ )
29
+ return {..., "degradation": deg.to_dict()}
30
+ """
31
+
32
+ from __future__ import annotations
33
+
34
+ from dataclasses import dataclass, field
35
+
36
+
37
+ @dataclass
38
+ class DegradationInfo:
39
+ """A structured signal that an engine substituted fallback data.
40
+
41
+ Attributes:
42
+ is_degraded: True when any field in the response was substituted
43
+ with a synthesized/default value. False means the response
44
+ is fully backed by real data sources.
45
+ reasons: Short machine-readable tokens describing why degradation
46
+ happened (e.g., ``"session_fetch_failed"``,
47
+ ``"empty_kernel_fallback"``). Intentionally open-ended — the
48
+ set grows as new fallback paths get flagged.
49
+ substituted_fields: Names of top-level response fields whose
50
+ values came from the fallback path, not the real source.
51
+ """
52
+
53
+ is_degraded: bool = False
54
+ reasons: list[str] = field(default_factory=list)
55
+ substituted_fields: list[str] = field(default_factory=list)
56
+
57
+ def to_dict(self) -> dict:
58
+ return {
59
+ "is_degraded": self.is_degraded,
60
+ "reasons": list(self.reasons),
61
+ "substituted_fields": list(self.substituted_fields),
62
+ }
@@ -7,6 +7,9 @@ Tools:
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
+ import importlib.util
11
+ import logging
12
+ import urllib.request
10
13
  from typing import Optional
11
14
 
12
15
  from fastmcp import Context
@@ -14,13 +17,55 @@ from fastmcp import Context
14
17
  from ..server import mcp
15
18
  from ..memory.technique_store import TechniqueStore
16
19
  from .capability_state import build_capability_state
17
- import logging
18
20
 
19
21
  logger = logging.getLogger(__name__)
20
22
 
21
23
  _memory_store = TechniqueStore()
22
24
 
23
25
 
26
+ # ── Capability probes ──────────────────────────────────────────────────
27
+ #
28
+ # These helpers are module-level so tests can monkeypatch them directly.
29
+
30
+
31
+ def _probe_web(timeout: float = 0.5) -> bool:
32
+ """Server-side outbound HTTP probe.
33
+
34
+ True when the MCP host can reach an arbitrary public URL. Does NOT
35
+ imply curated research corpora are installed — see the ``research``
36
+ domain for that.
37
+
38
+ Implementation: a ``timeout``-second HEAD request to
39
+ ``https://api.github.com`` using stdlib ``urllib.request``. Any
40
+ exception (DNS failure, TLS error, socket timeout, proxy block,
41
+ non-2xx response) collapses to False so the probe is safe to call
42
+ from any code path.
43
+ """
44
+ req = urllib.request.Request("https://api.github.com", method="HEAD")
45
+ try:
46
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
47
+ status = getattr(resp, "status", None)
48
+ return status is not None and 200 <= status < 400
49
+ except Exception as exc: # noqa: BLE001 — swallow everything to False
50
+ logger.debug("_probe_web failed: %s", exc)
51
+ return False
52
+
53
+
54
+ def _probe_flucoma() -> bool:
55
+ """Check whether the ``flucoma`` Python package is importable.
56
+
57
+ Uses ``importlib.util.find_spec`` so no import side-effects fire
58
+ (matching the pattern already used for optional capability probes
59
+ elsewhere in the codebase). Returns False if the package is missing
60
+ or if the spec lookup itself raises.
61
+ """
62
+ try:
63
+ return importlib.util.find_spec("flucoma") is not None
64
+ except Exception as exc: # noqa: BLE001
65
+ logger.debug("_probe_flucoma failed: %s", exc)
66
+ return False
67
+
68
+
24
69
  @mcp.tool()
25
70
  def get_capability_state(ctx: Context) -> dict:
26
71
  """Probe the runtime and return a capability state snapshot.
@@ -59,9 +104,13 @@ def get_capability_state(ctx: Context) -> dict:
59
104
  logger.debug("get_capability_state failed: %s", exc)
60
105
  memory_ok = False
61
106
 
62
- # ── Web / FluCoMa not probed live, default to False ───────────
63
- web_ok = False
64
- flucoma_ok = False
107
+ # ── Web — actually probe outbound HTTP egress ───────────────────
108
+ # Scoped to server-side outbound HTTP reachability; does NOT imply
109
+ # a curated research corpus is installed (see ``research`` domain).
110
+ web_ok = _probe_web()
111
+
112
+ # ── FluCoMa — optional import via find_spec (no side effects) ───
113
+ flucoma_ok = _probe_flucoma()
65
114
 
66
115
  state = build_capability_state(
67
116
  session_ok=session_ok,
@@ -9,6 +9,7 @@ from __future__ import annotations
9
9
 
10
10
  from fastmcp import Context
11
11
 
12
+ from ..runtime.degradation import DegradationInfo
12
13
  from ..server import mcp
13
14
  from . import builder
14
15
  from .models import SongBrain
@@ -55,6 +56,12 @@ def _fetch_session_data(ctx: Context) -> dict:
55
56
  - composition_analysis: from musical intelligence section inference
56
57
  - role_graph: from semantic move resolvers (track role inference)
57
58
  - recent_moves: from session-scoped action ledger
59
+
60
+ On session-fetch failure the fallback session_info shape is injected
61
+ (``tempo=120.0, track_count=0``) and a ``DegradationInfo`` is attached
62
+ under the ``_degradation`` key so callers can tell synthesized data
63
+ from real data. ``_fetch_session_data`` never raises — it always
64
+ returns a dict with the expected keys.
58
65
  """
59
66
  ableton = _get_ableton(ctx)
60
67
  data: dict = {
@@ -66,12 +73,19 @@ def _fetch_session_data(ctx: Context) -> dict:
66
73
  "role_graph": {},
67
74
  "recent_moves": [],
68
75
  }
76
+ degradation = DegradationInfo()
69
77
 
70
78
  try:
71
79
  data["session_info"] = ableton.send_command("get_session_info", {})
72
80
  except Exception as exc:
73
81
  logger.debug("_fetch_session_data failed: %s", exc)
74
82
  data["session_info"] = {"tempo": 120.0, "track_count": 0}
83
+ degradation.is_degraded = True
84
+ if "session_fetch_failed" not in degradation.reasons:
85
+ degradation.reasons.append("session_fetch_failed")
86
+ for fld in ("tempo", "track_count"):
87
+ if fld not in degradation.substituted_fields:
88
+ degradation.substituted_fields.append(fld)
75
89
 
76
90
  try:
77
91
  matrix = ableton.send_command("get_scene_matrix")
@@ -135,6 +149,10 @@ def _fetch_session_data(ctx: Context) -> dict:
135
149
  except Exception as exc:
136
150
  logger.debug("_fetch_session_data failed: %s", exc)
137
151
 
152
+ # Attach the degradation signal so build_song_brain can surface it.
153
+ # Under a reserved key (leading underscore) so it never collides with
154
+ # a real session data field.
155
+ data["_degradation"] = degradation
138
156
  return data
139
157
 
140
158
 
@@ -180,10 +198,15 @@ def build_song_brain(ctx: Context) -> dict:
180
198
  )
181
199
  _set_brain(ctx, brain)
182
200
 
201
+ # Surface the degradation payload so callers can distinguish a
202
+ # tempo=120 / track_count=0 synthesized response from a real one.
203
+ degradation = data.get("_degradation") or DegradationInfo()
204
+
183
205
  return {
184
206
  **brain.to_dict(),
185
207
  "summary": brain.summary,
186
208
  "capability": cap.to_dict(),
209
+ "degradation": degradation.to_dict(),
187
210
  }
188
211
 
189
212
 
@@ -19,12 +19,16 @@ from .models import TimbralFingerprint
19
19
 
20
20
  # ── Band-based brightness / warmth mapping ──────────────────────────────
21
21
  #
22
- # The M4L analyzer returns an 8-band spectrum by default. When a full
23
- # spectrum dict is passed, we look for these band keys in order. If the
24
- # raw {freq: magnitude} shape is passed instead, we fall back to a coarser
25
- # low/mid/high split.
22
+ # Two upstream producers feed this extractor with different band schemas:
23
+ # 1. get_master_spectrum (M4L analyzer) — v1.16+: 9 bands (sub_low,
24
+ # sub, low, low_mid, mid, high_mid, high, presence, air);
25
+ # pre-v1.16: 8 bands (no sub_low).
26
+ # 2. analyze_spectrum_offline — 8 bands with legacy names
27
+ # (sub, low, low_mid, mid, high_mid, high, very_high, ultra).
28
+ # We index the union of both name sets below; `_band_energy` uses dict.get
29
+ # so missing bands simply return 0 without complaint.
26
30
 
27
- _BANDS = ("sub", "low", "low_mid", "mid", "high_mid", "high", "very_high", "ultra")
31
+ _BANDS = ("sub_low", "sub", "low", "low_mid", "mid", "high_mid", "high", "presence", "air", "very_high", "ultra")
28
32
 
29
33
 
30
34
  def _band_energy(spectrum: Optional[dict], band: str) -> float:
@@ -55,9 +59,11 @@ def extract_timbre_fingerprint(
55
59
  Inputs are all optional — the function degrades gracefully when only
56
60
  some dimensions are measurable.
57
61
 
58
- spectrum: either {sub, low, low_mid, mid, high_mid, high, very_high, ultra}
59
- or {"bands": {...}} the 8-band shape returned by get_master_spectrum /
60
- analyze_spectrum_offline. Missing bands default to 0.
62
+ spectrum: either the 9-band shape from get_master_spectrum
63
+ ({sub_low, sub, low, low_mid, mid, high_mid, high, presence, air}),
64
+ the legacy 8-band shape from analyze_spectrum_offline
65
+ ({sub, low, low_mid, mid, high_mid, high, very_high, ultra}),
66
+ or {"bands": {...}} wrapping either. Missing bands default to 0.
61
67
  loudness: {"rms": float, "peak": float, "lufs": float, "lra": float} —
62
68
  output shape from analyze_loudness.
63
69
  spectral_shape: FluCoMa descriptors when available — {"centroid", "flatness",
@@ -35,6 +35,12 @@ from .taste import (
35
35
  compute_taste_fit,
36
36
  get_taste_profile,
37
37
  )
38
+ from .iteration import (
39
+ iterate_toward_goal_engine,
40
+ iterate_toward_goal_engine_async,
41
+ IterationResult,
42
+ IterationStep,
43
+ )
38
44
 
39
45
  __all__ = [
40
46
  "QUALITY_DIMENSIONS", "MEASURABLE_PROXIES",
@@ -49,4 +55,8 @@ __all__ = [
49
55
  "analyze_outcome_history",
50
56
  "compute_taste_fit",
51
57
  "get_taste_profile",
58
+ "iterate_toward_goal_engine",
59
+ "iterate_toward_goal_engine_async",
60
+ "IterationResult",
61
+ "IterationStep",
52
62
  ]
@@ -0,0 +1,344 @@
1
+ """Iteration engine — closes the evaluation loop by running experiments
2
+ repeatedly against a compiled GoalVector until threshold or timeout.
3
+
4
+ Pure-python: takes callables for experiment create/run/commit/discard so
5
+ tests can substitute in-memory fakes without an Ableton connection. The
6
+ callables may be sync or async — the engine uses `iterate_toward_goal_engine`
7
+ (sync) for the former and `iterate_toward_goal_engine_async` for the latter.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import inspect
12
+ from dataclasses import dataclass, field
13
+ from typing import Any, Awaitable, Callable, Optional, Union
14
+
15
+
16
+ @dataclass
17
+ class IterationStep:
18
+ """One iteration of the outer loop — one experiment's worth of work."""
19
+ iteration: int
20
+ experiment_id: str
21
+ winner_branch_id: Optional[str]
22
+ winner_score: float
23
+ threshold_met: bool
24
+ note: str = ""
25
+
26
+ def to_dict(self) -> dict:
27
+ return {
28
+ "iteration": self.iteration,
29
+ "experiment_id": self.experiment_id,
30
+ "winner_branch_id": self.winner_branch_id,
31
+ "winner_score": self.winner_score,
32
+ "threshold_met": self.threshold_met,
33
+ "note": self.note,
34
+ }
35
+
36
+
37
+ @dataclass
38
+ class IterationResult:
39
+ """Final result of iterate_toward_goal.
40
+
41
+ status:
42
+ - "committed" — a winner hit threshold, was committed permanently
43
+ - "exhausted" — max_iterations reached, committed best-so-far (on_timeout=commit_best)
44
+ - "timeout_no_commit" — max_iterations reached, no commit (on_timeout=discard_on_timeout)
45
+ - "no_candidates" — caller provided empty candidate_move_sets
46
+ - "error" — unrecoverable error; see reason
47
+ """
48
+ status: str
49
+ iterations_run: int
50
+ committed_experiment_id: Optional[str]
51
+ committed_branch_id: Optional[str]
52
+ final_score: float
53
+ steps: list[IterationStep] = field(default_factory=list)
54
+ reason: str = ""
55
+
56
+ def to_dict(self) -> dict:
57
+ return {
58
+ "status": self.status,
59
+ "iterations_run": self.iterations_run,
60
+ "committed_experiment_id": self.committed_experiment_id,
61
+ "committed_branch_id": self.committed_branch_id,
62
+ "final_score": self.final_score,
63
+ "steps": [s.to_dict() for s in self.steps],
64
+ "reason": self.reason,
65
+ }
66
+
67
+
68
+ def iterate_toward_goal_engine(
69
+ candidate_move_sets: list,
70
+ threshold: float,
71
+ max_iterations: int,
72
+ create_experiment_fn: Callable[[list], str],
73
+ run_experiment_fn: Callable[[str], Any],
74
+ commit_fn: Callable[[str, str], dict],
75
+ discard_fn: Callable[[str], dict],
76
+ on_timeout: str = "commit_best",
77
+ ) -> IterationResult:
78
+ """Run experiments repeatedly until winner_score >= threshold or timeout.
79
+
80
+ Pure orchestration — all I/O happens through the injected callbacks. The
81
+ run/commit/discard callbacks may be sync or async; coroutines will be
82
+ awaited when reached. This keeps the engine reusable by both the
83
+ sync test suite and the async MCP tool wrapper.
84
+
85
+ See module docstring for full contract. Invariant: never issues raw
86
+ undo calls — per-branch undo is the responsibility of run_experiment_fn.
87
+ This loop only chooses commit vs discard.
88
+ """
89
+ import asyncio
90
+
91
+ async def _as_async():
92
+ return await _iterate_async_core(
93
+ candidate_move_sets=candidate_move_sets,
94
+ threshold=threshold,
95
+ max_iterations=max_iterations,
96
+ create_experiment_fn=create_experiment_fn,
97
+ run_experiment_fn=run_experiment_fn,
98
+ commit_fn=commit_fn,
99
+ discard_fn=discard_fn,
100
+ on_timeout=on_timeout,
101
+ )
102
+
103
+ # If any callback is a coroutine function, run via asyncio. Otherwise
104
+ # execute the sync path directly to avoid event-loop overhead in tests.
105
+ any_async = any(
106
+ inspect.iscoroutinefunction(fn)
107
+ for fn in (create_experiment_fn, run_experiment_fn, commit_fn, discard_fn)
108
+ )
109
+ if any_async:
110
+ return asyncio.run(_as_async())
111
+
112
+ return _iterate_sync_core(
113
+ candidate_move_sets=candidate_move_sets,
114
+ threshold=threshold,
115
+ max_iterations=max_iterations,
116
+ create_experiment_fn=create_experiment_fn,
117
+ run_experiment_fn=run_experiment_fn,
118
+ commit_fn=commit_fn,
119
+ discard_fn=discard_fn,
120
+ on_timeout=on_timeout,
121
+ )
122
+
123
+
124
+ async def iterate_toward_goal_engine_async(
125
+ candidate_move_sets: list,
126
+ threshold: float,
127
+ max_iterations: int,
128
+ create_experiment_fn: Callable[[list], Any],
129
+ run_experiment_fn: Callable[[str], Any],
130
+ commit_fn: Callable[[str, str], Any],
131
+ discard_fn: Callable[[str], Any],
132
+ on_timeout: str = "commit_best",
133
+ ) -> IterationResult:
134
+ """Async variant — used by the MCP tool wrapper which has async callbacks."""
135
+ return await _iterate_async_core(
136
+ candidate_move_sets=candidate_move_sets,
137
+ threshold=threshold,
138
+ max_iterations=max_iterations,
139
+ create_experiment_fn=create_experiment_fn,
140
+ run_experiment_fn=run_experiment_fn,
141
+ commit_fn=commit_fn,
142
+ discard_fn=discard_fn,
143
+ on_timeout=on_timeout,
144
+ )
145
+
146
+
147
+ # ── Internal cores ─────────────────────────────────────────────────────────
148
+
149
+ def _iterate_sync_core(
150
+ candidate_move_sets,
151
+ threshold,
152
+ max_iterations,
153
+ create_experiment_fn,
154
+ run_experiment_fn,
155
+ commit_fn,
156
+ discard_fn,
157
+ on_timeout,
158
+ ) -> IterationResult:
159
+ if not candidate_move_sets:
160
+ return IterationResult(
161
+ status="no_candidates",
162
+ iterations_run=0,
163
+ committed_experiment_id=None,
164
+ committed_branch_id=None,
165
+ final_score=0.0,
166
+ reason="candidate_move_sets is empty",
167
+ )
168
+
169
+ steps: list[IterationStep] = []
170
+ best_score = -1.0
171
+ best_exp_id: Optional[str] = None
172
+ best_branch_id: Optional[str] = None
173
+ n = min(max_iterations, len(candidate_move_sets))
174
+
175
+ for i in range(n):
176
+ move_ids = candidate_move_sets[i]
177
+ exp_id = create_experiment_fn(move_ids)
178
+ winner_branch_id, winner_score = run_experiment_fn(exp_id)
179
+
180
+ met = winner_score >= threshold and winner_branch_id is not None
181
+ steps.append(IterationStep(
182
+ iteration=i,
183
+ experiment_id=exp_id,
184
+ winner_branch_id=winner_branch_id,
185
+ winner_score=winner_score,
186
+ threshold_met=met,
187
+ note=(
188
+ f"committed on iteration {i}" if met
189
+ else f"below threshold (need {threshold}, got {winner_score})"
190
+ ),
191
+ ))
192
+
193
+ if met:
194
+ # Discard any prior best-so-far before committing the new winner —
195
+ # otherwise the old non-winning experiment leaks in the store.
196
+ if best_exp_id is not None and best_exp_id != exp_id:
197
+ discard_fn(best_exp_id)
198
+ commit_fn(exp_id, winner_branch_id)
199
+ return IterationResult(
200
+ status="committed",
201
+ iterations_run=i + 1,
202
+ committed_experiment_id=exp_id,
203
+ committed_branch_id=winner_branch_id,
204
+ final_score=winner_score,
205
+ steps=steps,
206
+ reason=f"threshold {threshold} met on iteration {i}",
207
+ )
208
+
209
+ if winner_branch_id is not None and winner_score > best_score:
210
+ # Supersede previous best-so-far. It's now stale, free the slot.
211
+ if best_exp_id is not None:
212
+ discard_fn(best_exp_id)
213
+ best_score = winner_score
214
+ best_exp_id = exp_id
215
+ best_branch_id = winner_branch_id
216
+ else:
217
+ discard_fn(exp_id)
218
+
219
+ if on_timeout == "commit_best" and best_exp_id and best_branch_id:
220
+ commit_fn(best_exp_id, best_branch_id)
221
+ return IterationResult(
222
+ status="exhausted",
223
+ iterations_run=n,
224
+ committed_experiment_id=best_exp_id,
225
+ committed_branch_id=best_branch_id,
226
+ final_score=best_score,
227
+ steps=steps,
228
+ reason=(
229
+ f"max_iterations={n} reached, threshold {threshold} never met; "
230
+ f"committed best-so-far with score {best_score}"
231
+ ),
232
+ )
233
+
234
+ if best_exp_id:
235
+ discard_fn(best_exp_id)
236
+ return IterationResult(
237
+ status="timeout_no_commit",
238
+ iterations_run=n,
239
+ committed_experiment_id=None,
240
+ committed_branch_id=None,
241
+ final_score=max(best_score, 0.0),
242
+ steps=steps,
243
+ reason=f"max_iterations={n} reached, policy={on_timeout}, no commit issued",
244
+ )
245
+
246
+
247
+ async def _iterate_async_core(
248
+ candidate_move_sets,
249
+ threshold,
250
+ max_iterations,
251
+ create_experiment_fn,
252
+ run_experiment_fn,
253
+ commit_fn,
254
+ discard_fn,
255
+ on_timeout,
256
+ ) -> IterationResult:
257
+ if not candidate_move_sets:
258
+ return IterationResult(
259
+ status="no_candidates",
260
+ iterations_run=0,
261
+ committed_experiment_id=None,
262
+ committed_branch_id=None,
263
+ final_score=0.0,
264
+ reason="candidate_move_sets is empty",
265
+ )
266
+
267
+ async def _maybe_await(value):
268
+ if inspect.isawaitable(value):
269
+ return await value
270
+ return value
271
+
272
+ steps: list[IterationStep] = []
273
+ best_score = -1.0
274
+ best_exp_id: Optional[str] = None
275
+ best_branch_id: Optional[str] = None
276
+ n = min(max_iterations, len(candidate_move_sets))
277
+
278
+ for i in range(n):
279
+ move_ids = candidate_move_sets[i]
280
+ exp_id = await _maybe_await(create_experiment_fn(move_ids))
281
+ winner_branch_id, winner_score = await _maybe_await(run_experiment_fn(exp_id))
282
+
283
+ met = winner_score >= threshold and winner_branch_id is not None
284
+ steps.append(IterationStep(
285
+ iteration=i,
286
+ experiment_id=exp_id,
287
+ winner_branch_id=winner_branch_id,
288
+ winner_score=winner_score,
289
+ threshold_met=met,
290
+ note=(
291
+ f"committed on iteration {i}" if met
292
+ else f"below threshold (need {threshold}, got {winner_score})"
293
+ ),
294
+ ))
295
+
296
+ if met:
297
+ if best_exp_id is not None and best_exp_id != exp_id:
298
+ await _maybe_await(discard_fn(best_exp_id))
299
+ await _maybe_await(commit_fn(exp_id, winner_branch_id))
300
+ return IterationResult(
301
+ status="committed",
302
+ iterations_run=i + 1,
303
+ committed_experiment_id=exp_id,
304
+ committed_branch_id=winner_branch_id,
305
+ final_score=winner_score,
306
+ steps=steps,
307
+ reason=f"threshold {threshold} met on iteration {i}",
308
+ )
309
+
310
+ if winner_branch_id is not None and winner_score > best_score:
311
+ if best_exp_id is not None:
312
+ await _maybe_await(discard_fn(best_exp_id))
313
+ best_score = winner_score
314
+ best_exp_id = exp_id
315
+ best_branch_id = winner_branch_id
316
+ else:
317
+ await _maybe_await(discard_fn(exp_id))
318
+
319
+ if on_timeout == "commit_best" and best_exp_id and best_branch_id:
320
+ await _maybe_await(commit_fn(best_exp_id, best_branch_id))
321
+ return IterationResult(
322
+ status="exhausted",
323
+ iterations_run=n,
324
+ committed_experiment_id=best_exp_id,
325
+ committed_branch_id=best_branch_id,
326
+ final_score=best_score,
327
+ steps=steps,
328
+ reason=(
329
+ f"max_iterations={n} reached, threshold {threshold} never met; "
330
+ f"committed best-so-far with score {best_score}"
331
+ ),
332
+ )
333
+
334
+ if best_exp_id:
335
+ await _maybe_await(discard_fn(best_exp_id))
336
+ return IterationResult(
337
+ status="timeout_no_commit",
338
+ iterations_run=n,
339
+ committed_experiment_id=None,
340
+ committed_branch_id=None,
341
+ final_score=max(best_score, 0.0),
342
+ steps=steps,
343
+ reason=f"max_iterations={n} reached, policy={on_timeout}, no commit issued",
344
+ )