livepilot 1.17.1 → 1.17.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,481 @@
1
+ """Iteration engine — closes the evaluation loop by running experiments
2
+ repeatedly against a compiled GoalVector until threshold or timeout.
3
+
4
+ Pure-python: takes callables for experiment create/run/commit/discard so
5
+ tests can substitute in-memory fakes without an Ableton connection. The
6
+ callables may be sync or async — the engine uses `iterate_toward_goal_engine`
7
+ (sync) for the former and `iterate_toward_goal_engine_async` for the latter.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import inspect
12
+ from dataclasses import dataclass, field
13
+ from typing import Any, Awaitable, Callable, Optional, Union
14
+
15
+
16
+ @dataclass
17
+ class IterationStep:
18
+ """One iteration of the outer loop — one experiment's worth of work."""
19
+ iteration: int
20
+ experiment_id: str
21
+ winner_branch_id: Optional[str]
22
+ winner_score: float
23
+ threshold_met: bool
24
+ note: str = ""
25
+
26
+ def to_dict(self) -> dict:
27
+ return {
28
+ "iteration": self.iteration,
29
+ "experiment_id": self.experiment_id,
30
+ "winner_branch_id": self.winner_branch_id,
31
+ "winner_score": self.winner_score,
32
+ "threshold_met": self.threshold_met,
33
+ "note": self.note,
34
+ }
35
+
36
+
37
+ @dataclass
38
+ class IterationResult:
39
+ """Final result of iterate_toward_goal.
40
+
41
+ status:
42
+ - "committed" — a winner hit threshold AND commit succeeded (steps_ok>0, steps_failed==0)
43
+ - "committed_with_errors" — commit applied some steps but not all (steps_ok>0 AND steps_failed>0)
44
+ - "commit_failed" — commit was attempted but applied zero steps (steps_ok==0 OR committed:false)
45
+ - "exhausted" — max_iterations reached, committed best-so-far cleanly (on_timeout=commit_best)
46
+ - "timeout_no_commit" — max_iterations reached, no commit (on_timeout=discard_on_timeout)
47
+ - "no_candidates" — caller provided empty candidate_move_sets
48
+
49
+ commit_result: the raw dict returned by commit_fn, surfaced for caller
50
+ inspection. Populated whenever commit_fn was called (regardless of
51
+ whether the commit succeeded). None when no commit was attempted.
52
+ """
53
+ status: str
54
+ iterations_run: int
55
+ committed_experiment_id: Optional[str]
56
+ committed_branch_id: Optional[str]
57
+ final_score: float
58
+ steps: list[IterationStep] = field(default_factory=list)
59
+ reason: str = ""
60
+ commit_result: Optional[dict] = None
61
+
62
+ def to_dict(self) -> dict:
63
+ d = {
64
+ "status": self.status,
65
+ "iterations_run": self.iterations_run,
66
+ "committed_experiment_id": self.committed_experiment_id,
67
+ "committed_branch_id": self.committed_branch_id,
68
+ "final_score": self.final_score,
69
+ "steps": [s.to_dict() for s in self.steps],
70
+ "reason": self.reason,
71
+ }
72
+ if self.commit_result is not None:
73
+ d["commit_result"] = self.commit_result
74
+ return d
75
+
76
+
77
+ def _classify_commit_result(result: Any) -> str:
78
+ """Inspect a commit_fn return value and classify into an IterationResult
79
+ status. Conservative: any failure signal produces 'commit_failed', any
80
+ partial signal produces 'committed_with_errors', only clean success
81
+ produces 'committed'.
82
+
83
+ Known failure signals:
84
+ - {"committed": False, ...}
85
+ - {"status": "failed", ...}
86
+ - {"ok": False, ...}
87
+ - {"error": ...} present at top level (unless committed explicitly True)
88
+ - {"steps_ok": 0, ...}
89
+
90
+ Known partial signals:
91
+ - {"status": "committed_with_errors", ...}
92
+ - {"steps_failed": N, "steps_ok": M>0} where N>0
93
+ """
94
+ if not isinstance(result, dict):
95
+ # Non-dict returns: trust the caller but don't confirm partial/error.
96
+ return "committed"
97
+
98
+ # Hard failure signals
99
+ if result.get("committed") is False:
100
+ return "commit_failed"
101
+ if result.get("ok") is False:
102
+ return "commit_failed"
103
+ if result.get("status") == "failed":
104
+ return "commit_failed"
105
+ steps_ok = result.get("steps_ok")
106
+ steps_failed = result.get("steps_failed")
107
+ if steps_ok == 0 and (steps_failed is None or steps_failed > 0):
108
+ return "commit_failed"
109
+
110
+ # Partial success
111
+ if result.get("status") == "committed_with_errors":
112
+ return "committed_with_errors"
113
+ if (
114
+ isinstance(steps_failed, int) and steps_failed > 0
115
+ and isinstance(steps_ok, int) and steps_ok > 0
116
+ ):
117
+ return "committed_with_errors"
118
+
119
+ # Otherwise: clean success
120
+ return "committed"
121
+
122
+
123
+ def iterate_toward_goal_engine(
124
+ candidate_move_sets: list,
125
+ threshold: float,
126
+ max_iterations: int,
127
+ create_experiment_fn: Callable[[list], str],
128
+ run_experiment_fn: Callable[[str], Any],
129
+ commit_fn: Callable[[str, str], dict],
130
+ discard_fn: Callable[[str], dict],
131
+ on_timeout: str = "commit_best",
132
+ ) -> IterationResult:
133
+ """Run experiments repeatedly until winner_score >= threshold or timeout.
134
+
135
+ Pure orchestration — all I/O happens through the injected callbacks. The
136
+ run/commit/discard callbacks may be sync or async; coroutines will be
137
+ awaited when reached. This keeps the engine reusable by both the
138
+ sync test suite and the async MCP tool wrapper.
139
+
140
+ See module docstring for full contract. Invariant: never issues raw
141
+ undo calls — per-branch undo is the responsibility of run_experiment_fn.
142
+ This loop only chooses commit vs discard.
143
+ """
144
+ import asyncio
145
+
146
+ async def _as_async():
147
+ return await _iterate_async_core(
148
+ candidate_move_sets=candidate_move_sets,
149
+ threshold=threshold,
150
+ max_iterations=max_iterations,
151
+ create_experiment_fn=create_experiment_fn,
152
+ run_experiment_fn=run_experiment_fn,
153
+ commit_fn=commit_fn,
154
+ discard_fn=discard_fn,
155
+ on_timeout=on_timeout,
156
+ )
157
+
158
+ # If any callback is a coroutine function, run via asyncio. Otherwise
159
+ # execute the sync path directly to avoid event-loop overhead in tests.
160
+ any_async = any(
161
+ inspect.iscoroutinefunction(fn)
162
+ for fn in (create_experiment_fn, run_experiment_fn, commit_fn, discard_fn)
163
+ )
164
+ if any_async:
165
+ return asyncio.run(_as_async())
166
+
167
+ return _iterate_sync_core(
168
+ candidate_move_sets=candidate_move_sets,
169
+ threshold=threshold,
170
+ max_iterations=max_iterations,
171
+ create_experiment_fn=create_experiment_fn,
172
+ run_experiment_fn=run_experiment_fn,
173
+ commit_fn=commit_fn,
174
+ discard_fn=discard_fn,
175
+ on_timeout=on_timeout,
176
+ )
177
+
178
+
179
+ async def iterate_toward_goal_engine_async(
180
+ candidate_move_sets: list,
181
+ threshold: float,
182
+ max_iterations: int,
183
+ create_experiment_fn: Callable[[list], Any],
184
+ run_experiment_fn: Callable[[str], Any],
185
+ commit_fn: Callable[[str, str], Any],
186
+ discard_fn: Callable[[str], Any],
187
+ on_timeout: str = "commit_best",
188
+ ) -> IterationResult:
189
+ """Async variant — used by the MCP tool wrapper which has async callbacks."""
190
+ return await _iterate_async_core(
191
+ candidate_move_sets=candidate_move_sets,
192
+ threshold=threshold,
193
+ max_iterations=max_iterations,
194
+ create_experiment_fn=create_experiment_fn,
195
+ run_experiment_fn=run_experiment_fn,
196
+ commit_fn=commit_fn,
197
+ discard_fn=discard_fn,
198
+ on_timeout=on_timeout,
199
+ )
200
+
201
+
202
+ # ── Internal cores ─────────────────────────────────────────────────────────
203
+
204
+ def _iterate_sync_core(
205
+ candidate_move_sets,
206
+ threshold,
207
+ max_iterations,
208
+ create_experiment_fn,
209
+ run_experiment_fn,
210
+ commit_fn,
211
+ discard_fn,
212
+ on_timeout,
213
+ ) -> IterationResult:
214
+ if not candidate_move_sets:
215
+ return IterationResult(
216
+ status="no_candidates",
217
+ iterations_run=0,
218
+ committed_experiment_id=None,
219
+ committed_branch_id=None,
220
+ final_score=0.0,
221
+ reason="candidate_move_sets is empty",
222
+ )
223
+
224
+ steps: list[IterationStep] = []
225
+ best_score = -1.0
226
+ best_exp_id: Optional[str] = None
227
+ best_branch_id: Optional[str] = None
228
+ n = min(max_iterations, len(candidate_move_sets))
229
+
230
+ for i in range(n):
231
+ move_ids = candidate_move_sets[i]
232
+ exp_id = create_experiment_fn(move_ids)
233
+ winner_branch_id, winner_score = run_experiment_fn(exp_id)
234
+
235
+ met = winner_score >= threshold and winner_branch_id is not None
236
+ steps.append(IterationStep(
237
+ iteration=i,
238
+ experiment_id=exp_id,
239
+ winner_branch_id=winner_branch_id,
240
+ winner_score=winner_score,
241
+ threshold_met=met,
242
+ note=(
243
+ f"committed on iteration {i}" if met
244
+ else f"below threshold (need {threshold}, got {winner_score})"
245
+ ),
246
+ ))
247
+
248
+ if met:
249
+ # Discard any prior best-so-far before committing the new winner —
250
+ # otherwise the old non-winning experiment leaks in the store.
251
+ if best_exp_id is not None and best_exp_id != exp_id:
252
+ discard_fn(best_exp_id)
253
+ commit_payload = commit_fn(exp_id, winner_branch_id)
254
+ commit_status = _classify_commit_result(commit_payload)
255
+ commit_dict = commit_payload if isinstance(commit_payload, dict) else None
256
+ if commit_status == "commit_failed":
257
+ return IterationResult(
258
+ status="commit_failed",
259
+ iterations_run=i + 1,
260
+ committed_experiment_id=None,
261
+ committed_branch_id=None,
262
+ final_score=winner_score,
263
+ steps=steps,
264
+ reason=(
265
+ f"threshold {threshold} met on iteration {i} but commit "
266
+ f"applied no steps; see commit_result"
267
+ ),
268
+ commit_result=commit_dict,
269
+ )
270
+ return IterationResult(
271
+ status=commit_status, # "committed" or "committed_with_errors"
272
+ iterations_run=i + 1,
273
+ committed_experiment_id=exp_id,
274
+ committed_branch_id=winner_branch_id,
275
+ final_score=winner_score,
276
+ steps=steps,
277
+ reason=(
278
+ f"threshold {threshold} met on iteration {i}"
279
+ if commit_status == "committed"
280
+ else f"threshold {threshold} met on iteration {i}; "
281
+ f"commit applied with partial failures (see commit_result)"
282
+ ),
283
+ commit_result=commit_dict,
284
+ )
285
+
286
+ if winner_branch_id is not None and winner_score > best_score:
287
+ # Supersede previous best-so-far. It's now stale, free the slot.
288
+ if best_exp_id is not None:
289
+ discard_fn(best_exp_id)
290
+ best_score = winner_score
291
+ best_exp_id = exp_id
292
+ best_branch_id = winner_branch_id
293
+ else:
294
+ discard_fn(exp_id)
295
+
296
+ if on_timeout == "commit_best" and best_exp_id and best_branch_id:
297
+ commit_payload = commit_fn(best_exp_id, best_branch_id)
298
+ commit_status = _classify_commit_result(commit_payload)
299
+ commit_dict = commit_payload if isinstance(commit_payload, dict) else None
300
+ if commit_status == "commit_failed":
301
+ return IterationResult(
302
+ status="commit_failed",
303
+ iterations_run=n,
304
+ committed_experiment_id=None,
305
+ committed_branch_id=None,
306
+ final_score=best_score,
307
+ steps=steps,
308
+ reason=(
309
+ f"max_iterations={n} reached; commit_best selected best-so-far "
310
+ f"(score {best_score}) but the commit applied no steps; "
311
+ f"see commit_result"
312
+ ),
313
+ commit_result=commit_dict,
314
+ )
315
+ return IterationResult(
316
+ status="exhausted" if commit_status == "committed" else "committed_with_errors",
317
+ iterations_run=n,
318
+ committed_experiment_id=best_exp_id,
319
+ committed_branch_id=best_branch_id,
320
+ final_score=best_score,
321
+ steps=steps,
322
+ reason=(
323
+ f"max_iterations={n} reached, threshold {threshold} never met; "
324
+ f"committed best-so-far with score {best_score}"
325
+ + ("" if commit_status == "committed" else " (partial commit — see commit_result)")
326
+ ),
327
+ commit_result=commit_dict,
328
+ )
329
+
330
+ if best_exp_id:
331
+ discard_fn(best_exp_id)
332
+ return IterationResult(
333
+ status="timeout_no_commit",
334
+ iterations_run=n,
335
+ committed_experiment_id=None,
336
+ committed_branch_id=None,
337
+ final_score=max(best_score, 0.0),
338
+ steps=steps,
339
+ reason=f"max_iterations={n} reached, policy={on_timeout}, no commit issued",
340
+ )
341
+
342
+
343
+ async def _iterate_async_core(
344
+ candidate_move_sets,
345
+ threshold,
346
+ max_iterations,
347
+ create_experiment_fn,
348
+ run_experiment_fn,
349
+ commit_fn,
350
+ discard_fn,
351
+ on_timeout,
352
+ ) -> IterationResult:
353
+ if not candidate_move_sets:
354
+ return IterationResult(
355
+ status="no_candidates",
356
+ iterations_run=0,
357
+ committed_experiment_id=None,
358
+ committed_branch_id=None,
359
+ final_score=0.0,
360
+ reason="candidate_move_sets is empty",
361
+ )
362
+
363
+ async def _maybe_await(value):
364
+ if inspect.isawaitable(value):
365
+ return await value
366
+ return value
367
+
368
+ steps: list[IterationStep] = []
369
+ best_score = -1.0
370
+ best_exp_id: Optional[str] = None
371
+ best_branch_id: Optional[str] = None
372
+ n = min(max_iterations, len(candidate_move_sets))
373
+
374
+ for i in range(n):
375
+ move_ids = candidate_move_sets[i]
376
+ exp_id = await _maybe_await(create_experiment_fn(move_ids))
377
+ winner_branch_id, winner_score = await _maybe_await(run_experiment_fn(exp_id))
378
+
379
+ met = winner_score >= threshold and winner_branch_id is not None
380
+ steps.append(IterationStep(
381
+ iteration=i,
382
+ experiment_id=exp_id,
383
+ winner_branch_id=winner_branch_id,
384
+ winner_score=winner_score,
385
+ threshold_met=met,
386
+ note=(
387
+ f"committed on iteration {i}" if met
388
+ else f"below threshold (need {threshold}, got {winner_score})"
389
+ ),
390
+ ))
391
+
392
+ if met:
393
+ if best_exp_id is not None and best_exp_id != exp_id:
394
+ await _maybe_await(discard_fn(best_exp_id))
395
+ commit_payload = await _maybe_await(commit_fn(exp_id, winner_branch_id))
396
+ commit_status = _classify_commit_result(commit_payload)
397
+ commit_dict = commit_payload if isinstance(commit_payload, dict) else None
398
+ if commit_status == "commit_failed":
399
+ return IterationResult(
400
+ status="commit_failed",
401
+ iterations_run=i + 1,
402
+ committed_experiment_id=None,
403
+ committed_branch_id=None,
404
+ final_score=winner_score,
405
+ steps=steps,
406
+ reason=(
407
+ f"threshold {threshold} met on iteration {i} but commit "
408
+ f"applied no steps; see commit_result"
409
+ ),
410
+ commit_result=commit_dict,
411
+ )
412
+ return IterationResult(
413
+ status=commit_status,
414
+ iterations_run=i + 1,
415
+ committed_experiment_id=exp_id,
416
+ committed_branch_id=winner_branch_id,
417
+ final_score=winner_score,
418
+ steps=steps,
419
+ reason=(
420
+ f"threshold {threshold} met on iteration {i}"
421
+ if commit_status == "committed"
422
+ else f"threshold {threshold} met on iteration {i}; "
423
+ f"commit applied with partial failures (see commit_result)"
424
+ ),
425
+ commit_result=commit_dict,
426
+ )
427
+
428
+ if winner_branch_id is not None and winner_score > best_score:
429
+ if best_exp_id is not None:
430
+ await _maybe_await(discard_fn(best_exp_id))
431
+ best_score = winner_score
432
+ best_exp_id = exp_id
433
+ best_branch_id = winner_branch_id
434
+ else:
435
+ await _maybe_await(discard_fn(exp_id))
436
+
437
+ if on_timeout == "commit_best" and best_exp_id and best_branch_id:
438
+ commit_payload = await _maybe_await(commit_fn(best_exp_id, best_branch_id))
439
+ commit_status = _classify_commit_result(commit_payload)
440
+ commit_dict = commit_payload if isinstance(commit_payload, dict) else None
441
+ if commit_status == "commit_failed":
442
+ return IterationResult(
443
+ status="commit_failed",
444
+ iterations_run=n,
445
+ committed_experiment_id=None,
446
+ committed_branch_id=None,
447
+ final_score=best_score,
448
+ steps=steps,
449
+ reason=(
450
+ f"max_iterations={n} reached; commit_best selected best-so-far "
451
+ f"(score {best_score}) but the commit applied no steps; "
452
+ f"see commit_result"
453
+ ),
454
+ commit_result=commit_dict,
455
+ )
456
+ return IterationResult(
457
+ status="exhausted" if commit_status == "committed" else "committed_with_errors",
458
+ iterations_run=n,
459
+ committed_experiment_id=best_exp_id,
460
+ committed_branch_id=best_branch_id,
461
+ final_score=best_score,
462
+ steps=steps,
463
+ reason=(
464
+ f"max_iterations={n} reached, threshold {threshold} never met; "
465
+ f"committed best-so-far with score {best_score}"
466
+ + ("" if commit_status == "committed" else " (partial commit — see commit_result)")
467
+ ),
468
+ commit_result=commit_dict,
469
+ )
470
+
471
+ if best_exp_id:
472
+ await _maybe_await(discard_fn(best_exp_id))
473
+ return IterationResult(
474
+ status="timeout_no_commit",
475
+ iterations_run=n,
476
+ committed_experiment_id=None,
477
+ committed_branch_id=None,
478
+ final_score=max(best_score, 0.0),
479
+ steps=steps,
480
+ reason=f"max_iterations={n} reached, policy={on_timeout}, no commit issued",
481
+ )
@@ -240,9 +240,9 @@ def evaluate_move(
240
240
  Takes before/after sonic snapshots and the active GoalVector.
241
241
  Returns a score and keep/undo recommendation.
242
242
 
243
- Snapshots should contain: spectrum (8-band dict), rms, peak.
244
- Get these from get_master_spectrum + get_master_rms before and after
245
- making changes.
243
+ Snapshots should contain: spectrum (9-band dict sub_low → air, or
244
+ 8-band from pre-v1.16 .amxd builds), rms, peak. Get these from
245
+ get_master_spectrum + get_master_rms before and after making changes.
246
246
 
247
247
  Hard rules enforce undo when:
248
248
  - No measurable improvement (delta <= 0)
@@ -471,3 +471,194 @@ def route_request(
471
471
 
472
472
  plan = conductor.classify_request(request)
473
473
  return plan.to_dict()
474
+
475
+
476
+ # ── iterate_toward_goal (closed evaluation loop) ──────────────────────
477
+
478
+
479
+ @mcp.tool()
480
+ async def iterate_toward_goal(
481
+ ctx: Context,
482
+ goal_vector: dict | str,
483
+ candidate_move_sets: list,
484
+ threshold: float = 0.70,
485
+ max_iterations: int = 3,
486
+ on_timeout: str = "commit_best",
487
+ render_verify: bool = False,
488
+ ) -> dict:
489
+ """Close the evaluation loop: run experiments until threshold or timeout.
490
+
491
+ Each iteration creates an experiment from one candidate_move_sets entry,
492
+ runs all branches (which auto-undo per-branch via the experiment engine),
493
+ and checks the top-ranked branch's score against the GoalVector. If score
494
+ >= threshold, commit that branch permanently and stop. Otherwise discard
495
+ the experiment and try the next candidate set. On timeout, commit the
496
+ best-so-far (on_timeout='commit_best') or commit nothing
497
+ (on_timeout='discard_on_timeout').
498
+
499
+ Args:
500
+ goal_vector: Compiled GoalVector dict (from compile_goal_vector) or
501
+ JSON string. Provides the scoring target passed through to the
502
+ evaluation scorer inside each run_experiment call.
503
+ candidate_move_sets: List of move_id lists — one per iteration.
504
+ Example: [["make_punchier", "widen_stereo"], ["tighten_low_end"]].
505
+ Iteration 0 tries the first list, iteration 1 the second, etc.
506
+ If shorter than max_iterations, iteration stops when exhausted.
507
+ threshold: Winner score required to commit early. 0.0–1.0. Default 0.70.
508
+ max_iterations: Hard cap on outer-loop iterations. Default 3.
509
+ on_timeout: "commit_best" (commit highest-scoring experiment at end)
510
+ or "discard_on_timeout" (no commit if threshold never met).
511
+ render_verify: When True each branch captures + analyzes audio
512
+ (~6s extra per branch). Default False.
513
+
514
+ Returns: IterationResult dict with status, iterations_run,
515
+ committed_experiment_id, committed_branch_id, final_score, steps,
516
+ reason.
517
+
518
+ Safety: Only commits when threshold_met OR (on_timeout='commit_best' AND
519
+ best-so-far exists). Never double-undoes — per-branch undo is handled
520
+ inside run_experiment; this tool only issues commit or discard.
521
+ """
522
+ import time as _time
523
+ from ..branches import seed_from_move_id
524
+ from ..experiment import engine as exp_engine
525
+ from ..experiment.tools import (
526
+ _capture_snapshot,
527
+ _capture_snapshot_with_render_verify,
528
+ )
529
+ from ..semantic_moves import registry, compiler
530
+ from ..evaluation.policy import classify_branch_outcome
531
+ from ._agent_os_engine import iterate_toward_goal_engine_async
532
+
533
+ gv_dict = _parse_json_param(goal_vector, "goal_vector")
534
+
535
+ if not isinstance(candidate_move_sets, list) or not all(
536
+ isinstance(s, list) and all(isinstance(m, str) for m in s)
537
+ for s in candidate_move_sets
538
+ ):
539
+ return {
540
+ "error": (
541
+ "candidate_move_sets must be a list of lists of move_id strings"
542
+ )
543
+ }
544
+
545
+ ableton = _get_ableton(ctx)
546
+ bridge = ctx.lifespan_context.get("m4l")
547
+ mcp_registry = ctx.lifespan_context.get("mcp_dispatch", {})
548
+
549
+ # Pre-validate the GoalVector once — the eval_fn closure reuses this.
550
+ goal = engine.validate_goal_vector(
551
+ request_text=gv_dict.get("request_text", "iterate_toward_goal"),
552
+ targets=gv_dict.get("targets", {}),
553
+ protect=gv_dict.get("protect", {}),
554
+ mode=gv_dict.get("mode", "improve"),
555
+ aggression=float(gv_dict.get("aggression", 0.5)),
556
+ research_mode=gv_dict.get("research_mode", "none"),
557
+ )
558
+
559
+ # ── Callbacks wire the pure-logic engine to real experiment I/O ──
560
+
561
+ async def _create(move_ids: list[str]) -> str:
562
+ seeds = [seed_from_move_id(mid) for mid in move_ids]
563
+ kernel_id = f"iter_kern_{int(_time.time())}"
564
+ exp = exp_engine.create_experiment_from_seeds(
565
+ request_text=gv_dict.get("request_text", "iterate_toward_goal"),
566
+ seeds=seeds,
567
+ kernel_id=kernel_id,
568
+ )
569
+ return exp.experiment_id
570
+
571
+ async def _run(experiment_id: str):
572
+ experiment = exp_engine.get_experiment(experiment_id)
573
+ if experiment is None:
574
+ return None, 0.0
575
+
576
+ if render_verify:
577
+ capture_fn = lambda: _capture_snapshot_with_render_verify(ctx, 2.0)
578
+ else:
579
+ capture_fn = lambda: _capture_snapshot(ctx)
580
+
581
+ for branch in experiment.branches:
582
+ if branch.status != "pending":
583
+ continue
584
+
585
+ # Compile plan from semantic move when branch doesn't carry one
586
+ if branch.compiled_plan is None and branch.move_id:
587
+ move = registry.get_move(branch.move_id)
588
+ if move is None:
589
+ branch.status = "failed"
590
+ continue
591
+ session_info = ableton.send_command("get_session_info")
592
+ kernel = {"session_info": session_info, "mode": "explore"}
593
+ plan = compiler.compile(move, kernel)
594
+ branch.compiled_plan = plan.to_dict()
595
+
596
+ if branch.compiled_plan is None:
597
+ branch.status = "failed"
598
+ continue
599
+
600
+ await exp_engine.run_branch_async(
601
+ branch=branch,
602
+ ableton=ableton,
603
+ compiled_plan=branch.compiled_plan,
604
+ capture_fn=capture_fn,
605
+ bridge=bridge,
606
+ mcp_registry=mcp_registry,
607
+ ctx=ctx,
608
+ )
609
+
610
+ def eval_fn(before, after):
611
+ score_result = engine.compute_evaluation_score(goal, before, after)
612
+ outcome = classify_branch_outcome(
613
+ score=score_result.get("score", 0.0),
614
+ protection_violated=not score_result.get("keep_change", True)
615
+ and "protected" in " ".join(score_result.get("notes", [])).lower(),
616
+ measurable_count=0,
617
+ target_count=0,
618
+ goal_progress=score_result.get("goal_progress", 0.0),
619
+ exploration_rules=False,
620
+ )
621
+ return {
622
+ "score": outcome.score,
623
+ "keep_change": outcome.keep_change,
624
+ "status": outcome.status,
625
+ "note": outcome.note,
626
+ "dimension_changes": score_result.get("dimension_changes", {}),
627
+ }
628
+
629
+ exp_engine.evaluate_branch(branch, eval_fn)
630
+ if branch.evaluation and branch.evaluation.get("status") == "keep":
631
+ branch.status = "evaluated"
632
+ elif branch.evaluation and branch.evaluation.get("status") == "undo":
633
+ branch.status = "rejected"
634
+
635
+ ranked = experiment.ranked_branches()
636
+ if not ranked:
637
+ return None, 0.0
638
+ top = ranked[0]
639
+ return top.branch_id, float(top.score or 0.0)
640
+
641
+ async def _commit(experiment_id: str, branch_id: str) -> dict:
642
+ return await exp_engine.commit_branch_async(
643
+ exp_engine.get_experiment(experiment_id),
644
+ branch_id,
645
+ ableton,
646
+ bridge=bridge,
647
+ mcp_registry=mcp_registry,
648
+ ctx=ctx,
649
+ )
650
+
651
+ async def _discard(experiment_id: str) -> dict:
652
+ return exp_engine.discard_experiment(experiment_id)
653
+
654
+ result = await iterate_toward_goal_engine_async(
655
+ candidate_move_sets=candidate_move_sets,
656
+ threshold=float(threshold),
657
+ max_iterations=int(max_iterations),
658
+ create_experiment_fn=_create,
659
+ run_experiment_fn=_run,
660
+ commit_fn=_commit,
661
+ discard_fn=_discard,
662
+ on_timeout=on_timeout,
663
+ )
664
+ return result.to_dict()