livepilot 1.12.2 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +219 -0
  2. package/README.md +7 -7
  3. package/m4l_device/LivePilot_Analyzer.amxd +0 -0
  4. package/m4l_device/livepilot_bridge.js +1 -1
  5. package/mcp_server/__init__.py +1 -1
  6. package/mcp_server/branches/__init__.py +34 -0
  7. package/mcp_server/branches/types.py +286 -0
  8. package/mcp_server/composer/__init__.py +10 -1
  9. package/mcp_server/composer/branch_producer.py +349 -0
  10. package/mcp_server/composer/tools.py +58 -1
  11. package/mcp_server/evaluation/policy.py +227 -2
  12. package/mcp_server/experiment/engine.py +47 -11
  13. package/mcp_server/experiment/models.py +112 -8
  14. package/mcp_server/experiment/tools.py +502 -38
  15. package/mcp_server/memory/taste_graph.py +84 -11
  16. package/mcp_server/persistence/taste_store.py +21 -5
  17. package/mcp_server/runtime/session_kernel.py +46 -0
  18. package/mcp_server/runtime/tools.py +29 -3
  19. package/mcp_server/server.py +1 -0
  20. package/mcp_server/synthesis_brain/__init__.py +53 -0
  21. package/mcp_server/synthesis_brain/adapters/__init__.py +34 -0
  22. package/mcp_server/synthesis_brain/adapters/analog.py +273 -0
  23. package/mcp_server/synthesis_brain/adapters/base.py +86 -0
  24. package/mcp_server/synthesis_brain/adapters/drift.py +271 -0
  25. package/mcp_server/synthesis_brain/adapters/meld.py +261 -0
  26. package/mcp_server/synthesis_brain/adapters/operator.py +292 -0
  27. package/mcp_server/synthesis_brain/adapters/wavetable.py +364 -0
  28. package/mcp_server/synthesis_brain/engine.py +91 -0
  29. package/mcp_server/synthesis_brain/models.py +121 -0
  30. package/mcp_server/synthesis_brain/timbre.py +194 -0
  31. package/mcp_server/synthesis_brain/tools.py +231 -0
  32. package/mcp_server/tools/_conductor.py +144 -0
  33. package/mcp_server/wonder_mode/engine.py +324 -0
  34. package/mcp_server/wonder_mode/tools.py +153 -1
  35. package/package.json +2 -2
  36. package/remote_script/LivePilot/__init__.py +1 -1
  37. package/server.json +3 -3
@@ -16,6 +16,7 @@ from typing import Optional
16
16
  from fastmcp import Context
17
17
 
18
18
  from ..server import mcp
19
+ from ..branches import BranchSeed
19
20
  from . import engine
20
21
  from .models import BranchSnapshot
21
22
  import logging
@@ -28,7 +29,11 @@ def _get_ableton(ctx: Context):
28
29
 
29
30
 
30
31
  def _capture_snapshot(ctx: Context) -> BranchSnapshot:
31
- """Capture current session state as a BranchSnapshot."""
32
+ """Capture current session state as a BranchSnapshot (fast path).
33
+
34
+ Uses live meters + spectral cache. No audio rendering. Called when
35
+ render_verify is off (default) — adds no latency to branch trials.
36
+ """
32
37
  ableton = _get_ableton(ctx)
33
38
  spectral = ctx.lifespan_context.get("spectral")
34
39
 
@@ -57,24 +62,187 @@ def _capture_snapshot(ctx: Context) -> BranchSnapshot:
57
62
  return snapshot
58
63
 
59
64
 
65
+ def _capture_snapshot_with_render_verify(
66
+ ctx: Context, duration_seconds: float = 2.0,
67
+ ) -> BranchSnapshot:
68
+ """Capture state AND render audio for fingerprint extraction (PR4).
69
+
70
+ Runs the fast-path snapshot first, then additionally:
71
+ 1. capture_audio duration_seconds seconds from master
72
+ 2. analyze_loudness on the captured file
73
+ 3. analyze_spectrum_offline on the captured file
74
+ 4. extract_timbre_fingerprint from spectrum + loudness
75
+
76
+ Attaches capture_path, loudness, spectral_shape, and fingerprint to
77
+ the snapshot. When any stage fails (bridge unavailable, analyzer
78
+ missing, etc.), that stage's field is left None and a debug log is
79
+ emitted — render-verify degrades gracefully to the fast-path snapshot.
80
+
81
+ Expected added latency: duration_seconds (capture) + ~1-2s (offline
82
+ analysis). For a 2-branch experiment with 2s captures, that's
83
+ ~8-10s of overhead vs the default path.
84
+ """
85
+ snapshot = _capture_snapshot(ctx)
86
+
87
+ ableton = _get_ableton(ctx)
88
+ bridge = ctx.lifespan_context.get("m4l")
89
+
90
+ # Step 1: capture_audio is a bridge command — route via bridge.send_command
91
+ # if available, else fall back to ableton TCP which doesn't support it.
92
+ capture_path = None
93
+ if bridge is not None:
94
+ try:
95
+ maybe = bridge.send_command("capture_audio", float(duration_seconds), "master", "")
96
+ # bridge.send_command may return awaitable or plain dict.
97
+ import inspect
98
+ if inspect.isawaitable(maybe):
99
+ # We're in a sync context here — best effort, skip await.
100
+ # Render-verify from within sync capture_fn is the compromise;
101
+ # the async variant wires through from run_branch_async which
102
+ # does have await. Use the fast-path capture only.
103
+ logger.debug("capture_audio returned awaitable in sync context; skipping render-verify for this snapshot")
104
+ return snapshot
105
+ if isinstance(maybe, dict):
106
+ capture_path = maybe.get("file_path") or maybe.get("path") or maybe.get("filename")
107
+ except Exception as exc:
108
+ logger.debug("render-verify capture_audio failed: %s", exc)
109
+ if not capture_path:
110
+ return snapshot # graceful degrade — caller still gets fast-path data
111
+ snapshot.capture_path = capture_path
112
+
113
+ # Step 2-3: offline loudness + spectrum analysis (MCP tools, sync wrappers)
114
+ try:
115
+ from ..tools.analyzer import analyze_loudness as _analyze_loudness
116
+ loud = _analyze_loudness(capture_path)
117
+ if isinstance(loud, dict) and "error" not in loud:
118
+ snapshot.loudness = loud
119
+ except Exception as exc:
120
+ logger.debug("render-verify analyze_loudness failed: %s", exc)
121
+
122
+ try:
123
+ from ..tools.analyzer import analyze_spectrum_offline as _analyze_spectrum
124
+ spec = _analyze_spectrum(capture_path)
125
+ if isinstance(spec, dict) and "error" not in spec:
126
+ snapshot.spectral_shape = {
127
+ "centroid": spec.get("centroid_hz"),
128
+ "flatness": spec.get("spectral_flatness"),
129
+ "rolloff": spec.get("rolloff_hz"),
130
+ "bandwidth": spec.get("bandwidth_hz"),
131
+ # Back-map the 5-band balance into the 8-band keys our
132
+ # fingerprint extractor expects. Coarse mapping:
133
+ "bands": _map_5band_to_8band(spec.get("band_balance", {})),
134
+ }
135
+ except Exception as exc:
136
+ logger.debug("render-verify analyze_spectrum_offline failed: %s", exc)
137
+
138
+ # Step 4: build fingerprint from what we got
139
+ try:
140
+ from ..synthesis_brain import extract_timbre_fingerprint
141
+ fp = extract_timbre_fingerprint(
142
+ spectrum=(snapshot.spectral_shape or {}).get("bands"),
143
+ loudness=snapshot.loudness,
144
+ spectral_shape=snapshot.spectral_shape,
145
+ )
146
+ snapshot.fingerprint = fp.to_dict()
147
+ except Exception as exc:
148
+ logger.debug("render-verify extract_timbre_fingerprint failed: %s", exc)
149
+
150
+ return snapshot
151
+
152
+
153
+ def _map_5band_to_8band(b5: dict) -> dict:
154
+ """Adapt analyze_spectrum_offline's 5-band balance to the 8-band shape
155
+ extract_timbre_fingerprint expects.
156
+
157
+ 5-band: sub_60hz, low_250hz, mid_2khz, high_8khz, air_16khz
158
+ 8-band: sub, low, low_mid, mid, high_mid, high, very_high, ultra
159
+ """
160
+ if not isinstance(b5, dict):
161
+ return {}
162
+ # Conservative mapping — split each 5-band bucket across the 8-band shape.
163
+ return {
164
+ "sub": float(b5.get("sub_60hz", 0.0) or 0.0),
165
+ "low": float(b5.get("low_250hz", 0.0) or 0.0) * 0.6,
166
+ "low_mid": float(b5.get("low_250hz", 0.0) or 0.0) * 0.4,
167
+ "mid": float(b5.get("mid_2khz", 0.0) or 0.0) * 0.6,
168
+ "high_mid": float(b5.get("mid_2khz", 0.0) or 0.0) * 0.4,
169
+ "high": float(b5.get("high_8khz", 0.0) or 0.0) * 0.6,
170
+ "very_high": float(b5.get("high_8khz", 0.0) or 0.0) * 0.4,
171
+ "ultra": float(b5.get("air_16khz", 0.0) or 0.0),
172
+ }
173
+
174
+
60
175
  @mcp.tool()
61
176
  def create_experiment(
62
177
  ctx: Context,
63
178
  request_text: str,
64
179
  move_ids: Optional[list] = None,
65
180
  limit: int = 3,
181
+ seeds: Optional[list] = None,
182
+ compiled_plans: Optional[list] = None,
66
183
  ) -> dict:
67
184
  """Create an experiment set to compare multiple approaches.
68
185
 
69
- If move_ids is provided, creates one branch per move.
70
- Otherwise, uses propose_next_best_move to find candidates.
186
+ Three input modes (in priority order):
71
187
 
72
- request_text: what the user wants (e.g., "make this punchier")
73
- move_ids: specific moves to try (e.g., ["make_punchier", "tighten_low_end"])
74
- limit: max branches when auto-proposing (default 3)
188
+ 1. seeds (PR3+): a list of BranchSeed dicts. Each seed becomes one branch.
189
+ compiled_plans (optional parallel list) attaches pre-compiled plans
190
+ for freeform / synthesis / composer producers. Seed dict shape:
191
+ {seed_id, source, move_id, hypothesis, protected_qualities,
192
+ affected_scope, distinctness_reason, risk_label, novelty_label,
193
+ analytical_only}
194
+ Missing fields default per BranchSeed. This is the canonical path
195
+ for producers that have already done their own selection work.
196
+
197
+ 2. move_ids: legacy path — one semantic_move seed per move_id.
198
+ Unchanged behavior; internally delegates to the seeds path.
199
+
200
+ 3. Auto-proposal: neither seeds nor move_ids provided. Scans the semantic
201
+ move registry by keyword overlap with request_text and takes the top
202
+ ``limit`` moves (default 3).
75
203
 
76
204
  Returns: experiment set with branch IDs ready for run_experiment.
77
205
  """
206
+ # ── Mode 1: seeds provided ──────────────────────────────────────────
207
+ if seeds:
208
+ rehydrated: list[BranchSeed] = []
209
+ for i, s in enumerate(seeds):
210
+ if isinstance(s, BranchSeed):
211
+ rehydrated.append(s)
212
+ elif isinstance(s, dict):
213
+ try:
214
+ rehydrated.append(BranchSeed(**s))
215
+ except TypeError as exc:
216
+ return {"error": f"seeds[{i}] invalid: {exc}"}
217
+ else:
218
+ return {
219
+ "error": (
220
+ f"seeds[{i}] must be dict or BranchSeed, "
221
+ f"got {type(s).__name__}"
222
+ )
223
+ }
224
+
225
+ if compiled_plans is not None and len(compiled_plans) != len(rehydrated):
226
+ return {
227
+ "error": (
228
+ f"compiled_plans length ({len(compiled_plans)}) must match "
229
+ f"seeds length ({len(rehydrated)})"
230
+ )
231
+ }
232
+
233
+ ableton = _get_ableton(ctx)
234
+ ableton.send_command("get_session_info")
235
+ kernel_id = f"kern_{int(time.time())}"
236
+
237
+ experiment = engine.create_experiment_from_seeds(
238
+ request_text=request_text,
239
+ seeds=rehydrated,
240
+ kernel_id=kernel_id,
241
+ compiled_plans=compiled_plans,
242
+ )
243
+ return experiment.to_dict()
244
+
245
+ # ── Mode 2/3: legacy move_ids path ──────────────────────────────────
78
246
  if not move_ids:
79
247
  # Auto-propose moves from the registry
80
248
  from ..semantic_moves import registry
@@ -119,19 +287,46 @@ def create_experiment(
119
287
  async def run_experiment(
120
288
  ctx: Context,
121
289
  experiment_id: str,
290
+ exploration_rules: bool = False,
291
+ render_verify: bool = False,
292
+ render_duration_seconds: float = 2.0,
122
293
  ) -> dict:
123
294
  """Run all pending branches in an experiment.
124
295
 
125
296
  For each branch:
126
297
  1. Compile the semantic move against current session
298
+ (skipped when branch.compiled_plan is already set — PR3+)
127
299
  2. Capture before state
128
- 3. Execute the compiled plan (through the async router — v1.10.3 truth)
300
+ 3. Execute the compiled plan (through the async router)
129
301
  4. Capture after state
130
302
  5. Undo all successful steps (revert to checkpoint)
131
- 6. Evaluate the branch
303
+ 6. Evaluate the branch and classify its outcome via evaluation.policy
132
304
  7. Record per-step results on branch.execution_log
133
305
 
134
306
  Branches run sequentially (Ableton has linear undo).
307
+
308
+ exploration_rules: when True, branches that fail technical gates
309
+ (score < 0.40, non-positive measurable delta) are classified as
310
+ "interesting_but_failed" instead of "failed" — they stay in the
311
+ experiment for audit but don't appear in the ranking. Protection
312
+ violations STILL force undo regardless of this flag — that's a
313
+ safety invariant, not a taste judgment.
314
+
315
+ render_verify (PR4/v2): when True, each branch also captures audio
316
+ before and after execution, analyzes spectrum + loudness offline,
317
+ extracts a TimbralFingerprint, and attaches the before/after
318
+ fingerprint + diff to the branch snapshots. The diff is fed into
319
+ classify_branch_outcome as real measurable evidence — the
320
+ classifier no longer relies on meter heuristics alone. Default
321
+ False preserves speed; opt in when you want the classifier to
322
+ respond to spectral movement, not just track-meter drops.
323
+
324
+ render_duration_seconds: capture length per snapshot when
325
+ render_verify is on. Default 2.0 seconds. Each branch adds
326
+ ~2 * duration_seconds of capture time plus ~1-2s of offline
327
+ analysis — a 3-branch experiment at 2s adds ~15-18s.
328
+
329
+ Default render_verify=False preserves pre-PR4 behavior exactly.
135
330
  """
136
331
  experiment = engine.get_experiment(experiment_id)
137
332
  if not experiment:
@@ -149,51 +344,186 @@ async def run_experiment(
149
344
  if branch.status != "pending":
150
345
  continue
151
346
 
152
- # Compile the move
153
- move = registry.get_move(branch.move_id)
154
- if not move:
155
- branch.status = "failed"
156
- branch.score = 0.0
157
- branch.evaluation = {"error": f"Move {branch.move_id} not found"}
158
- results.append(branch.to_dict())
159
- continue
160
-
161
- session_info = ableton.send_command("get_session_info")
162
- kernel = {"session_info": session_info, "mode": "explore"}
163
- plan = compiler.compile(move, kernel)
164
- compiled_dict = plan.to_dict()
347
+ # PR3: respect a pre-existing compiled_plan on the branch (freeform /
348
+ # synthesis / composer producers bring their own). Only compile from
349
+ # move_id when the branch arrived without a plan — which requires a
350
+ # semantic_move seed (or a legacy move-only branch).
351
+ compiled_dict = branch.compiled_plan
352
+
353
+ if compiled_dict is None:
354
+ # Analytical-only branches short-circuit — no plan to run.
355
+ # Marked with status="analytical" so ranked_branches()
356
+ # (which only surfaces "evaluated") excludes them, and
357
+ # commit_experiment refuses to re-apply them.
358
+ if branch.seed is not None and branch.seed.analytical_only:
359
+ branch.status = "analytical"
360
+ branch.score = 0.0
361
+ branch.evaluation = {
362
+ "score": 0.0,
363
+ "keep_change": False,
364
+ "status": "analytical",
365
+ "note": "analytical_only branch — no execution path",
366
+ }
367
+ results.append(branch.to_dict())
368
+ continue
369
+
370
+ if not branch.move_id:
371
+ branch.status = "failed"
372
+ branch.score = 0.0
373
+ branch.evaluation = {
374
+ "error": (
375
+ "Branch has no compiled_plan and no move_id — "
376
+ "freeform producers must pre-populate compiled_plan"
377
+ )
378
+ }
379
+ results.append(branch.to_dict())
380
+ continue
381
+
382
+ # Compile from semantic move
383
+ move = registry.get_move(branch.move_id)
384
+ if not move:
385
+ branch.status = "failed"
386
+ branch.score = 0.0
387
+ branch.evaluation = {"error": f"Move {branch.move_id} not found"}
388
+ results.append(branch.to_dict())
389
+ continue
390
+
391
+ session_info = ableton.send_command("get_session_info")
392
+ kernel = {"session_info": session_info, "mode": "explore"}
393
+ plan = compiler.compile(move, kernel)
394
+ compiled_dict = plan.to_dict()
395
+
396
+ # Pick the capture function — render-verify mode captures audio
397
+ # and extracts a TimbralFingerprint, adding latency but giving
398
+ # classify_branch_outcome real measurable evidence.
399
+ if render_verify:
400
+ capture_fn = lambda: _capture_snapshot_with_render_verify(
401
+ ctx, duration_seconds=render_duration_seconds,
402
+ )
403
+ else:
404
+ capture_fn = lambda: _capture_snapshot(ctx)
165
405
 
166
406
  # Run the branch through the async router
167
407
  await engine.run_branch_async(
168
408
  branch=branch,
169
409
  ableton=ableton,
170
410
  compiled_plan=compiled_dict,
171
- capture_fn=lambda: _capture_snapshot(ctx),
411
+ capture_fn=capture_fn,
172
412
  bridge=bridge,
173
413
  mcp_registry=mcp_registry,
174
414
  ctx=ctx,
175
415
  )
176
416
 
177
- # Evaluate
417
+ # Evaluate — score via the inline heuristic, then classify via
418
+ # evaluation.policy for a unified keep/undo/interesting_but_failed
419
+ # decision (PR7).
420
+ from ..evaluation.policy import classify_branch_outcome
421
+
178
422
  def eval_fn(before, after):
179
- # Simple heuristic evaluation when spectral data isn't available
423
+ # Simple heuristic evaluation when spectral data isn't available.
424
+ # protection_violated is rough — derived from whether any track
425
+ # went silent (signal lost on a track = protection violation).
180
426
  score = 0.5 # Neutral
427
+ protection_violated = False
428
+ lost_tracks = 0
429
+
181
430
  if before.get("track_meters") and after.get("track_meters"):
182
- # Check all tracks still alive
183
431
  before_alive = sum(1 for t in before["track_meters"] if t.get("level", 0) > 0)
184
432
  after_alive = sum(1 for t in after["track_meters"] if t.get("level", 0) > 0)
185
- if after_alive >= before_alive:
433
+ lost_tracks = max(0, before_alive - after_alive)
434
+ if lost_tracks == 0:
186
435
  score += 0.1
187
436
  else:
188
- score -= 0.2 # Lost a track
437
+ score -= 0.2
438
+ # A track going silent is a protection violation — always
439
+ # undo regardless of exploration mode.
440
+ protection_violated = True
189
441
 
190
442
  if before.get("spectrum") and after.get("spectrum"):
191
- # Spectral balance improvement heuristic
192
- score += 0.1 # Bonus for having spectral data
193
-
194
- return {"score": round(score, 3), "keep_change": score > 0.45}
443
+ score += 0.1 # presence-of-data bonus
444
+
445
+ score = round(score, 3)
446
+
447
+ # PR4 — fingerprint diff to feed the classifier when render-verify
448
+ # is on. When both before/after have fingerprints, compute the
449
+ # per-dimension diff via synthesis_brain.diff_fingerprint and let
450
+ # classify_branch_outcome derive real measurable_count + goal_progress
451
+ # from it. Much stronger evidence than the meter heuristic alone.
452
+ fingerprint_diff = None
453
+ timbral_target = None
454
+ before_fp = before.get("fingerprint")
455
+ after_fp = after.get("fingerprint")
456
+ if before_fp and after_fp:
457
+ try:
458
+ from ..synthesis_brain import diff_fingerprint, TimbralFingerprint
459
+ before_obj = TimbralFingerprint(**{
460
+ k: v for k, v in before_fp.items()
461
+ if k in TimbralFingerprint.__dataclass_fields__
462
+ })
463
+ after_obj = TimbralFingerprint(**{
464
+ k: v for k, v in after_fp.items()
465
+ if k in TimbralFingerprint.__dataclass_fields__
466
+ })
467
+ fingerprint_diff = diff_fingerprint(before_obj, after_obj)
468
+ except Exception as exc:
469
+ logger.debug("fingerprint diff failed: %s", exc)
470
+
471
+ # If the branch's seed was a synthesis seed with a timbral target
472
+ # in its producer_payload, score diff in that target's direction.
473
+ if branch.seed is not None and branch.seed.source == "synthesis":
474
+ target_hint = (branch.seed.producer_payload or {}).get("timbral_target")
475
+ if isinstance(target_hint, dict):
476
+ timbral_target = target_hint
477
+
478
+ outcome = classify_branch_outcome(
479
+ score=score,
480
+ protection_violated=protection_violated,
481
+ measurable_count=0,
482
+ target_count=0,
483
+ goal_progress=0.0,
484
+ exploration_rules=exploration_rules,
485
+ fingerprint_diff=fingerprint_diff,
486
+ timbral_target=timbral_target,
487
+ )
488
+
489
+ result_eval = {
490
+ "score": outcome.score,
491
+ "keep_change": outcome.keep_change,
492
+ "status": outcome.status,
493
+ "failure_reasons": outcome.failure_reasons,
494
+ "note": outcome.note,
495
+ "lost_tracks": lost_tracks,
496
+ }
497
+ # Surface fingerprint evidence on the evaluation dict so
498
+ # compare_experiments can show per-branch spectral deltas.
499
+ if fingerprint_diff is not None:
500
+ result_eval["fingerprint_diff"] = fingerprint_diff
501
+ result_eval["fingerprint_before"] = before_fp
502
+ result_eval["fingerprint_after"] = after_fp
503
+ return result_eval
195
504
 
196
505
  engine.evaluate_branch(branch, eval_fn)
506
+
507
+ # Promote the classified status onto the branch. ranked_branches()
508
+ # only surfaces status="evaluated", so branches the classifier
509
+ # rejected ("undo") or retained for audit ("interesting_but_failed")
510
+ # are both correctly excluded from winner recommendations.
511
+ # Without this mapping, a branch the hard-rule classifier explicitly
512
+ # rejected could still win a ranking and be re-applied by commit.
513
+ if branch.evaluation and branch.evaluation.get("status"):
514
+ status = branch.evaluation["status"]
515
+ if status == "keep":
516
+ branch.status = "evaluated"
517
+ elif status == "interesting_but_failed":
518
+ branch.status = "interesting_but_failed"
519
+ elif status == "undo":
520
+ # Undo-classified branches had their steps rolled back by
521
+ # run_branch_async's undo pass; they must NOT be eligible
522
+ # winners. "rejected" is a terminal branch status distinct
523
+ # from "failed" (execution failed) and distinct from
524
+ # "interesting_but_failed" (exploration-mode retention).
525
+ branch.status = "rejected"
526
+
197
527
  results.append(branch.to_dict())
198
528
 
199
529
  return {
@@ -221,6 +551,30 @@ def compare_experiments(
221
551
  return {"error": f"Experiment {experiment_id} not found"}
222
552
 
223
553
  ranked = experiment.ranked_branches()
554
+
555
+ # Surface non-winning branch categories separately. None of these are
556
+ # candidates for commit — ranked_branches() filters them out — but the
557
+ # user sees what was tried.
558
+ interesting_failed = [
559
+ b for b in experiment.branches if b.status == "interesting_but_failed"
560
+ ]
561
+ rejected = [
562
+ b for b in experiment.branches if b.status == "rejected"
563
+ ]
564
+ analytical = [
565
+ b for b in experiment.branches if b.status == "analytical"
566
+ ]
567
+
568
+ def _audit_row(b):
569
+ return {
570
+ "branch_id": b.branch_id,
571
+ "name": b.name,
572
+ "move_id": b.move_id,
573
+ "score": b.score,
574
+ "summary": b.compiled_plan.get("summary", "") if b.compiled_plan else "",
575
+ "evaluation": b.evaluation,
576
+ }
577
+
224
578
  return {
225
579
  "experiment_id": experiment_id,
226
580
  "request": experiment.request_text,
@@ -228,16 +582,14 @@ def compare_experiments(
228
582
  "ranking": [
229
583
  {
230
584
  "rank": i + 1,
231
- "branch_id": b.branch_id,
232
- "name": b.name,
233
- "move_id": b.move_id,
234
- "score": b.score,
235
- "summary": b.compiled_plan.get("summary", "") if b.compiled_plan else "",
236
- "evaluation": b.evaluation,
585
+ **_audit_row(b),
237
586
  }
238
587
  for i, b in enumerate(ranked)
239
588
  ],
240
589
  "winner": ranked[0].to_dict() if ranked else None,
590
+ "interesting_but_failed": [_audit_row(b) for b in interesting_failed],
591
+ "rejected": [_audit_row(b) for b in rejected],
592
+ "analytical": [_audit_row(b) for b in analytical],
241
593
  }
242
594
 
243
595
 
@@ -258,11 +610,110 @@ async def commit_experiment(
258
610
  if not experiment:
259
611
  return {"error": f"Experiment {experiment_id} not found"}
260
612
 
613
+ # Refuse to commit branches the classifier rejected or that were
614
+ # analytical-only. Those statuses exist specifically so callers
615
+ # can't route them into re-application, and ranked_branches()
616
+ # already excludes them — so reaching commit with such a branch
617
+ # means the caller is bypassing the ranking layer.
618
+ target = experiment.get_branch(branch_id)
619
+ if target is None:
620
+ return {"error": f"Branch {branch_id} not found"}
621
+ if target.status in ("rejected", "analytical", "failed"):
622
+ return {
623
+ "error": (
624
+ f"Cannot commit branch with status '{target.status}'. "
625
+ f"'rejected' = hard-rule classifier rolled back; "
626
+ f"'analytical' = no executable plan; "
627
+ f"'failed' = zero steps applied successfully. "
628
+ f"Use compare_experiments to see eligible winners "
629
+ f"(only status='evaluated' branches are ranking candidates)."
630
+ ),
631
+ "branch_id": branch_id,
632
+ "branch_status": target.status,
633
+ }
634
+
261
635
  ableton = _get_ableton(ctx)
262
636
  bridge = ctx.lifespan_context.get("m4l")
263
637
  mcp_registry = ctx.lifespan_context.get("mcp_dispatch", {})
264
638
 
265
- return await engine.commit_branch_async(
639
+ # PR3 — composer winner escalation. When the winning branch came from
640
+ # the composer producer, the plan we auditioned was a lightweight
641
+ # scaffold (set_tempo + create_midi_track + create_scene/set_scene_name).
642
+ # Commit should deliver a populated session, not an empty skeleton.
643
+ # Re-run ComposerEngine.compose() on the intent captured in the seed's
644
+ # producer_payload, replace the branch's compiled_plan with the full
645
+ # resolved plan, then commit through the normal async router.
646
+ #
647
+ # When escalation fails (missing intent, zero resolved layers, etc.),
648
+ # fall back to committing the scaffold. Users get tracks + scenes
649
+ # they can populate manually, which is better than an error.
650
+ escalation_info = None
651
+ if (
652
+ target.seed is not None
653
+ and target.seed.source == "composer"
654
+ and target.seed.producer_payload
655
+ ):
656
+ try:
657
+ from ..composer import escalate_composer_branch
658
+ splice_client = ctx.lifespan_context.get("splice_client")
659
+ # browser_client only present on servers with live browser wiring;
660
+ # pass None defensively.
661
+ browser_client = ctx.lifespan_context.get("browser_client")
662
+ search_roots = ctx.lifespan_context.get("sample_search_roots") or []
663
+
664
+ escalation_info = await escalate_composer_branch(
665
+ producer_payload=target.seed.producer_payload,
666
+ search_roots=search_roots,
667
+ splice_client=splice_client,
668
+ browser_client=browser_client,
669
+ )
670
+
671
+ if escalation_info.get("ok"):
672
+ # Swap the compiled_plan for the fully resolved one before
673
+ # commit_branch_async runs it. Keep the old scaffold on the
674
+ # evaluation dict for audit.
675
+ old_plan = target.compiled_plan or {}
676
+ new_plan = {
677
+ "steps": escalation_info["plan"],
678
+ "step_count": escalation_info["step_count"],
679
+ "summary": (
680
+ f"Composer escalated: {escalation_info['layer_count']} "
681
+ f"layers, {escalation_info['step_count']} steps "
682
+ f"({len(escalation_info['resolved_samples'])} samples resolved)"
683
+ ),
684
+ }
685
+ target.compiled_plan = new_plan
686
+ if target.evaluation is None:
687
+ target.evaluation = {}
688
+ target.evaluation["composer_escalation"] = {
689
+ "escalated": True,
690
+ "scaffold_step_count": old_plan.get("step_count", 0),
691
+ "resolved_step_count": escalation_info["step_count"],
692
+ "layer_count": escalation_info["layer_count"],
693
+ "resolved_samples": escalation_info["resolved_samples"],
694
+ "warnings": escalation_info.get("warnings", []),
695
+ }
696
+ else:
697
+ # Record the fallback reason on evaluation so compare /
698
+ # commit responses carry explicit provenance.
699
+ if target.evaluation is None:
700
+ target.evaluation = {}
701
+ target.evaluation["composer_escalation"] = {
702
+ "escalated": False,
703
+ "error": escalation_info.get("error", "unknown"),
704
+ "warnings": escalation_info.get("warnings", []),
705
+ "fallback": "scaffold_plan",
706
+ }
707
+ except Exception as exc:
708
+ if target.evaluation is None:
709
+ target.evaluation = {}
710
+ target.evaluation["composer_escalation"] = {
711
+ "escalated": False,
712
+ "error": f"escalation raised: {exc}",
713
+ "fallback": "scaffold_plan",
714
+ }
715
+
716
+ commit_result = await engine.commit_branch_async(
266
717
  experiment,
267
718
  branch_id,
268
719
  ableton,
@@ -271,6 +722,19 @@ async def commit_experiment(
271
722
  ctx=ctx,
272
723
  )
273
724
 
725
+ # Surface escalation details on the commit response so the caller
726
+ # sees whether a scaffold or resolved plan was applied.
727
+ if escalation_info is not None and isinstance(commit_result, dict):
728
+ commit_result["composer_escalation"] = {
729
+ "escalated": bool(escalation_info.get("ok")),
730
+ "step_count": escalation_info.get("step_count"),
731
+ "layer_count": escalation_info.get("layer_count"),
732
+ "error": escalation_info.get("error"),
733
+ "warnings": escalation_info.get("warnings", []),
734
+ }
735
+
736
+ return commit_result
737
+
274
738
 
275
739
  @mcp.tool()
276
740
  def discard_experiment(