livepilot 1.13.0 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -120,6 +120,74 @@ class BranchOutcome:
120
120
  return asdict(self)
121
121
 
122
122
 
123
+ def derive_goal_progress_from_fingerprint(
124
+ fingerprint_diff: dict,
125
+ target: Optional[dict] = None,
126
+ ) -> tuple[float, int]:
127
+ """Derive (goal_progress, measurable_count) from a fingerprint diff.
128
+
129
+ PR4 wiring: TimbralFingerprint dimensions (brightness, warmth, bite,
130
+ softness, instability, width, texture_density, movement, polish) are
131
+ effectively a goal vector. When a branch has before/after fingerprints
132
+ extracted from actual captured audio, the per-dimension diff IS the
133
+ measurable evidence classify_branch_outcome needs to make a real
134
+ decision — no reason to fall back to the heuristic score alone.
135
+
136
+ fingerprint_diff: output of synthesis_brain.diff_fingerprint(before, after).
137
+ Shape: {"brightness": float, "warmth": float, ...}
138
+ target: optional TimbralFingerprint dict ({"brightness": 0.3, ...}).
139
+ When provided, goal_progress counts only dimensions the target
140
+ cared about (non-zero target value). When None, every dimension
141
+ with a non-trivial diff counts as a target.
142
+
143
+ Returns:
144
+ (goal_progress, measurable_count) tuple ready to feed into
145
+ classify_branch_outcome. goal_progress is signed (positive =
146
+ branch moved in the intended direction; negative = moved away).
147
+ measurable_count is how many dimensions had a readable diff.
148
+ """
149
+ if not fingerprint_diff:
150
+ return (0.0, 0)
151
+
152
+ # Epsilon — diffs this small are noise, not signal.
153
+ eps = 0.02
154
+ progress = 0.0
155
+ count = 0
156
+
157
+ # If target is provided, score each dimension by
158
+ # sign(target) * diff
159
+ # so moving in the target's direction counts positive, regardless
160
+ # of target magnitude. When no target, count any non-trivial diff
161
+ # in either direction as progress (a branch that "moves" at all
162
+ # is evidence the producer did something).
163
+ if target:
164
+ for dim, delta in fingerprint_diff.items():
165
+ if not isinstance(delta, (int, float)):
166
+ continue
167
+ if abs(delta) < eps:
168
+ continue
169
+ target_val = target.get(dim, 0.0)
170
+ if abs(target_val) < eps:
171
+ continue # target didn't care about this dimension
172
+ count += 1
173
+ # Normalize: sign(target) * delta, scaled so each dimension
174
+ # contributes at most 1.0 to progress.
175
+ direction = 1.0 if target_val > 0 else -1.0
176
+ progress += direction * max(-1.0, min(1.0, delta))
177
+ else:
178
+ for dim, delta in fingerprint_diff.items():
179
+ if not isinstance(delta, (int, float)):
180
+ continue
181
+ if abs(delta) < eps:
182
+ continue
183
+ count += 1
184
+ # Without a target, we can't tell "good" from "bad" movement.
185
+ # Count as weakly positive — branch did something measurable.
186
+ progress += abs(max(-1.0, min(1.0, delta))) * 0.5
187
+
188
+ return (round(progress, 3), count)
189
+
190
+
123
191
  def classify_branch_outcome(
124
192
  score: float,
125
193
  *,
@@ -128,6 +196,8 @@ def classify_branch_outcome(
128
196
  target_count: int = 0,
129
197
  goal_progress: float = 0.0,
130
198
  exploration_rules: bool = False,
199
+ fingerprint_diff: Optional[dict] = None,
200
+ timbral_target: Optional[dict] = None,
131
201
  ) -> BranchOutcome:
132
202
  """Classify a branch's terminal status from a score + optional hard-rule inputs.
133
203
 
@@ -143,9 +213,37 @@ def classify_branch_outcome(
143
213
  protection violations still force undo (safety invariant);
144
214
  all other failures downgrade to "interesting_but_failed".
145
215
 
216
+ PR4 additions (optional):
217
+ fingerprint_diff: output of synthesis_brain.diff_fingerprint
218
+ between before/after snapshots. When provided AND no caller-
219
+ supplied measurable_count/goal_progress were passed (both 0),
220
+ the classifier derives them from the diff — so the dimensions
221
+ of the TimbralFingerprint become the goal vector.
222
+ timbral_target: optional target fingerprint dict. Scores diff in
223
+ the target's direction (moving brighter counts positive when
224
+ target.brightness > 0). Omit when the branch had no specific
225
+ target; dimensions with non-trivial movement still contribute
226
+ measurable_count but progress is unsigned magnitude * 0.5.
227
+
146
228
  Returns a BranchOutcome that callers can plug into branch.score /
147
229
  .status / .evaluation without further interpretation.
148
230
  """
231
+ # PR4 — derive measurable evidence from fingerprint diff when the
232
+ # caller didn't supply their own. Keeps back-compat for existing
233
+ # callers that compute their own measurable inputs.
234
+ if (
235
+ fingerprint_diff
236
+ and measurable_count == 0
237
+ and abs(goal_progress) < 1e-6
238
+ ):
239
+ derived_progress, derived_count = derive_goal_progress_from_fingerprint(
240
+ fingerprint_diff, target=timbral_target,
241
+ )
242
+ goal_progress = derived_progress
243
+ measurable_count = derived_count
244
+ # target_count should also reflect the derived dimensions so the
245
+ # hard-rule path treats this as a genuinely measurable outcome.
246
+ target_count = max(target_count, derived_count)
149
247
  keep_change, failures = apply_hard_rules(
150
248
  goal_progress=goal_progress,
151
249
  collateral_damage=0.0, # not threaded here — branch lifecycle doesn't compute it yet
@@ -23,12 +23,37 @@ from ..branches import BranchSeed
23
23
 
24
24
  @dataclass
25
25
  class BranchSnapshot:
26
- """Captured state before or after a branch experiment."""
26
+ """Captured state before or after a branch experiment.
27
+
28
+ Pre-PR4 fields (spectrum / rms / peak / track_meters) stay the same —
29
+ they remain the fast-path evidence when render-verify isn't available
30
+ or wasn't opted in.
31
+
32
+ PR4 adds render-based fields that are populated only when the
33
+ experiment runs with render_verify=True:
34
+
35
+ capture_path: path to the captured audio file (useful for re-analysis
36
+ or user audition of the branch output).
37
+ loudness: {lufs, lra, rms, peak, crest} from analyze_loudness.
38
+ spectral_shape: {centroid, flatness, rolloff, crest} from FluCoMa or
39
+ the offline analyzer.
40
+ fingerprint: TimbralFingerprint.to_dict() extracted from the
41
+ captured audio.
42
+
43
+ The fingerprint is what classify_branch_outcome reads to derive a
44
+ real goal_progress + measurable_count instead of relying on the
45
+ inline meter-based heuristic alone.
46
+ """
27
47
  spectrum: Optional[dict] = None
28
48
  rms: Optional[float] = None
29
49
  peak: Optional[float] = None
30
50
  track_meters: Optional[list] = None
31
51
  timestamp_ms: int = 0
52
+ # PR4 — render-based evidence (opt-in via render_verify flag)
53
+ capture_path: Optional[str] = None
54
+ loudness: Optional[dict] = None
55
+ spectral_shape: Optional[dict] = None
56
+ fingerprint: Optional[dict] = None # TimbralFingerprint.to_dict()
32
57
 
33
58
  def to_dict(self) -> dict:
34
59
  d = {}
@@ -40,6 +65,14 @@ class BranchSnapshot:
40
65
  d["peak"] = self.peak
41
66
  if self.track_meters is not None:
42
67
  d["track_meters"] = self.track_meters
68
+ if self.capture_path is not None:
69
+ d["capture_path"] = self.capture_path
70
+ if self.loudness is not None:
71
+ d["loudness"] = self.loudness
72
+ if self.spectral_shape is not None:
73
+ d["spectral_shape"] = self.spectral_shape
74
+ if self.fingerprint is not None:
75
+ d["fingerprint"] = self.fingerprint
43
76
  d["timestamp_ms"] = self.timestamp_ms
44
77
  return d
45
78
 
@@ -152,6 +185,12 @@ class ExperimentBranch:
152
185
  d["analytical_only"] = (
153
186
  self.seed.analytical_only or self.compiled_plan is None
154
187
  )
188
+ # Shortcut to the seed's producer_payload so downstream callers
189
+ # (composer winner-commit, synthesis re-target, provenance logs)
190
+ # don't have to reach into d["seed"]["producer_payload"] every
191
+ # time. The full seed dict is still available for producers
192
+ # that need other fields.
193
+ d["producer_payload"] = dict(self.seed.producer_payload or {})
155
194
  return d
156
195
 
157
196
 
@@ -29,7 +29,11 @@ def _get_ableton(ctx: Context):
29
29
 
30
30
 
31
31
  def _capture_snapshot(ctx: Context) -> BranchSnapshot:
32
- """Capture current session state as a BranchSnapshot."""
32
+ """Capture current session state as a BranchSnapshot (fast path).
33
+
34
+ Uses live meters + spectral cache. No audio rendering. Called when
35
+ render_verify is off (default) — adds no latency to branch trials.
36
+ """
33
37
  ableton = _get_ableton(ctx)
34
38
  spectral = ctx.lifespan_context.get("spectral")
35
39
 
@@ -58,6 +62,116 @@ def _capture_snapshot(ctx: Context) -> BranchSnapshot:
58
62
  return snapshot
59
63
 
60
64
 
65
+ def _capture_snapshot_with_render_verify(
66
+ ctx: Context, duration_seconds: float = 2.0,
67
+ ) -> BranchSnapshot:
68
+ """Capture state AND render audio for fingerprint extraction (PR4).
69
+
70
+ Runs the fast-path snapshot first, then additionally:
71
+ 1. capture_audio duration_seconds seconds from master
72
+ 2. analyze_loudness on the captured file
73
+ 3. analyze_spectrum_offline on the captured file
74
+ 4. extract_timbre_fingerprint from spectrum + loudness
75
+
76
+ Attaches capture_path, loudness, spectral_shape, and fingerprint to
77
+ the snapshot. When any stage fails (bridge unavailable, analyzer
78
+ missing, etc.), that stage's field is left None and a debug log is
79
+ emitted — render-verify degrades gracefully to the fast-path snapshot.
80
+
81
+ Expected added latency: duration_seconds (capture) + ~1-2s (offline
82
+ analysis). For a 2-branch experiment with 2s captures, that's
83
+ ~8-10s of overhead vs the default path.
84
+ """
85
+ snapshot = _capture_snapshot(ctx)
86
+
87
+ ableton = _get_ableton(ctx)
88
+ bridge = ctx.lifespan_context.get("m4l")
89
+
90
+ # Step 1: capture_audio is a bridge command — route via bridge.send_command
91
+ # if available, else fall back to ableton TCP which doesn't support it.
92
+ capture_path = None
93
+ if bridge is not None:
94
+ try:
95
+ maybe = bridge.send_command("capture_audio", float(duration_seconds), "master", "")
96
+ # bridge.send_command may return awaitable or plain dict.
97
+ import inspect
98
+ if inspect.isawaitable(maybe):
99
+ # We're in a sync context here — best effort, skip await.
100
+ # Render-verify from within sync capture_fn is the compromise;
101
+ # the async variant wires through from run_branch_async which
102
+ # does have await. Use the fast-path capture only.
103
+ logger.debug("capture_audio returned awaitable in sync context; skipping render-verify for this snapshot")
104
+ return snapshot
105
+ if isinstance(maybe, dict):
106
+ capture_path = maybe.get("file_path") or maybe.get("path") or maybe.get("filename")
107
+ except Exception as exc:
108
+ logger.debug("render-verify capture_audio failed: %s", exc)
109
+ if not capture_path:
110
+ return snapshot # graceful degrade — caller still gets fast-path data
111
+ snapshot.capture_path = capture_path
112
+
113
+ # Step 2-3: offline loudness + spectrum analysis (MCP tools, sync wrappers)
114
+ try:
115
+ from ..tools.analyzer import analyze_loudness as _analyze_loudness
116
+ loud = _analyze_loudness(capture_path)
117
+ if isinstance(loud, dict) and "error" not in loud:
118
+ snapshot.loudness = loud
119
+ except Exception as exc:
120
+ logger.debug("render-verify analyze_loudness failed: %s", exc)
121
+
122
+ try:
123
+ from ..tools.analyzer import analyze_spectrum_offline as _analyze_spectrum
124
+ spec = _analyze_spectrum(capture_path)
125
+ if isinstance(spec, dict) and "error" not in spec:
126
+ snapshot.spectral_shape = {
127
+ "centroid": spec.get("centroid_hz"),
128
+ "flatness": spec.get("spectral_flatness"),
129
+ "rolloff": spec.get("rolloff_hz"),
130
+ "bandwidth": spec.get("bandwidth_hz"),
131
+ # Back-map the 5-band balance into the 8-band keys our
132
+ # fingerprint extractor expects. Coarse mapping:
133
+ "bands": _map_5band_to_8band(spec.get("band_balance", {})),
134
+ }
135
+ except Exception as exc:
136
+ logger.debug("render-verify analyze_spectrum_offline failed: %s", exc)
137
+
138
+ # Step 4: build fingerprint from what we got
139
+ try:
140
+ from ..synthesis_brain import extract_timbre_fingerprint
141
+ fp = extract_timbre_fingerprint(
142
+ spectrum=(snapshot.spectral_shape or {}).get("bands"),
143
+ loudness=snapshot.loudness,
144
+ spectral_shape=snapshot.spectral_shape,
145
+ )
146
+ snapshot.fingerprint = fp.to_dict()
147
+ except Exception as exc:
148
+ logger.debug("render-verify extract_timbre_fingerprint failed: %s", exc)
149
+
150
+ return snapshot
151
+
152
+
153
+ def _map_5band_to_8band(b5: dict) -> dict:
154
+ """Adapt analyze_spectrum_offline's 5-band balance to the 8-band shape
155
+ extract_timbre_fingerprint expects.
156
+
157
+ 5-band: sub_60hz, low_250hz, mid_2khz, high_8khz, air_16khz
158
+ 8-band: sub, low, low_mid, mid, high_mid, high, very_high, ultra
159
+ """
160
+ if not isinstance(b5, dict):
161
+ return {}
162
+ # Conservative mapping — split each 5-band bucket across the 8-band shape.
163
+ return {
164
+ "sub": float(b5.get("sub_60hz", 0.0) or 0.0),
165
+ "low": float(b5.get("low_250hz", 0.0) or 0.0) * 0.6,
166
+ "low_mid": float(b5.get("low_250hz", 0.0) or 0.0) * 0.4,
167
+ "mid": float(b5.get("mid_2khz", 0.0) or 0.0) * 0.6,
168
+ "high_mid": float(b5.get("mid_2khz", 0.0) or 0.0) * 0.4,
169
+ "high": float(b5.get("high_8khz", 0.0) or 0.0) * 0.6,
170
+ "very_high": float(b5.get("high_8khz", 0.0) or 0.0) * 0.4,
171
+ "ultra": float(b5.get("air_16khz", 0.0) or 0.0),
172
+ }
173
+
174
+
61
175
  @mcp.tool()
62
176
  def create_experiment(
63
177
  ctx: Context,
@@ -174,6 +288,8 @@ async def run_experiment(
174
288
  ctx: Context,
175
289
  experiment_id: str,
176
290
  exploration_rules: bool = False,
291
+ render_verify: bool = False,
292
+ render_duration_seconds: float = 2.0,
177
293
  ) -> dict:
178
294
  """Run all pending branches in an experiment.
179
295
 
@@ -189,14 +305,28 @@ async def run_experiment(
189
305
 
190
306
  Branches run sequentially (Ableton has linear undo).
191
307
 
192
- exploration_rules (PR7): when True, branches that fail technical gates
193
- (score < 0.40, non-positive measurable delta) are classified as
194
- "interesting_but_failed" instead of "failed" — they stay in the
195
- experiment for audit but don't appear in the ranking. Protection
196
- violations STILL force undo regardless of this flag — that's a safety
197
- invariant, not a taste judgment.
198
-
199
- Default False preserves pre-PR7 behavior exactly.
308
+ exploration_rules: when True, branches that fail technical gates
309
+ (score < 0.40, non-positive measurable delta) are classified as
310
+ "interesting_but_failed" instead of "failed" — they stay in the
311
+ experiment for audit but don't appear in the ranking. Protection
312
+ violations STILL force undo regardless of this flag — that's a
313
+ safety invariant, not a taste judgment.
314
+
315
+ render_verify (PR4/v2): when True, each branch also captures audio
316
+ before and after execution, analyzes spectrum + loudness offline,
317
+ extracts a TimbralFingerprint, and attaches the before/after
318
+ fingerprint + diff to the branch snapshots. The diff is fed into
319
+ classify_branch_outcome as real measurable evidence — the
320
+ classifier no longer relies on meter heuristics alone. Default
321
+ False preserves speed; opt in when you want the classifier to
322
+ respond to spectral movement, not just track-meter drops.
323
+
324
+ render_duration_seconds: capture length per snapshot when
325
+ render_verify is on. Default 2.0 seconds. Each branch adds
326
+ ~2 * duration_seconds of capture time plus ~1-2s of offline
327
+ analysis — a 3-branch experiment at 2s adds ~15-18s.
328
+
329
+ Default render_verify=False preserves pre-PR4 behavior exactly.
200
330
  """
201
331
  experiment = engine.get_experiment(experiment_id)
202
332
  if not experiment:
@@ -263,12 +393,22 @@ async def run_experiment(
263
393
  plan = compiler.compile(move, kernel)
264
394
  compiled_dict = plan.to_dict()
265
395
 
396
+ # Pick the capture function — render-verify mode captures audio
397
+ # and extracts a TimbralFingerprint, adding latency but giving
398
+ # classify_branch_outcome real measurable evidence.
399
+ if render_verify:
400
+ capture_fn = lambda: _capture_snapshot_with_render_verify(
401
+ ctx, duration_seconds=render_duration_seconds,
402
+ )
403
+ else:
404
+ capture_fn = lambda: _capture_snapshot(ctx)
405
+
266
406
  # Run the branch through the async router
267
407
  await engine.run_branch_async(
268
408
  branch=branch,
269
409
  ableton=ableton,
270
410
  compiled_plan=compiled_dict,
271
- capture_fn=lambda: _capture_snapshot(ctx),
411
+ capture_fn=capture_fn,
272
412
  bridge=bridge,
273
413
  mcp_registry=mcp_registry,
274
414
  ctx=ctx,
@@ -303,19 +443,50 @@ async def run_experiment(
303
443
  score += 0.1 # presence-of-data bonus
304
444
 
305
445
  score = round(score, 3)
446
+
447
+ # PR4 — fingerprint diff to feed the classifier when render-verify
448
+ # is on. When both before/after have fingerprints, compute the
449
+ # per-dimension diff via synthesis_brain.diff_fingerprint and let
450
+ # classify_branch_outcome derive real measurable_count + goal_progress
451
+ # from it. Much stronger evidence than the meter heuristic alone.
452
+ fingerprint_diff = None
453
+ timbral_target = None
454
+ before_fp = before.get("fingerprint")
455
+ after_fp = after.get("fingerprint")
456
+ if before_fp and after_fp:
457
+ try:
458
+ from ..synthesis_brain import diff_fingerprint, TimbralFingerprint
459
+ before_obj = TimbralFingerprint(**{
460
+ k: v for k, v in before_fp.items()
461
+ if k in TimbralFingerprint.__dataclass_fields__
462
+ })
463
+ after_obj = TimbralFingerprint(**{
464
+ k: v for k, v in after_fp.items()
465
+ if k in TimbralFingerprint.__dataclass_fields__
466
+ })
467
+ fingerprint_diff = diff_fingerprint(before_obj, after_obj)
468
+ except Exception as exc:
469
+ logger.debug("fingerprint diff failed: %s", exc)
470
+
471
+ # If the branch's seed was a synthesis seed with a timbral target
472
+ # in its producer_payload, score diff in that target's direction.
473
+ if branch.seed is not None and branch.seed.source == "synthesis":
474
+ target_hint = (branch.seed.producer_payload or {}).get("timbral_target")
475
+ if isinstance(target_hint, dict):
476
+ timbral_target = target_hint
477
+
306
478
  outcome = classify_branch_outcome(
307
479
  score=score,
308
480
  protection_violated=protection_violated,
309
- # Minimal hard-rule inputs — the heuristic doesn't compute
310
- # measurable_count / goal_progress deltas. target_count=0 and
311
- # measurable_count=0 lets rule 1 defer to score-only judgment.
312
481
  measurable_count=0,
313
482
  target_count=0,
314
483
  goal_progress=0.0,
315
484
  exploration_rules=exploration_rules,
485
+ fingerprint_diff=fingerprint_diff,
486
+ timbral_target=timbral_target,
316
487
  )
317
488
 
318
- return {
489
+ result_eval = {
319
490
  "score": outcome.score,
320
491
  "keep_change": outcome.keep_change,
321
492
  "status": outcome.status,
@@ -323,6 +494,13 @@ async def run_experiment(
323
494
  "note": outcome.note,
324
495
  "lost_tracks": lost_tracks,
325
496
  }
497
+ # Surface fingerprint evidence on the evaluation dict so
498
+ # compare_experiments can show per-branch spectral deltas.
499
+ if fingerprint_diff is not None:
500
+ result_eval["fingerprint_diff"] = fingerprint_diff
501
+ result_eval["fingerprint_before"] = before_fp
502
+ result_eval["fingerprint_after"] = after_fp
503
+ return result_eval
326
504
 
327
505
  engine.evaluate_branch(branch, eval_fn)
328
506
 
@@ -458,7 +636,84 @@ async def commit_experiment(
458
636
  bridge = ctx.lifespan_context.get("m4l")
459
637
  mcp_registry = ctx.lifespan_context.get("mcp_dispatch", {})
460
638
 
461
- return await engine.commit_branch_async(
639
+ # PR3 — composer winner escalation. When the winning branch came from
640
+ # the composer producer, the plan we auditioned was a lightweight
641
+ # scaffold (set_tempo + create_midi_track + create_scene/set_scene_name).
642
+ # Commit should deliver a populated session, not an empty skeleton.
643
+ # Re-run ComposerEngine.compose() on the intent captured in the seed's
644
+ # producer_payload, replace the branch's compiled_plan with the full
645
+ # resolved plan, then commit through the normal async router.
646
+ #
647
+ # When escalation fails (missing intent, zero resolved layers, etc.),
648
+ # fall back to committing the scaffold. Users get tracks + scenes
649
+ # they can populate manually, which is better than an error.
650
+ escalation_info = None
651
+ if (
652
+ target.seed is not None
653
+ and target.seed.source == "composer"
654
+ and target.seed.producer_payload
655
+ ):
656
+ try:
657
+ from ..composer import escalate_composer_branch
658
+ splice_client = ctx.lifespan_context.get("splice_client")
659
+ # browser_client only present on servers with live browser wiring;
660
+ # pass None defensively.
661
+ browser_client = ctx.lifespan_context.get("browser_client")
662
+ search_roots = ctx.lifespan_context.get("sample_search_roots") or []
663
+
664
+ escalation_info = await escalate_composer_branch(
665
+ producer_payload=target.seed.producer_payload,
666
+ search_roots=search_roots,
667
+ splice_client=splice_client,
668
+ browser_client=browser_client,
669
+ )
670
+
671
+ if escalation_info.get("ok"):
672
+ # Swap the compiled_plan for the fully resolved one before
673
+ # commit_branch_async runs it. Keep the old scaffold on the
674
+ # evaluation dict for audit.
675
+ old_plan = target.compiled_plan or {}
676
+ new_plan = {
677
+ "steps": escalation_info["plan"],
678
+ "step_count": escalation_info["step_count"],
679
+ "summary": (
680
+ f"Composer escalated: {escalation_info['layer_count']} "
681
+ f"layers, {escalation_info['step_count']} steps "
682
+ f"({len(escalation_info['resolved_samples'])} samples resolved)"
683
+ ),
684
+ }
685
+ target.compiled_plan = new_plan
686
+ if target.evaluation is None:
687
+ target.evaluation = {}
688
+ target.evaluation["composer_escalation"] = {
689
+ "escalated": True,
690
+ "scaffold_step_count": old_plan.get("step_count", 0),
691
+ "resolved_step_count": escalation_info["step_count"],
692
+ "layer_count": escalation_info["layer_count"],
693
+ "resolved_samples": escalation_info["resolved_samples"],
694
+ "warnings": escalation_info.get("warnings", []),
695
+ }
696
+ else:
697
+ # Record the fallback reason on evaluation so compare /
698
+ # commit responses carry explicit provenance.
699
+ if target.evaluation is None:
700
+ target.evaluation = {}
701
+ target.evaluation["composer_escalation"] = {
702
+ "escalated": False,
703
+ "error": escalation_info.get("error", "unknown"),
704
+ "warnings": escalation_info.get("warnings", []),
705
+ "fallback": "scaffold_plan",
706
+ }
707
+ except Exception as exc:
708
+ if target.evaluation is None:
709
+ target.evaluation = {}
710
+ target.evaluation["composer_escalation"] = {
711
+ "escalated": False,
712
+ "error": f"escalation raised: {exc}",
713
+ "fallback": "scaffold_plan",
714
+ }
715
+
716
+ commit_result = await engine.commit_branch_async(
462
717
  experiment,
463
718
  branch_id,
464
719
  ableton,
@@ -467,6 +722,19 @@ async def commit_experiment(
467
722
  ctx=ctx,
468
723
  )
469
724
 
725
+ # Surface escalation details on the commit response so the caller
726
+ # sees whether a scaffold or resolved plan was applied.
727
+ if escalation_info is not None and isinstance(commit_result, dict):
728
+ commit_result["composer_escalation"] = {
729
+ "escalated": bool(escalation_info.get("ok")),
730
+ "step_count": escalation_info.get("step_count"),
731
+ "layer_count": escalation_info.get("layer_count"),
732
+ "error": escalation_info.get("error"),
733
+ "warnings": escalation_info.get("warnings", []),
734
+ }
735
+
736
+ return commit_result
737
+
470
738
 
471
739
  @mcp.tool()
472
740
  def discard_experiment(
@@ -305,6 +305,7 @@ from .device_forge import tools as device_forge_tools # noqa: F401, E40
305
305
  from .sample_engine import tools as sample_engine_tools # noqa: F401, E402
306
306
  from .atlas import tools as atlas_tools # noqa: F401, E402
307
307
  from .composer import tools as composer_tools # noqa: F401, E402
308
+ from .synthesis_brain import tools as synthesis_brain_tools # noqa: F401, E402
308
309
  from .tools import diagnostics # noqa: F401, E402
309
310
  from .tools import miditool # noqa: F401, E402
310
311