livepilot 1.12.2 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +219 -0
- package/README.md +7 -7
- package/m4l_device/LivePilot_Analyzer.amxd +0 -0
- package/m4l_device/livepilot_bridge.js +1 -1
- package/mcp_server/__init__.py +1 -1
- package/mcp_server/branches/__init__.py +34 -0
- package/mcp_server/branches/types.py +286 -0
- package/mcp_server/composer/__init__.py +10 -1
- package/mcp_server/composer/branch_producer.py +349 -0
- package/mcp_server/composer/tools.py +58 -1
- package/mcp_server/evaluation/policy.py +227 -2
- package/mcp_server/experiment/engine.py +47 -11
- package/mcp_server/experiment/models.py +112 -8
- package/mcp_server/experiment/tools.py +502 -38
- package/mcp_server/memory/taste_graph.py +84 -11
- package/mcp_server/persistence/taste_store.py +21 -5
- package/mcp_server/runtime/session_kernel.py +46 -0
- package/mcp_server/runtime/tools.py +29 -3
- package/mcp_server/server.py +1 -0
- package/mcp_server/synthesis_brain/__init__.py +53 -0
- package/mcp_server/synthesis_brain/adapters/__init__.py +34 -0
- package/mcp_server/synthesis_brain/adapters/analog.py +273 -0
- package/mcp_server/synthesis_brain/adapters/base.py +86 -0
- package/mcp_server/synthesis_brain/adapters/drift.py +271 -0
- package/mcp_server/synthesis_brain/adapters/meld.py +261 -0
- package/mcp_server/synthesis_brain/adapters/operator.py +292 -0
- package/mcp_server/synthesis_brain/adapters/wavetable.py +364 -0
- package/mcp_server/synthesis_brain/engine.py +91 -0
- package/mcp_server/synthesis_brain/models.py +121 -0
- package/mcp_server/synthesis_brain/timbre.py +194 -0
- package/mcp_server/synthesis_brain/tools.py +231 -0
- package/mcp_server/tools/_conductor.py +144 -0
- package/mcp_server/wonder_mode/engine.py +324 -0
- package/mcp_server/wonder_mode/tools.py +153 -1
- package/package.json +2 -2
- package/remote_script/LivePilot/__init__.py +1 -1
- package/server.json +3 -3
|
@@ -16,6 +16,7 @@ from typing import Optional
|
|
|
16
16
|
from fastmcp import Context
|
|
17
17
|
|
|
18
18
|
from ..server import mcp
|
|
19
|
+
from ..branches import BranchSeed
|
|
19
20
|
from . import engine
|
|
20
21
|
from .models import BranchSnapshot
|
|
21
22
|
import logging
|
|
@@ -28,7 +29,11 @@ def _get_ableton(ctx: Context):
|
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
def _capture_snapshot(ctx: Context) -> BranchSnapshot:
|
|
31
|
-
"""Capture current session state as a BranchSnapshot.
|
|
32
|
+
"""Capture current session state as a BranchSnapshot (fast path).
|
|
33
|
+
|
|
34
|
+
Uses live meters + spectral cache. No audio rendering. Called when
|
|
35
|
+
render_verify is off (default) — adds no latency to branch trials.
|
|
36
|
+
"""
|
|
32
37
|
ableton = _get_ableton(ctx)
|
|
33
38
|
spectral = ctx.lifespan_context.get("spectral")
|
|
34
39
|
|
|
@@ -57,24 +62,187 @@ def _capture_snapshot(ctx: Context) -> BranchSnapshot:
|
|
|
57
62
|
return snapshot
|
|
58
63
|
|
|
59
64
|
|
|
65
|
+
def _capture_snapshot_with_render_verify(
|
|
66
|
+
ctx: Context, duration_seconds: float = 2.0,
|
|
67
|
+
) -> BranchSnapshot:
|
|
68
|
+
"""Capture state AND render audio for fingerprint extraction (PR4).
|
|
69
|
+
|
|
70
|
+
Runs the fast-path snapshot first, then additionally:
|
|
71
|
+
1. capture_audio duration_seconds seconds from master
|
|
72
|
+
2. analyze_loudness on the captured file
|
|
73
|
+
3. analyze_spectrum_offline on the captured file
|
|
74
|
+
4. extract_timbre_fingerprint from spectrum + loudness
|
|
75
|
+
|
|
76
|
+
Attaches capture_path, loudness, spectral_shape, and fingerprint to
|
|
77
|
+
the snapshot. When any stage fails (bridge unavailable, analyzer
|
|
78
|
+
missing, etc.), that stage's field is left None and a debug log is
|
|
79
|
+
emitted — render-verify degrades gracefully to the fast-path snapshot.
|
|
80
|
+
|
|
81
|
+
Expected added latency: duration_seconds (capture) + ~1-2s (offline
|
|
82
|
+
analysis). For a 2-branch experiment with 2s captures, that's
|
|
83
|
+
~8-10s of overhead vs the default path.
|
|
84
|
+
"""
|
|
85
|
+
snapshot = _capture_snapshot(ctx)
|
|
86
|
+
|
|
87
|
+
ableton = _get_ableton(ctx)
|
|
88
|
+
bridge = ctx.lifespan_context.get("m4l")
|
|
89
|
+
|
|
90
|
+
# Step 1: capture_audio is a bridge command — route via bridge.send_command
|
|
91
|
+
# if available, else fall back to ableton TCP which doesn't support it.
|
|
92
|
+
capture_path = None
|
|
93
|
+
if bridge is not None:
|
|
94
|
+
try:
|
|
95
|
+
maybe = bridge.send_command("capture_audio", float(duration_seconds), "master", "")
|
|
96
|
+
# bridge.send_command may return awaitable or plain dict.
|
|
97
|
+
import inspect
|
|
98
|
+
if inspect.isawaitable(maybe):
|
|
99
|
+
# We're in a sync context here — best effort, skip await.
|
|
100
|
+
# Render-verify from within sync capture_fn is the compromise;
|
|
101
|
+
# the async variant wires through from run_branch_async which
|
|
102
|
+
# does have await. Use the fast-path capture only.
|
|
103
|
+
logger.debug("capture_audio returned awaitable in sync context; skipping render-verify for this snapshot")
|
|
104
|
+
return snapshot
|
|
105
|
+
if isinstance(maybe, dict):
|
|
106
|
+
capture_path = maybe.get("file_path") or maybe.get("path") or maybe.get("filename")
|
|
107
|
+
except Exception as exc:
|
|
108
|
+
logger.debug("render-verify capture_audio failed: %s", exc)
|
|
109
|
+
if not capture_path:
|
|
110
|
+
return snapshot # graceful degrade — caller still gets fast-path data
|
|
111
|
+
snapshot.capture_path = capture_path
|
|
112
|
+
|
|
113
|
+
# Step 2-3: offline loudness + spectrum analysis (MCP tools, sync wrappers)
|
|
114
|
+
try:
|
|
115
|
+
from ..tools.analyzer import analyze_loudness as _analyze_loudness
|
|
116
|
+
loud = _analyze_loudness(capture_path)
|
|
117
|
+
if isinstance(loud, dict) and "error" not in loud:
|
|
118
|
+
snapshot.loudness = loud
|
|
119
|
+
except Exception as exc:
|
|
120
|
+
logger.debug("render-verify analyze_loudness failed: %s", exc)
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
from ..tools.analyzer import analyze_spectrum_offline as _analyze_spectrum
|
|
124
|
+
spec = _analyze_spectrum(capture_path)
|
|
125
|
+
if isinstance(spec, dict) and "error" not in spec:
|
|
126
|
+
snapshot.spectral_shape = {
|
|
127
|
+
"centroid": spec.get("centroid_hz"),
|
|
128
|
+
"flatness": spec.get("spectral_flatness"),
|
|
129
|
+
"rolloff": spec.get("rolloff_hz"),
|
|
130
|
+
"bandwidth": spec.get("bandwidth_hz"),
|
|
131
|
+
# Back-map the 5-band balance into the 8-band keys our
|
|
132
|
+
# fingerprint extractor expects. Coarse mapping:
|
|
133
|
+
"bands": _map_5band_to_8band(spec.get("band_balance", {})),
|
|
134
|
+
}
|
|
135
|
+
except Exception as exc:
|
|
136
|
+
logger.debug("render-verify analyze_spectrum_offline failed: %s", exc)
|
|
137
|
+
|
|
138
|
+
# Step 4: build fingerprint from what we got
|
|
139
|
+
try:
|
|
140
|
+
from ..synthesis_brain import extract_timbre_fingerprint
|
|
141
|
+
fp = extract_timbre_fingerprint(
|
|
142
|
+
spectrum=(snapshot.spectral_shape or {}).get("bands"),
|
|
143
|
+
loudness=snapshot.loudness,
|
|
144
|
+
spectral_shape=snapshot.spectral_shape,
|
|
145
|
+
)
|
|
146
|
+
snapshot.fingerprint = fp.to_dict()
|
|
147
|
+
except Exception as exc:
|
|
148
|
+
logger.debug("render-verify extract_timbre_fingerprint failed: %s", exc)
|
|
149
|
+
|
|
150
|
+
return snapshot
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _map_5band_to_8band(b5: dict) -> dict:
|
|
154
|
+
"""Adapt analyze_spectrum_offline's 5-band balance to the 8-band shape
|
|
155
|
+
extract_timbre_fingerprint expects.
|
|
156
|
+
|
|
157
|
+
5-band: sub_60hz, low_250hz, mid_2khz, high_8khz, air_16khz
|
|
158
|
+
8-band: sub, low, low_mid, mid, high_mid, high, very_high, ultra
|
|
159
|
+
"""
|
|
160
|
+
if not isinstance(b5, dict):
|
|
161
|
+
return {}
|
|
162
|
+
# Conservative mapping — split each 5-band bucket across the 8-band shape.
|
|
163
|
+
return {
|
|
164
|
+
"sub": float(b5.get("sub_60hz", 0.0) or 0.0),
|
|
165
|
+
"low": float(b5.get("low_250hz", 0.0) or 0.0) * 0.6,
|
|
166
|
+
"low_mid": float(b5.get("low_250hz", 0.0) or 0.0) * 0.4,
|
|
167
|
+
"mid": float(b5.get("mid_2khz", 0.0) or 0.0) * 0.6,
|
|
168
|
+
"high_mid": float(b5.get("mid_2khz", 0.0) or 0.0) * 0.4,
|
|
169
|
+
"high": float(b5.get("high_8khz", 0.0) or 0.0) * 0.6,
|
|
170
|
+
"very_high": float(b5.get("high_8khz", 0.0) or 0.0) * 0.4,
|
|
171
|
+
"ultra": float(b5.get("air_16khz", 0.0) or 0.0),
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
|
|
60
175
|
@mcp.tool()
|
|
61
176
|
def create_experiment(
|
|
62
177
|
ctx: Context,
|
|
63
178
|
request_text: str,
|
|
64
179
|
move_ids: Optional[list] = None,
|
|
65
180
|
limit: int = 3,
|
|
181
|
+
seeds: Optional[list] = None,
|
|
182
|
+
compiled_plans: Optional[list] = None,
|
|
66
183
|
) -> dict:
|
|
67
184
|
"""Create an experiment set to compare multiple approaches.
|
|
68
185
|
|
|
69
|
-
|
|
70
|
-
Otherwise, uses propose_next_best_move to find candidates.
|
|
186
|
+
Three input modes (in priority order):
|
|
71
187
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
188
|
+
1. seeds (PR3+): a list of BranchSeed dicts. Each seed becomes one branch.
|
|
189
|
+
compiled_plans (optional parallel list) attaches pre-compiled plans
|
|
190
|
+
for freeform / synthesis / composer producers. Seed dict shape:
|
|
191
|
+
{seed_id, source, move_id, hypothesis, protected_qualities,
|
|
192
|
+
affected_scope, distinctness_reason, risk_label, novelty_label,
|
|
193
|
+
analytical_only}
|
|
194
|
+
Missing fields default per BranchSeed. This is the canonical path
|
|
195
|
+
for producers that have already done their own selection work.
|
|
196
|
+
|
|
197
|
+
2. move_ids: legacy path — one semantic_move seed per move_id.
|
|
198
|
+
Unchanged behavior; internally delegates to the seeds path.
|
|
199
|
+
|
|
200
|
+
3. Auto-proposal: neither seeds nor move_ids provided. Scans the semantic
|
|
201
|
+
move registry by keyword overlap with request_text and takes the top
|
|
202
|
+
``limit`` moves (default 3).
|
|
75
203
|
|
|
76
204
|
Returns: experiment set with branch IDs ready for run_experiment.
|
|
77
205
|
"""
|
|
206
|
+
# ── Mode 1: seeds provided ──────────────────────────────────────────
|
|
207
|
+
if seeds:
|
|
208
|
+
rehydrated: list[BranchSeed] = []
|
|
209
|
+
for i, s in enumerate(seeds):
|
|
210
|
+
if isinstance(s, BranchSeed):
|
|
211
|
+
rehydrated.append(s)
|
|
212
|
+
elif isinstance(s, dict):
|
|
213
|
+
try:
|
|
214
|
+
rehydrated.append(BranchSeed(**s))
|
|
215
|
+
except TypeError as exc:
|
|
216
|
+
return {"error": f"seeds[{i}] invalid: {exc}"}
|
|
217
|
+
else:
|
|
218
|
+
return {
|
|
219
|
+
"error": (
|
|
220
|
+
f"seeds[{i}] must be dict or BranchSeed, "
|
|
221
|
+
f"got {type(s).__name__}"
|
|
222
|
+
)
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if compiled_plans is not None and len(compiled_plans) != len(rehydrated):
|
|
226
|
+
return {
|
|
227
|
+
"error": (
|
|
228
|
+
f"compiled_plans length ({len(compiled_plans)}) must match "
|
|
229
|
+
f"seeds length ({len(rehydrated)})"
|
|
230
|
+
)
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
ableton = _get_ableton(ctx)
|
|
234
|
+
ableton.send_command("get_session_info")
|
|
235
|
+
kernel_id = f"kern_{int(time.time())}"
|
|
236
|
+
|
|
237
|
+
experiment = engine.create_experiment_from_seeds(
|
|
238
|
+
request_text=request_text,
|
|
239
|
+
seeds=rehydrated,
|
|
240
|
+
kernel_id=kernel_id,
|
|
241
|
+
compiled_plans=compiled_plans,
|
|
242
|
+
)
|
|
243
|
+
return experiment.to_dict()
|
|
244
|
+
|
|
245
|
+
# ── Mode 2/3: legacy move_ids path ──────────────────────────────────
|
|
78
246
|
if not move_ids:
|
|
79
247
|
# Auto-propose moves from the registry
|
|
80
248
|
from ..semantic_moves import registry
|
|
@@ -119,19 +287,46 @@ def create_experiment(
|
|
|
119
287
|
async def run_experiment(
|
|
120
288
|
ctx: Context,
|
|
121
289
|
experiment_id: str,
|
|
290
|
+
exploration_rules: bool = False,
|
|
291
|
+
render_verify: bool = False,
|
|
292
|
+
render_duration_seconds: float = 2.0,
|
|
122
293
|
) -> dict:
|
|
123
294
|
"""Run all pending branches in an experiment.
|
|
124
295
|
|
|
125
296
|
For each branch:
|
|
126
297
|
1. Compile the semantic move against current session
|
|
298
|
+
(skipped when branch.compiled_plan is already set — PR3+)
|
|
127
299
|
2. Capture before state
|
|
128
|
-
3. Execute the compiled plan (through the async router
|
|
300
|
+
3. Execute the compiled plan (through the async router)
|
|
129
301
|
4. Capture after state
|
|
130
302
|
5. Undo all successful steps (revert to checkpoint)
|
|
131
|
-
6. Evaluate the branch
|
|
303
|
+
6. Evaluate the branch and classify its outcome via evaluation.policy
|
|
132
304
|
7. Record per-step results on branch.execution_log
|
|
133
305
|
|
|
134
306
|
Branches run sequentially (Ableton has linear undo).
|
|
307
|
+
|
|
308
|
+
exploration_rules: when True, branches that fail technical gates
|
|
309
|
+
(score < 0.40, non-positive measurable delta) are classified as
|
|
310
|
+
"interesting_but_failed" instead of "failed" — they stay in the
|
|
311
|
+
experiment for audit but don't appear in the ranking. Protection
|
|
312
|
+
violations STILL force undo regardless of this flag — that's a
|
|
313
|
+
safety invariant, not a taste judgment.
|
|
314
|
+
|
|
315
|
+
render_verify (PR4/v2): when True, each branch also captures audio
|
|
316
|
+
before and after execution, analyzes spectrum + loudness offline,
|
|
317
|
+
extracts a TimbralFingerprint, and attaches the before/after
|
|
318
|
+
fingerprint + diff to the branch snapshots. The diff is fed into
|
|
319
|
+
classify_branch_outcome as real measurable evidence — the
|
|
320
|
+
classifier no longer relies on meter heuristics alone. Default
|
|
321
|
+
False preserves speed; opt in when you want the classifier to
|
|
322
|
+
respond to spectral movement, not just track-meter drops.
|
|
323
|
+
|
|
324
|
+
render_duration_seconds: capture length per snapshot when
|
|
325
|
+
render_verify is on. Default 2.0 seconds. Each branch adds
|
|
326
|
+
~2 * duration_seconds of capture time plus ~1-2s of offline
|
|
327
|
+
analysis — a 3-branch experiment at 2s adds ~15-18s.
|
|
328
|
+
|
|
329
|
+
Default render_verify=False preserves pre-PR4 behavior exactly.
|
|
135
330
|
"""
|
|
136
331
|
experiment = engine.get_experiment(experiment_id)
|
|
137
332
|
if not experiment:
|
|
@@ -149,51 +344,186 @@ async def run_experiment(
|
|
|
149
344
|
if branch.status != "pending":
|
|
150
345
|
continue
|
|
151
346
|
|
|
152
|
-
#
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
347
|
+
# PR3: respect a pre-existing compiled_plan on the branch (freeform /
|
|
348
|
+
# synthesis / composer producers bring their own). Only compile from
|
|
349
|
+
# move_id when the branch arrived without a plan — which requires a
|
|
350
|
+
# semantic_move seed (or a legacy move-only branch).
|
|
351
|
+
compiled_dict = branch.compiled_plan
|
|
352
|
+
|
|
353
|
+
if compiled_dict is None:
|
|
354
|
+
# Analytical-only branches short-circuit — no plan to run.
|
|
355
|
+
# Marked with status="analytical" so ranked_branches()
|
|
356
|
+
# (which only surfaces "evaluated") excludes them, and
|
|
357
|
+
# commit_experiment refuses to re-apply them.
|
|
358
|
+
if branch.seed is not None and branch.seed.analytical_only:
|
|
359
|
+
branch.status = "analytical"
|
|
360
|
+
branch.score = 0.0
|
|
361
|
+
branch.evaluation = {
|
|
362
|
+
"score": 0.0,
|
|
363
|
+
"keep_change": False,
|
|
364
|
+
"status": "analytical",
|
|
365
|
+
"note": "analytical_only branch — no execution path",
|
|
366
|
+
}
|
|
367
|
+
results.append(branch.to_dict())
|
|
368
|
+
continue
|
|
369
|
+
|
|
370
|
+
if not branch.move_id:
|
|
371
|
+
branch.status = "failed"
|
|
372
|
+
branch.score = 0.0
|
|
373
|
+
branch.evaluation = {
|
|
374
|
+
"error": (
|
|
375
|
+
"Branch has no compiled_plan and no move_id — "
|
|
376
|
+
"freeform producers must pre-populate compiled_plan"
|
|
377
|
+
)
|
|
378
|
+
}
|
|
379
|
+
results.append(branch.to_dict())
|
|
380
|
+
continue
|
|
381
|
+
|
|
382
|
+
# Compile from semantic move
|
|
383
|
+
move = registry.get_move(branch.move_id)
|
|
384
|
+
if not move:
|
|
385
|
+
branch.status = "failed"
|
|
386
|
+
branch.score = 0.0
|
|
387
|
+
branch.evaluation = {"error": f"Move {branch.move_id} not found"}
|
|
388
|
+
results.append(branch.to_dict())
|
|
389
|
+
continue
|
|
390
|
+
|
|
391
|
+
session_info = ableton.send_command("get_session_info")
|
|
392
|
+
kernel = {"session_info": session_info, "mode": "explore"}
|
|
393
|
+
plan = compiler.compile(move, kernel)
|
|
394
|
+
compiled_dict = plan.to_dict()
|
|
395
|
+
|
|
396
|
+
# Pick the capture function — render-verify mode captures audio
|
|
397
|
+
# and extracts a TimbralFingerprint, adding latency but giving
|
|
398
|
+
# classify_branch_outcome real measurable evidence.
|
|
399
|
+
if render_verify:
|
|
400
|
+
capture_fn = lambda: _capture_snapshot_with_render_verify(
|
|
401
|
+
ctx, duration_seconds=render_duration_seconds,
|
|
402
|
+
)
|
|
403
|
+
else:
|
|
404
|
+
capture_fn = lambda: _capture_snapshot(ctx)
|
|
165
405
|
|
|
166
406
|
# Run the branch through the async router
|
|
167
407
|
await engine.run_branch_async(
|
|
168
408
|
branch=branch,
|
|
169
409
|
ableton=ableton,
|
|
170
410
|
compiled_plan=compiled_dict,
|
|
171
|
-
capture_fn=
|
|
411
|
+
capture_fn=capture_fn,
|
|
172
412
|
bridge=bridge,
|
|
173
413
|
mcp_registry=mcp_registry,
|
|
174
414
|
ctx=ctx,
|
|
175
415
|
)
|
|
176
416
|
|
|
177
|
-
# Evaluate
|
|
417
|
+
# Evaluate — score via the inline heuristic, then classify via
|
|
418
|
+
# evaluation.policy for a unified keep/undo/interesting_but_failed
|
|
419
|
+
# decision (PR7).
|
|
420
|
+
from ..evaluation.policy import classify_branch_outcome
|
|
421
|
+
|
|
178
422
|
def eval_fn(before, after):
|
|
179
|
-
# Simple heuristic evaluation when spectral data isn't available
|
|
423
|
+
# Simple heuristic evaluation when spectral data isn't available.
|
|
424
|
+
# protection_violated is rough — derived from whether any track
|
|
425
|
+
# went silent (signal lost on a track = protection violation).
|
|
180
426
|
score = 0.5 # Neutral
|
|
427
|
+
protection_violated = False
|
|
428
|
+
lost_tracks = 0
|
|
429
|
+
|
|
181
430
|
if before.get("track_meters") and after.get("track_meters"):
|
|
182
|
-
# Check all tracks still alive
|
|
183
431
|
before_alive = sum(1 for t in before["track_meters"] if t.get("level", 0) > 0)
|
|
184
432
|
after_alive = sum(1 for t in after["track_meters"] if t.get("level", 0) > 0)
|
|
185
|
-
|
|
433
|
+
lost_tracks = max(0, before_alive - after_alive)
|
|
434
|
+
if lost_tracks == 0:
|
|
186
435
|
score += 0.1
|
|
187
436
|
else:
|
|
188
|
-
score -= 0.2
|
|
437
|
+
score -= 0.2
|
|
438
|
+
# A track going silent is a protection violation — always
|
|
439
|
+
# undo regardless of exploration mode.
|
|
440
|
+
protection_violated = True
|
|
189
441
|
|
|
190
442
|
if before.get("spectrum") and after.get("spectrum"):
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
443
|
+
score += 0.1 # presence-of-data bonus
|
|
444
|
+
|
|
445
|
+
score = round(score, 3)
|
|
446
|
+
|
|
447
|
+
# PR4 — fingerprint diff to feed the classifier when render-verify
|
|
448
|
+
# is on. When both before/after have fingerprints, compute the
|
|
449
|
+
# per-dimension diff via synthesis_brain.diff_fingerprint and let
|
|
450
|
+
# classify_branch_outcome derive real measurable_count + goal_progress
|
|
451
|
+
# from it. Much stronger evidence than the meter heuristic alone.
|
|
452
|
+
fingerprint_diff = None
|
|
453
|
+
timbral_target = None
|
|
454
|
+
before_fp = before.get("fingerprint")
|
|
455
|
+
after_fp = after.get("fingerprint")
|
|
456
|
+
if before_fp and after_fp:
|
|
457
|
+
try:
|
|
458
|
+
from ..synthesis_brain import diff_fingerprint, TimbralFingerprint
|
|
459
|
+
before_obj = TimbralFingerprint(**{
|
|
460
|
+
k: v for k, v in before_fp.items()
|
|
461
|
+
if k in TimbralFingerprint.__dataclass_fields__
|
|
462
|
+
})
|
|
463
|
+
after_obj = TimbralFingerprint(**{
|
|
464
|
+
k: v for k, v in after_fp.items()
|
|
465
|
+
if k in TimbralFingerprint.__dataclass_fields__
|
|
466
|
+
})
|
|
467
|
+
fingerprint_diff = diff_fingerprint(before_obj, after_obj)
|
|
468
|
+
except Exception as exc:
|
|
469
|
+
logger.debug("fingerprint diff failed: %s", exc)
|
|
470
|
+
|
|
471
|
+
# If the branch's seed was a synthesis seed with a timbral target
|
|
472
|
+
# in its producer_payload, score diff in that target's direction.
|
|
473
|
+
if branch.seed is not None and branch.seed.source == "synthesis":
|
|
474
|
+
target_hint = (branch.seed.producer_payload or {}).get("timbral_target")
|
|
475
|
+
if isinstance(target_hint, dict):
|
|
476
|
+
timbral_target = target_hint
|
|
477
|
+
|
|
478
|
+
outcome = classify_branch_outcome(
|
|
479
|
+
score=score,
|
|
480
|
+
protection_violated=protection_violated,
|
|
481
|
+
measurable_count=0,
|
|
482
|
+
target_count=0,
|
|
483
|
+
goal_progress=0.0,
|
|
484
|
+
exploration_rules=exploration_rules,
|
|
485
|
+
fingerprint_diff=fingerprint_diff,
|
|
486
|
+
timbral_target=timbral_target,
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
result_eval = {
|
|
490
|
+
"score": outcome.score,
|
|
491
|
+
"keep_change": outcome.keep_change,
|
|
492
|
+
"status": outcome.status,
|
|
493
|
+
"failure_reasons": outcome.failure_reasons,
|
|
494
|
+
"note": outcome.note,
|
|
495
|
+
"lost_tracks": lost_tracks,
|
|
496
|
+
}
|
|
497
|
+
# Surface fingerprint evidence on the evaluation dict so
|
|
498
|
+
# compare_experiments can show per-branch spectral deltas.
|
|
499
|
+
if fingerprint_diff is not None:
|
|
500
|
+
result_eval["fingerprint_diff"] = fingerprint_diff
|
|
501
|
+
result_eval["fingerprint_before"] = before_fp
|
|
502
|
+
result_eval["fingerprint_after"] = after_fp
|
|
503
|
+
return result_eval
|
|
195
504
|
|
|
196
505
|
engine.evaluate_branch(branch, eval_fn)
|
|
506
|
+
|
|
507
|
+
# Promote the classified status onto the branch. ranked_branches()
|
|
508
|
+
# only surfaces status="evaluated", so branches the classifier
|
|
509
|
+
# rejected ("undo") or retained for audit ("interesting_but_failed")
|
|
510
|
+
# are both correctly excluded from winner recommendations.
|
|
511
|
+
# Without this mapping, a branch the hard-rule classifier explicitly
|
|
512
|
+
# rejected could still win a ranking and be re-applied by commit.
|
|
513
|
+
if branch.evaluation and branch.evaluation.get("status"):
|
|
514
|
+
status = branch.evaluation["status"]
|
|
515
|
+
if status == "keep":
|
|
516
|
+
branch.status = "evaluated"
|
|
517
|
+
elif status == "interesting_but_failed":
|
|
518
|
+
branch.status = "interesting_but_failed"
|
|
519
|
+
elif status == "undo":
|
|
520
|
+
# Undo-classified branches had their steps rolled back by
|
|
521
|
+
# run_branch_async's undo pass; they must NOT be eligible
|
|
522
|
+
# winners. "rejected" is a terminal branch status distinct
|
|
523
|
+
# from "failed" (execution failed) and distinct from
|
|
524
|
+
# "interesting_but_failed" (exploration-mode retention).
|
|
525
|
+
branch.status = "rejected"
|
|
526
|
+
|
|
197
527
|
results.append(branch.to_dict())
|
|
198
528
|
|
|
199
529
|
return {
|
|
@@ -221,6 +551,30 @@ def compare_experiments(
|
|
|
221
551
|
return {"error": f"Experiment {experiment_id} not found"}
|
|
222
552
|
|
|
223
553
|
ranked = experiment.ranked_branches()
|
|
554
|
+
|
|
555
|
+
# Surface non-winning branch categories separately. None of these are
|
|
556
|
+
# candidates for commit — ranked_branches() filters them out — but the
|
|
557
|
+
# user sees what was tried.
|
|
558
|
+
interesting_failed = [
|
|
559
|
+
b for b in experiment.branches if b.status == "interesting_but_failed"
|
|
560
|
+
]
|
|
561
|
+
rejected = [
|
|
562
|
+
b for b in experiment.branches if b.status == "rejected"
|
|
563
|
+
]
|
|
564
|
+
analytical = [
|
|
565
|
+
b for b in experiment.branches if b.status == "analytical"
|
|
566
|
+
]
|
|
567
|
+
|
|
568
|
+
def _audit_row(b):
|
|
569
|
+
return {
|
|
570
|
+
"branch_id": b.branch_id,
|
|
571
|
+
"name": b.name,
|
|
572
|
+
"move_id": b.move_id,
|
|
573
|
+
"score": b.score,
|
|
574
|
+
"summary": b.compiled_plan.get("summary", "") if b.compiled_plan else "",
|
|
575
|
+
"evaluation": b.evaluation,
|
|
576
|
+
}
|
|
577
|
+
|
|
224
578
|
return {
|
|
225
579
|
"experiment_id": experiment_id,
|
|
226
580
|
"request": experiment.request_text,
|
|
@@ -228,16 +582,14 @@ def compare_experiments(
|
|
|
228
582
|
"ranking": [
|
|
229
583
|
{
|
|
230
584
|
"rank": i + 1,
|
|
231
|
-
|
|
232
|
-
"name": b.name,
|
|
233
|
-
"move_id": b.move_id,
|
|
234
|
-
"score": b.score,
|
|
235
|
-
"summary": b.compiled_plan.get("summary", "") if b.compiled_plan else "",
|
|
236
|
-
"evaluation": b.evaluation,
|
|
585
|
+
**_audit_row(b),
|
|
237
586
|
}
|
|
238
587
|
for i, b in enumerate(ranked)
|
|
239
588
|
],
|
|
240
589
|
"winner": ranked[0].to_dict() if ranked else None,
|
|
590
|
+
"interesting_but_failed": [_audit_row(b) for b in interesting_failed],
|
|
591
|
+
"rejected": [_audit_row(b) for b in rejected],
|
|
592
|
+
"analytical": [_audit_row(b) for b in analytical],
|
|
241
593
|
}
|
|
242
594
|
|
|
243
595
|
|
|
@@ -258,11 +610,110 @@ async def commit_experiment(
|
|
|
258
610
|
if not experiment:
|
|
259
611
|
return {"error": f"Experiment {experiment_id} not found"}
|
|
260
612
|
|
|
613
|
+
# Refuse to commit branches the classifier rejected or that were
|
|
614
|
+
# analytical-only. Those statuses exist specifically so callers
|
|
615
|
+
# can't route them into re-application, and ranked_branches()
|
|
616
|
+
# already excludes them — so reaching commit with such a branch
|
|
617
|
+
# means the caller is bypassing the ranking layer.
|
|
618
|
+
target = experiment.get_branch(branch_id)
|
|
619
|
+
if target is None:
|
|
620
|
+
return {"error": f"Branch {branch_id} not found"}
|
|
621
|
+
if target.status in ("rejected", "analytical", "failed"):
|
|
622
|
+
return {
|
|
623
|
+
"error": (
|
|
624
|
+
f"Cannot commit branch with status '{target.status}'. "
|
|
625
|
+
f"'rejected' = hard-rule classifier rolled back; "
|
|
626
|
+
f"'analytical' = no executable plan; "
|
|
627
|
+
f"'failed' = zero steps applied successfully. "
|
|
628
|
+
f"Use compare_experiments to see eligible winners "
|
|
629
|
+
f"(only status='evaluated' branches are ranking candidates)."
|
|
630
|
+
),
|
|
631
|
+
"branch_id": branch_id,
|
|
632
|
+
"branch_status": target.status,
|
|
633
|
+
}
|
|
634
|
+
|
|
261
635
|
ableton = _get_ableton(ctx)
|
|
262
636
|
bridge = ctx.lifespan_context.get("m4l")
|
|
263
637
|
mcp_registry = ctx.lifespan_context.get("mcp_dispatch", {})
|
|
264
638
|
|
|
265
|
-
|
|
639
|
+
# PR3 — composer winner escalation. When the winning branch came from
|
|
640
|
+
# the composer producer, the plan we auditioned was a lightweight
|
|
641
|
+
# scaffold (set_tempo + create_midi_track + create_scene/set_scene_name).
|
|
642
|
+
# Commit should deliver a populated session, not an empty skeleton.
|
|
643
|
+
# Re-run ComposerEngine.compose() on the intent captured in the seed's
|
|
644
|
+
# producer_payload, replace the branch's compiled_plan with the full
|
|
645
|
+
# resolved plan, then commit through the normal async router.
|
|
646
|
+
#
|
|
647
|
+
# When escalation fails (missing intent, zero resolved layers, etc.),
|
|
648
|
+
# fall back to committing the scaffold. Users get tracks + scenes
|
|
649
|
+
# they can populate manually, which is better than an error.
|
|
650
|
+
escalation_info = None
|
|
651
|
+
if (
|
|
652
|
+
target.seed is not None
|
|
653
|
+
and target.seed.source == "composer"
|
|
654
|
+
and target.seed.producer_payload
|
|
655
|
+
):
|
|
656
|
+
try:
|
|
657
|
+
from ..composer import escalate_composer_branch
|
|
658
|
+
splice_client = ctx.lifespan_context.get("splice_client")
|
|
659
|
+
# browser_client only present on servers with live browser wiring;
|
|
660
|
+
# pass None defensively.
|
|
661
|
+
browser_client = ctx.lifespan_context.get("browser_client")
|
|
662
|
+
search_roots = ctx.lifespan_context.get("sample_search_roots") or []
|
|
663
|
+
|
|
664
|
+
escalation_info = await escalate_composer_branch(
|
|
665
|
+
producer_payload=target.seed.producer_payload,
|
|
666
|
+
search_roots=search_roots,
|
|
667
|
+
splice_client=splice_client,
|
|
668
|
+
browser_client=browser_client,
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
if escalation_info.get("ok"):
|
|
672
|
+
# Swap the compiled_plan for the fully resolved one before
|
|
673
|
+
# commit_branch_async runs it. Keep the old scaffold on the
|
|
674
|
+
# evaluation dict for audit.
|
|
675
|
+
old_plan = target.compiled_plan or {}
|
|
676
|
+
new_plan = {
|
|
677
|
+
"steps": escalation_info["plan"],
|
|
678
|
+
"step_count": escalation_info["step_count"],
|
|
679
|
+
"summary": (
|
|
680
|
+
f"Composer escalated: {escalation_info['layer_count']} "
|
|
681
|
+
f"layers, {escalation_info['step_count']} steps "
|
|
682
|
+
f"({len(escalation_info['resolved_samples'])} samples resolved)"
|
|
683
|
+
),
|
|
684
|
+
}
|
|
685
|
+
target.compiled_plan = new_plan
|
|
686
|
+
if target.evaluation is None:
|
|
687
|
+
target.evaluation = {}
|
|
688
|
+
target.evaluation["composer_escalation"] = {
|
|
689
|
+
"escalated": True,
|
|
690
|
+
"scaffold_step_count": old_plan.get("step_count", 0),
|
|
691
|
+
"resolved_step_count": escalation_info["step_count"],
|
|
692
|
+
"layer_count": escalation_info["layer_count"],
|
|
693
|
+
"resolved_samples": escalation_info["resolved_samples"],
|
|
694
|
+
"warnings": escalation_info.get("warnings", []),
|
|
695
|
+
}
|
|
696
|
+
else:
|
|
697
|
+
# Record the fallback reason on evaluation so compare /
|
|
698
|
+
# commit responses carry explicit provenance.
|
|
699
|
+
if target.evaluation is None:
|
|
700
|
+
target.evaluation = {}
|
|
701
|
+
target.evaluation["composer_escalation"] = {
|
|
702
|
+
"escalated": False,
|
|
703
|
+
"error": escalation_info.get("error", "unknown"),
|
|
704
|
+
"warnings": escalation_info.get("warnings", []),
|
|
705
|
+
"fallback": "scaffold_plan",
|
|
706
|
+
}
|
|
707
|
+
except Exception as exc:
|
|
708
|
+
if target.evaluation is None:
|
|
709
|
+
target.evaluation = {}
|
|
710
|
+
target.evaluation["composer_escalation"] = {
|
|
711
|
+
"escalated": False,
|
|
712
|
+
"error": f"escalation raised: {exc}",
|
|
713
|
+
"fallback": "scaffold_plan",
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
commit_result = await engine.commit_branch_async(
|
|
266
717
|
experiment,
|
|
267
718
|
branch_id,
|
|
268
719
|
ableton,
|
|
@@ -271,6 +722,19 @@ async def commit_experiment(
|
|
|
271
722
|
ctx=ctx,
|
|
272
723
|
)
|
|
273
724
|
|
|
725
|
+
# Surface escalation details on the commit response so the caller
|
|
726
|
+
# sees whether a scaffold or resolved plan was applied.
|
|
727
|
+
if escalation_info is not None and isinstance(commit_result, dict):
|
|
728
|
+
commit_result["composer_escalation"] = {
|
|
729
|
+
"escalated": bool(escalation_info.get("ok")),
|
|
730
|
+
"step_count": escalation_info.get("step_count"),
|
|
731
|
+
"layer_count": escalation_info.get("layer_count"),
|
|
732
|
+
"error": escalation_info.get("error"),
|
|
733
|
+
"warnings": escalation_info.get("warnings", []),
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
return commit_result
|
|
737
|
+
|
|
274
738
|
|
|
275
739
|
@mcp.tool()
|
|
276
740
|
def discard_experiment(
|