livepilot 1.13.0 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +137 -0
- package/README.md +6 -6
- package/m4l_device/LivePilot_Analyzer.amxd +0 -0
- package/m4l_device/livepilot_bridge.js +1 -1
- package/mcp_server/__init__.py +1 -1
- package/mcp_server/branches/__init__.py +2 -0
- package/mcp_server/branches/types.py +57 -1
- package/mcp_server/composer/__init__.py +2 -2
- package/mcp_server/composer/branch_producer.py +120 -0
- package/mcp_server/composer/tools.py +58 -1
- package/mcp_server/evaluation/policy.py +98 -0
- package/mcp_server/experiment/models.py +40 -1
- package/mcp_server/experiment/tools.py +283 -15
- package/mcp_server/server.py +1 -0
- package/mcp_server/synthesis_brain/adapters/analog.py +158 -52
- package/mcp_server/synthesis_brain/adapters/drift.py +156 -51
- package/mcp_server/synthesis_brain/adapters/meld.py +150 -40
- package/mcp_server/synthesis_brain/adapters/operator.py +137 -14
- package/mcp_server/synthesis_brain/adapters/wavetable.py +156 -20
- package/mcp_server/synthesis_brain/tools.py +231 -0
- package/package.json +2 -2
- package/remote_script/LivePilot/__init__.py +1 -1
- package/server.json +3 -3
|
@@ -120,6 +120,74 @@ class BranchOutcome:
|
|
|
120
120
|
return asdict(self)
|
|
121
121
|
|
|
122
122
|
|
|
123
|
+
def derive_goal_progress_from_fingerprint(
|
|
124
|
+
fingerprint_diff: dict,
|
|
125
|
+
target: Optional[dict] = None,
|
|
126
|
+
) -> tuple[float, int]:
|
|
127
|
+
"""Derive (goal_progress, measurable_count) from a fingerprint diff.
|
|
128
|
+
|
|
129
|
+
PR4 wiring: TimbralFingerprint dimensions (brightness, warmth, bite,
|
|
130
|
+
softness, instability, width, texture_density, movement, polish) are
|
|
131
|
+
effectively a goal vector. When a branch has before/after fingerprints
|
|
132
|
+
extracted from actual captured audio, the per-dimension diff IS the
|
|
133
|
+
measurable evidence classify_branch_outcome needs to make a real
|
|
134
|
+
decision — no reason to fall back to the heuristic score alone.
|
|
135
|
+
|
|
136
|
+
fingerprint_diff: output of synthesis_brain.diff_fingerprint(before, after).
|
|
137
|
+
Shape: {"brightness": float, "warmth": float, ...}
|
|
138
|
+
target: optional TimbralFingerprint dict ({"brightness": 0.3, ...}).
|
|
139
|
+
When provided, goal_progress counts only dimensions the target
|
|
140
|
+
cared about (non-zero target value). When None, every dimension
|
|
141
|
+
with a non-trivial diff counts as a target.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
(goal_progress, measurable_count) tuple ready to feed into
|
|
145
|
+
classify_branch_outcome. goal_progress is signed (positive =
|
|
146
|
+
branch moved in the intended direction; negative = moved away).
|
|
147
|
+
measurable_count is how many dimensions had a readable diff.
|
|
148
|
+
"""
|
|
149
|
+
if not fingerprint_diff:
|
|
150
|
+
return (0.0, 0)
|
|
151
|
+
|
|
152
|
+
# Epsilon — diffs this small are noise, not signal.
|
|
153
|
+
eps = 0.02
|
|
154
|
+
progress = 0.0
|
|
155
|
+
count = 0
|
|
156
|
+
|
|
157
|
+
# If target is provided, score each dimension by
|
|
158
|
+
# sign(target) * diff
|
|
159
|
+
# so moving in the target's direction counts positive, regardless
|
|
160
|
+
# of target magnitude. When no target, count any non-trivial diff
|
|
161
|
+
# in either direction as progress (a branch that "moves" at all
|
|
162
|
+
# is evidence the producer did something).
|
|
163
|
+
if target:
|
|
164
|
+
for dim, delta in fingerprint_diff.items():
|
|
165
|
+
if not isinstance(delta, (int, float)):
|
|
166
|
+
continue
|
|
167
|
+
if abs(delta) < eps:
|
|
168
|
+
continue
|
|
169
|
+
target_val = target.get(dim, 0.0)
|
|
170
|
+
if abs(target_val) < eps:
|
|
171
|
+
continue # target didn't care about this dimension
|
|
172
|
+
count += 1
|
|
173
|
+
# Normalize: sign(target) * delta, scaled so each dimension
|
|
174
|
+
# contributes at most 1.0 to progress.
|
|
175
|
+
direction = 1.0 if target_val > 0 else -1.0
|
|
176
|
+
progress += direction * max(-1.0, min(1.0, delta))
|
|
177
|
+
else:
|
|
178
|
+
for dim, delta in fingerprint_diff.items():
|
|
179
|
+
if not isinstance(delta, (int, float)):
|
|
180
|
+
continue
|
|
181
|
+
if abs(delta) < eps:
|
|
182
|
+
continue
|
|
183
|
+
count += 1
|
|
184
|
+
# Without a target, we can't tell "good" from "bad" movement.
|
|
185
|
+
# Count as weakly positive — branch did something measurable.
|
|
186
|
+
progress += abs(max(-1.0, min(1.0, delta))) * 0.5
|
|
187
|
+
|
|
188
|
+
return (round(progress, 3), count)
|
|
189
|
+
|
|
190
|
+
|
|
123
191
|
def classify_branch_outcome(
|
|
124
192
|
score: float,
|
|
125
193
|
*,
|
|
@@ -128,6 +196,8 @@ def classify_branch_outcome(
|
|
|
128
196
|
target_count: int = 0,
|
|
129
197
|
goal_progress: float = 0.0,
|
|
130
198
|
exploration_rules: bool = False,
|
|
199
|
+
fingerprint_diff: Optional[dict] = None,
|
|
200
|
+
timbral_target: Optional[dict] = None,
|
|
131
201
|
) -> BranchOutcome:
|
|
132
202
|
"""Classify a branch's terminal status from a score + optional hard-rule inputs.
|
|
133
203
|
|
|
@@ -143,9 +213,37 @@ def classify_branch_outcome(
|
|
|
143
213
|
protection violations still force undo (safety invariant);
|
|
144
214
|
all other failures downgrade to "interesting_but_failed".
|
|
145
215
|
|
|
216
|
+
PR4 additions (optional):
|
|
217
|
+
fingerprint_diff: output of synthesis_brain.diff_fingerprint
|
|
218
|
+
between before/after snapshots. When provided AND no caller-
|
|
219
|
+
supplied measurable_count/goal_progress were passed (both 0),
|
|
220
|
+
the classifier derives them from the diff — so the dimensions
|
|
221
|
+
of the TimbralFingerprint become the goal vector.
|
|
222
|
+
timbral_target: optional target fingerprint dict. Scores diff in
|
|
223
|
+
the target's direction (moving brighter counts positive when
|
|
224
|
+
target.brightness > 0). Omit when the branch had no specific
|
|
225
|
+
target; dimensions with non-trivial movement still contribute
|
|
226
|
+
measurable_count but progress is unsigned magnitude * 0.5.
|
|
227
|
+
|
|
146
228
|
Returns a BranchOutcome that callers can plug into branch.score /
|
|
147
229
|
.status / .evaluation without further interpretation.
|
|
148
230
|
"""
|
|
231
|
+
# PR4 — derive measurable evidence from fingerprint diff when the
|
|
232
|
+
# caller didn't supply their own. Keeps back-compat for existing
|
|
233
|
+
# callers that compute their own measurable inputs.
|
|
234
|
+
if (
|
|
235
|
+
fingerprint_diff
|
|
236
|
+
and measurable_count == 0
|
|
237
|
+
and abs(goal_progress) < 1e-6
|
|
238
|
+
):
|
|
239
|
+
derived_progress, derived_count = derive_goal_progress_from_fingerprint(
|
|
240
|
+
fingerprint_diff, target=timbral_target,
|
|
241
|
+
)
|
|
242
|
+
goal_progress = derived_progress
|
|
243
|
+
measurable_count = derived_count
|
|
244
|
+
# target_count should also reflect the derived dimensions so the
|
|
245
|
+
# hard-rule path treats this as a genuinely measurable outcome.
|
|
246
|
+
target_count = max(target_count, derived_count)
|
|
149
247
|
keep_change, failures = apply_hard_rules(
|
|
150
248
|
goal_progress=goal_progress,
|
|
151
249
|
collateral_damage=0.0, # not threaded here — branch lifecycle doesn't compute it yet
|
|
@@ -23,12 +23,37 @@ from ..branches import BranchSeed
|
|
|
23
23
|
|
|
24
24
|
@dataclass
|
|
25
25
|
class BranchSnapshot:
|
|
26
|
-
"""Captured state before or after a branch experiment.
|
|
26
|
+
"""Captured state before or after a branch experiment.
|
|
27
|
+
|
|
28
|
+
Pre-PR4 fields (spectrum / rms / peak / track_meters) stay the same —
|
|
29
|
+
they remain the fast-path evidence when render-verify isn't available
|
|
30
|
+
or wasn't opted in.
|
|
31
|
+
|
|
32
|
+
PR4 adds render-based fields that are populated only when the
|
|
33
|
+
experiment runs with render_verify=True:
|
|
34
|
+
|
|
35
|
+
capture_path: path to the captured audio file (useful for re-analysis
|
|
36
|
+
or user audition of the branch output).
|
|
37
|
+
loudness: {lufs, lra, rms, peak, crest} from analyze_loudness.
|
|
38
|
+
spectral_shape: {centroid, flatness, rolloff, crest} from FluCoMa or
|
|
39
|
+
the offline analyzer.
|
|
40
|
+
fingerprint: TimbralFingerprint.to_dict() extracted from the
|
|
41
|
+
captured audio.
|
|
42
|
+
|
|
43
|
+
The fingerprint is what classify_branch_outcome reads to derive a
|
|
44
|
+
real goal_progress + measurable_count instead of relying on the
|
|
45
|
+
inline meter-based heuristic alone.
|
|
46
|
+
"""
|
|
27
47
|
spectrum: Optional[dict] = None
|
|
28
48
|
rms: Optional[float] = None
|
|
29
49
|
peak: Optional[float] = None
|
|
30
50
|
track_meters: Optional[list] = None
|
|
31
51
|
timestamp_ms: int = 0
|
|
52
|
+
# PR4 — render-based evidence (opt-in via render_verify flag)
|
|
53
|
+
capture_path: Optional[str] = None
|
|
54
|
+
loudness: Optional[dict] = None
|
|
55
|
+
spectral_shape: Optional[dict] = None
|
|
56
|
+
fingerprint: Optional[dict] = None # TimbralFingerprint.to_dict()
|
|
32
57
|
|
|
33
58
|
def to_dict(self) -> dict:
|
|
34
59
|
d = {}
|
|
@@ -40,6 +65,14 @@ class BranchSnapshot:
|
|
|
40
65
|
d["peak"] = self.peak
|
|
41
66
|
if self.track_meters is not None:
|
|
42
67
|
d["track_meters"] = self.track_meters
|
|
68
|
+
if self.capture_path is not None:
|
|
69
|
+
d["capture_path"] = self.capture_path
|
|
70
|
+
if self.loudness is not None:
|
|
71
|
+
d["loudness"] = self.loudness
|
|
72
|
+
if self.spectral_shape is not None:
|
|
73
|
+
d["spectral_shape"] = self.spectral_shape
|
|
74
|
+
if self.fingerprint is not None:
|
|
75
|
+
d["fingerprint"] = self.fingerprint
|
|
43
76
|
d["timestamp_ms"] = self.timestamp_ms
|
|
44
77
|
return d
|
|
45
78
|
|
|
@@ -152,6 +185,12 @@ class ExperimentBranch:
|
|
|
152
185
|
d["analytical_only"] = (
|
|
153
186
|
self.seed.analytical_only or self.compiled_plan is None
|
|
154
187
|
)
|
|
188
|
+
# Shortcut to the seed's producer_payload so downstream callers
|
|
189
|
+
# (composer winner-commit, synthesis re-target, provenance logs)
|
|
190
|
+
# don't have to reach into d["seed"]["producer_payload"] every
|
|
191
|
+
# time. The full seed dict is still available for producers
|
|
192
|
+
# that need other fields.
|
|
193
|
+
d["producer_payload"] = dict(self.seed.producer_payload or {})
|
|
155
194
|
return d
|
|
156
195
|
|
|
157
196
|
|
|
@@ -29,7 +29,11 @@ def _get_ableton(ctx: Context):
|
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
def _capture_snapshot(ctx: Context) -> BranchSnapshot:
|
|
32
|
-
"""Capture current session state as a BranchSnapshot.
|
|
32
|
+
"""Capture current session state as a BranchSnapshot (fast path).
|
|
33
|
+
|
|
34
|
+
Uses live meters + spectral cache. No audio rendering. Called when
|
|
35
|
+
render_verify is off (default) — adds no latency to branch trials.
|
|
36
|
+
"""
|
|
33
37
|
ableton = _get_ableton(ctx)
|
|
34
38
|
spectral = ctx.lifespan_context.get("spectral")
|
|
35
39
|
|
|
@@ -58,6 +62,116 @@ def _capture_snapshot(ctx: Context) -> BranchSnapshot:
|
|
|
58
62
|
return snapshot
|
|
59
63
|
|
|
60
64
|
|
|
65
|
+
def _capture_snapshot_with_render_verify(
|
|
66
|
+
ctx: Context, duration_seconds: float = 2.0,
|
|
67
|
+
) -> BranchSnapshot:
|
|
68
|
+
"""Capture state AND render audio for fingerprint extraction (PR4).
|
|
69
|
+
|
|
70
|
+
Runs the fast-path snapshot first, then additionally:
|
|
71
|
+
1. capture_audio duration_seconds seconds from master
|
|
72
|
+
2. analyze_loudness on the captured file
|
|
73
|
+
3. analyze_spectrum_offline on the captured file
|
|
74
|
+
4. extract_timbre_fingerprint from spectrum + loudness
|
|
75
|
+
|
|
76
|
+
Attaches capture_path, loudness, spectral_shape, and fingerprint to
|
|
77
|
+
the snapshot. When any stage fails (bridge unavailable, analyzer
|
|
78
|
+
missing, etc.), that stage's field is left None and a debug log is
|
|
79
|
+
emitted — render-verify degrades gracefully to the fast-path snapshot.
|
|
80
|
+
|
|
81
|
+
Expected added latency: duration_seconds (capture) + ~1-2s (offline
|
|
82
|
+
analysis). For a 2-branch experiment with 2s captures, that's
|
|
83
|
+
~8-10s of overhead vs the default path.
|
|
84
|
+
"""
|
|
85
|
+
snapshot = _capture_snapshot(ctx)
|
|
86
|
+
|
|
87
|
+
ableton = _get_ableton(ctx)
|
|
88
|
+
bridge = ctx.lifespan_context.get("m4l")
|
|
89
|
+
|
|
90
|
+
# Step 1: capture_audio is a bridge command — route via bridge.send_command
|
|
91
|
+
# if available, else fall back to ableton TCP which doesn't support it.
|
|
92
|
+
capture_path = None
|
|
93
|
+
if bridge is not None:
|
|
94
|
+
try:
|
|
95
|
+
maybe = bridge.send_command("capture_audio", float(duration_seconds), "master", "")
|
|
96
|
+
# bridge.send_command may return awaitable or plain dict.
|
|
97
|
+
import inspect
|
|
98
|
+
if inspect.isawaitable(maybe):
|
|
99
|
+
# We're in a sync context here — best effort, skip await.
|
|
100
|
+
# Render-verify from within sync capture_fn is the compromise;
|
|
101
|
+
# the async variant wires through from run_branch_async which
|
|
102
|
+
# does have await. Use the fast-path capture only.
|
|
103
|
+
logger.debug("capture_audio returned awaitable in sync context; skipping render-verify for this snapshot")
|
|
104
|
+
return snapshot
|
|
105
|
+
if isinstance(maybe, dict):
|
|
106
|
+
capture_path = maybe.get("file_path") or maybe.get("path") or maybe.get("filename")
|
|
107
|
+
except Exception as exc:
|
|
108
|
+
logger.debug("render-verify capture_audio failed: %s", exc)
|
|
109
|
+
if not capture_path:
|
|
110
|
+
return snapshot # graceful degrade — caller still gets fast-path data
|
|
111
|
+
snapshot.capture_path = capture_path
|
|
112
|
+
|
|
113
|
+
# Step 2-3: offline loudness + spectrum analysis (MCP tools, sync wrappers)
|
|
114
|
+
try:
|
|
115
|
+
from ..tools.analyzer import analyze_loudness as _analyze_loudness
|
|
116
|
+
loud = _analyze_loudness(capture_path)
|
|
117
|
+
if isinstance(loud, dict) and "error" not in loud:
|
|
118
|
+
snapshot.loudness = loud
|
|
119
|
+
except Exception as exc:
|
|
120
|
+
logger.debug("render-verify analyze_loudness failed: %s", exc)
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
from ..tools.analyzer import analyze_spectrum_offline as _analyze_spectrum
|
|
124
|
+
spec = _analyze_spectrum(capture_path)
|
|
125
|
+
if isinstance(spec, dict) and "error" not in spec:
|
|
126
|
+
snapshot.spectral_shape = {
|
|
127
|
+
"centroid": spec.get("centroid_hz"),
|
|
128
|
+
"flatness": spec.get("spectral_flatness"),
|
|
129
|
+
"rolloff": spec.get("rolloff_hz"),
|
|
130
|
+
"bandwidth": spec.get("bandwidth_hz"),
|
|
131
|
+
# Back-map the 5-band balance into the 8-band keys our
|
|
132
|
+
# fingerprint extractor expects. Coarse mapping:
|
|
133
|
+
"bands": _map_5band_to_8band(spec.get("band_balance", {})),
|
|
134
|
+
}
|
|
135
|
+
except Exception as exc:
|
|
136
|
+
logger.debug("render-verify analyze_spectrum_offline failed: %s", exc)
|
|
137
|
+
|
|
138
|
+
# Step 4: build fingerprint from what we got
|
|
139
|
+
try:
|
|
140
|
+
from ..synthesis_brain import extract_timbre_fingerprint
|
|
141
|
+
fp = extract_timbre_fingerprint(
|
|
142
|
+
spectrum=(snapshot.spectral_shape or {}).get("bands"),
|
|
143
|
+
loudness=snapshot.loudness,
|
|
144
|
+
spectral_shape=snapshot.spectral_shape,
|
|
145
|
+
)
|
|
146
|
+
snapshot.fingerprint = fp.to_dict()
|
|
147
|
+
except Exception as exc:
|
|
148
|
+
logger.debug("render-verify extract_timbre_fingerprint failed: %s", exc)
|
|
149
|
+
|
|
150
|
+
return snapshot
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _map_5band_to_8band(b5: dict) -> dict:
|
|
154
|
+
"""Adapt analyze_spectrum_offline's 5-band balance to the 8-band shape
|
|
155
|
+
extract_timbre_fingerprint expects.
|
|
156
|
+
|
|
157
|
+
5-band: sub_60hz, low_250hz, mid_2khz, high_8khz, air_16khz
|
|
158
|
+
8-band: sub, low, low_mid, mid, high_mid, high, very_high, ultra
|
|
159
|
+
"""
|
|
160
|
+
if not isinstance(b5, dict):
|
|
161
|
+
return {}
|
|
162
|
+
# Conservative mapping — split each 5-band bucket across the 8-band shape.
|
|
163
|
+
return {
|
|
164
|
+
"sub": float(b5.get("sub_60hz", 0.0) or 0.0),
|
|
165
|
+
"low": float(b5.get("low_250hz", 0.0) or 0.0) * 0.6,
|
|
166
|
+
"low_mid": float(b5.get("low_250hz", 0.0) or 0.0) * 0.4,
|
|
167
|
+
"mid": float(b5.get("mid_2khz", 0.0) or 0.0) * 0.6,
|
|
168
|
+
"high_mid": float(b5.get("mid_2khz", 0.0) or 0.0) * 0.4,
|
|
169
|
+
"high": float(b5.get("high_8khz", 0.0) or 0.0) * 0.6,
|
|
170
|
+
"very_high": float(b5.get("high_8khz", 0.0) or 0.0) * 0.4,
|
|
171
|
+
"ultra": float(b5.get("air_16khz", 0.0) or 0.0),
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
|
|
61
175
|
@mcp.tool()
|
|
62
176
|
def create_experiment(
|
|
63
177
|
ctx: Context,
|
|
@@ -174,6 +288,8 @@ async def run_experiment(
|
|
|
174
288
|
ctx: Context,
|
|
175
289
|
experiment_id: str,
|
|
176
290
|
exploration_rules: bool = False,
|
|
291
|
+
render_verify: bool = False,
|
|
292
|
+
render_duration_seconds: float = 2.0,
|
|
177
293
|
) -> dict:
|
|
178
294
|
"""Run all pending branches in an experiment.
|
|
179
295
|
|
|
@@ -189,14 +305,28 @@ async def run_experiment(
|
|
|
189
305
|
|
|
190
306
|
Branches run sequentially (Ableton has linear undo).
|
|
191
307
|
|
|
192
|
-
exploration_rules
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
308
|
+
exploration_rules: when True, branches that fail technical gates
|
|
309
|
+
(score < 0.40, non-positive measurable delta) are classified as
|
|
310
|
+
"interesting_but_failed" instead of "failed" — they stay in the
|
|
311
|
+
experiment for audit but don't appear in the ranking. Protection
|
|
312
|
+
violations STILL force undo regardless of this flag — that's a
|
|
313
|
+
safety invariant, not a taste judgment.
|
|
314
|
+
|
|
315
|
+
render_verify (PR4/v2): when True, each branch also captures audio
|
|
316
|
+
before and after execution, analyzes spectrum + loudness offline,
|
|
317
|
+
extracts a TimbralFingerprint, and attaches the before/after
|
|
318
|
+
fingerprint + diff to the branch snapshots. The diff is fed into
|
|
319
|
+
classify_branch_outcome as real measurable evidence — the
|
|
320
|
+
classifier no longer relies on meter heuristics alone. Default
|
|
321
|
+
False preserves speed; opt in when you want the classifier to
|
|
322
|
+
respond to spectral movement, not just track-meter drops.
|
|
323
|
+
|
|
324
|
+
render_duration_seconds: capture length per snapshot when
|
|
325
|
+
render_verify is on. Default 2.0 seconds. Each branch adds
|
|
326
|
+
~2 * duration_seconds of capture time plus ~1-2s of offline
|
|
327
|
+
analysis — a 3-branch experiment at 2s adds ~15-18s.
|
|
328
|
+
|
|
329
|
+
Default render_verify=False preserves pre-PR4 behavior exactly.
|
|
200
330
|
"""
|
|
201
331
|
experiment = engine.get_experiment(experiment_id)
|
|
202
332
|
if not experiment:
|
|
@@ -263,12 +393,22 @@ async def run_experiment(
|
|
|
263
393
|
plan = compiler.compile(move, kernel)
|
|
264
394
|
compiled_dict = plan.to_dict()
|
|
265
395
|
|
|
396
|
+
# Pick the capture function — render-verify mode captures audio
|
|
397
|
+
# and extracts a TimbralFingerprint, adding latency but giving
|
|
398
|
+
# classify_branch_outcome real measurable evidence.
|
|
399
|
+
if render_verify:
|
|
400
|
+
capture_fn = lambda: _capture_snapshot_with_render_verify(
|
|
401
|
+
ctx, duration_seconds=render_duration_seconds,
|
|
402
|
+
)
|
|
403
|
+
else:
|
|
404
|
+
capture_fn = lambda: _capture_snapshot(ctx)
|
|
405
|
+
|
|
266
406
|
# Run the branch through the async router
|
|
267
407
|
await engine.run_branch_async(
|
|
268
408
|
branch=branch,
|
|
269
409
|
ableton=ableton,
|
|
270
410
|
compiled_plan=compiled_dict,
|
|
271
|
-
capture_fn=
|
|
411
|
+
capture_fn=capture_fn,
|
|
272
412
|
bridge=bridge,
|
|
273
413
|
mcp_registry=mcp_registry,
|
|
274
414
|
ctx=ctx,
|
|
@@ -303,19 +443,50 @@ async def run_experiment(
|
|
|
303
443
|
score += 0.1 # presence-of-data bonus
|
|
304
444
|
|
|
305
445
|
score = round(score, 3)
|
|
446
|
+
|
|
447
|
+
# PR4 — fingerprint diff to feed the classifier when render-verify
|
|
448
|
+
# is on. When both before/after have fingerprints, compute the
|
|
449
|
+
# per-dimension diff via synthesis_brain.diff_fingerprint and let
|
|
450
|
+
# classify_branch_outcome derive real measurable_count + goal_progress
|
|
451
|
+
# from it. Much stronger evidence than the meter heuristic alone.
|
|
452
|
+
fingerprint_diff = None
|
|
453
|
+
timbral_target = None
|
|
454
|
+
before_fp = before.get("fingerprint")
|
|
455
|
+
after_fp = after.get("fingerprint")
|
|
456
|
+
if before_fp and after_fp:
|
|
457
|
+
try:
|
|
458
|
+
from ..synthesis_brain import diff_fingerprint, TimbralFingerprint
|
|
459
|
+
before_obj = TimbralFingerprint(**{
|
|
460
|
+
k: v for k, v in before_fp.items()
|
|
461
|
+
if k in TimbralFingerprint.__dataclass_fields__
|
|
462
|
+
})
|
|
463
|
+
after_obj = TimbralFingerprint(**{
|
|
464
|
+
k: v for k, v in after_fp.items()
|
|
465
|
+
if k in TimbralFingerprint.__dataclass_fields__
|
|
466
|
+
})
|
|
467
|
+
fingerprint_diff = diff_fingerprint(before_obj, after_obj)
|
|
468
|
+
except Exception as exc:
|
|
469
|
+
logger.debug("fingerprint diff failed: %s", exc)
|
|
470
|
+
|
|
471
|
+
# If the branch's seed was a synthesis seed with a timbral target
|
|
472
|
+
# in its producer_payload, score diff in that target's direction.
|
|
473
|
+
if branch.seed is not None and branch.seed.source == "synthesis":
|
|
474
|
+
target_hint = (branch.seed.producer_payload or {}).get("timbral_target")
|
|
475
|
+
if isinstance(target_hint, dict):
|
|
476
|
+
timbral_target = target_hint
|
|
477
|
+
|
|
306
478
|
outcome = classify_branch_outcome(
|
|
307
479
|
score=score,
|
|
308
480
|
protection_violated=protection_violated,
|
|
309
|
-
# Minimal hard-rule inputs — the heuristic doesn't compute
|
|
310
|
-
# measurable_count / goal_progress deltas. target_count=0 and
|
|
311
|
-
# measurable_count=0 lets rule 1 defer to score-only judgment.
|
|
312
481
|
measurable_count=0,
|
|
313
482
|
target_count=0,
|
|
314
483
|
goal_progress=0.0,
|
|
315
484
|
exploration_rules=exploration_rules,
|
|
485
|
+
fingerprint_diff=fingerprint_diff,
|
|
486
|
+
timbral_target=timbral_target,
|
|
316
487
|
)
|
|
317
488
|
|
|
318
|
-
|
|
489
|
+
result_eval = {
|
|
319
490
|
"score": outcome.score,
|
|
320
491
|
"keep_change": outcome.keep_change,
|
|
321
492
|
"status": outcome.status,
|
|
@@ -323,6 +494,13 @@ async def run_experiment(
|
|
|
323
494
|
"note": outcome.note,
|
|
324
495
|
"lost_tracks": lost_tracks,
|
|
325
496
|
}
|
|
497
|
+
# Surface fingerprint evidence on the evaluation dict so
|
|
498
|
+
# compare_experiments can show per-branch spectral deltas.
|
|
499
|
+
if fingerprint_diff is not None:
|
|
500
|
+
result_eval["fingerprint_diff"] = fingerprint_diff
|
|
501
|
+
result_eval["fingerprint_before"] = before_fp
|
|
502
|
+
result_eval["fingerprint_after"] = after_fp
|
|
503
|
+
return result_eval
|
|
326
504
|
|
|
327
505
|
engine.evaluate_branch(branch, eval_fn)
|
|
328
506
|
|
|
@@ -458,7 +636,84 @@ async def commit_experiment(
|
|
|
458
636
|
bridge = ctx.lifespan_context.get("m4l")
|
|
459
637
|
mcp_registry = ctx.lifespan_context.get("mcp_dispatch", {})
|
|
460
638
|
|
|
461
|
-
|
|
639
|
+
# PR3 — composer winner escalation. When the winning branch came from
|
|
640
|
+
# the composer producer, the plan we auditioned was a lightweight
|
|
641
|
+
# scaffold (set_tempo + create_midi_track + create_scene/set_scene_name).
|
|
642
|
+
# Commit should deliver a populated session, not an empty skeleton.
|
|
643
|
+
# Re-run ComposerEngine.compose() on the intent captured in the seed's
|
|
644
|
+
# producer_payload, replace the branch's compiled_plan with the full
|
|
645
|
+
# resolved plan, then commit through the normal async router.
|
|
646
|
+
#
|
|
647
|
+
# When escalation fails (missing intent, zero resolved layers, etc.),
|
|
648
|
+
# fall back to committing the scaffold. Users get tracks + scenes
|
|
649
|
+
# they can populate manually, which is better than an error.
|
|
650
|
+
escalation_info = None
|
|
651
|
+
if (
|
|
652
|
+
target.seed is not None
|
|
653
|
+
and target.seed.source == "composer"
|
|
654
|
+
and target.seed.producer_payload
|
|
655
|
+
):
|
|
656
|
+
try:
|
|
657
|
+
from ..composer import escalate_composer_branch
|
|
658
|
+
splice_client = ctx.lifespan_context.get("splice_client")
|
|
659
|
+
# browser_client only present on servers with live browser wiring;
|
|
660
|
+
# pass None defensively.
|
|
661
|
+
browser_client = ctx.lifespan_context.get("browser_client")
|
|
662
|
+
search_roots = ctx.lifespan_context.get("sample_search_roots") or []
|
|
663
|
+
|
|
664
|
+
escalation_info = await escalate_composer_branch(
|
|
665
|
+
producer_payload=target.seed.producer_payload,
|
|
666
|
+
search_roots=search_roots,
|
|
667
|
+
splice_client=splice_client,
|
|
668
|
+
browser_client=browser_client,
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
if escalation_info.get("ok"):
|
|
672
|
+
# Swap the compiled_plan for the fully resolved one before
|
|
673
|
+
# commit_branch_async runs it. Keep the old scaffold on the
|
|
674
|
+
# evaluation dict for audit.
|
|
675
|
+
old_plan = target.compiled_plan or {}
|
|
676
|
+
new_plan = {
|
|
677
|
+
"steps": escalation_info["plan"],
|
|
678
|
+
"step_count": escalation_info["step_count"],
|
|
679
|
+
"summary": (
|
|
680
|
+
f"Composer escalated: {escalation_info['layer_count']} "
|
|
681
|
+
f"layers, {escalation_info['step_count']} steps "
|
|
682
|
+
f"({len(escalation_info['resolved_samples'])} samples resolved)"
|
|
683
|
+
),
|
|
684
|
+
}
|
|
685
|
+
target.compiled_plan = new_plan
|
|
686
|
+
if target.evaluation is None:
|
|
687
|
+
target.evaluation = {}
|
|
688
|
+
target.evaluation["composer_escalation"] = {
|
|
689
|
+
"escalated": True,
|
|
690
|
+
"scaffold_step_count": old_plan.get("step_count", 0),
|
|
691
|
+
"resolved_step_count": escalation_info["step_count"],
|
|
692
|
+
"layer_count": escalation_info["layer_count"],
|
|
693
|
+
"resolved_samples": escalation_info["resolved_samples"],
|
|
694
|
+
"warnings": escalation_info.get("warnings", []),
|
|
695
|
+
}
|
|
696
|
+
else:
|
|
697
|
+
# Record the fallback reason on evaluation so compare /
|
|
698
|
+
# commit responses carry explicit provenance.
|
|
699
|
+
if target.evaluation is None:
|
|
700
|
+
target.evaluation = {}
|
|
701
|
+
target.evaluation["composer_escalation"] = {
|
|
702
|
+
"escalated": False,
|
|
703
|
+
"error": escalation_info.get("error", "unknown"),
|
|
704
|
+
"warnings": escalation_info.get("warnings", []),
|
|
705
|
+
"fallback": "scaffold_plan",
|
|
706
|
+
}
|
|
707
|
+
except Exception as exc:
|
|
708
|
+
if target.evaluation is None:
|
|
709
|
+
target.evaluation = {}
|
|
710
|
+
target.evaluation["composer_escalation"] = {
|
|
711
|
+
"escalated": False,
|
|
712
|
+
"error": f"escalation raised: {exc}",
|
|
713
|
+
"fallback": "scaffold_plan",
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
commit_result = await engine.commit_branch_async(
|
|
462
717
|
experiment,
|
|
463
718
|
branch_id,
|
|
464
719
|
ableton,
|
|
@@ -467,6 +722,19 @@ async def commit_experiment(
|
|
|
467
722
|
ctx=ctx,
|
|
468
723
|
)
|
|
469
724
|
|
|
725
|
+
# Surface escalation details on the commit response so the caller
|
|
726
|
+
# sees whether a scaffold or resolved plan was applied.
|
|
727
|
+
if escalation_info is not None and isinstance(commit_result, dict):
|
|
728
|
+
commit_result["composer_escalation"] = {
|
|
729
|
+
"escalated": bool(escalation_info.get("ok")),
|
|
730
|
+
"step_count": escalation_info.get("step_count"),
|
|
731
|
+
"layer_count": escalation_info.get("layer_count"),
|
|
732
|
+
"error": escalation_info.get("error"),
|
|
733
|
+
"warnings": escalation_info.get("warnings", []),
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
return commit_result
|
|
737
|
+
|
|
470
738
|
|
|
471
739
|
@mcp.tool()
|
|
472
740
|
def discard_experiment(
|
package/mcp_server/server.py
CHANGED
|
@@ -305,6 +305,7 @@ from .device_forge import tools as device_forge_tools # noqa: F401, E40
|
|
|
305
305
|
from .sample_engine import tools as sample_engine_tools # noqa: F401, E402
|
|
306
306
|
from .atlas import tools as atlas_tools # noqa: F401, E402
|
|
307
307
|
from .composer import tools as composer_tools # noqa: F401, E402
|
|
308
|
+
from .synthesis_brain import tools as synthesis_brain_tools # noqa: F401, E402
|
|
308
309
|
from .tools import diagnostics # noqa: F401, E402
|
|
309
310
|
from .tools import miditool # noqa: F401, E402
|
|
310
311
|
|