livepilot 1.12.2 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +219 -0
- package/README.md +7 -7
- package/m4l_device/LivePilot_Analyzer.amxd +0 -0
- package/m4l_device/livepilot_bridge.js +1 -1
- package/mcp_server/__init__.py +1 -1
- package/mcp_server/branches/__init__.py +34 -0
- package/mcp_server/branches/types.py +286 -0
- package/mcp_server/composer/__init__.py +10 -1
- package/mcp_server/composer/branch_producer.py +349 -0
- package/mcp_server/composer/tools.py +58 -1
- package/mcp_server/evaluation/policy.py +227 -2
- package/mcp_server/experiment/engine.py +47 -11
- package/mcp_server/experiment/models.py +112 -8
- package/mcp_server/experiment/tools.py +502 -38
- package/mcp_server/memory/taste_graph.py +84 -11
- package/mcp_server/persistence/taste_store.py +21 -5
- package/mcp_server/runtime/session_kernel.py +46 -0
- package/mcp_server/runtime/tools.py +29 -3
- package/mcp_server/server.py +1 -0
- package/mcp_server/synthesis_brain/__init__.py +53 -0
- package/mcp_server/synthesis_brain/adapters/__init__.py +34 -0
- package/mcp_server/synthesis_brain/adapters/analog.py +273 -0
- package/mcp_server/synthesis_brain/adapters/base.py +86 -0
- package/mcp_server/synthesis_brain/adapters/drift.py +271 -0
- package/mcp_server/synthesis_brain/adapters/meld.py +261 -0
- package/mcp_server/synthesis_brain/adapters/operator.py +292 -0
- package/mcp_server/synthesis_brain/adapters/wavetable.py +364 -0
- package/mcp_server/synthesis_brain/engine.py +91 -0
- package/mcp_server/synthesis_brain/models.py +121 -0
- package/mcp_server/synthesis_brain/timbre.py +194 -0
- package/mcp_server/synthesis_brain/tools.py +231 -0
- package/mcp_server/tools/_conductor.py +144 -0
- package/mcp_server/wonder_mode/engine.py +324 -0
- package/mcp_server/wonder_mode/tools.py +153 -1
- package/package.json +2 -2
- package/remote_script/LivePilot/__init__.py +1 -1
- package/server.json +3 -3
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
"""Composer branch producer — emit section-hypothesis BranchSeeds.
|
|
2
|
+
|
|
3
|
+
PR11 adds a branch-native entry point alongside the existing compose()
|
|
4
|
+
pipeline. Instead of a single deterministic layer plan, callers can
|
|
5
|
+
request N distinct compositional hypotheses and audition them via
|
|
6
|
+
create_experiment(seeds=..., compiled_plans=...).
|
|
7
|
+
|
|
8
|
+
Design:
|
|
9
|
+
A composer branch is a CompositionIntent + variant_strategy. Three
|
|
10
|
+
canned strategies are shipped in PR11:
|
|
11
|
+
|
|
12
|
+
"canonical" — intent unchanged, layer plan uses genre defaults
|
|
13
|
+
"energy_shift" — intent.energy inverted around 0.5 (dense ⇄ sparse)
|
|
14
|
+
"layer_contrast" — one role swapped in the layer plan (e.g. bass
|
|
15
|
+
role replaced with pad-anchor, or percussion
|
|
16
|
+
stripped to emphasize melodic content)
|
|
17
|
+
|
|
18
|
+
Seeds carry source="composer". Each branch produces a pre-compiled
|
|
19
|
+
plan through the existing ComposerEngine.compose() pipeline so
|
|
20
|
+
run_experiment respects the plans without re-compiling. Later PRs
|
|
21
|
+
can add more strategies (key-shift, section-reorder, tempo-halftime).
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import hashlib
|
|
27
|
+
from typing import Optional
|
|
28
|
+
|
|
29
|
+
from ..branches import BranchSeed, freeform_seed
|
|
30
|
+
from .prompt_parser import parse_prompt, CompositionIntent
|
|
31
|
+
from .layer_planner import plan_layers, plan_sections
|
|
32
|
+
from .engine import ComposerEngine, CompositionResult
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# Strategy registry — each function takes an intent and returns (modified
|
|
36
|
+
# intent, distinctness_reason, novelty_label, risk_label).
|
|
37
|
+
def _strategy_canonical(intent: CompositionIntent):
|
|
38
|
+
return (
|
|
39
|
+
intent,
|
|
40
|
+
"baseline composition with genre defaults",
|
|
41
|
+
"safe",
|
|
42
|
+
"low",
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _strategy_energy_shift(intent: CompositionIntent):
|
|
47
|
+
new = CompositionIntent(
|
|
48
|
+
genre=intent.genre,
|
|
49
|
+
sub_genre=intent.sub_genre,
|
|
50
|
+
mood=intent.mood,
|
|
51
|
+
tempo=intent.tempo,
|
|
52
|
+
key=intent.key,
|
|
53
|
+
descriptors=list(intent.descriptors),
|
|
54
|
+
explicit_elements=list(intent.explicit_elements),
|
|
55
|
+
energy=round(1.0 - intent.energy, 2),
|
|
56
|
+
layer_count=intent.layer_count,
|
|
57
|
+
duration_bars=intent.duration_bars,
|
|
58
|
+
)
|
|
59
|
+
direction = "denser" if new.energy > intent.energy else "sparser"
|
|
60
|
+
return (
|
|
61
|
+
new,
|
|
62
|
+
f"energy shifted from {intent.energy:.1f} → {new.energy:.1f} ({direction})",
|
|
63
|
+
"strong",
|
|
64
|
+
"low",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _strategy_layer_contrast(intent: CompositionIntent):
|
|
69
|
+
new = CompositionIntent(
|
|
70
|
+
genre=intent.genre,
|
|
71
|
+
sub_genre=intent.sub_genre,
|
|
72
|
+
mood=intent.mood,
|
|
73
|
+
tempo=intent.tempo,
|
|
74
|
+
key=intent.key,
|
|
75
|
+
descriptors=list(intent.descriptors),
|
|
76
|
+
# Force the layer planner to drop "bass" as an anchor role by adding
|
|
77
|
+
# "pad" explicitly to explicit_elements and not asking for a bass.
|
|
78
|
+
explicit_elements=list(intent.explicit_elements) + ["pad_anchor", "no_bass"],
|
|
79
|
+
energy=intent.energy,
|
|
80
|
+
layer_count=intent.layer_count,
|
|
81
|
+
duration_bars=intent.duration_bars,
|
|
82
|
+
)
|
|
83
|
+
return (
|
|
84
|
+
new,
|
|
85
|
+
"layer contrast — pad anchor instead of bass line",
|
|
86
|
+
"unexpected",
|
|
87
|
+
"medium",
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
_STRATEGIES = [
|
|
92
|
+
("canonical", _strategy_canonical),
|
|
93
|
+
("energy_shift", _strategy_energy_shift),
|
|
94
|
+
("layer_contrast", _strategy_layer_contrast),
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _short_id(prefix: str, key: str) -> str:
|
|
99
|
+
h = hashlib.sha256(f"{prefix}:{key}".encode()).hexdigest()[:10]
|
|
100
|
+
return f"{prefix}_{h}"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def propose_composer_branches(
|
|
104
|
+
request_text: str,
|
|
105
|
+
kernel: Optional[dict] = None,
|
|
106
|
+
count: int = 2,
|
|
107
|
+
search_roots: Optional[list] = None,
|
|
108
|
+
) -> list[tuple[BranchSeed, dict]]:
|
|
109
|
+
"""Emit composer-source branch seeds with pre-compiled plans.
|
|
110
|
+
|
|
111
|
+
request_text: the natural-language composition prompt.
|
|
112
|
+
kernel: optional SessionKernel dict — reads ``freshness`` to gate
|
|
113
|
+
whether high-novelty strategies (layer_contrast) are included.
|
|
114
|
+
count: desired number of branches (clamped to 1..len(_STRATEGIES)).
|
|
115
|
+
search_roots: optional list of directory paths for sample resolution,
|
|
116
|
+
threaded to ComposerEngine.compose().
|
|
117
|
+
|
|
118
|
+
Returns a list of (BranchSeed, compiled_plan_dict) tuples. Each plan
|
|
119
|
+
is a dict with {"steps": [...], "step_count": N, "summary": "..."}
|
|
120
|
+
compatible with run_experiment.
|
|
121
|
+
"""
|
|
122
|
+
kernel = kernel or {}
|
|
123
|
+
freshness = float(kernel.get("freshness", 0.5) or 0.5)
|
|
124
|
+
|
|
125
|
+
intent = parse_prompt(request_text)
|
|
126
|
+
|
|
127
|
+
# Gate high-novelty strategies on freshness.
|
|
128
|
+
if freshness < 0.4:
|
|
129
|
+
strategies = [_STRATEGIES[0]] # canonical only
|
|
130
|
+
elif freshness < 0.7:
|
|
131
|
+
strategies = _STRATEGIES[:2] # canonical + energy_shift
|
|
132
|
+
else:
|
|
133
|
+
strategies = _STRATEGIES # all three
|
|
134
|
+
|
|
135
|
+
count = max(1, min(count, len(strategies)))
|
|
136
|
+
results: list[tuple[BranchSeed, dict]] = []
|
|
137
|
+
|
|
138
|
+
for name, strategy_fn in strategies[:count]:
|
|
139
|
+
try:
|
|
140
|
+
variant_intent, reason, novelty, risk = strategy_fn(intent)
|
|
141
|
+
plan = _build_section_hypothesis_plan(variant_intent, name)
|
|
142
|
+
|
|
143
|
+
seed = freeform_seed(
|
|
144
|
+
seed_id=_short_id(f"cmp_{name}", request_text),
|
|
145
|
+
hypothesis=f"Composer branch ({name}): {reason}",
|
|
146
|
+
source="composer",
|
|
147
|
+
novelty_label=novelty,
|
|
148
|
+
risk_label=risk,
|
|
149
|
+
distinctness_reason=reason,
|
|
150
|
+
# PR3 — carry the variant intent + strategy so commit_experiment
|
|
151
|
+
# can rehydrate and run the full ComposerEngine.compose()
|
|
152
|
+
# pipeline on the winner instead of committing the scaffold.
|
|
153
|
+
producer_payload={
|
|
154
|
+
"strategy": name,
|
|
155
|
+
"intent": variant_intent.to_dict(),
|
|
156
|
+
"request_text": request_text,
|
|
157
|
+
"reason": reason,
|
|
158
|
+
},
|
|
159
|
+
)
|
|
160
|
+
results.append((seed, plan))
|
|
161
|
+
except Exception as exc:
|
|
162
|
+
# Don't let one strategy's failure kill the rest.
|
|
163
|
+
import logging
|
|
164
|
+
logging.getLogger(__name__).warning(
|
|
165
|
+
"composer strategy %s failed: %s", name, exc
|
|
166
|
+
)
|
|
167
|
+
continue
|
|
168
|
+
|
|
169
|
+
return results
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
async def escalate_composer_branch(
|
|
173
|
+
producer_payload: dict,
|
|
174
|
+
search_roots: Optional[list] = None,
|
|
175
|
+
splice_client: object = None,
|
|
176
|
+
browser_client: object = None,
|
|
177
|
+
max_credits: int = 10,
|
|
178
|
+
) -> dict:
|
|
179
|
+
"""Run the full ComposerEngine.compose() pipeline on a committed
|
|
180
|
+
composer branch, using the CompositionIntent captured in the seed's
|
|
181
|
+
producer_payload at emit time.
|
|
182
|
+
|
|
183
|
+
Returns a dict with:
|
|
184
|
+
ok: bool
|
|
185
|
+
plan: list of executable steps (the full resolved plan, not the
|
|
186
|
+
scaffolding the branch was auditioned with)
|
|
187
|
+
step_count: int
|
|
188
|
+
layer_count: int
|
|
189
|
+
resolved_samples: dict (role → local_path)
|
|
190
|
+
warnings: list (unresolved layers, missing samples, etc.)
|
|
191
|
+
error: str (when ok=False)
|
|
192
|
+
|
|
193
|
+
When ok=False, callers should fall back to committing the scaffold
|
|
194
|
+
plan instead of dropping the branch — the scaffolding is still
|
|
195
|
+
useful as a track/scene skeleton the user can populate manually.
|
|
196
|
+
|
|
197
|
+
This function is async because ComposerEngine.compose() is async
|
|
198
|
+
(it awaits Splice / filesystem sample resolution).
|
|
199
|
+
"""
|
|
200
|
+
import logging
|
|
201
|
+
logger = logging.getLogger(__name__)
|
|
202
|
+
|
|
203
|
+
schema_version = producer_payload.get("schema_version") if producer_payload else None
|
|
204
|
+
intent_dict = (producer_payload or {}).get("intent")
|
|
205
|
+
|
|
206
|
+
if not intent_dict:
|
|
207
|
+
return {
|
|
208
|
+
"ok": False,
|
|
209
|
+
"error": (
|
|
210
|
+
"Composer branch producer_payload missing 'intent'. "
|
|
211
|
+
"This branch was likely emitted before PR3/v2 and cannot "
|
|
212
|
+
"be escalated — commit the scaffold plan instead."
|
|
213
|
+
),
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
# Rehydrate CompositionIntent from the payload dict. Tolerate unknown
|
|
217
|
+
# keys by only pulling the fields CompositionIntent understands — older
|
|
218
|
+
# schemas may have fewer fields, newer may have more.
|
|
219
|
+
try:
|
|
220
|
+
intent_fields = {
|
|
221
|
+
k: v for k, v in intent_dict.items()
|
|
222
|
+
if k in (
|
|
223
|
+
"genre", "sub_genre", "mood", "tempo", "key",
|
|
224
|
+
"descriptors", "explicit_elements", "energy",
|
|
225
|
+
"layer_count", "duration_bars",
|
|
226
|
+
)
|
|
227
|
+
}
|
|
228
|
+
intent = CompositionIntent(**intent_fields)
|
|
229
|
+
except Exception as exc:
|
|
230
|
+
return {
|
|
231
|
+
"ok": False,
|
|
232
|
+
"error": (
|
|
233
|
+
f"Failed to rehydrate CompositionIntent from producer_payload "
|
|
234
|
+
f"(schema_version={schema_version}): {exc}"
|
|
235
|
+
),
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
engine = ComposerEngine()
|
|
239
|
+
try:
|
|
240
|
+
result: CompositionResult = await engine.compose(
|
|
241
|
+
intent=intent,
|
|
242
|
+
dry_run=False,
|
|
243
|
+
max_credits=max_credits,
|
|
244
|
+
search_roots=search_roots or [],
|
|
245
|
+
splice_client=splice_client,
|
|
246
|
+
browser_client=browser_client,
|
|
247
|
+
)
|
|
248
|
+
except Exception as exc:
|
|
249
|
+
return {
|
|
250
|
+
"ok": False,
|
|
251
|
+
"error": f"ComposerEngine.compose() raised: {exc}",
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
# Fallback when no layers resolved — explicit signal so callers can
|
|
255
|
+
# fall back to the scaffold instead of silently shipping an empty
|
|
256
|
+
# plan.
|
|
257
|
+
if not result.plan or len(result.layers) == 0:
|
|
258
|
+
return {
|
|
259
|
+
"ok": False,
|
|
260
|
+
"error": (
|
|
261
|
+
"ComposerEngine.compose() produced zero executable layers. "
|
|
262
|
+
"Sample resolution likely failed — check Splice credits, "
|
|
263
|
+
"filesystem roots, or browser connectivity. Falling back "
|
|
264
|
+
"to scaffold commit is the correct action."
|
|
265
|
+
),
|
|
266
|
+
"warnings": list(result.warnings),
|
|
267
|
+
"resolved_samples": dict(result.resolved_samples),
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
return {
|
|
271
|
+
"ok": True,
|
|
272
|
+
"plan": list(result.plan),
|
|
273
|
+
"step_count": len(result.plan),
|
|
274
|
+
"layer_count": len(result.layers),
|
|
275
|
+
"resolved_samples": dict(result.resolved_samples),
|
|
276
|
+
"credits_estimated": result.credits_estimated,
|
|
277
|
+
"warnings": list(result.warnings),
|
|
278
|
+
"intent_used": intent.to_dict(),
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _build_section_hypothesis_plan(intent: CompositionIntent, strategy_name: str) -> dict:
|
|
283
|
+
"""Build a lightweight, executable plan from an intent.
|
|
284
|
+
|
|
285
|
+
Uses the synchronous planning primitives (plan_layers, plan_sections)
|
|
286
|
+
to generate a scaffolding plan: set_tempo + create_midi_track per layer
|
|
287
|
+
with sensible names and colors. Sample resolution is deferred —
|
|
288
|
+
callers that want samples loaded should either hand the branch to
|
|
289
|
+
commit_experiment after auditioning, or re-run ComposerEngine.compose()
|
|
290
|
+
on the winning intent.
|
|
291
|
+
|
|
292
|
+
Returns a dict with {"steps", "step_count", "summary"}.
|
|
293
|
+
"""
|
|
294
|
+
layers = plan_layers(intent)
|
|
295
|
+
sections = plan_sections(intent)
|
|
296
|
+
|
|
297
|
+
steps: list[dict] = []
|
|
298
|
+
|
|
299
|
+
# Step 1: tempo — only when intent.tempo is set. Remote transport
|
|
300
|
+
# handler takes "tempo" (not "bpm") — see transport.py:set_tempo.
|
|
301
|
+
if intent.tempo and intent.tempo > 0:
|
|
302
|
+
steps.append({
|
|
303
|
+
"tool": "set_tempo",
|
|
304
|
+
"params": {"tempo": float(intent.tempo)},
|
|
305
|
+
})
|
|
306
|
+
|
|
307
|
+
# Step 2: one create_midi_track per layer role — the skeleton every
|
|
308
|
+
# subsequent composition step builds on.
|
|
309
|
+
for idx, layer in enumerate(layers):
|
|
310
|
+
name = getattr(layer, "role", f"layer_{idx}")
|
|
311
|
+
steps.append({
|
|
312
|
+
"tool": "create_midi_track",
|
|
313
|
+
"params": {"name": str(name)},
|
|
314
|
+
})
|
|
315
|
+
|
|
316
|
+
# Step 3: one create_scene + set_scene_name per section. Remote
|
|
317
|
+
# create_scene handler only accepts "index" — see scenes.py:create_scene.
|
|
318
|
+
# Section labels land via set_scene_name after creation. step_id +
|
|
319
|
+
# $from_step binding resolves the new scene index so parallel branches
|
|
320
|
+
# with different section counts don't step on each other.
|
|
321
|
+
for s_idx, section in enumerate(sections):
|
|
322
|
+
if isinstance(section, dict):
|
|
323
|
+
sec_name = section.get("name", f"Section {s_idx + 1}")
|
|
324
|
+
else:
|
|
325
|
+
sec_name = f"Section {s_idx + 1}"
|
|
326
|
+
create_step_id = f"create_scene_{s_idx}"
|
|
327
|
+
steps.append({
|
|
328
|
+
"tool": "create_scene",
|
|
329
|
+
"step_id": create_step_id,
|
|
330
|
+
"params": {"index": -1}, # -1 ⇒ append at end
|
|
331
|
+
})
|
|
332
|
+
steps.append({
|
|
333
|
+
"tool": "set_scene_name",
|
|
334
|
+
"params": {
|
|
335
|
+
"scene_index": {"$from_step": create_step_id, "path": "index"},
|
|
336
|
+
"name": str(sec_name),
|
|
337
|
+
},
|
|
338
|
+
})
|
|
339
|
+
|
|
340
|
+
summary = (
|
|
341
|
+
f"{strategy_name}: {intent.genre or 'auto-genre'} @ "
|
|
342
|
+
f"{intent.tempo or 'auto-tempo'} bpm, energy {intent.energy:.1f} — "
|
|
343
|
+
f"{len(layers)} layers, {len(sections)} sections"
|
|
344
|
+
)
|
|
345
|
+
return {
|
|
346
|
+
"steps": steps,
|
|
347
|
+
"step_count": len(steps),
|
|
348
|
+
"summary": summary,
|
|
349
|
+
}
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
"""Composer Engine MCP tools —
|
|
1
|
+
"""Composer Engine MCP tools — 4 tools for auto-composition.
|
|
2
2
|
|
|
3
3
|
compose: full multi-layer composition from text prompt
|
|
4
4
|
augment_with_samples: add layers to existing session
|
|
5
5
|
get_composition_plan: dry run preview
|
|
6
|
+
propose_composer_branches (PR5/v2): multi-strategy branch hypotheses for
|
|
7
|
+
exploratory workflows (feeds create_experiment(seeds=...))
|
|
6
8
|
"""
|
|
7
9
|
|
|
8
10
|
from __future__ import annotations
|
|
@@ -213,3 +215,58 @@ async def get_composition_plan(
|
|
|
213
215
|
"then step through each tool call in sequence."
|
|
214
216
|
)
|
|
215
217
|
return plan
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
@mcp.tool()
|
|
221
|
+
def propose_composer_branches(
|
|
222
|
+
ctx: Context,
|
|
223
|
+
request_text: str,
|
|
224
|
+
count: int = 2,
|
|
225
|
+
freshness: float = 0.65,
|
|
226
|
+
) -> dict:
|
|
227
|
+
"""Emit N distinct compositional hypotheses for a single prompt (PR5/v2).
|
|
228
|
+
|
|
229
|
+
Branch-native companion to compose(): instead of one deterministic
|
|
230
|
+
layer plan, produces up to ``count`` BranchSeeds with different
|
|
231
|
+
strategic angles the user can audition via create_experiment +
|
|
232
|
+
run_experiment. Each seed carries a pre-compiled scaffolding plan
|
|
233
|
+
(set_tempo + create_midi_track per layer + create_scene per section)
|
|
234
|
+
that gets escalated to a fully resolved plan by commit_experiment
|
|
235
|
+
when the winning branch is chosen.
|
|
236
|
+
|
|
237
|
+
Strategies (gated on freshness):
|
|
238
|
+
canonical — intent unchanged, genre defaults
|
|
239
|
+
(shipped at every freshness level)
|
|
240
|
+
energy_shift — intent.energy inverted around 0.5
|
|
241
|
+
(freshness >= 0.4)
|
|
242
|
+
layer_contrast — one role swapped (pad-anchor instead of bass)
|
|
243
|
+
(freshness >= 0.7)
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
{
|
|
247
|
+
"request_text": str,
|
|
248
|
+
"branch_count": int,
|
|
249
|
+
"seeds": [BranchSeed.to_dict(), ...],
|
|
250
|
+
"compiled_plans": [plan_dict, ...] (parallel to seeds; scaffold),
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
Each seed's producer_payload carries {strategy, intent,
|
|
254
|
+
request_text, reason} so commit_experiment can rehydrate the
|
|
255
|
+
CompositionIntent and run the full ComposerEngine.compose() for
|
|
256
|
+
the winner.
|
|
257
|
+
"""
|
|
258
|
+
from .branch_producer import propose_composer_branches as _propose
|
|
259
|
+
|
|
260
|
+
pairs = _propose(
|
|
261
|
+
request_text=request_text,
|
|
262
|
+
kernel={"freshness": float(freshness)},
|
|
263
|
+
count=int(count),
|
|
264
|
+
)
|
|
265
|
+
seeds = [s.to_dict() for s, _ in pairs]
|
|
266
|
+
plans = [p for _, p in pairs]
|
|
267
|
+
return {
|
|
268
|
+
"request_text": request_text,
|
|
269
|
+
"branch_count": len(seeds),
|
|
270
|
+
"seeds": seeds,
|
|
271
|
+
"compiled_plans": plans,
|
|
272
|
+
}
|
|
@@ -4,10 +4,24 @@ Consistent keep/undo semantics shared across sonic, composition,
|
|
|
4
4
|
and all future evaluators.
|
|
5
5
|
|
|
6
6
|
Design: EVALUATION_FABRIC_V1.md, section 8
|
|
7
|
+
|
|
8
|
+
PR7 adds ``classify_branch_outcome`` — a branch-lifecycle classifier that
|
|
9
|
+
maps a score (and optional hard-rule inputs) to one of three statuses:
|
|
10
|
+
"keep", "undo", "interesting_but_failed". The third status exists for
|
|
11
|
+
exploration mode: a branch that failed technical gates but surfaced a
|
|
12
|
+
novel idea is kept for audit and never re-applied. Protection violations
|
|
13
|
+
still force undo regardless of exploration mode — that's a safety
|
|
14
|
+
invariant, not a taste judgment.
|
|
7
15
|
"""
|
|
8
16
|
|
|
9
17
|
from __future__ import annotations
|
|
10
18
|
|
|
19
|
+
from dataclasses import asdict, dataclass, field
|
|
20
|
+
from typing import Literal, Optional
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
BranchOutcomeStatus = Literal["keep", "undo", "interesting_but_failed"]
|
|
24
|
+
|
|
11
25
|
|
|
12
26
|
def apply_hard_rules(
|
|
13
27
|
goal_progress: float,
|
|
@@ -16,11 +30,17 @@ def apply_hard_rules(
|
|
|
16
30
|
measurable_count: int,
|
|
17
31
|
score: float,
|
|
18
32
|
target_count: int,
|
|
33
|
+
defer_on_unmeasurable: bool = True,
|
|
19
34
|
) -> tuple[bool, list[str]]:
|
|
20
35
|
"""Enforce hard rules and return (keep_change, failure_reasons).
|
|
21
36
|
|
|
22
37
|
Rules (evaluated in order):
|
|
23
|
-
1. All targets unmeasurable + no protection violation
|
|
38
|
+
1. (optional) All targets unmeasurable + no protection violation
|
|
39
|
+
-> defer to agent. Fires only when defer_on_unmeasurable=True
|
|
40
|
+
(the default). The evaluation fabric relies on this to mark
|
|
41
|
+
decision_mode="deferred". Score-producing evaluators (branch
|
|
42
|
+
lifecycle, PR7) pass defer_on_unmeasurable=False because the
|
|
43
|
+
score IS the judgment — no deferral needed.
|
|
24
44
|
2. Protection violated -> force undo
|
|
25
45
|
3. Measurable delta <= 0 when measurable targets exist -> force undo
|
|
26
46
|
4. Score < 0.40 -> force undo
|
|
@@ -32,6 +52,10 @@ def apply_hard_rules(
|
|
|
32
52
|
measurable_count: how many target dimensions were measurable
|
|
33
53
|
score: composite quality score (0-1)
|
|
34
54
|
target_count: total number of target dimensions
|
|
55
|
+
defer_on_unmeasurable: when True (default), rule 1 returns
|
|
56
|
+
(True, [defer message]) as soon as no measurable targets
|
|
57
|
+
exist. When False, rule 1 is skipped and rules 2-4 run
|
|
58
|
+
unconditionally.
|
|
35
59
|
|
|
36
60
|
Returns:
|
|
37
61
|
(keep_change, list_of_rule_failure_reasons)
|
|
@@ -39,7 +63,11 @@ def apply_hard_rules(
|
|
|
39
63
|
failures: list[str] = []
|
|
40
64
|
|
|
41
65
|
# Rule 1: all unmeasurable + no protection violation -> defer
|
|
42
|
-
if
|
|
66
|
+
if (
|
|
67
|
+
defer_on_unmeasurable
|
|
68
|
+
and measurable_count == 0
|
|
69
|
+
and not protection_violated
|
|
70
|
+
):
|
|
43
71
|
return True, [
|
|
44
72
|
"No measurable target dimensions — deferring keep/undo "
|
|
45
73
|
"to agent musical judgment"
|
|
@@ -65,3 +93,200 @@ def apply_hard_rules(
|
|
|
65
93
|
|
|
66
94
|
keep_change = len(failures) == 0
|
|
67
95
|
return keep_change, failures
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ── PR7 — branch-lifecycle classifier ────────────────────────────────────
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class BranchOutcome:
|
|
103
|
+
"""Unified branch evaluation result.
|
|
104
|
+
|
|
105
|
+
Fields:
|
|
106
|
+
status: terminal classification — "keep" | "undo" | "interesting_but_failed"
|
|
107
|
+
keep_change: True ⇒ status == "keep"; never True for the other statuses.
|
|
108
|
+
score: the composite score that informed the decision.
|
|
109
|
+
failure_reasons: human-readable list of failed hard rules (empty on keep).
|
|
110
|
+
note: optional explanation aimed at the user.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
status: BranchOutcomeStatus
|
|
114
|
+
keep_change: bool
|
|
115
|
+
score: float
|
|
116
|
+
failure_reasons: list[str] = field(default_factory=list)
|
|
117
|
+
note: str = ""
|
|
118
|
+
|
|
119
|
+
def to_dict(self) -> dict:
|
|
120
|
+
return asdict(self)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def derive_goal_progress_from_fingerprint(
|
|
124
|
+
fingerprint_diff: dict,
|
|
125
|
+
target: Optional[dict] = None,
|
|
126
|
+
) -> tuple[float, int]:
|
|
127
|
+
"""Derive (goal_progress, measurable_count) from a fingerprint diff.
|
|
128
|
+
|
|
129
|
+
PR4 wiring: TimbralFingerprint dimensions (brightness, warmth, bite,
|
|
130
|
+
softness, instability, width, texture_density, movement, polish) are
|
|
131
|
+
effectively a goal vector. When a branch has before/after fingerprints
|
|
132
|
+
extracted from actual captured audio, the per-dimension diff IS the
|
|
133
|
+
measurable evidence classify_branch_outcome needs to make a real
|
|
134
|
+
decision — no reason to fall back to the heuristic score alone.
|
|
135
|
+
|
|
136
|
+
fingerprint_diff: output of synthesis_brain.diff_fingerprint(before, after).
|
|
137
|
+
Shape: {"brightness": float, "warmth": float, ...}
|
|
138
|
+
target: optional TimbralFingerprint dict ({"brightness": 0.3, ...}).
|
|
139
|
+
When provided, goal_progress counts only dimensions the target
|
|
140
|
+
cared about (non-zero target value). When None, every dimension
|
|
141
|
+
with a non-trivial diff counts as a target.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
(goal_progress, measurable_count) tuple ready to feed into
|
|
145
|
+
classify_branch_outcome. goal_progress is signed (positive =
|
|
146
|
+
branch moved in the intended direction; negative = moved away).
|
|
147
|
+
measurable_count is how many dimensions had a readable diff.
|
|
148
|
+
"""
|
|
149
|
+
if not fingerprint_diff:
|
|
150
|
+
return (0.0, 0)
|
|
151
|
+
|
|
152
|
+
# Epsilon — diffs this small are noise, not signal.
|
|
153
|
+
eps = 0.02
|
|
154
|
+
progress = 0.0
|
|
155
|
+
count = 0
|
|
156
|
+
|
|
157
|
+
# If target is provided, score each dimension by
|
|
158
|
+
# sign(target) * diff
|
|
159
|
+
# so moving in the target's direction counts positive, regardless
|
|
160
|
+
# of target magnitude. When no target, count any non-trivial diff
|
|
161
|
+
# in either direction as progress (a branch that "moves" at all
|
|
162
|
+
# is evidence the producer did something).
|
|
163
|
+
if target:
|
|
164
|
+
for dim, delta in fingerprint_diff.items():
|
|
165
|
+
if not isinstance(delta, (int, float)):
|
|
166
|
+
continue
|
|
167
|
+
if abs(delta) < eps:
|
|
168
|
+
continue
|
|
169
|
+
target_val = target.get(dim, 0.0)
|
|
170
|
+
if abs(target_val) < eps:
|
|
171
|
+
continue # target didn't care about this dimension
|
|
172
|
+
count += 1
|
|
173
|
+
# Normalize: sign(target) * delta, scaled so each dimension
|
|
174
|
+
# contributes at most 1.0 to progress.
|
|
175
|
+
direction = 1.0 if target_val > 0 else -1.0
|
|
176
|
+
progress += direction * max(-1.0, min(1.0, delta))
|
|
177
|
+
else:
|
|
178
|
+
for dim, delta in fingerprint_diff.items():
|
|
179
|
+
if not isinstance(delta, (int, float)):
|
|
180
|
+
continue
|
|
181
|
+
if abs(delta) < eps:
|
|
182
|
+
continue
|
|
183
|
+
count += 1
|
|
184
|
+
# Without a target, we can't tell "good" from "bad" movement.
|
|
185
|
+
# Count as weakly positive — branch did something measurable.
|
|
186
|
+
progress += abs(max(-1.0, min(1.0, delta))) * 0.5
|
|
187
|
+
|
|
188
|
+
return (round(progress, 3), count)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def classify_branch_outcome(
|
|
192
|
+
score: float,
|
|
193
|
+
*,
|
|
194
|
+
protection_violated: bool = False,
|
|
195
|
+
measurable_count: int = 0,
|
|
196
|
+
target_count: int = 0,
|
|
197
|
+
goal_progress: float = 0.0,
|
|
198
|
+
exploration_rules: bool = False,
|
|
199
|
+
fingerprint_diff: Optional[dict] = None,
|
|
200
|
+
timbral_target: Optional[dict] = None,
|
|
201
|
+
) -> BranchOutcome:
|
|
202
|
+
"""Classify a branch's terminal status from a score + optional hard-rule inputs.
|
|
203
|
+
|
|
204
|
+
Delegates to apply_hard_rules with ``defer_on_unmeasurable=False`` — a
|
|
205
|
+
score-producing evaluator DID make a judgment, so rule 1's deferral
|
|
206
|
+
path is not appropriate here. The score alone is enough to push a
|
|
207
|
+
branch toward undo / interesting_but_failed.
|
|
208
|
+
|
|
209
|
+
Post-processing:
|
|
210
|
+
- ``exploration_rules=False`` (technical safety, default):
|
|
211
|
+
any hard-rule failure ⇒ status="undo".
|
|
212
|
+
- ``exploration_rules=True`` (creative exploration):
|
|
213
|
+
protection violations still force undo (safety invariant);
|
|
214
|
+
all other failures downgrade to "interesting_but_failed".
|
|
215
|
+
|
|
216
|
+
PR4 additions (optional):
|
|
217
|
+
fingerprint_diff: output of synthesis_brain.diff_fingerprint
|
|
218
|
+
between before/after snapshots. When provided AND no caller-
|
|
219
|
+
supplied measurable_count/goal_progress were passed (both 0),
|
|
220
|
+
the classifier derives them from the diff — so the dimensions
|
|
221
|
+
of the TimbralFingerprint become the goal vector.
|
|
222
|
+
timbral_target: optional target fingerprint dict. Scores diff in
|
|
223
|
+
the target's direction (moving brighter counts positive when
|
|
224
|
+
target.brightness > 0). Omit when the branch had no specific
|
|
225
|
+
target; dimensions with non-trivial movement still contribute
|
|
226
|
+
measurable_count but progress is unsigned magnitude * 0.5.
|
|
227
|
+
|
|
228
|
+
Returns a BranchOutcome that callers can plug into branch.score /
|
|
229
|
+
.status / .evaluation without further interpretation.
|
|
230
|
+
"""
|
|
231
|
+
# PR4 — derive measurable evidence from fingerprint diff when the
|
|
232
|
+
# caller didn't supply their own. Keeps back-compat for existing
|
|
233
|
+
# callers that compute their own measurable inputs.
|
|
234
|
+
if (
|
|
235
|
+
fingerprint_diff
|
|
236
|
+
and measurable_count == 0
|
|
237
|
+
and abs(goal_progress) < 1e-6
|
|
238
|
+
):
|
|
239
|
+
derived_progress, derived_count = derive_goal_progress_from_fingerprint(
|
|
240
|
+
fingerprint_diff, target=timbral_target,
|
|
241
|
+
)
|
|
242
|
+
goal_progress = derived_progress
|
|
243
|
+
measurable_count = derived_count
|
|
244
|
+
# target_count should also reflect the derived dimensions so the
|
|
245
|
+
# hard-rule path treats this as a genuinely measurable outcome.
|
|
246
|
+
target_count = max(target_count, derived_count)
|
|
247
|
+
keep_change, failures = apply_hard_rules(
|
|
248
|
+
goal_progress=goal_progress,
|
|
249
|
+
collateral_damage=0.0, # not threaded here — branch lifecycle doesn't compute it yet
|
|
250
|
+
protection_violated=protection_violated,
|
|
251
|
+
measurable_count=measurable_count,
|
|
252
|
+
score=score,
|
|
253
|
+
target_count=target_count,
|
|
254
|
+
defer_on_unmeasurable=False,
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
if keep_change:
|
|
258
|
+
return BranchOutcome(
|
|
259
|
+
status="keep",
|
|
260
|
+
keep_change=True,
|
|
261
|
+
score=score,
|
|
262
|
+
failure_reasons=[],
|
|
263
|
+
note="",
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# Failed — decide between undo and interesting_but_failed.
|
|
267
|
+
protection_failure = any("protected dimension" in f for f in failures)
|
|
268
|
+
|
|
269
|
+
if exploration_rules and not protection_failure:
|
|
270
|
+
return BranchOutcome(
|
|
271
|
+
status="interesting_but_failed",
|
|
272
|
+
keep_change=False,
|
|
273
|
+
score=score,
|
|
274
|
+
failure_reasons=failures,
|
|
275
|
+
note=(
|
|
276
|
+
"Exploration rule: branch failed technical gates but is "
|
|
277
|
+
"retained for audit. Not re-applied."
|
|
278
|
+
),
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
return BranchOutcome(
|
|
282
|
+
status="undo",
|
|
283
|
+
keep_change=False,
|
|
284
|
+
score=score,
|
|
285
|
+
failure_reasons=failures,
|
|
286
|
+
note=(
|
|
287
|
+
"Protection violation — branch rolled back regardless of "
|
|
288
|
+
"exploration mode."
|
|
289
|
+
if protection_failure
|
|
290
|
+
else "Branch rolled back per hard rules."
|
|
291
|
+
),
|
|
292
|
+
)
|