livepilot 1.9.13 → 1.9.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/.claude-plugin/marketplace.json +3 -3
  2. package/AGENTS.md +3 -3
  3. package/CHANGELOG.md +51 -0
  4. package/CONTRIBUTING.md +1 -1
  5. package/README.md +7 -7
  6. package/bin/livepilot.js +32 -8
  7. package/installer/install.js +21 -2
  8. package/livepilot/.Codex-plugin/plugin.json +2 -2
  9. package/livepilot/.claude-plugin/plugin.json +2 -2
  10. package/livepilot/agents/livepilot-producer/AGENT.md +243 -49
  11. package/livepilot/skills/livepilot-core/SKILL.md +81 -6
  12. package/livepilot/skills/livepilot-core/references/m4l-devices.md +2 -2
  13. package/livepilot/skills/livepilot-core/references/overview.md +3 -3
  14. package/livepilot/skills/livepilot-core/references/sound-design.md +3 -2
  15. package/livepilot/skills/livepilot-release/SKILL.md +13 -13
  16. package/m4l_device/LivePilot_Analyzer.amxd +0 -0
  17. package/m4l_device/livepilot_bridge.js +6 -3
  18. package/mcp_server/__init__.py +1 -1
  19. package/mcp_server/curves.py +11 -3
  20. package/mcp_server/evaluation/__init__.py +1 -0
  21. package/mcp_server/evaluation/fabric.py +575 -0
  22. package/mcp_server/evaluation/feature_extractors.py +84 -0
  23. package/mcp_server/evaluation/policy.py +67 -0
  24. package/mcp_server/evaluation/tools.py +53 -0
  25. package/mcp_server/memory/__init__.py +11 -2
  26. package/mcp_server/memory/anti_memory.py +78 -0
  27. package/mcp_server/memory/promotion.py +94 -0
  28. package/mcp_server/memory/session_memory.py +108 -0
  29. package/mcp_server/memory/taste_memory.py +158 -0
  30. package/mcp_server/memory/technique_store.py +2 -1
  31. package/mcp_server/memory/tools.py +112 -0
  32. package/mcp_server/mix_engine/__init__.py +1 -0
  33. package/mcp_server/mix_engine/critics.py +299 -0
  34. package/mcp_server/mix_engine/models.py +152 -0
  35. package/mcp_server/mix_engine/planner.py +103 -0
  36. package/mcp_server/mix_engine/state_builder.py +316 -0
  37. package/mcp_server/mix_engine/tools.py +214 -0
  38. package/mcp_server/performance_engine/__init__.py +1 -0
  39. package/mcp_server/performance_engine/models.py +148 -0
  40. package/mcp_server/performance_engine/planner.py +267 -0
  41. package/mcp_server/performance_engine/safety.py +162 -0
  42. package/mcp_server/performance_engine/tools.py +183 -0
  43. package/mcp_server/project_brain/__init__.py +6 -0
  44. package/mcp_server/project_brain/arrangement_graph.py +64 -0
  45. package/mcp_server/project_brain/automation_graph.py +72 -0
  46. package/mcp_server/project_brain/builder.py +123 -0
  47. package/mcp_server/project_brain/capability_graph.py +64 -0
  48. package/mcp_server/project_brain/models.py +282 -0
  49. package/mcp_server/project_brain/refresh.py +80 -0
  50. package/mcp_server/project_brain/role_graph.py +103 -0
  51. package/mcp_server/project_brain/session_graph.py +51 -0
  52. package/mcp_server/project_brain/tools.py +144 -0
  53. package/mcp_server/reference_engine/__init__.py +1 -0
  54. package/mcp_server/reference_engine/gap_analyzer.py +239 -0
  55. package/mcp_server/reference_engine/models.py +105 -0
  56. package/mcp_server/reference_engine/profile_builder.py +149 -0
  57. package/mcp_server/reference_engine/tactic_router.py +117 -0
  58. package/mcp_server/reference_engine/tools.py +235 -0
  59. package/mcp_server/runtime/__init__.py +1 -0
  60. package/mcp_server/runtime/action_ledger.py +117 -0
  61. package/mcp_server/runtime/action_ledger_models.py +84 -0
  62. package/mcp_server/runtime/action_tools.py +57 -0
  63. package/mcp_server/runtime/capability_state.py +218 -0
  64. package/mcp_server/runtime/safety_kernel.py +339 -0
  65. package/mcp_server/runtime/safety_tools.py +42 -0
  66. package/mcp_server/runtime/tools.py +64 -0
  67. package/mcp_server/server.py +23 -1
  68. package/mcp_server/sound_design/__init__.py +1 -0
  69. package/mcp_server/sound_design/critics.py +297 -0
  70. package/mcp_server/sound_design/models.py +147 -0
  71. package/mcp_server/sound_design/planner.py +104 -0
  72. package/mcp_server/sound_design/tools.py +297 -0
  73. package/mcp_server/tools/_agent_os_engine.py +947 -0
  74. package/mcp_server/tools/_composition_engine.py +1530 -0
  75. package/mcp_server/tools/_conductor.py +199 -0
  76. package/mcp_server/tools/_conductor_budgets.py +222 -0
  77. package/mcp_server/tools/_evaluation_contracts.py +91 -0
  78. package/mcp_server/tools/_form_engine.py +416 -0
  79. package/mcp_server/tools/_motif_engine.py +351 -0
  80. package/mcp_server/tools/_planner_engine.py +516 -0
  81. package/mcp_server/tools/_research_engine.py +542 -0
  82. package/mcp_server/tools/_research_provider.py +185 -0
  83. package/mcp_server/tools/_snapshot_normalizer.py +49 -0
  84. package/mcp_server/tools/agent_os.py +440 -0
  85. package/mcp_server/tools/analyzer.py +18 -0
  86. package/mcp_server/tools/automation.py +25 -10
  87. package/mcp_server/tools/composition.py +563 -0
  88. package/mcp_server/tools/motif.py +104 -0
  89. package/mcp_server/tools/planner.py +144 -0
  90. package/mcp_server/tools/research.py +223 -0
  91. package/mcp_server/tools/tracks.py +18 -3
  92. package/mcp_server/tools/transport.py +10 -2
  93. package/mcp_server/transition_engine/__init__.py +6 -0
  94. package/mcp_server/transition_engine/archetypes.py +167 -0
  95. package/mcp_server/transition_engine/critics.py +340 -0
  96. package/mcp_server/transition_engine/models.py +90 -0
  97. package/mcp_server/transition_engine/tools.py +291 -0
  98. package/mcp_server/translation_engine/__init__.py +5 -0
  99. package/mcp_server/translation_engine/critics.py +297 -0
  100. package/mcp_server/translation_engine/models.py +27 -0
  101. package/mcp_server/translation_engine/tools.py +74 -0
  102. package/package.json +2 -2
  103. package/remote_script/LivePilot/__init__.py +1 -1
  104. package/remote_script/LivePilot/arrangement.py +12 -2
  105. package/requirements.txt +1 -1
@@ -0,0 +1,947 @@
1
+ """Agent OS V1 Engine — pure-computation core for goal compilation, world modeling,
2
+ critic analysis, and evaluation scoring.
3
+
4
+ Zero external dependencies beyond stdlib. All functions are pure — no I/O, no Ableton
5
+ connection, no network calls. The MCP tool wrappers in agent_os.py handle data fetching;
6
+ this module handles computation.
7
+
8
+ Design: spec at docs/AGENT_OS_V1.md, sections 6-12.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import math
14
+ import re
15
+ from dataclasses import asdict, dataclass, field
16
+ from typing import Any, Optional
17
+
18
+
19
+ # ── Quality Dimensions ────────────────────────────────────────────────
20
+
21
+ QUALITY_DIMENSIONS = frozenset({
22
+ "energy", "punch", "weight", "density", "brightness", "warmth",
23
+ "width", "depth", "motion", "contrast", "clarity", "cohesion",
24
+ "groove", "tension", "novelty", "polish", "emotion",
25
+ })
26
+
27
+ # Dimensions with measurable spectral proxies in Phase 1.
28
+ # Others (motion, contrast, groove, tension, novelty, polish, emotion, cohesion, depth)
29
+ # get confidence=0.0 — the LLM agent uses its own musical judgment for those.
30
+ #
31
+ # Note: "width" requires stereo width data (side/mid ratio) from compare_to_reference.
32
+ # Phase 1 does NOT have this in the sonic snapshot, so width is NOT measurable yet.
33
+ # It is intentionally excluded here until Phase 2 adds stereo analysis to the snapshot.
34
+ MEASURABLE_PROXIES: dict[str, str] = {
35
+ "brightness": "high + presence bands (averaged)",
36
+ "warmth": "low_mid band energy",
37
+ "weight": "sub + low bands (averaged)",
38
+ "clarity": "inverse of low_mid congestion",
39
+ "density": "spectral flatness (geometric/arithmetic mean ratio)",
40
+ "energy": "RMS level",
41
+ "punch": "crest factor in dB (20*log10(peak/rms))",
42
+ }
43
+
44
+ VALID_MODES = frozenset({"observe", "improve", "explore", "finish", "diagnose"})
45
+ VALID_RESEARCH_MODES = frozenset({"none", "targeted", "deep"})
46
+
47
+
48
+ # ── GoalVector ────────────────────────────────────────────────────────
49
+
50
+ @dataclass
51
+ class GoalVector:
52
+ """Compiled user intent as a machine-usable goal.
53
+
54
+ targets: dimension → weight (0-1). Weights should approximately sum to 1.0.
55
+ protect: dimension → minimum acceptable value (0-1). If a dimension drops
56
+ below this value after a move, the move is undone.
57
+ """
58
+ request_text: str
59
+ targets: dict[str, float] = field(default_factory=dict)
60
+ protect: dict[str, float] = field(default_factory=dict)
61
+ mode: str = "improve"
62
+ aggression: float = 0.5
63
+ research_mode: str = "none"
64
+
65
+ def to_dict(self) -> dict:
66
+ return asdict(self)
67
+
68
+
69
+ def validate_goal_vector(
70
+ request_text: str,
71
+ targets: dict[str, float],
72
+ protect: dict[str, float],
73
+ mode: str,
74
+ aggression: float,
75
+ research_mode: str,
76
+ ) -> GoalVector:
77
+ """Validate and construct a GoalVector. Raises ValueError on invalid input."""
78
+ if not request_text or not request_text.strip():
79
+ raise ValueError("request_text cannot be empty")
80
+
81
+ # Validate dimensions
82
+ for dim in targets:
83
+ if dim not in QUALITY_DIMENSIONS:
84
+ raise ValueError(
85
+ f"Unknown target dimension '{dim}'. "
86
+ f"Valid: {sorted(QUALITY_DIMENSIONS)}"
87
+ )
88
+ for dim in protect:
89
+ if dim not in QUALITY_DIMENSIONS:
90
+ raise ValueError(
91
+ f"Unknown protect dimension '{dim}'. "
92
+ f"Valid: {sorted(QUALITY_DIMENSIONS)}"
93
+ )
94
+
95
+ # Validate weights are non-negative
96
+ for dim, w in targets.items():
97
+ if w < 0.0:
98
+ raise ValueError(f"Target weight for '{dim}' must be >= 0.0, got {w}")
99
+ for dim, w in protect.items():
100
+ if not 0.0 <= w <= 1.0:
101
+ raise ValueError(f"Protect threshold for '{dim}' must be 0.0-1.0, got {w}")
102
+
103
+ if mode not in VALID_MODES:
104
+ raise ValueError(f"mode must be one of {sorted(VALID_MODES)}, got '{mode}'")
105
+ if research_mode not in VALID_RESEARCH_MODES:
106
+ raise ValueError(
107
+ f"research_mode must be one of {sorted(VALID_RESEARCH_MODES)}, "
108
+ f"got '{research_mode}'"
109
+ )
110
+ if not 0.0 <= aggression <= 1.0:
111
+ raise ValueError(f"aggression must be 0.0-1.0, got {aggression}")
112
+
113
+ # Normalize target weights to sum to ~1.0 if they don't already
114
+ total = sum(targets.values())
115
+ if targets and total > 0:
116
+ if abs(total - 1.0) > 0.01:
117
+ targets = {k: v / total for k, v in targets.items()}
118
+
119
+ return GoalVector(
120
+ request_text=request_text.strip(),
121
+ targets=targets,
122
+ protect=protect,
123
+ mode=mode,
124
+ aggression=aggression,
125
+ research_mode=research_mode,
126
+ )
127
+
128
+
129
+ # ── WorldModel ────────────────────────────────────────────────────────
130
+
131
+ # Track role inference patterns — ordered by specificity
132
+ _ROLE_PATTERNS: list[tuple[str, str]] = [
133
+ (r"kick|bd|bass\s*drum", "kick"),
134
+ (r"snare|sd|snr", "snare"),
135
+ (r"clap|cp|hand\s*clap", "clap"),
136
+ (r"h(?:i)?[\s\-]?hat|hh|hat", "hihat"),
137
+ (r"perc|percussion|conga|bongo|shaker|tamb", "percussion"),
138
+ (r"sub\s*bass|sub", "sub_bass"),
139
+ (r"bass|low", "bass"),
140
+ (r"pad|atmosphere|atmo|ambient|drone", "pad"),
141
+ (r"lead|melody|mel|synth\s*lead", "lead"),
142
+ (r"chord|keys|piano|organ|rhodes", "chords"),
143
+ (r"vocal|vox|voice", "vocal"),
144
+ (r"fx|sfx|riser|sweep|noise|texture|tape", "texture"),
145
+ (r"string", "strings"),
146
+ (r"brass", "brass"),
147
+ (r"resamp|bounce|bus|group|master", "utility"),
148
+ ]
149
+
150
+
151
+ def infer_track_role(track_name: str) -> str:
152
+ """Infer a track's musical role from its name. Returns 'unknown' if no match."""
153
+ name_lower = track_name.lower().strip()
154
+ for pattern, role in _ROLE_PATTERNS:
155
+ if re.search(pattern, name_lower):
156
+ return role
157
+ return "unknown"
158
+
159
+
160
+ @dataclass
161
+ class WorldModel:
162
+ """Session state snapshot for critic analysis."""
163
+ topology: dict = field(default_factory=dict)
164
+ sonic: Optional[dict] = None
165
+ technical: dict = field(default_factory=dict)
166
+ track_roles: dict = field(default_factory=dict)
167
+
168
+ def to_dict(self) -> dict:
169
+ return asdict(self)
170
+
171
+
172
+ def build_world_model_from_data(
173
+ session_info: dict,
174
+ spectrum: Optional[dict] = None,
175
+ rms: Optional[dict] = None,
176
+ detected_key: Optional[dict] = None,
177
+ flucoma_status: Optional[dict] = None,
178
+ track_infos: Optional[list[dict]] = None,
179
+ ) -> WorldModel:
180
+ """Assemble a WorldModel from raw tool outputs.
181
+
182
+ All parameters are optional — the model degrades gracefully when
183
+ analyzer data is unavailable.
184
+ """
185
+ # Topology
186
+ tracks = session_info.get("tracks", [])
187
+ topology = {
188
+ "tempo": session_info.get("tempo"),
189
+ "time_signature": f"{session_info.get('signature_numerator', 4)}/{session_info.get('signature_denominator', 4)}",
190
+ "track_count": session_info.get("track_count", 0),
191
+ "return_count": session_info.get("return_track_count", 0),
192
+ "scene_count": session_info.get("scene_count", 0),
193
+ "is_playing": session_info.get("is_playing", False),
194
+ "tracks": [
195
+ {
196
+ "index": t.get("index"),
197
+ "name": t.get("name", ""),
198
+ "has_midi": t.get("has_midi_input", False),
199
+ "has_audio": t.get("has_audio_input", False),
200
+ "mute": t.get("mute", False),
201
+ "solo": t.get("solo", False),
202
+ "arm": t.get("arm", False),
203
+ }
204
+ for t in tracks
205
+ ],
206
+ }
207
+
208
+ # Track roles
209
+ track_roles = {}
210
+ for t in tracks:
211
+ idx = t.get("index", 0)
212
+ name = t.get("name", "")
213
+ track_roles[idx] = infer_track_role(name)
214
+
215
+ # Sonic state (None if analyzer unavailable)
216
+ sonic = None
217
+ if spectrum and spectrum.get("bands"):
218
+ sonic = {
219
+ "spectrum": spectrum.get("bands", {}),
220
+ "rms": rms.get("rms") if rms else None,
221
+ "peak": rms.get("peak") if rms else None,
222
+ "key": detected_key.get("key") if detected_key else None,
223
+ "scale": detected_key.get("scale") if detected_key else None,
224
+ "key_confidence": detected_key.get("confidence") if detected_key else None,
225
+ }
226
+
227
+ # Technical state
228
+ analyzer_available = spectrum is not None and bool(spectrum.get("bands"))
229
+ flucoma_available = (
230
+ flucoma_status is not None
231
+ and flucoma_status.get("flucoma_available", False)
232
+ )
233
+
234
+ # Check plugin health from track_infos if provided
235
+ unhealthy_devices = []
236
+ if track_infos:
237
+ for ti in track_infos:
238
+ for dev in ti.get("devices", []):
239
+ flags = dev.get("health_flags", [])
240
+ if "opaque_or_failed_plugin" in flags:
241
+ unhealthy_devices.append({
242
+ "track": ti.get("index"),
243
+ "device": dev.get("name"),
244
+ "flag": "opaque_or_failed_plugin",
245
+ })
246
+
247
+ technical = {
248
+ "analyzer_available": analyzer_available,
249
+ "flucoma_available": flucoma_available,
250
+ "unhealthy_devices": unhealthy_devices,
251
+ }
252
+
253
+ return WorldModel(
254
+ topology=topology,
255
+ sonic=sonic,
256
+ technical=technical,
257
+ track_roles=track_roles,
258
+ )
259
+
260
+
261
+ # ── Critics ───────────────────────────────────────────────────────────
262
+
263
+ @dataclass
264
+ class Issue:
265
+ """A diagnosed problem or opportunity."""
266
+ type: str
267
+ critic: str # "sonic" or "technical"
268
+ severity: float # 0.0-1.0
269
+ confidence: float # 0.0-1.0
270
+ affected_dimensions: list[str] = field(default_factory=list)
271
+ evidence: list[str] = field(default_factory=list)
272
+ recommended_actions: list[str] = field(default_factory=list)
273
+
274
+ def to_dict(self) -> dict:
275
+ return asdict(self)
276
+
277
+
278
+ def run_sonic_critic(
279
+ sonic: Optional[dict],
280
+ goal: GoalVector,
281
+ track_roles: dict,
282
+ ) -> list[Issue]:
283
+ """Run sonic heuristics against spectrum data. Returns issues that overlap
284
+ with the goal's target dimensions."""
285
+ if sonic is None:
286
+ return [Issue(
287
+ type="analyzer_unavailable",
288
+ critic="sonic",
289
+ severity=0.3,
290
+ confidence=1.0,
291
+ affected_dimensions=list(MEASURABLE_PROXIES.keys()),
292
+ evidence=["M4L Analyzer not connected or no audio playing"],
293
+ recommended_actions=["Load LivePilot_Analyzer on master", "Start playback"],
294
+ )]
295
+
296
+ issues = []
297
+ bands = sonic.get("spectrum", {})
298
+ rms = sonic.get("rms")
299
+ peak = sonic.get("peak")
300
+ target_dims = set(goal.targets.keys())
301
+
302
+ # 1. Mud detection: low_mid congestion
303
+ low_mid = bands.get("low_mid", 0)
304
+ if low_mid > 0.7 and {"clarity", "weight", "warmth"} & target_dims:
305
+ issues.append(Issue(
306
+ type="low_mid_congestion",
307
+ critic="sonic",
308
+ severity=min(1.0, (low_mid - 0.7) * 3.3),
309
+ confidence=0.85,
310
+ affected_dimensions=["clarity", "weight"],
311
+ evidence=[f"low_mid band energy: {low_mid:.2f} (threshold: 0.7)"],
312
+ recommended_actions=["EQ cut 200-500Hz on muddiest track", "HPF on non-bass elements"],
313
+ ))
314
+
315
+ # 2. Weak sub
316
+ sub = bands.get("sub", 0)
317
+ has_bass = any(r in ("kick", "bass", "sub_bass") for r in track_roles.values())
318
+ if sub < 0.15 and has_bass and {"weight", "energy", "punch"} & target_dims:
319
+ issues.append(Issue(
320
+ type="weak_foundation",
321
+ critic="sonic",
322
+ severity=0.6,
323
+ confidence=0.75,
324
+ affected_dimensions=["weight", "energy"],
325
+ evidence=[f"sub band energy: {sub:.2f} with bass tracks present"],
326
+ recommended_actions=["Boost sub on kick/bass", "Check HPF not too aggressive"],
327
+ ))
328
+
329
+ # 3. Harsh top
330
+ high = bands.get("high", 0)
331
+ presence = bands.get("presence", 0)
332
+ if (high + presence) > 0.8 and {"brightness", "clarity", "warmth"} & target_dims:
333
+ issues.append(Issue(
334
+ type="harsh_highs",
335
+ critic="sonic",
336
+ severity=min(1.0, ((high + presence) - 0.8) * 2.5),
337
+ confidence=0.80,
338
+ affected_dimensions=["brightness", "clarity"],
339
+ evidence=[f"high+presence: {high + presence:.2f} (threshold: 0.8)"],
340
+ recommended_actions=["Reduce high shelf on brightest element", "Add subtle LP filter"],
341
+ ))
342
+
343
+ # 4. Low headroom
344
+ if rms is not None and rms > 0.9 and {"energy", "punch", "clarity"} & target_dims:
345
+ issues.append(Issue(
346
+ type="headroom_risk",
347
+ critic="sonic",
348
+ severity=min(1.0, (rms - 0.9) * 10),
349
+ confidence=0.90,
350
+ affected_dimensions=["energy", "clarity", "punch"],
351
+ evidence=[f"RMS: {rms:.3f} (threshold: 0.9)"],
352
+ recommended_actions=["Reduce master volume", "Lower loudest track", "Add limiter"],
353
+ ))
354
+
355
+ # 5. Flat dynamics (C1 fix: correct dB formula)
356
+ if rms is not None and peak is not None and rms > 0 and peak > 0:
357
+ crest_db = 20.0 * math.log10(peak / max(rms, 0.001))
358
+ if crest_db < 3.0 and {"punch", "energy", "contrast"} & target_dims:
359
+ issues.append(Issue(
360
+ type="dynamics_flat",
361
+ critic="sonic",
362
+ severity=0.5,
363
+ confidence=0.70,
364
+ affected_dimensions=["punch", "contrast"],
365
+ evidence=[f"crest factor: {crest_db:.1f} dB (threshold: 3 dB)"],
366
+ recommended_actions=["Reduce compression", "Add transient shaper", "Reduce limiter"],
367
+ ))
368
+
369
+ return issues
370
+
371
+
372
+ def run_technical_critic(technical: dict) -> list[Issue]:
373
+ """Check technical health of the session."""
374
+ issues = []
375
+
376
+ if not technical.get("analyzer_available", False):
377
+ issues.append(Issue(
378
+ type="analyzer_offline",
379
+ critic="technical",
380
+ severity=0.4,
381
+ confidence=1.0,
382
+ evidence=["LivePilot Analyzer not receiving data"],
383
+ recommended_actions=["Load LivePilot_Analyzer.amxd on master track"],
384
+ ))
385
+
386
+ for dev in technical.get("unhealthy_devices", []):
387
+ issues.append(Issue(
388
+ type="unhealthy_plugin",
389
+ critic="technical",
390
+ severity=0.7,
391
+ confidence=0.95,
392
+ evidence=[f"Track {dev['track']}: {dev['device']} — {dev['flag']}"],
393
+ recommended_actions=["Delete and replace with native Ableton device"],
394
+ ))
395
+
396
+ return issues
397
+
398
+
399
+ # ── Evaluation Engine ─────────────────────────────────────────────────
400
+
401
+ def _clamp(value: float, lo: float = 0.0, hi: float = 1.0) -> float:
402
+ """Clamp value to [lo, hi] range."""
403
+ return max(lo, min(hi, value))
404
+
405
+
406
+ def _extract_dimension_value(
407
+ sonic: dict,
408
+ dimension: str,
409
+ ) -> Optional[float]:
410
+ """Map a quality dimension to a measurable value from sonic data.
411
+
412
+ Returns None for unmeasurable dimensions (confidence=0.0 in Phase 1).
413
+ All returned values are clamped to 0.0-1.0 for consistent scoring.
414
+ """
415
+ if not sonic:
416
+ return None
417
+ # Accept both "spectrum" and "bands" keys — get_master_spectrum returns
418
+ # {"bands": {...}} while the evaluator historically expected {"spectrum": {...}}.
419
+ # Finding 2 fix: tolerate either shape so raw analyzer output works.
420
+ bands = sonic.get("spectrum") or sonic.get("bands")
421
+ if not bands:
422
+ return None
423
+ rms = sonic.get("rms")
424
+ peak = sonic.get("peak")
425
+
426
+ if dimension == "brightness":
427
+ high = bands.get("high", 0)
428
+ presence = bands.get("presence", 0)
429
+ return _clamp((high + presence) / 2.0)
430
+ elif dimension == "warmth":
431
+ return _clamp(bands.get("low_mid", 0))
432
+ elif dimension == "weight":
433
+ sub = bands.get("sub", 0)
434
+ low = bands.get("low", 0)
435
+ return _clamp((sub + low) / 2.0)
436
+ elif dimension == "clarity":
437
+ low_mid = bands.get("low_mid", 0)
438
+ return _clamp(1.0 - low_mid)
439
+ elif dimension == "density":
440
+ # Spectral flatness: geometric mean / arithmetic mean of band values.
441
+ # Higher = more evenly distributed energy (noise-like).
442
+ # Lower = more tonal (energy concentrated in few bands).
443
+ vals = [max(v, 1e-10) for v in bands.values() if isinstance(v, (int, float))]
444
+ if not vals:
445
+ return None
446
+ geo_mean = math.exp(sum(math.log(v) for v in vals) / len(vals))
447
+ arith_mean = sum(vals) / len(vals)
448
+ return _clamp(geo_mean / max(arith_mean, 1e-10))
449
+ elif dimension == "energy":
450
+ return _clamp(rms) if rms is not None else None
451
+ elif dimension == "punch":
452
+ if rms and peak and rms > 0:
453
+ crest_db = 20.0 * math.log10(max(peak / rms, 1.0))
454
+ # Normalize: 0 dB = 0.0, 20 dB = 1.0
455
+ return _clamp(crest_db / 20.0)
456
+ return None
457
+ else:
458
+ # Unmeasurable in Phase 1 (width, depth, motion, contrast,
459
+ # groove, tension, novelty, polish, emotion, cohesion)
460
+ return None
461
+
462
+
463
+ def compute_evaluation_score(
464
+ goal: GoalVector,
465
+ before_sonic: dict,
466
+ after_sonic: dict,
467
+ outcome_history: Optional[list[dict]] = None,
468
+ ) -> dict:
469
+ """Compute whether a move improved the mix toward the goal.
470
+
471
+ Returns:
472
+ {
473
+ "score": float (0-1),
474
+ "keep_change": bool,
475
+ "goal_progress": float (-1 to 1),
476
+ "collateral_damage": float (0-1),
477
+ "measurable_delta": float (-1 to 1),
478
+ "notes": list[str],
479
+ "dimension_changes": dict,
480
+ "consecutive_undo_hint": bool,
481
+ }
482
+ """
483
+ notes: list[str] = []
484
+ dimension_changes: dict[str, dict] = {}
485
+
486
+ # Compute per-dimension deltas
487
+ total_goal_progress = 0.0
488
+ measurable_count = 0
489
+
490
+ for dim, weight in goal.targets.items():
491
+ before_val = _extract_dimension_value(before_sonic, dim)
492
+ after_val = _extract_dimension_value(after_sonic, dim)
493
+
494
+ if before_val is not None and after_val is not None:
495
+ delta = after_val - before_val
496
+ dimension_changes[dim] = {
497
+ "before": round(before_val, 4),
498
+ "after": round(after_val, 4),
499
+ "delta": round(delta, 4),
500
+ }
501
+ total_goal_progress += delta * weight
502
+ measurable_count += 1
503
+ else:
504
+ notes.append(f"{dim}: not measurable in Phase 1 (confidence=0.0)")
505
+
506
+ # Check protected dimensions (C3 fix: use the actual threshold)
507
+ collateral_damage = 0.0
508
+ protection_violated = False
509
+
510
+ for dim, threshold in goal.protect.items():
511
+ before_val = _extract_dimension_value(before_sonic, dim)
512
+ after_val = _extract_dimension_value(after_sonic, dim)
513
+
514
+ if before_val is not None and after_val is not None:
515
+ drop = before_val - after_val
516
+ if drop > 0:
517
+ collateral_damage = max(collateral_damage, drop)
518
+ # Violation: value dropped below the user's threshold
519
+ if after_val < threshold:
520
+ protection_violated = True
521
+ notes.append(
522
+ f"PROTECTED dimension '{dim}' at {after_val:.3f}, "
523
+ f"below threshold {threshold:.3f}"
524
+ )
525
+ # Also flag large drops even if still above threshold
526
+ elif drop > 0.15:
527
+ protection_violated = True
528
+ notes.append(
529
+ f"PROTECTED dimension '{dim}' dropped by {drop:.3f} "
530
+ f"(absolute drop > 0.15)"
531
+ )
532
+
533
+ # Measurable delta (average improvement across measured dimensions)
534
+ measurable_delta = total_goal_progress / max(measurable_count, 1)
535
+
536
+ # Taste fit: how well does this move align with user preferences?
537
+ taste_fit = compute_taste_fit(goal, outcome_history) if outcome_history else 0.0
538
+
539
+ # Compute composite score (spec section 12.2)
540
+ goal_fit = _clamp(0.5 + total_goal_progress)
541
+ measurable_component = _clamp(0.5 + measurable_delta)
542
+ preservation = _clamp(1.0 - collateral_damage * 5)
543
+ confidence = measurable_count / max(len(goal.targets), 1)
544
+
545
+ score = (
546
+ 0.30 * goal_fit
547
+ + 0.25 * measurable_component
548
+ + 0.15 * preservation
549
+ + 0.10 * taste_fit
550
+ + 0.10 * confidence
551
+ + 0.10 * 1.0 # reversibility: 1.0 for undo-able moves
552
+ )
553
+
554
+ # Hard rules
555
+ keep_change = True
556
+
557
+ if measurable_count > 0 and measurable_delta <= 0:
558
+ keep_change = False
559
+ notes.append("HARD RULE: measurable delta <= 0 — no measurable improvement")
560
+
561
+ if protection_violated:
562
+ keep_change = False
563
+ notes.append("HARD RULE: protected dimension violated")
564
+
565
+ if score < 0.40:
566
+ keep_change = False
567
+ notes.append(f"HARD RULE: total score {score:.3f} < 0.40 threshold")
568
+
569
+ if measurable_count == 0 and not protection_violated:
570
+ # All TARGET dimensions unmeasurable AND no protection violations —
571
+ # defer keep/undo to the agent's musical judgment.
572
+ # IMPORTANT: protection violations still force undo even when
573
+ # targets are unmeasurable (Finding 1 fix).
574
+ keep_change = True
575
+ notes.append(
576
+ "No measurable target dimensions — deferring keep/undo to agent musical judgment"
577
+ )
578
+
579
+ return {
580
+ "score": round(score, 4),
581
+ "keep_change": keep_change,
582
+ "goal_progress": round(total_goal_progress, 4),
583
+ "collateral_damage": round(collateral_damage, 4),
584
+ "measurable_delta": round(measurable_delta, 4),
585
+ "measurable_dimensions": measurable_count,
586
+ "total_dimensions": len(goal.targets),
587
+ "dimension_changes": dimension_changes,
588
+ "notes": notes,
589
+ # I5: hint for the agent to track consecutive undos
590
+ "consecutive_undo_hint": not keep_change,
591
+ }
592
+
593
+
594
+ # ── Technique Cards (Round 2) ─────────────────────────────────────────
595
+
596
+ @dataclass
597
+ class TechniqueCard:
598
+ """A structured, reusable production recipe — not just text."""
599
+ problem: str
600
+ context: list[str] = field(default_factory=list) # genre/style tags
601
+ devices: list[str] = field(default_factory=list) # what to load
602
+ method: str = "" # step-by-step instructions
603
+ verification: list[str] = field(default_factory=list) # what to check after
604
+ evidence: dict = field(default_factory=dict) # {sources, in_session_tested}
605
+
606
+ def to_dict(self) -> dict:
607
+ return asdict(self)
608
+
609
+ def to_memory_payload(self) -> dict:
610
+ """Convert to a payload suitable for memory_learn(type='technique_card')."""
611
+ return {
612
+ "problem": self.problem,
613
+ "context": self.context,
614
+ "devices": self.devices,
615
+ "method": self.method,
616
+ "verification": self.verification,
617
+ "evidence": self.evidence,
618
+ }
619
+
620
+
621
+ def build_technique_card_from_outcome(outcome: dict) -> Optional[TechniqueCard]:
622
+ """Extract a technique card from a successful outcome.
623
+
624
+ Only produces a card if the outcome was kept and had meaningful improvement.
625
+ """
626
+ if not outcome.get("kept", False):
627
+ return None
628
+ if outcome.get("score", 0) < 0.6:
629
+ return None
630
+
631
+ gv = outcome.get("goal_vector", {})
632
+ move = outcome.get("move", {})
633
+ dim_changes = outcome.get("dimension_changes", {})
634
+
635
+ # Build problem description from goal
636
+ targets = gv.get("targets", {})
637
+ if not targets:
638
+ return None
639
+
640
+ top_dim = max(targets.items(), key=lambda x: x[1])[0] if targets else "general"
641
+ problem = f"Improve {top_dim} in production"
642
+
643
+ # Build method from move
644
+ method = move.get("name", "unknown technique")
645
+ if isinstance(move.get("actions"), list):
646
+ method = " → ".join(move["actions"])
647
+
648
+ # Build verification from dimension changes
649
+ verification = []
650
+ for dim, change in dim_changes.items():
651
+ if isinstance(change, dict) and change.get("delta", 0) > 0:
652
+ verification.append(f"{dim} should improve (was +{change['delta']:.3f})")
653
+
654
+ return TechniqueCard(
655
+ problem=problem,
656
+ context=list(gv.get("tags", [])) if isinstance(gv.get("tags"), list) else [],
657
+ devices=move.get("devices", []) if isinstance(move.get("devices"), list) else [],
658
+ method=method,
659
+ verification=verification,
660
+ evidence={"score": outcome.get("score", 0), "in_session_tested": True},
661
+ )
662
+
663
+
664
+ # ── Outcome Memory Analysis (Round 1) ────────────────────────────────
665
+
666
+ def analyze_outcome_history(outcomes: list[dict]) -> dict:
667
+ """Analyze accumulated outcome memories to identify user taste patterns.
668
+
669
+ outcomes: list of outcome technique payloads from memory_list(type="outcome")
670
+ Returns taste analysis: keep rate, dimension success, inferred preferences.
671
+ """
672
+ if not outcomes:
673
+ return {
674
+ "total_outcomes": 0,
675
+ "keep_rate": 0.0,
676
+ "dimension_success": {},
677
+ "common_kept_moves": [],
678
+ "common_undone_moves": [],
679
+ "taste_vector": {},
680
+ "notes": ["No outcome history — use the evaluation loop to build taste data"],
681
+ }
682
+
683
+ total = len(outcomes)
684
+ kept = [o for o in outcomes if o.get("kept", False)]
685
+ undone = [o for o in outcomes if not o.get("kept", False)]
686
+ keep_rate = len(kept) / total
687
+
688
+ # Dimension success: average improvement per dimension when kept
689
+ dimension_success: dict[str, list[float]] = {}
690
+ for o in kept:
691
+ for dim, change in o.get("dimension_changes", {}).items():
692
+ delta = change.get("delta", 0) if isinstance(change, dict) else 0
693
+ dimension_success.setdefault(dim, []).append(delta)
694
+
695
+ avg_dimension_success = {
696
+ dim: round(sum(vals) / len(vals), 4)
697
+ for dim, vals in dimension_success.items()
698
+ if vals
699
+ }
700
+
701
+ # Common move types
702
+ kept_moves = {}
703
+ undone_moves = {}
704
+ for o in kept:
705
+ move_name = o.get("move", {}).get("name", "unknown") if isinstance(o.get("move"), dict) else "unknown"
706
+ kept_moves[move_name] = kept_moves.get(move_name, 0) + 1
707
+ for o in undone:
708
+ move_name = o.get("move", {}).get("name", "unknown") if isinstance(o.get("move"), dict) else "unknown"
709
+ undone_moves[move_name] = undone_moves.get(move_name, 0) + 1
710
+
711
+ common_kept = sorted(kept_moves.items(), key=lambda x: -x[1])[:5]
712
+ common_undone = sorted(undone_moves.items(), key=lambda x: -x[1])[:5]
713
+
714
+ # Taste vector: which dimensions does this user care about?
715
+ # Weight by how often each dimension appears in kept outcomes
716
+ taste_vector: dict[str, float] = {}
717
+ for o in kept:
718
+ gv = o.get("goal_vector", {})
719
+ targets = gv.get("targets", {}) if isinstance(gv, dict) else {}
720
+ for dim, weight in targets.items():
721
+ taste_vector[dim] = taste_vector.get(dim, 0) + weight
722
+
723
+ # Normalize
724
+ taste_total = sum(taste_vector.values())
725
+ if taste_total > 0:
726
+ taste_vector = {k: round(v / taste_total, 3) for k, v in taste_vector.items()}
727
+
728
+ notes = []
729
+ if keep_rate < 0.3:
730
+ notes.append(f"Low keep rate ({keep_rate:.0%}) — agent may be too aggressive")
731
+ if keep_rate > 0.8:
732
+ notes.append(f"High keep rate ({keep_rate:.0%}) — agent is well-calibrated or too conservative")
733
+
734
+ return {
735
+ "total_outcomes": total,
736
+ "kept": len(kept),
737
+ "undone": len(undone),
738
+ "keep_rate": round(keep_rate, 3),
739
+ "dimension_success": avg_dimension_success,
740
+ "common_kept_moves": [{"move": m, "count": c} for m, c in common_kept],
741
+ "common_undone_moves": [{"move": m, "count": c} for m, c in common_undone],
742
+ "taste_vector": taste_vector,
743
+ "notes": notes,
744
+ }
745
+
746
+
747
+ # ── Taste Model (Round 4) ────────────────────────────────────────────
748
+
749
+ def compute_taste_fit(
750
+ goal: GoalVector,
751
+ outcome_history: Optional[list[dict]] = None,
752
+ ) -> float:
753
+ """Compute how well a goal aligns with the user's accumulated taste preferences.
754
+
755
+ Analyzes outcome history to build a taste vector (which dimensions matter
756
+ most to this user), then scores the current goal's alignment.
757
+
758
+ Returns 0.0-1.0 where:
759
+ - 0.0 = no data or goal doesn't match taste
760
+ - 1.0 = goal perfectly aligns with user's demonstrated preferences
761
+ """
762
+ if not outcome_history:
763
+ return 0.0
764
+
765
+ # Build taste vector from kept outcomes
766
+ taste_vector: dict[str, float] = {}
767
+ total_kept = 0
768
+
769
+ for o in outcome_history:
770
+ if not o.get("kept", False):
771
+ continue
772
+ total_kept += 1
773
+ gv = o.get("goal_vector", {})
774
+ targets = gv.get("targets", {}) if isinstance(gv, dict) else {}
775
+ for dim, weight in targets.items():
776
+ taste_vector[dim] = taste_vector.get(dim, 0) + weight
777
+
778
+ if not taste_vector or total_kept == 0:
779
+ return 0.0
780
+
781
+ # Normalize taste vector
782
+ taste_total = sum(taste_vector.values())
783
+ if taste_total > 0:
784
+ taste_vector = {k: v / taste_total for k, v in taste_vector.items()}
785
+
786
+ # Score: how much does the current goal overlap with taste preferences?
787
+ # Dot product of normalized goal weights and taste weights
788
+ goal_targets = goal.targets
789
+ if not goal_targets:
790
+ return 0.0
791
+
792
+ goal_total = sum(goal_targets.values())
793
+ if goal_total <= 0:
794
+ return 0.0
795
+
796
+ overlap = 0.0
797
+ for dim, weight in goal_targets.items():
798
+ normalized_weight = weight / goal_total
799
+ taste_weight = taste_vector.get(dim, 0)
800
+ overlap += normalized_weight * taste_weight
801
+
802
+ # Scale: overlap is typically small (product of two normalized distributions)
803
+ # Amplify so that moderate overlap gives a meaningful score
804
+ return _clamp(overlap * 4.0)
805
+
806
+
807
+ def get_taste_profile(outcome_history: list[dict]) -> dict:
808
+ """Build a full taste profile from outcome history.
809
+
810
+ Returns: {taste_vector, preferred_dimensions, avoided_dimensions,
811
+ keep_rate, sample_size}
812
+ """
813
+ analysis = analyze_outcome_history(outcome_history)
814
+ taste_vector = analysis.get("taste_vector", {})
815
+
816
+ # Identify preferred and avoided dimensions
817
+ preferred = sorted(taste_vector.items(), key=lambda x: -x[1])[:5]
818
+ avoided_dims: dict[str, float] = {}
819
+ for o in outcome_history:
820
+ if o.get("kept", False):
821
+ continue # Only look at undone moves
822
+ gv = o.get("goal_vector", {})
823
+ targets = gv.get("targets", {}) if isinstance(gv, dict) else {}
824
+ for dim, weight in targets.items():
825
+ avoided_dims[dim] = avoided_dims.get(dim, 0) + weight
826
+
827
+ if avoided_dims:
828
+ avoid_total = sum(avoided_dims.values())
829
+ if avoid_total > 0:
830
+ avoided_dims = {k: v / avoid_total for k, v in avoided_dims.items()}
831
+
832
+ avoided = sorted(avoided_dims.items(), key=lambda x: -x[1])[:5]
833
+
834
+ return {
835
+ "taste_vector": taste_vector,
836
+ "preferred_dimensions": [{"dim": d, "weight": round(w, 3)} for d, w in preferred],
837
+ "avoided_dimensions": [{"dim": d, "weight": round(w, 3)} for d, w in avoided],
838
+ "keep_rate": analysis.get("keep_rate", 0),
839
+ "sample_size": analysis.get("total_outcomes", 0),
840
+ "notes": analysis.get("notes", []),
841
+ }
842
+
843
+
844
+ # ── Background Technique Mining (Round 3) ───────────────────────────
845
+
846
+ def should_mine_technique(
847
+ outcome: dict,
848
+ existing_techniques: Optional[list[dict]] = None,
849
+ ) -> bool:
850
+ """Determine if an outcome is novel enough to auto-create a technique card.
851
+
852
+ Returns True if:
853
+ - Score > 0.7 (high quality)
854
+ - At least one dimension improved by > 0.15
855
+ - No similar technique already exists in memory
856
+ """
857
+ if not outcome.get("kept", False):
858
+ return False
859
+ if outcome.get("score", 0) < 0.7:
860
+ return False
861
+
862
+ # Check for meaningful dimension improvement
863
+ dim_changes = outcome.get("dimension_changes", {})
864
+ has_significant_improvement = False
865
+ for dim, change in dim_changes.items():
866
+ delta = change.get("delta", 0) if isinstance(change, dict) else 0
867
+ if delta > 0.15:
868
+ has_significant_improvement = True
869
+ break
870
+
871
+ if not has_significant_improvement:
872
+ return False
873
+
874
+ # Check for novelty — don't create duplicate techniques
875
+ if existing_techniques:
876
+ move = outcome.get("move", {})
877
+ move_name = move.get("name", "") if isinstance(move, dict) else ""
878
+ if move_name:
879
+ for tech in existing_techniques:
880
+ payload = tech.get("payload", {})
881
+ existing_method = payload.get("method", "")
882
+ if move_name.lower() in existing_method.lower():
883
+ return False # Similar technique already exists
884
+
885
+ return True
886
+
887
+
888
+ def mine_technique_from_outcome(outcome: dict) -> Optional[TechniqueCard]:
889
+ """Extract a technique card from a high-quality outcome.
890
+
891
+ This is the "background mining" — when the agent detects a novel
892
+ approach that worked well, it auto-creates a technique card for future use.
893
+ """
894
+ if not outcome.get("kept", False):
895
+ return None
896
+
897
+ gv = outcome.get("goal_vector", {})
898
+ move = outcome.get("move", {})
899
+ dim_changes = outcome.get("dimension_changes", {})
900
+ score = outcome.get("score", 0)
901
+
902
+ # Build problem description
903
+ targets = gv.get("targets", {})
904
+ if targets:
905
+ top_dims = sorted(targets.items(), key=lambda x: -x[1])[:2]
906
+ problem = f"Improve {' and '.join(d for d, _ in top_dims)}"
907
+ else:
908
+ problem = "General production improvement"
909
+
910
+ # Build method
911
+ move_name = move.get("name", "unknown") if isinstance(move, dict) else str(move)
912
+ actions = move.get("actions", []) if isinstance(move, dict) else []
913
+ if isinstance(actions, list) and actions:
914
+ method = f"{move_name}: {' → '.join(str(a) for a in actions)}"
915
+ else:
916
+ method = move_name
917
+
918
+ # Build verification from what actually improved
919
+ verification = []
920
+ for dim, change in dim_changes.items():
921
+ if isinstance(change, dict) and change.get("delta", 0) > 0.05:
922
+ verification.append(
923
+ f"{dim} should improve (observed +{change['delta']:.3f})"
924
+ )
925
+
926
+ # Devices used
927
+ devices = move.get("devices", []) if isinstance(move, dict) else []
928
+ if not isinstance(devices, list):
929
+ devices = []
930
+
931
+ return TechniqueCard(
932
+ problem=problem,
933
+ context=list(gv.get("tags", [])) if isinstance(gv.get("tags"), list) else [],
934
+ devices=devices,
935
+ method=method,
936
+ verification=verification,
937
+ evidence={
938
+ "score": score,
939
+ "in_session_tested": True,
940
+ "auto_mined": True,
941
+ "dimension_improvements": {
942
+ dim: change.get("delta", 0)
943
+ for dim, change in dim_changes.items()
944
+ if isinstance(change, dict) and change.get("delta", 0) > 0
945
+ },
946
+ },
947
+ )