livepilot 1.10.4 → 1.10.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/.claude-plugin/marketplace.json +3 -3
  2. package/AGENTS.md +3 -3
  3. package/CHANGELOG.md +148 -0
  4. package/CONTRIBUTING.md +1 -1
  5. package/README.md +6 -6
  6. package/livepilot/.Codex-plugin/plugin.json +2 -2
  7. package/livepilot/.claude-plugin/plugin.json +2 -2
  8. package/livepilot/skills/livepilot-core/SKILL.md +4 -4
  9. package/livepilot/skills/livepilot-core/references/overview.md +3 -3
  10. package/livepilot/skills/livepilot-evaluation/references/capability-modes.md +1 -1
  11. package/livepilot/skills/livepilot-release/SKILL.md +5 -5
  12. package/m4l_device/LivePilot_Analyzer.amxd +0 -0
  13. package/m4l_device/livepilot_bridge.js +12 -1
  14. package/manifest.json +3 -3
  15. package/mcp_server/__init__.py +1 -1
  16. package/mcp_server/composer/sample_resolver.py +10 -6
  17. package/mcp_server/composer/tools.py +10 -6
  18. package/mcp_server/connection.py +6 -1
  19. package/mcp_server/creative_constraints/tools.py +9 -8
  20. package/mcp_server/experiment/engine.py +9 -5
  21. package/mcp_server/experiment/tools.py +9 -9
  22. package/mcp_server/hook_hunter/tools.py +14 -9
  23. package/mcp_server/m4l_bridge.py +11 -0
  24. package/mcp_server/memory/taste_graph.py +7 -2
  25. package/mcp_server/mix_engine/tools.py +8 -3
  26. package/mcp_server/musical_intelligence/tools.py +15 -10
  27. package/mcp_server/performance_engine/tools.py +6 -2
  28. package/mcp_server/preview_studio/tools.py +21 -15
  29. package/mcp_server/project_brain/tools.py +18 -10
  30. package/mcp_server/reference_engine/tools.py +7 -5
  31. package/mcp_server/runtime/capability_probe.py +10 -4
  32. package/mcp_server/runtime/tools.py +8 -2
  33. package/mcp_server/sample_engine/tools.py +394 -33
  34. package/mcp_server/semantic_moves/tools.py +5 -1
  35. package/mcp_server/server.py +10 -9
  36. package/mcp_server/services/motif_service.py +9 -3
  37. package/mcp_server/session_continuity/tools.py +7 -3
  38. package/mcp_server/session_continuity/tracker.py +9 -8
  39. package/mcp_server/song_brain/tools.py +17 -12
  40. package/mcp_server/splice_client/client.py +19 -6
  41. package/mcp_server/stuckness_detector/tools.py +8 -5
  42. package/mcp_server/tools/_agent_os_engine/__init__.py +52 -0
  43. package/mcp_server/tools/_agent_os_engine/critics.py +134 -0
  44. package/mcp_server/tools/_agent_os_engine/evaluation.py +206 -0
  45. package/mcp_server/tools/_agent_os_engine/models.py +132 -0
  46. package/mcp_server/tools/_agent_os_engine/taste.py +192 -0
  47. package/mcp_server/tools/_agent_os_engine/techniques.py +161 -0
  48. package/mcp_server/tools/_agent_os_engine/world_model.py +170 -0
  49. package/mcp_server/tools/_composition_engine/__init__.py +67 -0
  50. package/mcp_server/tools/_composition_engine/analysis.py +174 -0
  51. package/mcp_server/tools/_composition_engine/critics.py +522 -0
  52. package/mcp_server/tools/_composition_engine/gestures.py +230 -0
  53. package/mcp_server/tools/_composition_engine/harmony.py +70 -0
  54. package/mcp_server/tools/_composition_engine/models.py +193 -0
  55. package/mcp_server/tools/_composition_engine/sections.py +371 -0
  56. package/mcp_server/tools/_perception_engine.py +18 -11
  57. package/mcp_server/tools/agent_os.py +23 -15
  58. package/mcp_server/tools/analyzer.py +166 -7
  59. package/mcp_server/tools/automation.py +6 -1
  60. package/mcp_server/tools/composition.py +25 -16
  61. package/mcp_server/tools/devices.py +10 -6
  62. package/mcp_server/tools/motif.py +7 -2
  63. package/mcp_server/tools/planner.py +6 -2
  64. package/mcp_server/tools/research.py +13 -10
  65. package/mcp_server/transition_engine/tools.py +6 -1
  66. package/mcp_server/translation_engine/tools.py +8 -6
  67. package/mcp_server/wonder_mode/engine.py +8 -3
  68. package/mcp_server/wonder_mode/tools.py +29 -21
  69. package/package.json +2 -2
  70. package/remote_script/LivePilot/__init__.py +1 -1
  71. package/requirements.txt +6 -0
  72. package/livepilot.mcpb +0 -0
  73. package/mcp_server/tools/_agent_os_engine.py +0 -947
  74. package/mcp_server/tools/_composition_engine.py +0 -1530
@@ -7,6 +7,7 @@ Separates taste (cross-session) from identity (in-song) ranking.
7
7
  from __future__ import annotations
8
8
 
9
9
  import hashlib
10
+ import logging
10
11
  import time
11
12
  from typing import Optional
12
13
 
@@ -17,6 +18,8 @@ from .models import (
17
18
  TurnResolution,
18
19
  )
19
20
 
21
+ logger = logging.getLogger(__name__)
22
+
20
23
 
21
24
  # ── In-memory state ───────────────────────────────────────────────
22
25
 
@@ -129,9 +132,8 @@ def record_turn_resolution(
129
132
  if _project_store is not None:
130
133
  try:
131
134
  _project_store.save_turn(turn.to_dict())
132
- except Exception:
133
- pass
134
-
135
+ except Exception as exc:
136
+ logger.debug("record_turn_resolution failed: %s", exc)
135
137
  return turn
136
138
 
137
139
 
@@ -158,9 +160,8 @@ def open_thread(description: str, domain: str = "", priority: float = 0.5) -> Cr
158
160
  if _project_store is not None:
159
161
  try:
160
162
  _project_store.save_thread(thread.to_dict())
161
- except Exception:
162
- pass
163
-
163
+ except Exception as exc:
164
+ logger.debug("open_thread failed: %s", exc)
164
165
  return thread
165
166
 
166
167
 
@@ -173,8 +174,8 @@ def resolve_thread(thread_id: str) -> Optional[CreativeThread]:
173
174
  if _project_store is not None:
174
175
  try:
175
176
  _project_store.save_thread(thread.to_dict())
176
- except Exception:
177
- pass
177
+ except Exception as exc:
178
+ logger.debug("resolve_thread failed: %s", exc)
178
179
  return thread
179
180
 
180
181
 
@@ -12,7 +12,9 @@ from fastmcp import Context
12
12
  from ..server import mcp
13
13
  from . import builder
14
14
  from .models import SongBrain
15
+ import logging
15
16
 
17
+ logger = logging.getLogger(__name__)
16
18
 
17
19
  # Module-level fallback for consumers without ctx.
18
20
  # Prefer ctx.lifespan_context["current_brain"] when ctx is available.
@@ -67,7 +69,8 @@ def _fetch_session_data(ctx: Context) -> dict:
67
69
 
68
70
  try:
69
71
  data["session_info"] = ableton.send_command("get_session_info", {})
70
- except Exception:
72
+ except Exception as exc:
73
+ logger.debug("_fetch_session_data failed: %s", exc)
71
74
  data["session_info"] = {"tempo": 120.0, "track_count": 0}
72
75
 
73
76
  try:
@@ -78,22 +81,23 @@ def _fetch_session_data(ctx: Context) -> dict:
78
81
  zip(matrix.get("scenes", []), matrix.get("matrix", []))
79
82
  )
80
83
  ]
81
- except Exception:
82
- pass
84
+ except Exception as exc:
85
+ logger.debug("_fetch_session_data failed: %s", exc)
83
86
 
84
87
  try:
85
88
  info = data["session_info"]
86
89
  tracks_list = info.get("tracks", [])
87
90
  data["tracks"] = tracks_list if isinstance(tracks_list, list) else []
88
- except Exception:
89
- pass
91
+ except Exception as exc:
92
+ logger.debug("_fetch_session_data failed: %s", exc)
90
93
 
91
94
  # Motif data — via shared motif service (pure-Python, not TCP)
92
95
  try:
93
96
  from ..services.motif_service import get_motif_data, fetch_notes_from_ableton
94
97
  notes_by_track = fetch_notes_from_ableton(ableton, data.get("tracks", []))
95
98
  data["motif_data"] = get_motif_data(notes_by_track)
96
- except Exception:
99
+ except Exception as exc:
100
+ logger.debug("_fetch_session_data failed: %s", exc)
97
101
  pass # Motif graph requires notes in clips; empty is valid
98
102
 
99
103
  # Composition analysis — from musical intelligence detectors (pure computation)
@@ -106,8 +110,8 @@ def _fetch_session_data(ctx: Context) -> dict:
106
110
  "sections": [p.to_dict() for p in purposes],
107
111
  "emotional_arc": arc.to_dict(),
108
112
  }
109
- except Exception:
110
- pass
113
+ except Exception as exc:
114
+ logger.debug("_fetch_session_data failed: %s", exc)
111
115
 
112
116
  # Role graph — from semantic move resolvers (pure computation, no I/O)
113
117
  try:
@@ -118,8 +122,8 @@ def _fetch_session_data(ctx: Context) -> dict:
118
122
  role = infer_role(name)
119
123
  roles[name] = {"index": track.get("index", 0), "role": role}
120
124
  data["role_graph"] = roles
121
- except Exception:
122
- pass
125
+ except Exception as exc:
126
+ logger.debug("_fetch_session_data failed: %s", exc)
123
127
 
124
128
  # Recent moves — from session-scoped action ledger
125
129
  try:
@@ -128,8 +132,8 @@ def _fetch_session_data(ctx: Context) -> dict:
128
132
  if isinstance(ledger, SessionLedger):
129
133
  recent = ledger.get_recent_moves(limit=10)
130
134
  data["recent_moves"] = [e.to_dict() for e in recent]
131
- except Exception:
132
- pass
135
+ except Exception as exc:
136
+ logger.debug("_fetch_session_data failed: %s", exc)
133
137
 
134
138
  return data
135
139
 
@@ -152,6 +156,7 @@ def build_song_brain(ctx: Context) -> dict:
152
156
 
153
157
  # Capability reporting — what data was actually available
154
158
  from ..runtime.capability import build_capability
159
+
155
160
  cap = build_capability(
156
161
  required=["session_info", "scenes", "tracks", "motif_data", "composition_analysis", "role_graph"],
157
162
  available={
@@ -190,12 +190,25 @@ class SpliceGRPCClient:
190
190
  ) -> Optional[str]:
191
191
  """Download a sample by file_hash. Returns local path when complete.
192
192
 
193
- Costs 1 credit. Checks credit floor before downloading.
194
- Returns None on failure.
193
+ Costs 1 credit. Enforces CREDIT_HARD_FLOOR defensively refuses the
194
+ download (returns None) if completing it would leave the user at or
195
+ below the floor, regardless of what the caller requested. Callers
196
+ should still gate on `can_afford` upstream for UX, but this guard
197
+ closes the hole if a future caller forgets.
195
198
  """
196
199
  if not self.connected:
197
200
  return None
198
201
 
202
+ # Defensive floor guard — do not rely on callers alone.
203
+ can, remaining = await self.can_afford(1, budget=1)
204
+ if not can:
205
+ logger.warning(
206
+ "Splice download blocked by credit floor guard "
207
+ "(remaining=%s, floor=%s, file_hash=%s)",
208
+ remaining, CREDIT_HARD_FLOOR, file_hash,
209
+ )
210
+ return None
211
+
199
212
  pb2 = self._pb2
200
213
  try:
201
214
  # Trigger download
@@ -221,8 +234,8 @@ class SpliceGRPCClient:
221
234
  )
222
235
  if response.Sample.LocalPath:
223
236
  return response.Sample.LocalPath
224
- except Exception:
225
- pass
237
+ except Exception as exc:
238
+ logger.debug("_wait_for_download failed: %s", exc)
226
239
  await asyncio.sleep(0.5)
227
240
  logger.warning(f"Download timed out for {file_hash}")
228
241
  return None
@@ -304,9 +317,9 @@ class SpliceGRPCClient:
304
317
  try:
305
318
  await self.stub.SyncSounds(pb2.SyncSoundsRequest())
306
319
  return True
307
- except Exception:
320
+ except Exception as exc:
321
+ logger.debug("sync_sounds failed: %s", exc)
308
322
  return False
309
-
310
323
  # ── Connection Helpers ──────────────────────────────────────────
311
324
 
312
325
  def _read_port(self) -> Optional[int]:
@@ -11,6 +11,9 @@ from fastmcp import Context
11
11
 
12
12
  from ..server import mcp
13
13
  from . import detector
14
+ import logging
15
+
16
+ logger = logging.getLogger(__name__)
14
17
 
15
18
 
16
19
  def _get_ableton(ctx: Context):
@@ -30,8 +33,8 @@ def _get_action_history(ctx: Context) -> list[dict]:
30
33
  if isinstance(ledger, SessionLedger):
31
34
  recent = ledger.get_recent_moves(limit=20)
32
35
  return [e.to_dict() for e in recent]
33
- except Exception:
34
- pass
36
+ except Exception as exc:
37
+ logger.debug("_get_action_history failed: %s", exc)
35
38
  return []
36
39
 
37
40
 
@@ -45,9 +48,8 @@ def _get_session_and_brain(ctx: Context) -> tuple[dict, dict, int]:
45
48
  try:
46
49
  session_info = ableton.send_command("get_session_info", {})
47
50
  section_count = session_info.get("scene_count", 0)
48
- except Exception:
49
- pass
50
-
51
+ except Exception as exc:
52
+ logger.debug("_get_session_and_brain failed: %s", exc)
51
53
  try:
52
54
  from ..song_brain.tools import _current_brain
53
55
  if _current_brain is not None:
@@ -165,6 +167,7 @@ def start_rescue_workflow(
165
167
 
166
168
  # Build a rescue suggestion for this specific type
167
169
  from .models import StucknessReport
170
+
168
171
  report = StucknessReport(
169
172
  confidence=0.6,
170
173
  level="stuck",
@@ -0,0 +1,52 @@
1
+ """Agent OS engine — goal compilation, world model, evaluation.
2
+
3
+ This package replaces the former single-file `_agent_os_engine.py`.
4
+ Public surface unchanged — callers import the same names.
5
+
6
+ Internal organization:
7
+ models.py — Dataclasses + module-level constants
8
+ world_model.py — Goal validation, role inference, world-model build
9
+ critics.py — Sonic + technical critics
10
+ evaluation.py — Scoring, dimension extraction, clamp helpers
11
+ techniques.py — TechniqueCard mining + building
12
+ taste.py — Outcome analysis, taste fit, taste profile
13
+ """
14
+ from __future__ import annotations
15
+
16
+ from .models import (
17
+ QUALITY_DIMENSIONS, MEASURABLE_PROXIES,
18
+ VALID_MODES, VALID_RESEARCH_MODES,
19
+ GoalVector, WorldModel, Issue, TechniqueCard,
20
+ )
21
+ from .world_model import (
22
+ validate_goal_vector,
23
+ infer_track_role,
24
+ build_world_model_from_data,
25
+ )
26
+ from .critics import run_sonic_critic, run_technical_critic
27
+ from .evaluation import compute_evaluation_score, _extract_dimension_value
28
+ from .techniques import (
29
+ build_technique_card_from_outcome,
30
+ should_mine_technique,
31
+ mine_technique_from_outcome,
32
+ )
33
+ from .taste import (
34
+ analyze_outcome_history,
35
+ compute_taste_fit,
36
+ get_taste_profile,
37
+ )
38
+
39
+ __all__ = [
40
+ "QUALITY_DIMENSIONS", "MEASURABLE_PROXIES",
41
+ "VALID_MODES", "VALID_RESEARCH_MODES",
42
+ "GoalVector", "WorldModel", "Issue", "TechniqueCard",
43
+ "validate_goal_vector", "infer_track_role", "build_world_model_from_data",
44
+ "run_sonic_critic", "run_technical_critic",
45
+ "compute_evaluation_score",
46
+ "build_technique_card_from_outcome",
47
+ "should_mine_technique",
48
+ "mine_technique_from_outcome",
49
+ "analyze_outcome_history",
50
+ "compute_taste_fit",
51
+ "get_taste_profile",
52
+ ]
@@ -0,0 +1,134 @@
1
+ """Part of the _agent_os_engine package — extracted from the single-file engine.
2
+
3
+ Pure-computation core. Callers should import from the package facade
4
+ (`from mcp_server.tools._agent_os_engine import X`), which re-exports from
5
+ these sub-modules.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import math
10
+ import re
11
+ from dataclasses import asdict, dataclass, field
12
+ from typing import Any, Optional
13
+
14
+ from .models import Issue, GoalVector, WorldModel, MEASURABLE_PROXIES
15
+
16
+ def run_sonic_critic(
17
+ sonic: Optional[dict],
18
+ goal: GoalVector,
19
+ track_roles: dict,
20
+ ) -> list[Issue]:
21
+ """Run sonic heuristics against spectrum data. Returns issues that overlap
22
+ with the goal's target dimensions."""
23
+ if sonic is None:
24
+ return [Issue(
25
+ type="analyzer_unavailable",
26
+ critic="sonic",
27
+ severity=0.3,
28
+ confidence=1.0,
29
+ affected_dimensions=list(MEASURABLE_PROXIES.keys()),
30
+ evidence=["M4L Analyzer not connected or no audio playing"],
31
+ recommended_actions=["Load LivePilot_Analyzer on master", "Start playback"],
32
+ )]
33
+
34
+ issues = []
35
+ bands = sonic.get("spectrum", {})
36
+ rms = sonic.get("rms")
37
+ peak = sonic.get("peak")
38
+ target_dims = set(goal.targets.keys())
39
+
40
+ # 1. Mud detection: low_mid congestion
41
+ low_mid = bands.get("low_mid", 0)
42
+ if low_mid > 0.7 and {"clarity", "weight", "warmth"} & target_dims:
43
+ issues.append(Issue(
44
+ type="low_mid_congestion",
45
+ critic="sonic",
46
+ severity=min(1.0, (low_mid - 0.7) * 3.3),
47
+ confidence=0.85,
48
+ affected_dimensions=["clarity", "weight"],
49
+ evidence=[f"low_mid band energy: {low_mid:.2f} (threshold: 0.7)"],
50
+ recommended_actions=["EQ cut 200-500Hz on muddiest track", "HPF on non-bass elements"],
51
+ ))
52
+
53
+ # 2. Weak sub
54
+ sub = bands.get("sub", 0)
55
+ has_bass = any(r in ("kick", "bass", "sub_bass") for r in track_roles.values())
56
+ if sub < 0.15 and has_bass and {"weight", "energy", "punch"} & target_dims:
57
+ issues.append(Issue(
58
+ type="weak_foundation",
59
+ critic="sonic",
60
+ severity=0.6,
61
+ confidence=0.75,
62
+ affected_dimensions=["weight", "energy"],
63
+ evidence=[f"sub band energy: {sub:.2f} with bass tracks present"],
64
+ recommended_actions=["Boost sub on kick/bass", "Check HPF not too aggressive"],
65
+ ))
66
+
67
+ # 3. Harsh top
68
+ high = bands.get("high", 0)
69
+ presence = bands.get("presence", 0)
70
+ if (high + presence) > 0.8 and {"brightness", "clarity", "warmth"} & target_dims:
71
+ issues.append(Issue(
72
+ type="harsh_highs",
73
+ critic="sonic",
74
+ severity=min(1.0, ((high + presence) - 0.8) * 2.5),
75
+ confidence=0.80,
76
+ affected_dimensions=["brightness", "clarity"],
77
+ evidence=[f"high+presence: {high + presence:.2f} (threshold: 0.8)"],
78
+ recommended_actions=["Reduce high shelf on brightest element", "Add subtle LP filter"],
79
+ ))
80
+
81
+ # 4. Low headroom
82
+ if rms is not None and rms > 0.9 and {"energy", "punch", "clarity"} & target_dims:
83
+ issues.append(Issue(
84
+ type="headroom_risk",
85
+ critic="sonic",
86
+ severity=min(1.0, (rms - 0.9) * 10),
87
+ confidence=0.90,
88
+ affected_dimensions=["energy", "clarity", "punch"],
89
+ evidence=[f"RMS: {rms:.3f} (threshold: 0.9)"],
90
+ recommended_actions=["Reduce master volume", "Lower loudest track", "Add limiter"],
91
+ ))
92
+
93
+ # 5. Flat dynamics (C1 fix: correct dB formula)
94
+ if rms is not None and peak is not None and rms > 0 and peak > 0:
95
+ crest_db = 20.0 * math.log10(peak / max(rms, 0.001))
96
+ if crest_db < 3.0 and {"punch", "energy", "contrast"} & target_dims:
97
+ issues.append(Issue(
98
+ type="dynamics_flat",
99
+ critic="sonic",
100
+ severity=0.5,
101
+ confidence=0.70,
102
+ affected_dimensions=["punch", "contrast"],
103
+ evidence=[f"crest factor: {crest_db:.1f} dB (threshold: 3 dB)"],
104
+ recommended_actions=["Reduce compression", "Add transient shaper", "Reduce limiter"],
105
+ ))
106
+
107
+ return issues
108
+
109
+ def run_technical_critic(technical: dict) -> list[Issue]:
110
+ """Check technical health of the session."""
111
+ issues = []
112
+
113
+ if not technical.get("analyzer_available", False):
114
+ issues.append(Issue(
115
+ type="analyzer_offline",
116
+ critic="technical",
117
+ severity=0.4,
118
+ confidence=1.0,
119
+ evidence=["LivePilot Analyzer not receiving data"],
120
+ recommended_actions=["Load LivePilot_Analyzer.amxd on master track"],
121
+ ))
122
+
123
+ for dev in technical.get("unhealthy_devices", []):
124
+ issues.append(Issue(
125
+ type="unhealthy_plugin",
126
+ critic="technical",
127
+ severity=0.7,
128
+ confidence=0.95,
129
+ evidence=[f"Track {dev['track']}: {dev['device']} — {dev['flag']}"],
130
+ recommended_actions=["Delete and replace with native Ableton device"],
131
+ ))
132
+
133
+ return issues
134
+
@@ -0,0 +1,206 @@
1
+ """Part of the _agent_os_engine package — extracted from the single-file engine.
2
+
3
+ Pure-computation core. Callers should import from the package facade
4
+ (`from mcp_server.tools._agent_os_engine import X`), which re-exports from
5
+ these sub-modules.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import math
10
+ import re
11
+ from dataclasses import asdict, dataclass, field
12
+ from typing import Any, Optional
13
+
14
+ from .models import QUALITY_DIMENSIONS, GoalVector, WorldModel, _clamp
15
+ from .taste import compute_taste_fit
16
+
17
+
18
+ # ── Evaluation Engine ─────────────────────────────────────────────────
19
+ # _clamp lives in .models — shared with taste.py to avoid circular imports.
20
+
21
+ def _extract_dimension_value(
22
+ sonic: dict,
23
+ dimension: str,
24
+ ) -> Optional[float]:
25
+ """Map a quality dimension to a measurable value from sonic data.
26
+
27
+ Returns None for unmeasurable dimensions (confidence=0.0 in Phase 1).
28
+ All returned values are clamped to 0.0-1.0 for consistent scoring.
29
+ """
30
+ if not sonic:
31
+ return None
32
+ # Accept both "spectrum" and "bands" keys — get_master_spectrum returns
33
+ # {"bands": {...}} while the evaluator historically expected {"spectrum": {...}}.
34
+ # Finding 2 fix: tolerate either shape so raw analyzer output works.
35
+ bands = sonic.get("spectrum") or sonic.get("bands")
36
+ if not bands:
37
+ return None
38
+ rms = sonic.get("rms")
39
+ peak = sonic.get("peak")
40
+
41
+ if dimension == "brightness":
42
+ high = bands.get("high", 0)
43
+ presence = bands.get("presence", 0)
44
+ return _clamp((high + presence) / 2.0)
45
+ elif dimension == "warmth":
46
+ return _clamp(bands.get("low_mid", 0))
47
+ elif dimension == "weight":
48
+ sub = bands.get("sub", 0)
49
+ low = bands.get("low", 0)
50
+ return _clamp((sub + low) / 2.0)
51
+ elif dimension == "clarity":
52
+ low_mid = bands.get("low_mid", 0)
53
+ return _clamp(1.0 - low_mid)
54
+ elif dimension == "density":
55
+ # Spectral flatness: geometric mean / arithmetic mean of band values.
56
+ # Higher = more evenly distributed energy (noise-like).
57
+ # Lower = more tonal (energy concentrated in few bands).
58
+ vals = [max(v, 1e-10) for v in bands.values() if isinstance(v, (int, float))]
59
+ if not vals:
60
+ return None
61
+ geo_mean = math.exp(sum(math.log(v) for v in vals) / len(vals))
62
+ arith_mean = sum(vals) / len(vals)
63
+ return _clamp(geo_mean / max(arith_mean, 1e-10))
64
+ elif dimension == "energy":
65
+ return _clamp(rms) if rms is not None else None
66
+ elif dimension == "punch":
67
+ if rms and peak and rms > 0:
68
+ crest_db = 20.0 * math.log10(max(peak / rms, 1.0))
69
+ # Normalize: 0 dB = 0.0, 20 dB = 1.0
70
+ return _clamp(crest_db / 20.0)
71
+ return None
72
+ else:
73
+ # Unmeasurable in Phase 1 (width, depth, motion, contrast,
74
+ # groove, tension, novelty, polish, emotion, cohesion)
75
+ return None
76
+
77
+ def compute_evaluation_score(
78
+ goal: GoalVector,
79
+ before_sonic: dict,
80
+ after_sonic: dict,
81
+ outcome_history: Optional[list[dict]] = None,
82
+ ) -> dict:
83
+ """Compute whether a move improved the mix toward the goal.
84
+
85
+ Returns:
86
+ {
87
+ "score": float (0-1),
88
+ "keep_change": bool,
89
+ "goal_progress": float (-1 to 1),
90
+ "collateral_damage": float (0-1),
91
+ "measurable_delta": float (-1 to 1),
92
+ "notes": list[str],
93
+ "dimension_changes": dict,
94
+ "consecutive_undo_hint": bool,
95
+ }
96
+ """
97
+ notes: list[str] = []
98
+ dimension_changes: dict[str, dict] = {}
99
+
100
+ # Compute per-dimension deltas
101
+ total_goal_progress = 0.0
102
+ measurable_count = 0
103
+
104
+ for dim, weight in goal.targets.items():
105
+ before_val = _extract_dimension_value(before_sonic, dim)
106
+ after_val = _extract_dimension_value(after_sonic, dim)
107
+
108
+ if before_val is not None and after_val is not None:
109
+ delta = after_val - before_val
110
+ dimension_changes[dim] = {
111
+ "before": round(before_val, 4),
112
+ "after": round(after_val, 4),
113
+ "delta": round(delta, 4),
114
+ }
115
+ total_goal_progress += delta * weight
116
+ measurable_count += 1
117
+ else:
118
+ notes.append(f"{dim}: not measurable in Phase 1 (confidence=0.0)")
119
+
120
+ # Check protected dimensions (C3 fix: use the actual threshold)
121
+ collateral_damage = 0.0
122
+ protection_violated = False
123
+
124
+ for dim, threshold in goal.protect.items():
125
+ before_val = _extract_dimension_value(before_sonic, dim)
126
+ after_val = _extract_dimension_value(after_sonic, dim)
127
+
128
+ if before_val is not None and after_val is not None:
129
+ drop = before_val - after_val
130
+ if drop > 0:
131
+ collateral_damage = max(collateral_damage, drop)
132
+ # Violation: value dropped below the user's threshold
133
+ if after_val < threshold:
134
+ protection_violated = True
135
+ notes.append(
136
+ f"PROTECTED dimension '{dim}' at {after_val:.3f}, "
137
+ f"below threshold {threshold:.3f}"
138
+ )
139
+ # Also flag large drops even if still above threshold
140
+ elif drop > 0.15:
141
+ protection_violated = True
142
+ notes.append(
143
+ f"PROTECTED dimension '{dim}' dropped by {drop:.3f} "
144
+ f"(absolute drop > 0.15)"
145
+ )
146
+
147
+ # Measurable delta (average improvement across measured dimensions)
148
+ measurable_delta = total_goal_progress / max(measurable_count, 1)
149
+
150
+ # Taste fit: how well does this move align with user preferences?
151
+ taste_fit = compute_taste_fit(goal, outcome_history) if outcome_history else 0.0
152
+
153
+ # Compute composite score (spec section 12.2)
154
+ goal_fit = _clamp(0.5 + total_goal_progress)
155
+ measurable_component = _clamp(0.5 + measurable_delta)
156
+ preservation = _clamp(1.0 - collateral_damage * 5)
157
+ confidence = measurable_count / max(len(goal.targets), 1)
158
+
159
+ score = (
160
+ 0.30 * goal_fit
161
+ + 0.25 * measurable_component
162
+ + 0.15 * preservation
163
+ + 0.10 * taste_fit
164
+ + 0.10 * confidence
165
+ + 0.10 * 1.0 # reversibility: 1.0 for undo-able moves
166
+ )
167
+
168
+ # Hard rules
169
+ keep_change = True
170
+
171
+ if measurable_count > 0 and measurable_delta <= 0:
172
+ keep_change = False
173
+ notes.append("HARD RULE: measurable delta <= 0 — no measurable improvement")
174
+
175
+ if protection_violated:
176
+ keep_change = False
177
+ notes.append("HARD RULE: protected dimension violated")
178
+
179
+ if score < 0.40:
180
+ keep_change = False
181
+ notes.append(f"HARD RULE: total score {score:.3f} < 0.40 threshold")
182
+
183
+ if measurable_count == 0 and not protection_violated:
184
+ # All TARGET dimensions unmeasurable AND no protection violations —
185
+ # defer keep/undo to the agent's musical judgment.
186
+ # IMPORTANT: protection violations still force undo even when
187
+ # targets are unmeasurable (Finding 1 fix).
188
+ keep_change = True
189
+ notes.append(
190
+ "No measurable target dimensions — deferring keep/undo to agent musical judgment"
191
+ )
192
+
193
+ return {
194
+ "score": round(score, 4),
195
+ "keep_change": keep_change,
196
+ "goal_progress": round(total_goal_progress, 4),
197
+ "collateral_damage": round(collateral_damage, 4),
198
+ "measurable_delta": round(measurable_delta, 4),
199
+ "measurable_dimensions": measurable_count,
200
+ "total_dimensions": len(goal.targets),
201
+ "dimension_changes": dimension_changes,
202
+ "notes": notes,
203
+ # I5: hint for the agent to track consecutive undos
204
+ "consecutive_undo_hint": not keep_change,
205
+ }
206
+