livepilot 1.9.13 → 1.9.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/.claude-plugin/marketplace.json +3 -3
  2. package/AGENTS.md +3 -3
  3. package/CHANGELOG.md +51 -0
  4. package/CONTRIBUTING.md +1 -1
  5. package/README.md +7 -7
  6. package/bin/livepilot.js +32 -8
  7. package/installer/install.js +21 -2
  8. package/livepilot/.Codex-plugin/plugin.json +2 -2
  9. package/livepilot/.claude-plugin/plugin.json +2 -2
  10. package/livepilot/agents/livepilot-producer/AGENT.md +243 -49
  11. package/livepilot/skills/livepilot-core/SKILL.md +81 -6
  12. package/livepilot/skills/livepilot-core/references/m4l-devices.md +2 -2
  13. package/livepilot/skills/livepilot-core/references/overview.md +3 -3
  14. package/livepilot/skills/livepilot-core/references/sound-design.md +3 -2
  15. package/livepilot/skills/livepilot-release/SKILL.md +13 -13
  16. package/m4l_device/LivePilot_Analyzer.amxd +0 -0
  17. package/m4l_device/livepilot_bridge.js +6 -3
  18. package/mcp_server/__init__.py +1 -1
  19. package/mcp_server/curves.py +11 -3
  20. package/mcp_server/evaluation/__init__.py +1 -0
  21. package/mcp_server/evaluation/fabric.py +575 -0
  22. package/mcp_server/evaluation/feature_extractors.py +84 -0
  23. package/mcp_server/evaluation/policy.py +67 -0
  24. package/mcp_server/evaluation/tools.py +53 -0
  25. package/mcp_server/memory/__init__.py +11 -2
  26. package/mcp_server/memory/anti_memory.py +78 -0
  27. package/mcp_server/memory/promotion.py +94 -0
  28. package/mcp_server/memory/session_memory.py +108 -0
  29. package/mcp_server/memory/taste_memory.py +158 -0
  30. package/mcp_server/memory/technique_store.py +2 -1
  31. package/mcp_server/memory/tools.py +112 -0
  32. package/mcp_server/mix_engine/__init__.py +1 -0
  33. package/mcp_server/mix_engine/critics.py +299 -0
  34. package/mcp_server/mix_engine/models.py +152 -0
  35. package/mcp_server/mix_engine/planner.py +103 -0
  36. package/mcp_server/mix_engine/state_builder.py +316 -0
  37. package/mcp_server/mix_engine/tools.py +214 -0
  38. package/mcp_server/performance_engine/__init__.py +1 -0
  39. package/mcp_server/performance_engine/models.py +148 -0
  40. package/mcp_server/performance_engine/planner.py +267 -0
  41. package/mcp_server/performance_engine/safety.py +162 -0
  42. package/mcp_server/performance_engine/tools.py +183 -0
  43. package/mcp_server/project_brain/__init__.py +6 -0
  44. package/mcp_server/project_brain/arrangement_graph.py +64 -0
  45. package/mcp_server/project_brain/automation_graph.py +72 -0
  46. package/mcp_server/project_brain/builder.py +123 -0
  47. package/mcp_server/project_brain/capability_graph.py +64 -0
  48. package/mcp_server/project_brain/models.py +282 -0
  49. package/mcp_server/project_brain/refresh.py +80 -0
  50. package/mcp_server/project_brain/role_graph.py +103 -0
  51. package/mcp_server/project_brain/session_graph.py +51 -0
  52. package/mcp_server/project_brain/tools.py +144 -0
  53. package/mcp_server/reference_engine/__init__.py +1 -0
  54. package/mcp_server/reference_engine/gap_analyzer.py +239 -0
  55. package/mcp_server/reference_engine/models.py +105 -0
  56. package/mcp_server/reference_engine/profile_builder.py +149 -0
  57. package/mcp_server/reference_engine/tactic_router.py +117 -0
  58. package/mcp_server/reference_engine/tools.py +235 -0
  59. package/mcp_server/runtime/__init__.py +1 -0
  60. package/mcp_server/runtime/action_ledger.py +117 -0
  61. package/mcp_server/runtime/action_ledger_models.py +84 -0
  62. package/mcp_server/runtime/action_tools.py +57 -0
  63. package/mcp_server/runtime/capability_state.py +218 -0
  64. package/mcp_server/runtime/safety_kernel.py +339 -0
  65. package/mcp_server/runtime/safety_tools.py +42 -0
  66. package/mcp_server/runtime/tools.py +64 -0
  67. package/mcp_server/server.py +23 -1
  68. package/mcp_server/sound_design/__init__.py +1 -0
  69. package/mcp_server/sound_design/critics.py +297 -0
  70. package/mcp_server/sound_design/models.py +147 -0
  71. package/mcp_server/sound_design/planner.py +104 -0
  72. package/mcp_server/sound_design/tools.py +297 -0
  73. package/mcp_server/tools/_agent_os_engine.py +947 -0
  74. package/mcp_server/tools/_composition_engine.py +1530 -0
  75. package/mcp_server/tools/_conductor.py +199 -0
  76. package/mcp_server/tools/_conductor_budgets.py +222 -0
  77. package/mcp_server/tools/_evaluation_contracts.py +91 -0
  78. package/mcp_server/tools/_form_engine.py +416 -0
  79. package/mcp_server/tools/_motif_engine.py +351 -0
  80. package/mcp_server/tools/_planner_engine.py +516 -0
  81. package/mcp_server/tools/_research_engine.py +542 -0
  82. package/mcp_server/tools/_research_provider.py +185 -0
  83. package/mcp_server/tools/_snapshot_normalizer.py +49 -0
  84. package/mcp_server/tools/agent_os.py +440 -0
  85. package/mcp_server/tools/analyzer.py +18 -0
  86. package/mcp_server/tools/automation.py +25 -10
  87. package/mcp_server/tools/composition.py +563 -0
  88. package/mcp_server/tools/motif.py +104 -0
  89. package/mcp_server/tools/planner.py +144 -0
  90. package/mcp_server/tools/research.py +223 -0
  91. package/mcp_server/tools/tracks.py +18 -3
  92. package/mcp_server/tools/transport.py +10 -2
  93. package/mcp_server/transition_engine/__init__.py +6 -0
  94. package/mcp_server/transition_engine/archetypes.py +167 -0
  95. package/mcp_server/transition_engine/critics.py +340 -0
  96. package/mcp_server/transition_engine/models.py +90 -0
  97. package/mcp_server/transition_engine/tools.py +291 -0
  98. package/mcp_server/translation_engine/__init__.py +5 -0
  99. package/mcp_server/translation_engine/critics.py +297 -0
  100. package/mcp_server/translation_engine/models.py +27 -0
  101. package/mcp_server/translation_engine/tools.py +74 -0
  102. package/package.json +2 -2
  103. package/remote_script/LivePilot/__init__.py +1 -1
  104. package/remote_script/LivePilot/arrangement.py +12 -2
  105. package/requirements.txt +1 -1
@@ -0,0 +1,575 @@
1
+ """Evaluation Fabric — unified entry point for all engine evaluators.
2
+
3
+ Provides evaluate() as the single router, plus engine-specific evaluators:
4
+ - evaluate_sonic_move() — spectral before/after
5
+ - evaluate_composition_move() — issue list before/after
6
+ - evaluate_mix_move() — mix critic issues before/after
7
+ - evaluate_transition() — transition score before/after
8
+ - evaluate_translation() — translation report before/after
9
+
10
+ Uses feature_extractors for dimension extraction, policy for hard rules,
11
+ and the existing contracts from _evaluation_contracts.
12
+
13
+ Design: EVALUATION_FABRIC_V1.md
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from typing import Optional
19
+
20
+ from ..tools._evaluation_contracts import (
21
+ EvaluationRequest,
22
+ EvaluationResult,
23
+ MEASURABLE_DIMENSIONS,
24
+ is_dimension_measurable,
25
+ )
26
+ from ..tools._snapshot_normalizer import normalize_sonic_snapshot
27
+ from .feature_extractors import extract_dimension_value, _clamp
28
+ from .policy import apply_hard_rules
29
+
30
+
31
+ # ── Sonic Evaluator ──────────────────────────────────────────────────
32
+
33
+
34
+ def evaluate_sonic_move(
35
+ request: EvaluationRequest,
36
+ outcome_history: Optional[list[dict]] = None,
37
+ ) -> EvaluationResult:
38
+ """Evaluate a sonic move using the Evaluation Fabric.
39
+
40
+ Normalizes before/after snapshots, extracts dimension values,
41
+ computes score and goal progress, and applies hard rules.
42
+
43
+ Args:
44
+ request: EvaluationRequest with before/after snapshots,
45
+ goal targets/protect in goal and protect dicts.
46
+ outcome_history: optional list of past outcomes for taste fit.
47
+
48
+ Returns:
49
+ EvaluationResult with score, keep_change, dimension_changes, etc.
50
+ """
51
+ # Normalize snapshots
52
+ before_norm = normalize_sonic_snapshot(request.before, source="before")
53
+ after_norm = normalize_sonic_snapshot(request.after, source="after")
54
+
55
+ targets = request.goal.get("targets", {})
56
+ protect = request.protect
57
+
58
+ notes: list[str] = []
59
+ dimension_changes: dict[str, dict] = {}
60
+
61
+ # Compute per-dimension deltas
62
+ total_goal_progress = 0.0
63
+ measurable_count = 0
64
+
65
+ for dim, weight in targets.items():
66
+ before_val = extract_dimension_value(before_norm, dim) if before_norm else None
67
+ after_val = extract_dimension_value(after_norm, dim) if after_norm else None
68
+
69
+ if before_val is not None and after_val is not None:
70
+ delta = after_val - before_val
71
+ dimension_changes[dim] = {
72
+ "before": round(before_val, 4),
73
+ "after": round(after_val, 4),
74
+ "delta": round(delta, 4),
75
+ }
76
+ total_goal_progress += delta * weight
77
+ measurable_count += 1
78
+ else:
79
+ notes.append(f"{dim}: not measurable in Phase 1 (confidence=0.0)")
80
+
81
+ # Check protected dimensions
82
+ collateral_damage = 0.0
83
+ protection_violated = False
84
+
85
+ for dim, threshold in protect.items():
86
+ before_val = extract_dimension_value(before_norm, dim) if before_norm else None
87
+ after_val = extract_dimension_value(after_norm, dim) if after_norm else None
88
+
89
+ if before_val is not None and after_val is not None:
90
+ drop = before_val - after_val
91
+ if drop > 0:
92
+ collateral_damage = max(collateral_damage, drop)
93
+ if after_val < threshold:
94
+ protection_violated = True
95
+ notes.append(
96
+ f"PROTECTED dimension '{dim}' at {after_val:.3f}, "
97
+ f"below threshold {threshold:.3f}"
98
+ )
99
+ elif drop > 0.15:
100
+ protection_violated = True
101
+ notes.append(
102
+ f"PROTECTED dimension '{dim}' dropped by {drop:.3f} "
103
+ f"(absolute drop > 0.15)"
104
+ )
105
+
106
+ # Compute composite score
107
+ measurable_delta = total_goal_progress / max(measurable_count, 1)
108
+ goal_fit = _clamp(0.5 + total_goal_progress)
109
+ measurable_component = _clamp(0.5 + measurable_delta)
110
+ preservation = _clamp(1.0 - collateral_damage * 5)
111
+ confidence = measurable_count / max(len(targets), 1)
112
+
113
+ score = (
114
+ 0.30 * goal_fit
115
+ + 0.25 * measurable_component
116
+ + 0.15 * preservation
117
+ + 0.10 * 0.0 # taste_fit: placeholder, no history in fabric v1
118
+ + 0.10 * confidence
119
+ + 0.10 * 1.0 # reversibility: 1.0 for undo-able moves
120
+ )
121
+
122
+ # Apply hard rules via policy engine
123
+ keep_change, rule_failures = apply_hard_rules(
124
+ goal_progress=total_goal_progress,
125
+ collateral_damage=collateral_damage,
126
+ protection_violated=protection_violated,
127
+ measurable_count=measurable_count,
128
+ score=score,
129
+ target_count=len(targets),
130
+ )
131
+ notes.extend(rule_failures)
132
+
133
+ # Decide decision_mode
134
+ if measurable_count == 0:
135
+ decision_mode = "deferred"
136
+ elif keep_change:
137
+ decision_mode = "measured"
138
+ else:
139
+ decision_mode = "measured"
140
+
141
+ return EvaluationResult(
142
+ engine=request.engine or "sonic",
143
+ score=round(score, 4),
144
+ keep_change=keep_change,
145
+ goal_progress=round(total_goal_progress, 4),
146
+ collateral_damage=round(collateral_damage, 4),
147
+ hard_rule_failures=rule_failures,
148
+ dimension_changes=dimension_changes,
149
+ notes=notes,
150
+ decision_mode=decision_mode,
151
+ memory_candidate=keep_change and measurable_count > 0,
152
+ )
153
+
154
+
155
+ # ── Composition Evaluator ────────────────────────────────────────────
156
+
157
+
158
+ def evaluate_composition_move(
159
+ before_issues: list[dict],
160
+ after_issues: list[dict],
161
+ ) -> EvaluationResult:
162
+ """Evaluate a composition move by comparing issue lists.
163
+
164
+ Wraps the existing composition evaluation logic using the
165
+ canonical EvaluationResult contract.
166
+
167
+ Args:
168
+ before_issues: list of dicts with at least "severity" (float).
169
+ after_issues: list of dicts with at least "severity" (float).
170
+
171
+ Returns:
172
+ EvaluationResult with score, keep_change, notes.
173
+ """
174
+ notes: list[str] = []
175
+
176
+ before_count = len(before_issues)
177
+ after_count = len(after_issues)
178
+ issue_delta = before_count - after_count
179
+
180
+ before_severity = sum(i.get("severity", 0.0) for i in before_issues)
181
+ after_severity = sum(i.get("severity", 0.0) for i in after_issues)
182
+ severity_improvement = before_severity - after_severity
183
+
184
+ if before_count > 0:
185
+ improvement_ratio = severity_improvement / max(before_severity, 0.01)
186
+ else:
187
+ improvement_ratio = 0.0 if after_count == 0 else -0.5
188
+
189
+ score = max(0.0, min(1.0, 0.5 + improvement_ratio * 0.5))
190
+
191
+ keep_change = True
192
+
193
+ if severity_improvement < 0:
194
+ keep_change = False
195
+ notes.append(
196
+ f"WORSE: total severity increased by {-severity_improvement:.2f}"
197
+ )
198
+
199
+ if after_count > before_count + 1:
200
+ keep_change = False
201
+ notes.append(
202
+ f"NEW ISSUES: {after_count - before_count} new issues introduced"
203
+ )
204
+
205
+ if score < 0.40:
206
+ keep_change = False
207
+ notes.append(f"SCORE: {score:.3f} below 0.40 threshold")
208
+
209
+ if keep_change and severity_improvement > 0:
210
+ notes.append(
211
+ f"IMPROVED: resolved {issue_delta} issue(s), "
212
+ f"severity reduced by {severity_improvement:.2f}"
213
+ )
214
+
215
+ hard_rule_failures = [n for n in notes if n.startswith(("WORSE:", "NEW ISSUES:", "SCORE:"))]
216
+
217
+ return EvaluationResult(
218
+ engine="composition",
219
+ score=round(score, 4),
220
+ keep_change=keep_change,
221
+ goal_progress=round(severity_improvement, 4),
222
+ collateral_damage=0.0,
223
+ hard_rule_failures=hard_rule_failures,
224
+ dimension_changes={"issue_delta": issue_delta},
225
+ notes=notes,
226
+ decision_mode="measured",
227
+ memory_candidate=keep_change and severity_improvement > 0,
228
+ )
229
+
230
+
231
+ # ── Mix Evaluator ───────────────────────────────────────────────────
232
+
233
+
234
+ def evaluate_mix_move(
235
+ before_issues: list[dict],
236
+ after_issues: list[dict],
237
+ ) -> EvaluationResult:
238
+ """Evaluate a mix move by comparing mix critic issue lists.
239
+
240
+ Scores masking reduction, punch change, headroom, stereo stability
241
+ and applies hard rules from policy.py.
242
+
243
+ Args:
244
+ before_issues: list of MixIssue dicts (from run_all_mix_critics).
245
+ after_issues: list of MixIssue dicts (from run_all_mix_critics).
246
+
247
+ Returns:
248
+ EvaluationResult with score, keep_change, dimension_changes.
249
+ """
250
+ notes: list[str] = []
251
+
252
+ # Severity helpers per critic category
253
+ def _severity_for(issues: list[dict], critic: str) -> float:
254
+ return sum(i.get("severity", 0.0) for i in issues if i.get("critic") == critic)
255
+
256
+ # Track four key dimensions
257
+ before_masking = _severity_for(before_issues, "masking")
258
+ after_masking = _severity_for(after_issues, "masking")
259
+ masking_delta = before_masking - after_masking # positive = improvement
260
+
261
+ before_dynamics = _severity_for(before_issues, "dynamics")
262
+ after_dynamics = _severity_for(after_issues, "dynamics")
263
+ headroom_delta = before_dynamics - after_dynamics
264
+
265
+ before_stereo = _severity_for(before_issues, "stereo")
266
+ after_stereo = _severity_for(after_issues, "stereo")
267
+ stereo_delta = before_stereo - after_stereo
268
+
269
+ before_balance = _severity_for(before_issues, "balance")
270
+ after_balance = _severity_for(after_issues, "balance")
271
+ balance_delta = before_balance - after_balance
272
+
273
+ dimension_changes = {
274
+ "masking_reduction": round(masking_delta, 4),
275
+ "headroom_change": round(headroom_delta, 4),
276
+ "stereo_stability": round(stereo_delta, 4),
277
+ "balance_change": round(balance_delta, 4),
278
+ }
279
+
280
+ # Overall severity comparison
281
+ before_total = sum(i.get("severity", 0.0) for i in before_issues)
282
+ after_total = sum(i.get("severity", 0.0) for i in after_issues)
283
+ severity_improvement = before_total - after_total
284
+
285
+ before_count = len(before_issues)
286
+ after_count = len(after_issues)
287
+
288
+ if before_total > 0:
289
+ improvement_ratio = severity_improvement / max(before_total, 0.01)
290
+ else:
291
+ improvement_ratio = 0.0 if after_count == 0 else -0.5
292
+
293
+ score = _clamp(0.5 + improvement_ratio * 0.5)
294
+
295
+ # Hard-rule style checks
296
+ keep_change = True
297
+
298
+ if severity_improvement < 0:
299
+ keep_change = False
300
+ notes.append(
301
+ f"WORSE: total mix severity increased by {-severity_improvement:.2f}"
302
+ )
303
+
304
+ if after_count > before_count + 2:
305
+ keep_change = False
306
+ notes.append(
307
+ f"NEW ISSUES: {after_count - before_count} new mix issues introduced"
308
+ )
309
+
310
+ if score < 0.40:
311
+ keep_change = False
312
+ notes.append(f"SCORE: {score:.3f} below 0.40 threshold")
313
+
314
+ if keep_change and severity_improvement > 0:
315
+ notes.append(
316
+ f"IMPROVED: mix severity reduced by {severity_improvement:.2f} "
317
+ f"across {before_count - after_count} fewer issues"
318
+ )
319
+
320
+ hard_rule_failures = [
321
+ n for n in notes if n.startswith(("WORSE:", "NEW ISSUES:", "SCORE:"))
322
+ ]
323
+
324
+ return EvaluationResult(
325
+ engine="mix",
326
+ score=round(score, 4),
327
+ keep_change=keep_change,
328
+ goal_progress=round(severity_improvement, 4),
329
+ collateral_damage=0.0,
330
+ hard_rule_failures=hard_rule_failures,
331
+ dimension_changes=dimension_changes,
332
+ notes=notes,
333
+ decision_mode="measured",
334
+ memory_candidate=keep_change and severity_improvement > 0,
335
+ )
336
+
337
+
338
+ # ── Transition Evaluator ────────────────────────────────────────────
339
+
340
+
341
+ def evaluate_transition(
342
+ before_score: dict,
343
+ after_score: dict,
344
+ ) -> EvaluationResult:
345
+ """Evaluate a transition move by comparing TransitionScore dicts.
346
+
347
+ Compares boundary_clarity, payoff_strength, energy_redirection,
348
+ and overall_quality before and after the move.
349
+
350
+ Args:
351
+ before_score: dict with transition quality metrics.
352
+ after_score: dict with transition quality metrics.
353
+
354
+ Returns:
355
+ EvaluationResult with score, keep_change, dimension_changes.
356
+ """
357
+ notes: list[str] = []
358
+
359
+ # Key transition dimensions to compare
360
+ dims = ["boundary_clarity", "payoff_strength", "energy_redirection", "overall_quality"]
361
+ dimension_changes: dict[str, dict] = {}
362
+ total_improvement = 0.0
363
+ measured = 0
364
+
365
+ for dim in dims:
366
+ bv = before_score.get(dim)
367
+ av = after_score.get(dim)
368
+ if bv is not None and av is not None:
369
+ delta = av - bv
370
+ dimension_changes[dim] = {
371
+ "before": round(bv, 4),
372
+ "after": round(av, 4),
373
+ "delta": round(delta, 4),
374
+ }
375
+ total_improvement += delta
376
+ measured += 1
377
+
378
+ avg_improvement = total_improvement / max(measured, 1)
379
+ score = _clamp(0.5 + avg_improvement)
380
+
381
+ keep_change = True
382
+
383
+ if measured > 0 and total_improvement < 0:
384
+ keep_change = False
385
+ notes.append(
386
+ f"WORSE: transition quality decreased by {-total_improvement:.3f}"
387
+ )
388
+
389
+ if score < 0.40:
390
+ keep_change = False
391
+ notes.append(f"SCORE: {score:.3f} below 0.40 threshold")
392
+
393
+ if keep_change and total_improvement > 0:
394
+ notes.append(
395
+ f"IMPROVED: transition quality improved by {total_improvement:.3f} "
396
+ f"across {measured} dimensions"
397
+ )
398
+
399
+ hard_rule_failures = [
400
+ n for n in notes if n.startswith(("WORSE:", "SCORE:"))
401
+ ]
402
+
403
+ return EvaluationResult(
404
+ engine="transition",
405
+ score=round(score, 4),
406
+ keep_change=keep_change,
407
+ goal_progress=round(total_improvement, 4),
408
+ collateral_damage=0.0,
409
+ hard_rule_failures=hard_rule_failures,
410
+ dimension_changes=dimension_changes,
411
+ notes=notes,
412
+ decision_mode="measured",
413
+ memory_candidate=keep_change and total_improvement > 0,
414
+ )
415
+
416
+
417
+ # ── Translation Evaluator ───────────────────────────────────────────
418
+
419
+
420
+ def evaluate_translation(
421
+ before_report: dict,
422
+ after_report: dict,
423
+ ) -> EvaluationResult:
424
+ """Evaluate a translation move by comparing TranslationReport dicts.
425
+
426
+ Compares robustness booleans (mono_safe, small_speaker_safe,
427
+ low_end_stable, front_element_present) and harshness_risk.
428
+
429
+ Args:
430
+ before_report: dict from build_translation_report().to_dict().
431
+ after_report: dict from build_translation_report().to_dict().
432
+
433
+ Returns:
434
+ EvaluationResult with score, keep_change, dimension_changes.
435
+ """
436
+ notes: list[str] = []
437
+ dimension_changes: dict[str, dict] = {}
438
+
439
+ # Boolean robustness flags — True is good, False is bad
440
+ bool_dims = ["mono_safe", "small_speaker_safe", "low_end_stable", "front_element_present"]
441
+ improvements = 0
442
+ regressions = 0
443
+
444
+ for dim in bool_dims:
445
+ bv = before_report.get(dim)
446
+ av = after_report.get(dim)
447
+ if bv is not None and av is not None:
448
+ dimension_changes[dim] = {"before": bv, "after": av}
449
+ if not bv and av:
450
+ improvements += 1
451
+ elif bv and not av:
452
+ regressions += 1
453
+
454
+ # Harshness risk — lower is better
455
+ bh = before_report.get("harshness_risk", 0.0)
456
+ ah = after_report.get("harshness_risk", 0.0)
457
+ harshness_delta = bh - ah # positive = improvement
458
+ dimension_changes["harshness_risk"] = {
459
+ "before": round(bh, 4),
460
+ "after": round(ah, 4),
461
+ "delta": round(harshness_delta, 4),
462
+ }
463
+
464
+ # Overall robustness classification
465
+ robustness_map = {"robust": 1.0, "fragile": 0.5, "critical": 0.0}
466
+ before_rob = robustness_map.get(before_report.get("overall_robustness", ""), 0.5)
467
+ after_rob = robustness_map.get(after_report.get("overall_robustness", ""), 0.5)
468
+ robustness_delta = after_rob - before_rob
469
+ dimension_changes["overall_robustness"] = {
470
+ "before": before_report.get("overall_robustness", "unknown"),
471
+ "after": after_report.get("overall_robustness", "unknown"),
472
+ }
473
+
474
+ # Composite score
475
+ flag_score = (improvements - regressions) / max(len(bool_dims), 1)
476
+ score = _clamp(
477
+ 0.5
478
+ + flag_score * 0.3
479
+ + harshness_delta * 0.3
480
+ + robustness_delta * 0.4
481
+ )
482
+
483
+ total_improvement = flag_score + harshness_delta + robustness_delta
484
+
485
+ keep_change = True
486
+
487
+ if regressions > improvements:
488
+ keep_change = False
489
+ notes.append(
490
+ f"WORSE: {regressions} robustness flags regressed vs "
491
+ f"{improvements} improved"
492
+ )
493
+
494
+ if after_rob < before_rob:
495
+ keep_change = False
496
+ notes.append(
497
+ f"WORSE: overall robustness degraded from "
498
+ f"{before_report.get('overall_robustness')} to "
499
+ f"{after_report.get('overall_robustness')}"
500
+ )
501
+
502
+ if score < 0.40:
503
+ keep_change = False
504
+ notes.append(f"SCORE: {score:.3f} below 0.40 threshold")
505
+
506
+ if keep_change and total_improvement > 0:
507
+ notes.append(
508
+ f"IMPROVED: {improvements} robustness flags improved, "
509
+ f"harshness reduced by {harshness_delta:.3f}"
510
+ )
511
+
512
+ hard_rule_failures = [
513
+ n for n in notes if n.startswith(("WORSE:", "SCORE:"))
514
+ ]
515
+
516
+ return EvaluationResult(
517
+ engine="translation",
518
+ score=round(score, 4),
519
+ keep_change=keep_change,
520
+ goal_progress=round(total_improvement, 4),
521
+ collateral_damage=0.0,
522
+ hard_rule_failures=hard_rule_failures,
523
+ dimension_changes=dimension_changes,
524
+ notes=notes,
525
+ decision_mode="measured",
526
+ memory_candidate=keep_change and total_improvement > 0,
527
+ )
528
+
529
+
530
+ # ── Unified Entry Point ─────────────────────────────────────────────
531
+
532
+
533
+ def evaluate(request: EvaluationRequest) -> EvaluationResult:
534
+ """Unified evaluation entry point — routes to engine-specific evaluator.
535
+
536
+ Args:
537
+ request: EvaluationRequest with engine field determining routing:
538
+ - "sonic" -> evaluate_sonic_move
539
+ - "composition" -> evaluate_composition_move
540
+ - "mix" -> evaluate_mix_move
541
+ - "transition" -> evaluate_transition
542
+ - "translation" -> evaluate_translation
543
+
544
+ Returns:
545
+ EvaluationResult from the appropriate engine evaluator.
546
+ """
547
+ engine = (request.engine or "sonic").lower()
548
+
549
+ if engine == "sonic":
550
+ return evaluate_sonic_move(request)
551
+
552
+ elif engine == "composition":
553
+ before_issues = request.before.get("issues", [])
554
+ after_issues = request.after.get("issues", [])
555
+ return evaluate_composition_move(before_issues, after_issues)
556
+
557
+ elif engine == "mix":
558
+ before_issues = request.before.get("issues", [])
559
+ after_issues = request.after.get("issues", [])
560
+ return evaluate_mix_move(before_issues, after_issues)
561
+
562
+ elif engine == "transition":
563
+ return evaluate_transition(request.before, request.after)
564
+
565
+ elif engine == "translation":
566
+ return evaluate_translation(request.before, request.after)
567
+
568
+ else:
569
+ return EvaluationResult(
570
+ engine=engine,
571
+ score=0.0,
572
+ keep_change=True,
573
+ decision_mode="deferred",
574
+ notes=[f"Unknown engine '{engine}' — deferring to agent judgment"],
575
+ )
@@ -0,0 +1,84 @@
1
+ """Feature Extractors — derive measurable values from normalized snapshots.
2
+
3
+ Replicates the dimension-extraction logic from _agent_os_engine but operates
4
+ on the canonical normalized snapshot format (always has "spectrum" key).
5
+
6
+ All returned values are clamped to 0.0-1.0 for consistent scoring.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import math
12
+ from typing import Optional
13
+
14
+ from ..tools._evaluation_contracts import MEASURABLE_DIMENSIONS
15
+
16
+
17
+ def _clamp(value: float, lo: float = 0.0, hi: float = 1.0) -> float:
18
+ """Clamp value to [lo, hi] range."""
19
+ return max(lo, min(hi, value))
20
+
21
+
22
+ def extract_dimension_value(
23
+ snapshot: dict,
24
+ dimension: str,
25
+ ) -> Optional[float]:
26
+ """Extract a measurable dimension value from a normalized sonic snapshot.
27
+
28
+ Args:
29
+ snapshot: Normalized snapshot (from normalize_sonic_snapshot).
30
+ Must have a "spectrum" key with band values.
31
+ dimension: One of the MEASURABLE_DIMENSIONS (brightness, warmth,
32
+ weight, clarity, density, energy, punch).
33
+
34
+ Returns:
35
+ Float in 0.0-1.0 for measurable dimensions, None otherwise.
36
+ """
37
+ if not snapshot or not isinstance(snapshot, dict):
38
+ return None
39
+
40
+ bands = snapshot.get("spectrum")
41
+ if not bands:
42
+ return None
43
+
44
+ rms = snapshot.get("rms")
45
+ peak = snapshot.get("peak")
46
+
47
+ if dimension == "brightness":
48
+ high = bands.get("high", 0)
49
+ presence = bands.get("presence", 0)
50
+ return _clamp((high + presence) / 2.0)
51
+
52
+ elif dimension == "warmth":
53
+ return _clamp(bands.get("low_mid", 0))
54
+
55
+ elif dimension == "weight":
56
+ sub = bands.get("sub", 0)
57
+ low = bands.get("low", 0)
58
+ return _clamp((sub + low) / 2.0)
59
+
60
+ elif dimension == "clarity":
61
+ low_mid = bands.get("low_mid", 0)
62
+ return _clamp(1.0 - low_mid)
63
+
64
+ elif dimension == "density":
65
+ vals = [max(v, 1e-10) for v in bands.values()
66
+ if isinstance(v, (int, float))]
67
+ if not vals:
68
+ return None
69
+ geo_mean = math.exp(sum(math.log(v) for v in vals) / len(vals))
70
+ arith_mean = sum(vals) / len(vals)
71
+ return _clamp(geo_mean / max(arith_mean, 1e-10))
72
+
73
+ elif dimension == "energy":
74
+ return _clamp(rms) if rms is not None else None
75
+
76
+ elif dimension == "punch":
77
+ if rms and peak and rms > 0:
78
+ crest_db = 20.0 * math.log10(max(peak / rms, 1.0))
79
+ return _clamp(crest_db / 20.0)
80
+ return None
81
+
82
+ else:
83
+ # Unmeasurable dimension
84
+ return None