livepilot 1.17.1 → 1.17.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +124 -0
- package/README.md +8 -7
- package/m4l_device/BUILD_GUIDE.md +24 -20
- package/m4l_device/LivePilot_Analyzer.amxd +0 -0
- package/m4l_device/livepilot_bridge.js +1 -1
- package/mcp_server/__init__.py +1 -1
- package/mcp_server/m4l_bridge.py +2 -1
- package/mcp_server/preview_studio/engine.py +85 -11
- package/mcp_server/preview_studio/models.py +8 -0
- package/mcp_server/preview_studio/tools.py +98 -48
- package/mcp_server/runtime/capability_state.py +18 -0
- package/mcp_server/runtime/degradation.py +62 -0
- package/mcp_server/runtime/tools.py +53 -4
- package/mcp_server/song_brain/tools.py +23 -0
- package/mcp_server/synthesis_brain/timbre.py +14 -8
- package/mcp_server/tools/_agent_os_engine/__init__.py +10 -0
- package/mcp_server/tools/_agent_os_engine/iteration.py +344 -0
- package/mcp_server/tools/agent_os.py +194 -3
- package/mcp_server/tools/analyzer.py +19 -6
- package/package.json +2 -2
- package/remote_script/LivePilot/__init__.py +1 -1
- package/requirements.txt +5 -5
- package/server.json +3 -3
|
@@ -155,6 +155,9 @@ def build_capability_state(
|
|
|
155
155
|
)
|
|
156
156
|
|
|
157
157
|
# ── web ──────────────────────────────────────────────────────────
|
|
158
|
+
# Server-side outbound HTTP capability. True when the MCP host can
|
|
159
|
+
# reach an arbitrary public URL. Does NOT imply curated research
|
|
160
|
+
# corpora are installed — see the ``research`` domain below.
|
|
158
161
|
web_reasons: list[str] = []
|
|
159
162
|
if not web_ok:
|
|
160
163
|
web_reasons.append("web_unavailable")
|
|
@@ -166,6 +169,21 @@ def build_capability_state(
|
|
|
166
169
|
reasons=web_reasons,
|
|
167
170
|
)
|
|
168
171
|
|
|
172
|
+
# ── flucoma ──────────────────────────────────────────────────────
|
|
173
|
+
# Optional dependency (the ``flucoma`` Python package). Emitted
|
|
174
|
+
# unconditionally so consumers can distinguish "probed and missing"
|
|
175
|
+
# from "probe not run yet".
|
|
176
|
+
flucoma_reasons: list[str] = []
|
|
177
|
+
if not flucoma_ok:
|
|
178
|
+
flucoma_reasons.append("flucoma_not_installed")
|
|
179
|
+
domains["flucoma"] = CapabilityDomain(
|
|
180
|
+
name="flucoma",
|
|
181
|
+
available=flucoma_ok,
|
|
182
|
+
confidence=0.9 if flucoma_ok else 0.0,
|
|
183
|
+
mode="available" if flucoma_ok else "unavailable",
|
|
184
|
+
reasons=flucoma_reasons,
|
|
185
|
+
)
|
|
186
|
+
|
|
169
187
|
# ── research (composite) ────────────────────────────────────────
|
|
170
188
|
research_reasons: list[str] = []
|
|
171
189
|
research_sources = 0
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Explicit degradation signalling for engines that fall back to synthesized data.
|
|
2
|
+
|
|
3
|
+
Before PR-B, several engines silently substituted defaults when a data
|
|
4
|
+
source failed — ``song_brain`` injected ``tempo=120.0, track_count=0``
|
|
5
|
+
on session-fetch failure, and ``preview_studio`` compiled variants
|
|
6
|
+
against an empty-but-valid kernel when the caller didn't supply one.
|
|
7
|
+
Downstream consumers had no way to tell synthesized data from real
|
|
8
|
+
data, so polished outputs were returned as if they were real.
|
|
9
|
+
|
|
10
|
+
``DegradationInfo`` is the shared payload engines attach to their
|
|
11
|
+
responses whenever they substitute fallback values. Consumers can
|
|
12
|
+
inspect ``is_degraded``, ``reasons``, and ``substituted_fields`` to
|
|
13
|
+
decide whether to trust the response or re-try the operation.
|
|
14
|
+
|
|
15
|
+
Usage::
|
|
16
|
+
|
|
17
|
+
from mcp_server.runtime.degradation import DegradationInfo
|
|
18
|
+
|
|
19
|
+
deg = DegradationInfo()
|
|
20
|
+
try:
|
|
21
|
+
data = fetch_real_data()
|
|
22
|
+
except Exception:
|
|
23
|
+
data = FALLBACK_DATA
|
|
24
|
+
deg = DegradationInfo(
|
|
25
|
+
is_degraded=True,
|
|
26
|
+
reasons=["data_source_unreachable"],
|
|
27
|
+
substituted_fields=["tempo", "track_count"],
|
|
28
|
+
)
|
|
29
|
+
return {..., "degradation": deg.to_dict()}
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
from dataclasses import dataclass, field
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class DegradationInfo:
|
|
39
|
+
"""A structured signal that an engine substituted fallback data.
|
|
40
|
+
|
|
41
|
+
Attributes:
|
|
42
|
+
is_degraded: True when any field in the response was substituted
|
|
43
|
+
with a synthesized/default value. False means the response
|
|
44
|
+
is fully backed by real data sources.
|
|
45
|
+
reasons: Short machine-readable tokens describing why degradation
|
|
46
|
+
happened (e.g., ``"session_fetch_failed"``,
|
|
47
|
+
``"empty_kernel_fallback"``). Intentionally open-ended — the
|
|
48
|
+
set grows as new fallback paths get flagged.
|
|
49
|
+
substituted_fields: Names of top-level response fields whose
|
|
50
|
+
values came from the fallback path, not the real source.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
is_degraded: bool = False
|
|
54
|
+
reasons: list[str] = field(default_factory=list)
|
|
55
|
+
substituted_fields: list[str] = field(default_factory=list)
|
|
56
|
+
|
|
57
|
+
def to_dict(self) -> dict:
|
|
58
|
+
return {
|
|
59
|
+
"is_degraded": self.is_degraded,
|
|
60
|
+
"reasons": list(self.reasons),
|
|
61
|
+
"substituted_fields": list(self.substituted_fields),
|
|
62
|
+
}
|
|
@@ -7,6 +7,9 @@ Tools:
|
|
|
7
7
|
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
|
+
import importlib.util
|
|
11
|
+
import logging
|
|
12
|
+
import urllib.request
|
|
10
13
|
from typing import Optional
|
|
11
14
|
|
|
12
15
|
from fastmcp import Context
|
|
@@ -14,13 +17,55 @@ from fastmcp import Context
|
|
|
14
17
|
from ..server import mcp
|
|
15
18
|
from ..memory.technique_store import TechniqueStore
|
|
16
19
|
from .capability_state import build_capability_state
|
|
17
|
-
import logging
|
|
18
20
|
|
|
19
21
|
logger = logging.getLogger(__name__)
|
|
20
22
|
|
|
21
23
|
_memory_store = TechniqueStore()
|
|
22
24
|
|
|
23
25
|
|
|
26
|
+
# ── Capability probes ──────────────────────────────────────────────────
|
|
27
|
+
#
|
|
28
|
+
# These helpers are module-level so tests can monkeypatch them directly.
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _probe_web(timeout: float = 0.5) -> bool:
|
|
32
|
+
"""Server-side outbound HTTP probe.
|
|
33
|
+
|
|
34
|
+
True when the MCP host can reach an arbitrary public URL. Does NOT
|
|
35
|
+
imply curated research corpora are installed — see the ``research``
|
|
36
|
+
domain for that.
|
|
37
|
+
|
|
38
|
+
Implementation: a ``timeout``-second HEAD request to
|
|
39
|
+
``https://api.github.com`` using stdlib ``urllib.request``. Any
|
|
40
|
+
exception (DNS failure, TLS error, socket timeout, proxy block,
|
|
41
|
+
non-2xx response) collapses to False so the probe is safe to call
|
|
42
|
+
from any code path.
|
|
43
|
+
"""
|
|
44
|
+
req = urllib.request.Request("https://api.github.com", method="HEAD")
|
|
45
|
+
try:
|
|
46
|
+
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
47
|
+
status = getattr(resp, "status", None)
|
|
48
|
+
return status is not None and 200 <= status < 400
|
|
49
|
+
except Exception as exc: # noqa: BLE001 — swallow everything to False
|
|
50
|
+
logger.debug("_probe_web failed: %s", exc)
|
|
51
|
+
return False
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _probe_flucoma() -> bool:
|
|
55
|
+
"""Check whether the ``flucoma`` Python package is importable.
|
|
56
|
+
|
|
57
|
+
Uses ``importlib.util.find_spec`` so no import side-effects fire
|
|
58
|
+
(matching the pattern already used for optional capability probes
|
|
59
|
+
elsewhere in the codebase). Returns False if the package is missing
|
|
60
|
+
or if the spec lookup itself raises.
|
|
61
|
+
"""
|
|
62
|
+
try:
|
|
63
|
+
return importlib.util.find_spec("flucoma") is not None
|
|
64
|
+
except Exception as exc: # noqa: BLE001
|
|
65
|
+
logger.debug("_probe_flucoma failed: %s", exc)
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
|
|
24
69
|
@mcp.tool()
|
|
25
70
|
def get_capability_state(ctx: Context) -> dict:
|
|
26
71
|
"""Probe the runtime and return a capability state snapshot.
|
|
@@ -59,9 +104,13 @@ def get_capability_state(ctx: Context) -> dict:
|
|
|
59
104
|
logger.debug("get_capability_state failed: %s", exc)
|
|
60
105
|
memory_ok = False
|
|
61
106
|
|
|
62
|
-
# ── Web
|
|
63
|
-
|
|
64
|
-
|
|
107
|
+
# ── Web — actually probe outbound HTTP egress ───────────────────
|
|
108
|
+
# Scoped to server-side outbound HTTP reachability; does NOT imply
|
|
109
|
+
# a curated research corpus is installed (see ``research`` domain).
|
|
110
|
+
web_ok = _probe_web()
|
|
111
|
+
|
|
112
|
+
# ── FluCoMa — optional import via find_spec (no side effects) ───
|
|
113
|
+
flucoma_ok = _probe_flucoma()
|
|
65
114
|
|
|
66
115
|
state = build_capability_state(
|
|
67
116
|
session_ok=session_ok,
|
|
@@ -9,6 +9,7 @@ from __future__ import annotations
|
|
|
9
9
|
|
|
10
10
|
from fastmcp import Context
|
|
11
11
|
|
|
12
|
+
from ..runtime.degradation import DegradationInfo
|
|
12
13
|
from ..server import mcp
|
|
13
14
|
from . import builder
|
|
14
15
|
from .models import SongBrain
|
|
@@ -55,6 +56,12 @@ def _fetch_session_data(ctx: Context) -> dict:
|
|
|
55
56
|
- composition_analysis: from musical intelligence section inference
|
|
56
57
|
- role_graph: from semantic move resolvers (track role inference)
|
|
57
58
|
- recent_moves: from session-scoped action ledger
|
|
59
|
+
|
|
60
|
+
On session-fetch failure the fallback session_info shape is injected
|
|
61
|
+
(``tempo=120.0, track_count=0``) and a ``DegradationInfo`` is attached
|
|
62
|
+
under the ``_degradation`` key so callers can tell synthesized data
|
|
63
|
+
from real data. ``_fetch_session_data`` never raises — it always
|
|
64
|
+
returns a dict with the expected keys.
|
|
58
65
|
"""
|
|
59
66
|
ableton = _get_ableton(ctx)
|
|
60
67
|
data: dict = {
|
|
@@ -66,12 +73,19 @@ def _fetch_session_data(ctx: Context) -> dict:
|
|
|
66
73
|
"role_graph": {},
|
|
67
74
|
"recent_moves": [],
|
|
68
75
|
}
|
|
76
|
+
degradation = DegradationInfo()
|
|
69
77
|
|
|
70
78
|
try:
|
|
71
79
|
data["session_info"] = ableton.send_command("get_session_info", {})
|
|
72
80
|
except Exception as exc:
|
|
73
81
|
logger.debug("_fetch_session_data failed: %s", exc)
|
|
74
82
|
data["session_info"] = {"tempo": 120.0, "track_count": 0}
|
|
83
|
+
degradation.is_degraded = True
|
|
84
|
+
if "session_fetch_failed" not in degradation.reasons:
|
|
85
|
+
degradation.reasons.append("session_fetch_failed")
|
|
86
|
+
for fld in ("tempo", "track_count"):
|
|
87
|
+
if fld not in degradation.substituted_fields:
|
|
88
|
+
degradation.substituted_fields.append(fld)
|
|
75
89
|
|
|
76
90
|
try:
|
|
77
91
|
matrix = ableton.send_command("get_scene_matrix")
|
|
@@ -135,6 +149,10 @@ def _fetch_session_data(ctx: Context) -> dict:
|
|
|
135
149
|
except Exception as exc:
|
|
136
150
|
logger.debug("_fetch_session_data failed: %s", exc)
|
|
137
151
|
|
|
152
|
+
# Attach the degradation signal so build_song_brain can surface it.
|
|
153
|
+
# Under a reserved key (leading underscore) so it never collides with
|
|
154
|
+
# a real session data field.
|
|
155
|
+
data["_degradation"] = degradation
|
|
138
156
|
return data
|
|
139
157
|
|
|
140
158
|
|
|
@@ -180,10 +198,15 @@ def build_song_brain(ctx: Context) -> dict:
|
|
|
180
198
|
)
|
|
181
199
|
_set_brain(ctx, brain)
|
|
182
200
|
|
|
201
|
+
# Surface the degradation payload so callers can distinguish a
|
|
202
|
+
# tempo=120 / track_count=0 synthesized response from a real one.
|
|
203
|
+
degradation = data.get("_degradation") or DegradationInfo()
|
|
204
|
+
|
|
183
205
|
return {
|
|
184
206
|
**brain.to_dict(),
|
|
185
207
|
"summary": brain.summary,
|
|
186
208
|
"capability": cap.to_dict(),
|
|
209
|
+
"degradation": degradation.to_dict(),
|
|
187
210
|
}
|
|
188
211
|
|
|
189
212
|
|
|
@@ -19,12 +19,16 @@ from .models import TimbralFingerprint
|
|
|
19
19
|
|
|
20
20
|
# ── Band-based brightness / warmth mapping ──────────────────────────────
|
|
21
21
|
#
|
|
22
|
-
#
|
|
23
|
-
#
|
|
24
|
-
#
|
|
25
|
-
#
|
|
22
|
+
# Two upstream producers feed this extractor with different band schemas:
|
|
23
|
+
# 1. get_master_spectrum (M4L analyzer) — v1.16+: 9 bands (sub_low,
|
|
24
|
+
# sub, low, low_mid, mid, high_mid, high, presence, air);
|
|
25
|
+
# pre-v1.16: 8 bands (no sub_low).
|
|
26
|
+
# 2. analyze_spectrum_offline — 8 bands with legacy names
|
|
27
|
+
# (sub, low, low_mid, mid, high_mid, high, very_high, ultra).
|
|
28
|
+
# We index the union of both name sets below; `_band_energy` uses dict.get
|
|
29
|
+
# so missing bands simply return 0 without complaint.
|
|
26
30
|
|
|
27
|
-
_BANDS = ("sub", "low", "low_mid", "mid", "high_mid", "high", "very_high", "ultra")
|
|
31
|
+
_BANDS = ("sub_low", "sub", "low", "low_mid", "mid", "high_mid", "high", "presence", "air", "very_high", "ultra")
|
|
28
32
|
|
|
29
33
|
|
|
30
34
|
def _band_energy(spectrum: Optional[dict], band: str) -> float:
|
|
@@ -55,9 +59,11 @@ def extract_timbre_fingerprint(
|
|
|
55
59
|
Inputs are all optional — the function degrades gracefully when only
|
|
56
60
|
some dimensions are measurable.
|
|
57
61
|
|
|
58
|
-
spectrum: either
|
|
59
|
-
|
|
60
|
-
|
|
62
|
+
spectrum: either the 9-band shape from get_master_spectrum
|
|
63
|
+
({sub_low, sub, low, low_mid, mid, high_mid, high, presence, air}),
|
|
64
|
+
the legacy 8-band shape from analyze_spectrum_offline
|
|
65
|
+
({sub, low, low_mid, mid, high_mid, high, very_high, ultra}),
|
|
66
|
+
or {"bands": {...}} wrapping either. Missing bands default to 0.
|
|
61
67
|
loudness: {"rms": float, "peak": float, "lufs": float, "lra": float} —
|
|
62
68
|
output shape from analyze_loudness.
|
|
63
69
|
spectral_shape: FluCoMa descriptors when available — {"centroid", "flatness",
|
|
@@ -35,6 +35,12 @@ from .taste import (
|
|
|
35
35
|
compute_taste_fit,
|
|
36
36
|
get_taste_profile,
|
|
37
37
|
)
|
|
38
|
+
from .iteration import (
|
|
39
|
+
iterate_toward_goal_engine,
|
|
40
|
+
iterate_toward_goal_engine_async,
|
|
41
|
+
IterationResult,
|
|
42
|
+
IterationStep,
|
|
43
|
+
)
|
|
38
44
|
|
|
39
45
|
__all__ = [
|
|
40
46
|
"QUALITY_DIMENSIONS", "MEASURABLE_PROXIES",
|
|
@@ -49,4 +55,8 @@ __all__ = [
|
|
|
49
55
|
"analyze_outcome_history",
|
|
50
56
|
"compute_taste_fit",
|
|
51
57
|
"get_taste_profile",
|
|
58
|
+
"iterate_toward_goal_engine",
|
|
59
|
+
"iterate_toward_goal_engine_async",
|
|
60
|
+
"IterationResult",
|
|
61
|
+
"IterationStep",
|
|
52
62
|
]
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"""Iteration engine — closes the evaluation loop by running experiments
|
|
2
|
+
repeatedly against a compiled GoalVector until threshold or timeout.
|
|
3
|
+
|
|
4
|
+
Pure-python: takes callables for experiment create/run/commit/discard so
|
|
5
|
+
tests can substitute in-memory fakes without an Ableton connection. The
|
|
6
|
+
callables may be sync or async — the engine uses `iterate_toward_goal_engine`
|
|
7
|
+
(sync) for the former and `iterate_toward_goal_engine_async` for the latter.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import inspect
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from typing import Any, Awaitable, Callable, Optional, Union
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class IterationStep:
|
|
18
|
+
"""One iteration of the outer loop — one experiment's worth of work."""
|
|
19
|
+
iteration: int
|
|
20
|
+
experiment_id: str
|
|
21
|
+
winner_branch_id: Optional[str]
|
|
22
|
+
winner_score: float
|
|
23
|
+
threshold_met: bool
|
|
24
|
+
note: str = ""
|
|
25
|
+
|
|
26
|
+
def to_dict(self) -> dict:
|
|
27
|
+
return {
|
|
28
|
+
"iteration": self.iteration,
|
|
29
|
+
"experiment_id": self.experiment_id,
|
|
30
|
+
"winner_branch_id": self.winner_branch_id,
|
|
31
|
+
"winner_score": self.winner_score,
|
|
32
|
+
"threshold_met": self.threshold_met,
|
|
33
|
+
"note": self.note,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class IterationResult:
|
|
39
|
+
"""Final result of iterate_toward_goal.
|
|
40
|
+
|
|
41
|
+
status:
|
|
42
|
+
- "committed" — a winner hit threshold, was committed permanently
|
|
43
|
+
- "exhausted" — max_iterations reached, committed best-so-far (on_timeout=commit_best)
|
|
44
|
+
- "timeout_no_commit" — max_iterations reached, no commit (on_timeout=discard_on_timeout)
|
|
45
|
+
- "no_candidates" — caller provided empty candidate_move_sets
|
|
46
|
+
- "error" — unrecoverable error; see reason
|
|
47
|
+
"""
|
|
48
|
+
status: str
|
|
49
|
+
iterations_run: int
|
|
50
|
+
committed_experiment_id: Optional[str]
|
|
51
|
+
committed_branch_id: Optional[str]
|
|
52
|
+
final_score: float
|
|
53
|
+
steps: list[IterationStep] = field(default_factory=list)
|
|
54
|
+
reason: str = ""
|
|
55
|
+
|
|
56
|
+
def to_dict(self) -> dict:
|
|
57
|
+
return {
|
|
58
|
+
"status": self.status,
|
|
59
|
+
"iterations_run": self.iterations_run,
|
|
60
|
+
"committed_experiment_id": self.committed_experiment_id,
|
|
61
|
+
"committed_branch_id": self.committed_branch_id,
|
|
62
|
+
"final_score": self.final_score,
|
|
63
|
+
"steps": [s.to_dict() for s in self.steps],
|
|
64
|
+
"reason": self.reason,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def iterate_toward_goal_engine(
|
|
69
|
+
candidate_move_sets: list,
|
|
70
|
+
threshold: float,
|
|
71
|
+
max_iterations: int,
|
|
72
|
+
create_experiment_fn: Callable[[list], str],
|
|
73
|
+
run_experiment_fn: Callable[[str], Any],
|
|
74
|
+
commit_fn: Callable[[str, str], dict],
|
|
75
|
+
discard_fn: Callable[[str], dict],
|
|
76
|
+
on_timeout: str = "commit_best",
|
|
77
|
+
) -> IterationResult:
|
|
78
|
+
"""Run experiments repeatedly until winner_score >= threshold or timeout.
|
|
79
|
+
|
|
80
|
+
Pure orchestration — all I/O happens through the injected callbacks. The
|
|
81
|
+
run/commit/discard callbacks may be sync or async; coroutines will be
|
|
82
|
+
awaited when reached. This keeps the engine reusable by both the
|
|
83
|
+
sync test suite and the async MCP tool wrapper.
|
|
84
|
+
|
|
85
|
+
See module docstring for full contract. Invariant: never issues raw
|
|
86
|
+
undo calls — per-branch undo is the responsibility of run_experiment_fn.
|
|
87
|
+
This loop only chooses commit vs discard.
|
|
88
|
+
"""
|
|
89
|
+
import asyncio
|
|
90
|
+
|
|
91
|
+
async def _as_async():
|
|
92
|
+
return await _iterate_async_core(
|
|
93
|
+
candidate_move_sets=candidate_move_sets,
|
|
94
|
+
threshold=threshold,
|
|
95
|
+
max_iterations=max_iterations,
|
|
96
|
+
create_experiment_fn=create_experiment_fn,
|
|
97
|
+
run_experiment_fn=run_experiment_fn,
|
|
98
|
+
commit_fn=commit_fn,
|
|
99
|
+
discard_fn=discard_fn,
|
|
100
|
+
on_timeout=on_timeout,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# If any callback is a coroutine function, run via asyncio. Otherwise
|
|
104
|
+
# execute the sync path directly to avoid event-loop overhead in tests.
|
|
105
|
+
any_async = any(
|
|
106
|
+
inspect.iscoroutinefunction(fn)
|
|
107
|
+
for fn in (create_experiment_fn, run_experiment_fn, commit_fn, discard_fn)
|
|
108
|
+
)
|
|
109
|
+
if any_async:
|
|
110
|
+
return asyncio.run(_as_async())
|
|
111
|
+
|
|
112
|
+
return _iterate_sync_core(
|
|
113
|
+
candidate_move_sets=candidate_move_sets,
|
|
114
|
+
threshold=threshold,
|
|
115
|
+
max_iterations=max_iterations,
|
|
116
|
+
create_experiment_fn=create_experiment_fn,
|
|
117
|
+
run_experiment_fn=run_experiment_fn,
|
|
118
|
+
commit_fn=commit_fn,
|
|
119
|
+
discard_fn=discard_fn,
|
|
120
|
+
on_timeout=on_timeout,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
async def iterate_toward_goal_engine_async(
|
|
125
|
+
candidate_move_sets: list,
|
|
126
|
+
threshold: float,
|
|
127
|
+
max_iterations: int,
|
|
128
|
+
create_experiment_fn: Callable[[list], Any],
|
|
129
|
+
run_experiment_fn: Callable[[str], Any],
|
|
130
|
+
commit_fn: Callable[[str, str], Any],
|
|
131
|
+
discard_fn: Callable[[str], Any],
|
|
132
|
+
on_timeout: str = "commit_best",
|
|
133
|
+
) -> IterationResult:
|
|
134
|
+
"""Async variant — used by the MCP tool wrapper which has async callbacks."""
|
|
135
|
+
return await _iterate_async_core(
|
|
136
|
+
candidate_move_sets=candidate_move_sets,
|
|
137
|
+
threshold=threshold,
|
|
138
|
+
max_iterations=max_iterations,
|
|
139
|
+
create_experiment_fn=create_experiment_fn,
|
|
140
|
+
run_experiment_fn=run_experiment_fn,
|
|
141
|
+
commit_fn=commit_fn,
|
|
142
|
+
discard_fn=discard_fn,
|
|
143
|
+
on_timeout=on_timeout,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
# ── Internal cores ─────────────────────────────────────────────────────────
|
|
148
|
+
|
|
149
|
+
def _iterate_sync_core(
|
|
150
|
+
candidate_move_sets,
|
|
151
|
+
threshold,
|
|
152
|
+
max_iterations,
|
|
153
|
+
create_experiment_fn,
|
|
154
|
+
run_experiment_fn,
|
|
155
|
+
commit_fn,
|
|
156
|
+
discard_fn,
|
|
157
|
+
on_timeout,
|
|
158
|
+
) -> IterationResult:
|
|
159
|
+
if not candidate_move_sets:
|
|
160
|
+
return IterationResult(
|
|
161
|
+
status="no_candidates",
|
|
162
|
+
iterations_run=0,
|
|
163
|
+
committed_experiment_id=None,
|
|
164
|
+
committed_branch_id=None,
|
|
165
|
+
final_score=0.0,
|
|
166
|
+
reason="candidate_move_sets is empty",
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
steps: list[IterationStep] = []
|
|
170
|
+
best_score = -1.0
|
|
171
|
+
best_exp_id: Optional[str] = None
|
|
172
|
+
best_branch_id: Optional[str] = None
|
|
173
|
+
n = min(max_iterations, len(candidate_move_sets))
|
|
174
|
+
|
|
175
|
+
for i in range(n):
|
|
176
|
+
move_ids = candidate_move_sets[i]
|
|
177
|
+
exp_id = create_experiment_fn(move_ids)
|
|
178
|
+
winner_branch_id, winner_score = run_experiment_fn(exp_id)
|
|
179
|
+
|
|
180
|
+
met = winner_score >= threshold and winner_branch_id is not None
|
|
181
|
+
steps.append(IterationStep(
|
|
182
|
+
iteration=i,
|
|
183
|
+
experiment_id=exp_id,
|
|
184
|
+
winner_branch_id=winner_branch_id,
|
|
185
|
+
winner_score=winner_score,
|
|
186
|
+
threshold_met=met,
|
|
187
|
+
note=(
|
|
188
|
+
f"committed on iteration {i}" if met
|
|
189
|
+
else f"below threshold (need {threshold}, got {winner_score})"
|
|
190
|
+
),
|
|
191
|
+
))
|
|
192
|
+
|
|
193
|
+
if met:
|
|
194
|
+
# Discard any prior best-so-far before committing the new winner —
|
|
195
|
+
# otherwise the old non-winning experiment leaks in the store.
|
|
196
|
+
if best_exp_id is not None and best_exp_id != exp_id:
|
|
197
|
+
discard_fn(best_exp_id)
|
|
198
|
+
commit_fn(exp_id, winner_branch_id)
|
|
199
|
+
return IterationResult(
|
|
200
|
+
status="committed",
|
|
201
|
+
iterations_run=i + 1,
|
|
202
|
+
committed_experiment_id=exp_id,
|
|
203
|
+
committed_branch_id=winner_branch_id,
|
|
204
|
+
final_score=winner_score,
|
|
205
|
+
steps=steps,
|
|
206
|
+
reason=f"threshold {threshold} met on iteration {i}",
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
if winner_branch_id is not None and winner_score > best_score:
|
|
210
|
+
# Supersede previous best-so-far. It's now stale, free the slot.
|
|
211
|
+
if best_exp_id is not None:
|
|
212
|
+
discard_fn(best_exp_id)
|
|
213
|
+
best_score = winner_score
|
|
214
|
+
best_exp_id = exp_id
|
|
215
|
+
best_branch_id = winner_branch_id
|
|
216
|
+
else:
|
|
217
|
+
discard_fn(exp_id)
|
|
218
|
+
|
|
219
|
+
if on_timeout == "commit_best" and best_exp_id and best_branch_id:
|
|
220
|
+
commit_fn(best_exp_id, best_branch_id)
|
|
221
|
+
return IterationResult(
|
|
222
|
+
status="exhausted",
|
|
223
|
+
iterations_run=n,
|
|
224
|
+
committed_experiment_id=best_exp_id,
|
|
225
|
+
committed_branch_id=best_branch_id,
|
|
226
|
+
final_score=best_score,
|
|
227
|
+
steps=steps,
|
|
228
|
+
reason=(
|
|
229
|
+
f"max_iterations={n} reached, threshold {threshold} never met; "
|
|
230
|
+
f"committed best-so-far with score {best_score}"
|
|
231
|
+
),
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
if best_exp_id:
|
|
235
|
+
discard_fn(best_exp_id)
|
|
236
|
+
return IterationResult(
|
|
237
|
+
status="timeout_no_commit",
|
|
238
|
+
iterations_run=n,
|
|
239
|
+
committed_experiment_id=None,
|
|
240
|
+
committed_branch_id=None,
|
|
241
|
+
final_score=max(best_score, 0.0),
|
|
242
|
+
steps=steps,
|
|
243
|
+
reason=f"max_iterations={n} reached, policy={on_timeout}, no commit issued",
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
async def _iterate_async_core(
|
|
248
|
+
candidate_move_sets,
|
|
249
|
+
threshold,
|
|
250
|
+
max_iterations,
|
|
251
|
+
create_experiment_fn,
|
|
252
|
+
run_experiment_fn,
|
|
253
|
+
commit_fn,
|
|
254
|
+
discard_fn,
|
|
255
|
+
on_timeout,
|
|
256
|
+
) -> IterationResult:
|
|
257
|
+
if not candidate_move_sets:
|
|
258
|
+
return IterationResult(
|
|
259
|
+
status="no_candidates",
|
|
260
|
+
iterations_run=0,
|
|
261
|
+
committed_experiment_id=None,
|
|
262
|
+
committed_branch_id=None,
|
|
263
|
+
final_score=0.0,
|
|
264
|
+
reason="candidate_move_sets is empty",
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
async def _maybe_await(value):
|
|
268
|
+
if inspect.isawaitable(value):
|
|
269
|
+
return await value
|
|
270
|
+
return value
|
|
271
|
+
|
|
272
|
+
steps: list[IterationStep] = []
|
|
273
|
+
best_score = -1.0
|
|
274
|
+
best_exp_id: Optional[str] = None
|
|
275
|
+
best_branch_id: Optional[str] = None
|
|
276
|
+
n = min(max_iterations, len(candidate_move_sets))
|
|
277
|
+
|
|
278
|
+
for i in range(n):
|
|
279
|
+
move_ids = candidate_move_sets[i]
|
|
280
|
+
exp_id = await _maybe_await(create_experiment_fn(move_ids))
|
|
281
|
+
winner_branch_id, winner_score = await _maybe_await(run_experiment_fn(exp_id))
|
|
282
|
+
|
|
283
|
+
met = winner_score >= threshold and winner_branch_id is not None
|
|
284
|
+
steps.append(IterationStep(
|
|
285
|
+
iteration=i,
|
|
286
|
+
experiment_id=exp_id,
|
|
287
|
+
winner_branch_id=winner_branch_id,
|
|
288
|
+
winner_score=winner_score,
|
|
289
|
+
threshold_met=met,
|
|
290
|
+
note=(
|
|
291
|
+
f"committed on iteration {i}" if met
|
|
292
|
+
else f"below threshold (need {threshold}, got {winner_score})"
|
|
293
|
+
),
|
|
294
|
+
))
|
|
295
|
+
|
|
296
|
+
if met:
|
|
297
|
+
if best_exp_id is not None and best_exp_id != exp_id:
|
|
298
|
+
await _maybe_await(discard_fn(best_exp_id))
|
|
299
|
+
await _maybe_await(commit_fn(exp_id, winner_branch_id))
|
|
300
|
+
return IterationResult(
|
|
301
|
+
status="committed",
|
|
302
|
+
iterations_run=i + 1,
|
|
303
|
+
committed_experiment_id=exp_id,
|
|
304
|
+
committed_branch_id=winner_branch_id,
|
|
305
|
+
final_score=winner_score,
|
|
306
|
+
steps=steps,
|
|
307
|
+
reason=f"threshold {threshold} met on iteration {i}",
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
if winner_branch_id is not None and winner_score > best_score:
|
|
311
|
+
if best_exp_id is not None:
|
|
312
|
+
await _maybe_await(discard_fn(best_exp_id))
|
|
313
|
+
best_score = winner_score
|
|
314
|
+
best_exp_id = exp_id
|
|
315
|
+
best_branch_id = winner_branch_id
|
|
316
|
+
else:
|
|
317
|
+
await _maybe_await(discard_fn(exp_id))
|
|
318
|
+
|
|
319
|
+
if on_timeout == "commit_best" and best_exp_id and best_branch_id:
|
|
320
|
+
await _maybe_await(commit_fn(best_exp_id, best_branch_id))
|
|
321
|
+
return IterationResult(
|
|
322
|
+
status="exhausted",
|
|
323
|
+
iterations_run=n,
|
|
324
|
+
committed_experiment_id=best_exp_id,
|
|
325
|
+
committed_branch_id=best_branch_id,
|
|
326
|
+
final_score=best_score,
|
|
327
|
+
steps=steps,
|
|
328
|
+
reason=(
|
|
329
|
+
f"max_iterations={n} reached, threshold {threshold} never met; "
|
|
330
|
+
f"committed best-so-far with score {best_score}"
|
|
331
|
+
),
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
if best_exp_id:
|
|
335
|
+
await _maybe_await(discard_fn(best_exp_id))
|
|
336
|
+
return IterationResult(
|
|
337
|
+
status="timeout_no_commit",
|
|
338
|
+
iterations_run=n,
|
|
339
|
+
committed_experiment_id=None,
|
|
340
|
+
committed_branch_id=None,
|
|
341
|
+
final_score=max(best_score, 0.0),
|
|
342
|
+
steps=steps,
|
|
343
|
+
reason=f"max_iterations={n} reached, policy={on_timeout}, no commit issued",
|
|
344
|
+
)
|