livepilot 1.10.2 → 1.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/AGENTS.md +1 -1
- package/CHANGELOG.md +138 -0
- package/README.md +6 -4
- package/livepilot/.Codex-plugin/plugin.json +1 -1
- package/livepilot/.claude-plugin/plugin.json +1 -1
- package/livepilot/skills/livepilot-core/references/overview.md +1 -1
- package/livepilot/skills/livepilot-evaluation/references/capability-modes.md +1 -1
- package/livepilot.mcpb +0 -0
- package/m4l_device/livepilot_bridge.js +1 -1
- package/manifest.json +1 -1
- package/mcp_server/__init__.py +1 -1
- package/mcp_server/composer/engine.py +17 -22
- package/mcp_server/composer/sample_resolver.py +150 -11
- package/mcp_server/experiment/engine.py +212 -16
- package/mcp_server/experiment/models.py +10 -0
- package/mcp_server/experiment/tools.py +28 -10
- package/mcp_server/persistence/project_store.py +61 -7
- package/mcp_server/preview_studio/tools.py +73 -7
- package/package.json +1 -1
- package/remote_script/LivePilot/__init__.py +1 -1
- package/scripts/sync_metadata.py +4 -4
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
{
|
|
11
11
|
"name": "livepilot",
|
|
12
12
|
"description": "Agentic production system for Ableton Live 12 — 317 tools, 43 domains, device atlas, spectral perception, technique memory, sample intelligence, auto-composition, neo-Riemannian harmony, Euclidean rhythm, species counterpoint, MIDI I/O",
|
|
13
|
-
"version": "1.10.
|
|
13
|
+
"version": "1.10.3",
|
|
14
14
|
"author": {
|
|
15
15
|
"name": "Pilot Studio"
|
|
16
16
|
},
|
package/AGENTS.md
CHANGED
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,143 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 1.10.3 — Truth Release (April 14 2026)
|
|
4
|
+
|
|
5
|
+
A correctness pass focused on making the top-layer workflows **trustworthy
|
|
6
|
+
in real use**. No new tool families, no new domains, no new breadth. Every
|
|
7
|
+
change is a truth-release fix: execution paths are real, emitted plans are
|
|
8
|
+
valid, sample matching is musically sane, and product language matches
|
|
9
|
+
implementation.
|
|
10
|
+
|
|
11
|
+
The four flagship workflows this release optimizes for:
|
|
12
|
+
1. **Session understanding** — already strong, unchanged
|
|
13
|
+
2. **Sample-guided section building** — fixed by §2 + §3
|
|
14
|
+
3. **Wonder rescue** — fixed by §1
|
|
15
|
+
4. **Targeted improvement ("tighten the low end")** — already strong, unchanged
|
|
16
|
+
|
|
17
|
+
If a feature couldn't be made true in this cycle, it was downgraded honestly
|
|
18
|
+
rather than preserved as fake capability.
|
|
19
|
+
|
|
20
|
+
### Fixed — Execution truth (§1)
|
|
21
|
+
|
|
22
|
+
- **Experiments now route through the async execution router.**
|
|
23
|
+
`mcp_server/experiment/engine.py` had two code paths (`run_branch` and
|
|
24
|
+
`commit_branch`) that called `ableton.send_command(tool, params)` directly
|
|
25
|
+
and suppressed every failure with a silent `except Exception: pass`. They
|
|
26
|
+
now go through `execute_plan_steps_async` with per-step results recorded
|
|
27
|
+
on `branch.execution_log`. Branch status reflects reality: `evaluated`
|
|
28
|
+
when steps ran, `failed` when zero succeeded, `committed_with_errors`
|
|
29
|
+
when a commit was partial. Users can see exactly which tools succeeded
|
|
30
|
+
and which didn't.
|
|
31
|
+
- **`commit_preview_variant` actually applies the variant now.**
|
|
32
|
+
Previously this tool only marked the variant as chosen in an in-memory
|
|
33
|
+
store and updated taste memory — the comment said *"the caller should
|
|
34
|
+
then apply the variant's compiled plan"* which was a trust leak. Users
|
|
35
|
+
reasonably expected `commit` to **apply** the variant. It now runs the
|
|
36
|
+
variant's compiled plan through `execute_plan_steps_async` and returns
|
|
37
|
+
`execution_log` + `steps_ok` / `steps_failed` + explicit `status`
|
|
38
|
+
(`committed` / `committed_with_errors` / `failed`). Analytical-only
|
|
39
|
+
variants (no compiled plan) return `status="analytical_only"` and
|
|
40
|
+
`committed=False` instead of pretending to apply anything.
|
|
41
|
+
|
|
42
|
+
### Fixed — Composer truthfulness (§2)
|
|
43
|
+
|
|
44
|
+
- **`suggest_sample_technique` removed from the executable plan.**
|
|
45
|
+
The composer was emitting `{"tool": "suggest_sample_technique", "params":
|
|
46
|
+
{"technique_id": layer.technique_id}}` in both `compose()` and `augment()`.
|
|
47
|
+
The real tool's signature is `(file_path required, intent, philosophy,
|
|
48
|
+
max_suggestions)` — `technique_id` is not a parameter and `file_path` is
|
|
49
|
+
required. This step would have always failed at runtime. It's now dropped
|
|
50
|
+
from the executable plan entirely; `layer.technique_id` still surfaces
|
|
51
|
+
in the descriptive `result.layers[*].technique_id` output for user
|
|
52
|
+
inspection. The agent can call `suggest_sample_technique` separately with
|
|
53
|
+
a real file path if it wants per-sample recipe advice.
|
|
54
|
+
All 12 remaining composer tool emissions validated against real signatures
|
|
55
|
+
— they're all correct.
|
|
56
|
+
|
|
57
|
+
### Fixed — Sample resolution quality (§3)
|
|
58
|
+
|
|
59
|
+
- **Role-aware scored ranking replaces naive first-hit substring matching.**
|
|
60
|
+
The old `_filesystem_match` returned the first audio file whose name
|
|
61
|
+
contained the layer's role OR any query token. This produced obvious
|
|
62
|
+
musical mistakes: a `lead` layer asking for *"techno melody Am"* would
|
|
63
|
+
get matched to `drums_techno.wav` because of the shared "techno" token.
|
|
64
|
+
The new scorer considers:
|
|
65
|
+
* role word in filename (+3.0)
|
|
66
|
+
* filename's primary role matches layer role (+1.5 bonus)
|
|
67
|
+
* filename's primary role is a **different** role (−5.0 penalty — this
|
|
68
|
+
is what blocks the drums-for-lead failure)
|
|
69
|
+
* role-adjacent hint words (kick/snare for drums, sub/808 for bass, etc.)
|
|
70
|
+
(+2.0)
|
|
71
|
+
* query token overlap excluding the role word (+0.5 per token)
|
|
72
|
+
* tempo token overlap between filename and query (+1.0)
|
|
73
|
+
A candidate must score strictly above 0.0 to be returned — files with
|
|
74
|
+
no signal at all return `unresolved` instead of an arbitrary first pick.
|
|
75
|
+
Six new regression tests lock out specific failure patterns.
|
|
76
|
+
|
|
77
|
+
### Fixed — Project identity stability (§5)
|
|
78
|
+
|
|
79
|
+
- **`project_hash` uses much more entropy.** The old hash was
|
|
80
|
+
`tempo + track_count + sorted_track_names` — the author's own comment
|
|
81
|
+
said *"this is imperfect"*. It collided whenever two songs shared the
|
|
82
|
+
same tempo and track names, and it was invariant to track reordering,
|
|
83
|
+
scene changes, and arrangement length. The new hash includes:
|
|
84
|
+
* tempo (1 decimal)
|
|
85
|
+
* time signature
|
|
86
|
+
* song length in beats (arrangement duration — very distinguishing)
|
|
87
|
+
* **ordered** track list: `(index, name, color, has_midi_input)` per track
|
|
88
|
+
* return track count + names
|
|
89
|
+
* **ordered** scene list: `(index, name, color)` per scene
|
|
90
|
+
Six new tests lock out: track reordering collision, song-length collision,
|
|
91
|
+
scene-list collision, time-signature collision, and track-rename detection.
|
|
92
|
+
Not a true project ID (that still needs Live set file path access from
|
|
93
|
+
the Remote Script, deferred) but substantially less fragile in practice.
|
|
94
|
+
|
|
95
|
+
### Changed — Product language (§6)
|
|
96
|
+
|
|
97
|
+
- **README.md**: "Producer Agent — autonomous multi-step production"
|
|
98
|
+
rewritten as *"an orchestrated multi-step assistant for building,
|
|
99
|
+
layering and refining sessions. [...] The agent proposes plans; the user
|
|
100
|
+
confirms and listens. LivePilot is a high-trust operator, not an
|
|
101
|
+
autonomous producer."*
|
|
102
|
+
- **docs/manual/getting-started.md**: "An autonomous agent that can build
|
|
103
|
+
entire tracks from high-level descriptions" rewritten to frame output as
|
|
104
|
+
a *"playable baseline — a starting point, not a finished track. You
|
|
105
|
+
listen, decide what works, and iterate."*
|
|
106
|
+
- **docs/manual/intelligence.md**: `agentic_loop` workflow mode description
|
|
107
|
+
changed from *"Full autonomous loop with evaluation"* to *"Multi-step
|
|
108
|
+
plan-and-evaluate loop with explicit checkpoints"*.
|
|
109
|
+
|
|
110
|
+
### Tests
|
|
111
|
+
|
|
112
|
+
- **1756 passing**, 1 skipped (was 1740 in v1.10.2; +16 net new regressions):
|
|
113
|
+
* +2 composer: `suggest_sample_technique` NOT in compose/augment plan
|
|
114
|
+
* +6 sample resolver: role-aware ranking lockouts
|
|
115
|
+
* +2 preview studio: `commit_preview_variant` executes + analytical-only honesty
|
|
116
|
+
* +6 project persistence: hash collision-resistance
|
|
117
|
+
|
|
118
|
+
### Note — what was intentionally NOT fixed in this cycle
|
|
119
|
+
|
|
120
|
+
- **`mcp_dispatch` registry expansion.** Only `load_sample_to_simpler` is
|
|
121
|
+
registered. The other 9 `MCP_TOOLS` entries are not currently emitted by
|
|
122
|
+
any compiled plan I can find. The router returns a clear "not in dispatch"
|
|
123
|
+
error if an unregistered MCP tool ever gets emitted, which is *honest
|
|
124
|
+
failure* — not silent. Adding stub entries would be preemptive scope.
|
|
125
|
+
- **Wonder Mode full SessionKernel.** Wonder passes real `session_info` from
|
|
126
|
+
Ableton to the variant compilers when connected — the kernel SHAPE is
|
|
127
|
+
minimal (`{session_info, mode}`) but the semantic-move compilers only
|
|
128
|
+
read `kernel.session_info.tracks`, so the extra fields don't change
|
|
129
|
+
behavior. Low value, deferred.
|
|
130
|
+
- **Silent `except: pass` in non-execution paths.** `commit_preview_variant`
|
|
131
|
+
has two silent excepts around taste-memory and turn-resolution updates.
|
|
132
|
+
These are bookkeeping side effects, not execution-critical, and failing
|
|
133
|
+
them shouldn't abort the commit. Left as-is.
|
|
134
|
+
- **Project identity via Live set file path.** The real fix for §5 would
|
|
135
|
+
be to pull `song.song_document_path` from Live via a new Remote Script
|
|
136
|
+
handler. Deferred — the stronger hash is a substantial improvement
|
|
137
|
+
without adding new Remote Script surface area.
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
3
141
|
## 1.10.2 — npm Distribution Fix + Tool-Count Audit (April 14 2026)
|
|
4
142
|
|
|
5
143
|
Patch release. The orchestration hardening shipped in 1.10.1 was correct on
|
package/README.md
CHANGED
|
@@ -514,10 +514,12 @@ claude plugin add github:dreamrec/LivePilot/plugin
|
|
|
514
514
|
| `/evaluate` | Before/after evaluation of recent changes |
|
|
515
515
|
| `/memory` | Technique library management |
|
|
516
516
|
|
|
517
|
-
**Producer Agent** —
|
|
518
|
-
Consults memory for style context, searches
|
|
519
|
-
searches samples, creates tracks, programs MIDI,
|
|
520
|
-
reads the spectrum to verify, and arranges sections.
|
|
517
|
+
**Producer Agent** — an orchestrated multi-step assistant for building,
|
|
518
|
+
layering and refining sessions. Consults memory for style context, searches
|
|
519
|
+
the atlas for instruments, searches samples, creates tracks, programs MIDI,
|
|
520
|
+
chains effects, reads the spectrum to verify, and arranges sections. The
|
|
521
|
+
agent proposes plans; the user confirms and listens. LivePilot is a high-
|
|
522
|
+
trust operator, not an autonomous producer.
|
|
521
523
|
|
|
522
524
|
**Core Skill** — operational discipline connecting all layers.
|
|
523
525
|
Consult atlas before loading. Read analyzer after mixing.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "livepilot",
|
|
3
|
-
"version": "1.10.
|
|
3
|
+
"version": "1.10.3",
|
|
4
4
|
"description": "Agentic production system for Ableton Live 12 — 317 tools, 43 domains, device atlas, sample intelligence, auto-composition, spectral perception, technique memory, neo-Riemannian harmony, Euclidean rhythm, species counterpoint, MIDI I/O",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Pilot Studio"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "livepilot",
|
|
3
|
-
"version": "1.10.
|
|
3
|
+
"version": "1.10.3",
|
|
4
4
|
"description": "Agentic production system for Ableton Live 12 — 317 tools, 43 domains, device atlas, sample intelligence, auto-composition, spectral perception, technique memory, neo-Riemannian harmony, Euclidean rhythm, species counterpoint, MIDI I/O",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Pilot Studio"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# LivePilot v1.10.
|
|
1
|
+
# LivePilot v1.10.3 — Architecture & Tool Reference
|
|
2
2
|
|
|
3
3
|
Agentic production system for Ableton Live 12. 317 tools across 43 domains. Device atlas (1305 devices, 81 enriched), spectral perception (M4L analyzer), technique memory, automation intelligence (16 curve types, 15 recipes), music theory (Krumhansl-Schmuckler, species counterpoint), generative algorithms (Euclidean rhythm, tintinnabuli, phase shift, additive process), neo-Riemannian harmony (PRL transforms, Tonnetz), MIDI file I/O.
|
|
4
4
|
|
|
@@ -104,7 +104,7 @@ Call `get_capability_state` at the start of any evaluation session. The response
|
|
|
104
104
|
{
|
|
105
105
|
"mode": "normal",
|
|
106
106
|
"analyzer_connected": true,
|
|
107
|
-
"bridge_version": "1.10.
|
|
107
|
+
"bridge_version": "1.10.3",
|
|
108
108
|
"spectral_cache_age_ms": 1200,
|
|
109
109
|
"flucoma_available": false,
|
|
110
110
|
"session_connected": true
|
package/livepilot.mcpb
CHANGED
|
Binary file
|
package/manifest.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"manifest_version": "0.3",
|
|
3
3
|
"name": "livepilot",
|
|
4
4
|
"display_name": "LivePilot — AI for Ableton Live",
|
|
5
|
-
"version": "1.10.
|
|
5
|
+
"version": "1.10.3",
|
|
6
6
|
"description": "Agentic production system for Ableton Live 12. Make beats, mix tracks, design sounds, and arrange songs with 317 AI-powered tools.",
|
|
7
7
|
"long_description": "LivePilot is an agentic production system for Ableton Live 12. 317 tools across 43 domains — device atlas (1305 devices), sample intelligence (Splice + browser + filesystem), auto-composition, spectral perception, technique memory, and 12 creative engines.\n\n**What it does:**\n- Creates MIDI clips with notes, chords, and rhythms\n- Loads instruments and effects via Device Atlas (1305 devices indexed)\n- Searches samples across Splice, Ableton browser, and filesystem\n- Plans compositions from text prompts with genre-aware layering\n- Slices samples with intent-based MIDI generation\n- Mixes with volume, panning, sends, and automation\n- Analyzes your mix with real-time spectral data (M4L bridge)\n- Diagnoses stuck sessions and generates creative rescue variants\n- Remembers your production style across sessions\n\n**How it works:**\nLivePilot installs a Remote Script in Ableton that communicates with the AI over a local TCP connection. Everything runs on your machine — no audio leaves your computer.",
|
|
8
8
|
"author": {
|
package/mcp_server/__init__.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
"""LivePilot MCP Server — bridges MCP protocol to Ableton Live."""
|
|
2
|
-
__version__ = "1.10.
|
|
2
|
+
__version__ = "1.10.3"
|
|
@@ -126,19 +126,18 @@ def _step_load_sample_to_simpler(track_index: int, layer: LayerSpec, file_path:
|
|
|
126
126
|
}
|
|
127
127
|
|
|
128
128
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
}
|
|
129
|
+
# NOTE: there used to be a _step_suggest_technique helper here that emitted a
|
|
130
|
+
# `suggest_sample_technique` step into the executable plan with params
|
|
131
|
+
# {"technique_id": layer.technique_id}. This was broken: the real tool's
|
|
132
|
+
# signature is (file_path, intent, philosophy, max_suggestions) and takes
|
|
133
|
+
# no technique_id param. The step would have failed at runtime with a
|
|
134
|
+
# "required file_path missing" error.
|
|
135
|
+
#
|
|
136
|
+
# Removed in v1.10.3 (Truth Release). Technique suggestions for composer
|
|
137
|
+
# layers are now surfaced in the descriptive result output (result.layers[*].
|
|
138
|
+
# technique_id) — the agent can call suggest_sample_technique separately
|
|
139
|
+
# with the resolved sample path if it wants per-sample recipe advice. The
|
|
140
|
+
# executable plan emits only real, validated tool calls.
|
|
142
141
|
|
|
143
142
|
|
|
144
143
|
def _processing_steps_with_binding(
|
|
@@ -366,8 +365,9 @@ class ComposerEngine:
|
|
|
366
365
|
|
|
367
366
|
plan.append(_step_load_sample_to_simpler(track_index, layer, file_path))
|
|
368
367
|
|
|
369
|
-
|
|
370
|
-
|
|
368
|
+
# technique_id intentionally NOT emitted as an executable step —
|
|
369
|
+
# see note above _step_suggest_technique removal. layer.technique_id
|
|
370
|
+
# is still surfaced in result.layers for descriptive output.
|
|
371
371
|
|
|
372
372
|
plan.extend(_processing_steps_with_binding(track_index, layer, layer_idx))
|
|
373
373
|
plan.extend(_mix_steps(track_index, layer))
|
|
@@ -458,13 +458,8 @@ class ComposerEngine:
|
|
|
458
458
|
"role": layer.role,
|
|
459
459
|
})
|
|
460
460
|
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
"tool": "suggest_sample_technique",
|
|
464
|
-
"params": {"technique_id": layer.technique_id},
|
|
465
|
-
"description": f"Get technique recipe '{layer.technique_id}'",
|
|
466
|
-
"role": layer.role,
|
|
467
|
-
})
|
|
461
|
+
# technique_id intentionally NOT emitted (see compose() above).
|
|
462
|
+
# Surfaced in result.new_layers for descriptive output only.
|
|
468
463
|
|
|
469
464
|
for dev_idx, device in enumerate(layer.processing):
|
|
470
465
|
device_name = device.get("name", "")
|
|
@@ -16,10 +16,31 @@ Returns (local_path, source) where source is one of:
|
|
|
16
16
|
|
|
17
17
|
Preference order is fixed: filesystem > splice_local > splice_remote > browser.
|
|
18
18
|
Filesystem wins even if Splice has a faster hit — local files are free.
|
|
19
|
+
|
|
20
|
+
Role-aware filesystem ranking (v1.10.3)
|
|
21
|
+
----------------------------------------
|
|
22
|
+
Filesystem matching used to return the first file whose name contained the
|
|
23
|
+
role OR any query token. This caused obvious musical mistakes — a `lead`
|
|
24
|
+
layer would get matched to `drums_techno.wav` because both share the genre
|
|
25
|
+
token "techno". The Truth Release (v1.10.3) replaces that with a scored
|
|
26
|
+
ranker that considers:
|
|
27
|
+
|
|
28
|
+
* role word in filename (+3.0)
|
|
29
|
+
* filename's primary role == layer role (+1.5 bonus)
|
|
30
|
+
* filename's primary role == a DIFFERENT role (-5.0 penalty)
|
|
31
|
+
* role-adjacent hint words (e.g. kick/snare for drums) (+2.0)
|
|
32
|
+
* query token overlap, excluding the role word itself (+0.5 per token)
|
|
33
|
+
* tempo token (e.g. "128bpm") shared between filename and query (+1.0)
|
|
34
|
+
|
|
35
|
+
A candidate must score strictly above 0.0 to be returned. This blocks the
|
|
36
|
+
obvious failure mode where genre-only matches override role matches or
|
|
37
|
+
where unrelated files with no signal get returned just because they're
|
|
38
|
+
the first audio file found.
|
|
19
39
|
"""
|
|
20
40
|
|
|
21
41
|
from __future__ import annotations
|
|
22
42
|
|
|
43
|
+
import re
|
|
23
44
|
from pathlib import Path
|
|
24
45
|
from typing import Optional, Tuple
|
|
25
46
|
|
|
@@ -28,6 +49,32 @@ from .layer_planner import LayerSpec
|
|
|
28
49
|
|
|
29
50
|
_AUDIO_EXTENSIONS = (".wav", ".aif", ".aiff", ".flac")
|
|
30
51
|
|
|
52
|
+
# Role-adjacent hint words (NOT the role itself — that's scored separately).
|
|
53
|
+
# These are words commonly found in filenames that indicate the layer role
|
|
54
|
+
# without using the literal role name.
|
|
55
|
+
_ROLE_HINTS: dict[str, frozenset[str]] = {
|
|
56
|
+
"drums": frozenset(["kick", "snare", "hat", "clap", "perc", "break", "beat", "loop", "hihat"]),
|
|
57
|
+
"bass": frozenset(["sub", "808", "low", "deep", "bassline"]),
|
|
58
|
+
"lead": frozenset(["synth", "arp", "mel", "melody", "riff", "hook"]),
|
|
59
|
+
"pad": frozenset(["ambient", "atmos", "drone", "string", "warm"]),
|
|
60
|
+
"texture": frozenset(["atmos", "ambient", "drone", "swell", "noise"]),
|
|
61
|
+
"vocal": frozenset(["vox", "voice", "chop", "phrase", "acapella"]),
|
|
62
|
+
"percussion": frozenset(["shaker", "tamb", "bongo", "conga", "tom", "ride", "cowbell"]),
|
|
63
|
+
"fx": frozenset(["sfx", "riser", "impact", "sweep", "whoosh", "rise", "fall", "hit"]),
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
# Flat set of every known "primary role word" that might appear at the start
|
|
67
|
+
# of a filename. Used to classify a filename's dominant role.
|
|
68
|
+
_ALL_ROLE_WORDS: frozenset[str] = frozenset(
|
|
69
|
+
{role for role in _ROLE_HINTS}
|
|
70
|
+
| {"drum"} # singular form of "drums"
|
|
71
|
+
| {h for hints in _ROLE_HINTS.values() for h in hints}
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Tempo token pattern — matches 2-3 digit BPM values in filenames like
|
|
75
|
+
# "kick_128bpm.wav", "drums_120_loop.wav", "bass128.wav".
|
|
76
|
+
_TEMPO_RE = re.compile(r"(\d{2,3})")
|
|
77
|
+
|
|
31
78
|
|
|
32
79
|
def _query_tokens(query: str) -> list[str]:
|
|
33
80
|
"""Return lowercase query tokens meaningful for matching (len > 2)."""
|
|
@@ -42,21 +89,113 @@ def _iter_candidates(root: Path):
|
|
|
42
89
|
yield from root.rglob(f"*{ext}")
|
|
43
90
|
|
|
44
91
|
|
|
45
|
-
def
|
|
46
|
-
"""
|
|
92
|
+
def _primary_role_of(filename_stem: str) -> Optional[str]:
|
|
93
|
+
"""Identify the dominant 'role' of a filename based on its first token.
|
|
94
|
+
|
|
95
|
+
Example: "drums_techno_128.wav" -> "drums". "bass_sub_808.aif" -> "bass".
|
|
96
|
+
Returns None if the first token isn't a known role word.
|
|
97
|
+
"""
|
|
98
|
+
# Split on underscores, hyphens, spaces, dots
|
|
99
|
+
parts = re.split(r"[_\-\s.]+", filename_stem.lower())
|
|
100
|
+
for p in parts:
|
|
101
|
+
if p in _ALL_ROLE_WORDS:
|
|
102
|
+
return p
|
|
103
|
+
return None
|
|
47
104
|
|
|
48
|
-
|
|
105
|
+
|
|
106
|
+
def _role_matches(primary: str, role: str) -> bool:
|
|
107
|
+
"""True if the filename's primary role belongs to the same role family
|
|
108
|
+
as the layer's role (handles role == 'drums' vs primary == 'kick')."""
|
|
109
|
+
if primary == role:
|
|
110
|
+
return True
|
|
111
|
+
# "drum" is the singular of "drums"
|
|
112
|
+
if primary == "drum" and role == "drums":
|
|
113
|
+
return True
|
|
114
|
+
# primary is one of the role's hints (e.g. "kick" is a drum hint)
|
|
115
|
+
hints = _ROLE_HINTS.get(role, frozenset())
|
|
116
|
+
return primary in hints
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _score_candidate(path: Path, layer: LayerSpec, query_tempos: set[str]) -> float:
|
|
120
|
+
"""Return a ranking score for this candidate file.
|
|
121
|
+
|
|
122
|
+
Scores combine role fit, role hints, query tokens, and tempo match.
|
|
123
|
+
A negative score is possible (and disqualifying) when the filename's
|
|
124
|
+
primary role is clearly a DIFFERENT role family — that blocks the
|
|
125
|
+
"lead layer grabs drums via shared genre token" failure pattern.
|
|
49
126
|
"""
|
|
127
|
+
name = path.stem.lower()
|
|
128
|
+
role = (layer.role or "").lower()
|
|
129
|
+
score = 0.0
|
|
130
|
+
|
|
131
|
+
# 1. Role word literally in filename
|
|
132
|
+
if role and role in name:
|
|
133
|
+
score += 3.0
|
|
134
|
+
|
|
135
|
+
# 2. Primary-role classification of the filename
|
|
136
|
+
primary = _primary_role_of(name)
|
|
137
|
+
if primary:
|
|
138
|
+
if _role_matches(primary, role):
|
|
139
|
+
score += 1.5 # bonus: filename is "about" this layer's role
|
|
140
|
+
else:
|
|
141
|
+
score -= 5.0 # heavy penalty: filename is about a different role
|
|
142
|
+
|
|
143
|
+
# 3. Role-adjacent hint words in filename
|
|
144
|
+
hints = _ROLE_HINTS.get(role, frozenset())
|
|
145
|
+
for hint in hints:
|
|
146
|
+
if hint in name:
|
|
147
|
+
score += 2.0
|
|
148
|
+
break # count at most once
|
|
149
|
+
|
|
150
|
+
# 4. Query token overlap (excluding the role word — already scored above)
|
|
50
151
|
tokens = _query_tokens(layer.search_query)
|
|
51
|
-
|
|
152
|
+
for tok in tokens:
|
|
153
|
+
if tok == role:
|
|
154
|
+
continue
|
|
155
|
+
if tok in name:
|
|
156
|
+
score += 0.5
|
|
157
|
+
|
|
158
|
+
# 5. Tempo match — if query mentions e.g. "128bpm" and filename has "128"
|
|
159
|
+
if query_tempos:
|
|
160
|
+
filename_tempos = set(_TEMPO_RE.findall(name))
|
|
161
|
+
# Only count digits that are plausible BPMs (60-200)
|
|
162
|
+
filename_tempos = {t for t in filename_tempos if 60 <= int(t) <= 200}
|
|
163
|
+
if query_tempos & filename_tempos:
|
|
164
|
+
score += 1.0
|
|
165
|
+
|
|
166
|
+
return score
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _extract_query_tempos(query: str) -> set[str]:
|
|
170
|
+
"""Pull tempo tokens (e.g. '128bpm', '120') out of a search query."""
|
|
171
|
+
tempos = set()
|
|
172
|
+
for match in _TEMPO_RE.findall(query.lower()):
|
|
173
|
+
if 60 <= int(match) <= 200:
|
|
174
|
+
tempos.add(match)
|
|
175
|
+
return tempos
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _filesystem_match(layer: LayerSpec, search_roots: list[Path]) -> Optional[str]:
|
|
179
|
+
"""Score every audio file across the search_roots and return the best.
|
|
180
|
+
|
|
181
|
+
Returns None if no file scores above zero. "Above zero" is the
|
|
182
|
+
threshold for "has any role or token signal" — anything at or below
|
|
183
|
+
zero is considered unresolved (to avoid returning arbitrary files
|
|
184
|
+
that happen to be first in alphabetical order).
|
|
185
|
+
"""
|
|
186
|
+
query_tempos = _extract_query_tempos(layer.search_query)
|
|
187
|
+
|
|
188
|
+
best_path: Optional[Path] = None
|
|
189
|
+
best_score: float = 0.0 # must strictly exceed this to win
|
|
190
|
+
|
|
52
191
|
for root in search_roots:
|
|
53
192
|
for path in _iter_candidates(Path(root)):
|
|
54
|
-
|
|
55
|
-
if
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
return None
|
|
193
|
+
score = _score_candidate(path, layer, query_tempos)
|
|
194
|
+
if score > best_score:
|
|
195
|
+
best_score = score
|
|
196
|
+
best_path = path
|
|
197
|
+
|
|
198
|
+
return str(best_path) if best_path is not None else None
|
|
60
199
|
|
|
61
200
|
|
|
62
201
|
async def _splice_resolve(
|
|
@@ -128,7 +267,7 @@ async def resolve_sample_for_layer(
|
|
|
128
267
|
"""
|
|
129
268
|
roots = [Path(r) for r in (search_roots or []) if r]
|
|
130
269
|
|
|
131
|
-
# 1. Filesystem — always try first, no network
|
|
270
|
+
# 1. Filesystem — always try first, no network. Scored ranking since v1.10.3.
|
|
132
271
|
fs_hit = _filesystem_match(layer, roots)
|
|
133
272
|
if fs_hit:
|
|
134
273
|
return fs_hit, "filesystem"
|
|
@@ -101,41 +101,128 @@ def run_branch(
|
|
|
101
101
|
|
|
102
102
|
The branch is updated in-place with snapshots and status.
|
|
103
103
|
"""
|
|
104
|
+
# NOTE: this function was converted to an async wrapper around the
|
|
105
|
+
# async execution router in v1.10.3 (Truth Release). The synchronous
|
|
106
|
+
# _run_branch_sync stays for any caller that still uses it, but it now
|
|
107
|
+
# fails loudly on execution errors instead of silently swallowing them.
|
|
108
|
+
# The canonical path is run_branch_async below. Callers (tools.py) use
|
|
109
|
+
# the async variant directly.
|
|
110
|
+
return _run_branch_sync(branch, ableton, compiled_plan, capture_fn)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _run_branch_sync(branch, ableton, compiled_plan, capture_fn):
|
|
114
|
+
"""Legacy sync run_branch body. Preserved for back-compat only.
|
|
115
|
+
|
|
116
|
+
Experiment tools now use run_branch_async which routes through the
|
|
117
|
+
unified execution substrate.
|
|
118
|
+
"""
|
|
104
119
|
branch.status = "running"
|
|
105
120
|
branch.compiled_plan = compiled_plan
|
|
106
|
-
|
|
107
|
-
# 1. Capture before
|
|
108
121
|
branch.before_snapshot = capture_fn()
|
|
109
122
|
|
|
110
|
-
# 2. Execute plan steps
|
|
111
123
|
steps_executed = 0
|
|
124
|
+
log = []
|
|
112
125
|
for step in compiled_plan.get("steps", []):
|
|
113
126
|
tool = step.get("tool", "")
|
|
114
127
|
params = step.get("params", {})
|
|
115
128
|
if not tool:
|
|
116
129
|
continue
|
|
117
|
-
# Skip read-only verification steps
|
|
118
130
|
if tool in ("get_track_meters", "get_master_spectrum", "analyze_mix"):
|
|
119
131
|
continue
|
|
120
132
|
try:
|
|
121
|
-
ableton.send_command(tool, params)
|
|
133
|
+
result = ableton.send_command(tool, params)
|
|
122
134
|
steps_executed += 1
|
|
123
|
-
|
|
124
|
-
|
|
135
|
+
log.append({"tool": tool, "backend": "remote_command", "ok": True, "result": result})
|
|
136
|
+
except Exception as exc:
|
|
137
|
+
log.append({"tool": tool, "backend": "remote_command", "ok": False, "error": str(exc)})
|
|
125
138
|
|
|
139
|
+
branch.execution_log = log
|
|
126
140
|
branch.executed_at_ms = int(time.time() * 1000)
|
|
141
|
+
branch.after_snapshot = capture_fn()
|
|
142
|
+
|
|
143
|
+
for _ in range(steps_executed):
|
|
144
|
+
try:
|
|
145
|
+
ableton.send_command("undo", {})
|
|
146
|
+
except Exception:
|
|
147
|
+
break
|
|
127
148
|
|
|
128
|
-
|
|
149
|
+
branch.status = "evaluated" if steps_executed > 0 else "failed"
|
|
150
|
+
return branch
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
async def run_branch_async(
|
|
154
|
+
branch,
|
|
155
|
+
ableton,
|
|
156
|
+
compiled_plan: dict,
|
|
157
|
+
capture_fn,
|
|
158
|
+
bridge=None,
|
|
159
|
+
mcp_registry=None,
|
|
160
|
+
ctx=None,
|
|
161
|
+
):
|
|
162
|
+
"""Run a single branch experiment through the async execution router.
|
|
163
|
+
|
|
164
|
+
Same semantics as run_branch (apply → capture → evaluate → undo) but
|
|
165
|
+
dispatches each step through execute_plan_steps_async so remote /
|
|
166
|
+
bridge / mcp backends are all routed correctly and per-step failures
|
|
167
|
+
are visible in branch.execution_log.
|
|
168
|
+
|
|
169
|
+
Read-only verification steps (get_track_meters, get_master_spectrum,
|
|
170
|
+
analyze_mix) are skipped in the apply pass — they're used for snapshot
|
|
171
|
+
capture separately.
|
|
172
|
+
"""
|
|
173
|
+
from ..runtime.execution_router import execute_plan_steps_async
|
|
174
|
+
|
|
175
|
+
branch.status = "running"
|
|
176
|
+
branch.compiled_plan = compiled_plan
|
|
177
|
+
|
|
178
|
+
branch.before_snapshot = capture_fn()
|
|
179
|
+
|
|
180
|
+
# Filter out read-only verification steps from the apply pass
|
|
181
|
+
all_steps = compiled_plan.get("steps", []) or []
|
|
182
|
+
apply_steps = [
|
|
183
|
+
s for s in all_steps
|
|
184
|
+
if s.get("tool") and s.get("tool") not in (
|
|
185
|
+
"get_track_meters", "get_master_spectrum", "analyze_mix",
|
|
186
|
+
)
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
exec_results = await execute_plan_steps_async(
|
|
190
|
+
apply_steps,
|
|
191
|
+
ableton=ableton,
|
|
192
|
+
bridge=bridge,
|
|
193
|
+
mcp_registry=mcp_registry or {},
|
|
194
|
+
ctx=ctx,
|
|
195
|
+
stop_on_failure=False, # best-effort, but log every failure
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# Record per-step results on the branch for visibility
|
|
199
|
+
branch.execution_log = [
|
|
200
|
+
{
|
|
201
|
+
"tool": r.tool,
|
|
202
|
+
"backend": r.backend,
|
|
203
|
+
"ok": r.ok,
|
|
204
|
+
**({"result": r.result} if r.ok else {"error": r.error}),
|
|
205
|
+
}
|
|
206
|
+
for r in exec_results
|
|
207
|
+
]
|
|
208
|
+
|
|
209
|
+
steps_executed = sum(1 for r in exec_results if r.ok)
|
|
210
|
+
branch.executed_at_ms = int(time.time() * 1000)
|
|
129
211
|
branch.after_snapshot = capture_fn()
|
|
130
212
|
|
|
131
|
-
#
|
|
213
|
+
# Undo all successful steps back to checkpoint. Undo is a remote_command,
|
|
214
|
+
# route it through the normal ableton.send_command path for simplicity.
|
|
132
215
|
for _ in range(steps_executed):
|
|
133
216
|
try:
|
|
134
217
|
ableton.send_command("undo", {})
|
|
135
218
|
except Exception:
|
|
136
219
|
break
|
|
137
220
|
|
|
138
|
-
branch
|
|
221
|
+
# A branch is "evaluated" only if it actually applied at least one step.
|
|
222
|
+
# If every step failed, mark it "failed" — this is the truth-release
|
|
223
|
+
# behavior that makes the experiment honest instead of pretending
|
|
224
|
+
# a broken branch produced a neutral result.
|
|
225
|
+
branch.status = "evaluated" if steps_executed > 0 else "failed"
|
|
139
226
|
return branch
|
|
140
227
|
|
|
141
228
|
|
|
@@ -160,12 +247,102 @@ def evaluate_branch(
|
|
|
160
247
|
|
|
161
248
|
# ── Commit / discard ─────────────────────────────────────────────────────────
|
|
162
249
|
|
|
250
|
+
async def commit_branch_async(
|
|
251
|
+
experiment: ExperimentSet,
|
|
252
|
+
branch_id: str,
|
|
253
|
+
ableton,
|
|
254
|
+
bridge=None,
|
|
255
|
+
mcp_registry=None,
|
|
256
|
+
ctx=None,
|
|
257
|
+
) -> dict:
|
|
258
|
+
"""Re-apply the winning branch's moves permanently, through the async
|
|
259
|
+
execution router. No undo — the changes stick.
|
|
260
|
+
|
|
261
|
+
Returns a dict with the committed branch info AND the execution_log
|
|
262
|
+
(per-step ok/error results). If any step failed, the branch is marked
|
|
263
|
+
'committed_with_errors' so the caller can tell the commit was partial.
|
|
264
|
+
"""
|
|
265
|
+
from ..runtime.execution_router import execute_plan_steps_async
|
|
266
|
+
|
|
267
|
+
branch = experiment.get_branch(branch_id)
|
|
268
|
+
if not branch:
|
|
269
|
+
return {"error": f"Branch {branch_id} not found"}
|
|
270
|
+
|
|
271
|
+
if not branch.compiled_plan:
|
|
272
|
+
return {"error": "Branch has no compiled plan"}
|
|
273
|
+
|
|
274
|
+
all_steps = branch.compiled_plan.get("steps", []) or []
|
|
275
|
+
apply_steps = [
|
|
276
|
+
s for s in all_steps
|
|
277
|
+
if s.get("tool") and s.get("tool") not in (
|
|
278
|
+
"get_track_meters", "get_master_spectrum", "analyze_mix",
|
|
279
|
+
)
|
|
280
|
+
]
|
|
281
|
+
|
|
282
|
+
exec_results = await execute_plan_steps_async(
|
|
283
|
+
apply_steps,
|
|
284
|
+
ableton=ableton,
|
|
285
|
+
bridge=bridge,
|
|
286
|
+
mcp_registry=mcp_registry or {},
|
|
287
|
+
ctx=ctx,
|
|
288
|
+
stop_on_failure=False, # best-effort commit — record everything
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
log = [
|
|
292
|
+
{
|
|
293
|
+
"tool": r.tool,
|
|
294
|
+
"backend": r.backend,
|
|
295
|
+
"ok": r.ok,
|
|
296
|
+
**({"result": r.result} if r.ok else {"error": r.error}),
|
|
297
|
+
}
|
|
298
|
+
for r in exec_results
|
|
299
|
+
]
|
|
300
|
+
branch.execution_log = log
|
|
301
|
+
steps_ok = sum(1 for r in exec_results if r.ok)
|
|
302
|
+
steps_failed = len(exec_results) - steps_ok
|
|
303
|
+
|
|
304
|
+
if steps_failed == 0 and steps_ok > 0:
|
|
305
|
+
branch.status = "committed"
|
|
306
|
+
elif steps_ok > 0:
|
|
307
|
+
branch.status = "committed_with_errors"
|
|
308
|
+
else:
|
|
309
|
+
# Zero successful steps — don't claim the commit happened
|
|
310
|
+
branch.status = "failed"
|
|
311
|
+
return {
|
|
312
|
+
"committed": False,
|
|
313
|
+
"branch_id": branch_id,
|
|
314
|
+
"branch_name": branch.name,
|
|
315
|
+
"error": "No steps executed successfully",
|
|
316
|
+
"steps_attempted": len(apply_steps),
|
|
317
|
+
"execution_log": log,
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
experiment.winner_branch_id = branch_id
|
|
321
|
+
experiment.status = "committed"
|
|
322
|
+
|
|
323
|
+
return {
|
|
324
|
+
"committed": True,
|
|
325
|
+
"branch_id": branch_id,
|
|
326
|
+
"branch_name": branch.name,
|
|
327
|
+
"steps_executed": steps_ok,
|
|
328
|
+
"steps_failed": steps_failed,
|
|
329
|
+
"status": branch.status,
|
|
330
|
+
"score": branch.score,
|
|
331
|
+
"execution_log": log,
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
|
|
163
335
|
def commit_branch(
|
|
164
336
|
experiment: ExperimentSet,
|
|
165
337
|
branch_id: str,
|
|
166
338
|
ableton,
|
|
167
339
|
) -> dict:
|
|
168
|
-
"""
|
|
340
|
+
"""Legacy sync wrapper kept for any direct caller. The canonical path
|
|
341
|
+
is commit_branch_async through tools.py → execute_plan_steps_async.
|
|
342
|
+
|
|
343
|
+
Still truth-honest: records per-step ok/error, marks branches as
|
|
344
|
+
'committed_with_errors' on partial failure rather than lying about it.
|
|
345
|
+
"""
|
|
169
346
|
branch = experiment.get_branch(branch_id)
|
|
170
347
|
if not branch:
|
|
171
348
|
return {"error": f"Branch {branch_id} not found"}
|
|
@@ -173,7 +350,6 @@ def commit_branch(
|
|
|
173
350
|
if not branch.compiled_plan:
|
|
174
351
|
return {"error": "Branch has no compiled plan"}
|
|
175
352
|
|
|
176
|
-
# Re-execute the plan (this time without undoing)
|
|
177
353
|
executed = []
|
|
178
354
|
for step in branch.compiled_plan.get("steps", []):
|
|
179
355
|
tool = step.get("tool", "")
|
|
@@ -182,11 +358,29 @@ def commit_branch(
|
|
|
182
358
|
continue
|
|
183
359
|
try:
|
|
184
360
|
result = ableton.send_command(tool, params)
|
|
185
|
-
executed.append({"tool": tool, "ok": True})
|
|
361
|
+
executed.append({"tool": tool, "ok": True, "backend": "remote_command"})
|
|
186
362
|
except Exception as exc:
|
|
187
|
-
executed.append({"tool": tool, "ok": False, "error": str(exc)})
|
|
363
|
+
executed.append({"tool": tool, "ok": False, "backend": "remote_command", "error": str(exc)})
|
|
364
|
+
|
|
365
|
+
branch.execution_log = executed
|
|
366
|
+
ok_count = sum(1 for e in executed if e.get("ok"))
|
|
367
|
+
failed_count = len(executed) - ok_count
|
|
368
|
+
|
|
369
|
+
if failed_count == 0 and ok_count > 0:
|
|
370
|
+
branch.status = "committed"
|
|
371
|
+
elif ok_count > 0:
|
|
372
|
+
branch.status = "committed_with_errors"
|
|
373
|
+
else:
|
|
374
|
+
branch.status = "failed"
|
|
375
|
+
return {
|
|
376
|
+
"committed": False,
|
|
377
|
+
"branch_id": branch_id,
|
|
378
|
+
"branch_name": branch.name,
|
|
379
|
+
"error": "No steps executed successfully",
|
|
380
|
+
"steps_attempted": len(executed),
|
|
381
|
+
"execution_log": executed,
|
|
382
|
+
}
|
|
188
383
|
|
|
189
|
-
branch.status = "committed"
|
|
190
384
|
experiment.winner_branch_id = branch_id
|
|
191
385
|
experiment.status = "committed"
|
|
192
386
|
|
|
@@ -194,7 +388,9 @@ def commit_branch(
|
|
|
194
388
|
"committed": True,
|
|
195
389
|
"branch_id": branch_id,
|
|
196
390
|
"branch_name": branch.name,
|
|
197
|
-
"steps_executed":
|
|
391
|
+
"steps_executed": ok_count,
|
|
392
|
+
"steps_failed": failed_count,
|
|
393
|
+
"status": branch.status,
|
|
198
394
|
"score": branch.score,
|
|
199
395
|
}
|
|
200
396
|
|
|
@@ -55,6 +55,12 @@ class ExperimentBranch:
|
|
|
55
55
|
evaluation: Optional[dict] = None
|
|
56
56
|
score: float = 0.0
|
|
57
57
|
|
|
58
|
+
# Execution log — per-step results from the async router. Non-empty when
|
|
59
|
+
# a branch has been run through run_branch or committed via commit_branch.
|
|
60
|
+
# Each entry: {tool, backend, ok, error, result}. Surfaced on to_dict()
|
|
61
|
+
# so callers can see exactly which steps succeeded or failed.
|
|
62
|
+
execution_log: list = field(default_factory=list)
|
|
63
|
+
|
|
58
64
|
# Metadata
|
|
59
65
|
created_at_ms: int = 0
|
|
60
66
|
executed_at_ms: int = 0
|
|
@@ -77,6 +83,10 @@ class ExperimentBranch:
|
|
|
77
83
|
d["after_snapshot"] = self.after_snapshot.to_dict()
|
|
78
84
|
if self.evaluation:
|
|
79
85
|
d["evaluation"] = self.evaluation
|
|
86
|
+
if self.execution_log:
|
|
87
|
+
d["execution_log"] = self.execution_log
|
|
88
|
+
d["steps_ok"] = sum(1 for e in self.execution_log if e.get("ok"))
|
|
89
|
+
d["steps_failed"] = sum(1 for e in self.execution_log if not e.get("ok"))
|
|
80
90
|
return d
|
|
81
91
|
|
|
82
92
|
|
|
@@ -116,7 +116,7 @@ def create_experiment(
|
|
|
116
116
|
|
|
117
117
|
|
|
118
118
|
@mcp.tool()
|
|
119
|
-
def run_experiment(
|
|
119
|
+
async def run_experiment(
|
|
120
120
|
ctx: Context,
|
|
121
121
|
experiment_id: str,
|
|
122
122
|
) -> dict:
|
|
@@ -125,10 +125,11 @@ def run_experiment(
|
|
|
125
125
|
For each branch:
|
|
126
126
|
1. Compile the semantic move against current session
|
|
127
127
|
2. Capture before state
|
|
128
|
-
3. Execute the compiled plan
|
|
128
|
+
3. Execute the compiled plan (through the async router — v1.10.3 truth)
|
|
129
129
|
4. Capture after state
|
|
130
|
-
5. Undo all
|
|
130
|
+
5. Undo all successful steps (revert to checkpoint)
|
|
131
131
|
6. Evaluate the branch
|
|
132
|
+
7. Record per-step results on branch.execution_log
|
|
132
133
|
|
|
133
134
|
Branches run sequentially (Ableton has linear undo).
|
|
134
135
|
"""
|
|
@@ -137,6 +138,8 @@ def run_experiment(
|
|
|
137
138
|
return {"error": f"Experiment {experiment_id} not found"}
|
|
138
139
|
|
|
139
140
|
ableton = _get_ableton(ctx)
|
|
141
|
+
bridge = ctx.lifespan_context.get("m4l")
|
|
142
|
+
mcp_registry = ctx.lifespan_context.get("mcp_dispatch", {})
|
|
140
143
|
|
|
141
144
|
# Import compiler
|
|
142
145
|
from ..semantic_moves import registry, compiler
|
|
@@ -149,7 +152,7 @@ def run_experiment(
|
|
|
149
152
|
# Compile the move
|
|
150
153
|
move = registry.get_move(branch.move_id)
|
|
151
154
|
if not move:
|
|
152
|
-
branch.status = "
|
|
155
|
+
branch.status = "failed"
|
|
153
156
|
branch.score = 0.0
|
|
154
157
|
branch.evaluation = {"error": f"Move {branch.move_id} not found"}
|
|
155
158
|
results.append(branch.to_dict())
|
|
@@ -160,12 +163,15 @@ def run_experiment(
|
|
|
160
163
|
plan = compiler.compile(move, kernel)
|
|
161
164
|
compiled_dict = plan.to_dict()
|
|
162
165
|
|
|
163
|
-
# Run the branch
|
|
164
|
-
engine.
|
|
166
|
+
# Run the branch through the async router
|
|
167
|
+
await engine.run_branch_async(
|
|
165
168
|
branch=branch,
|
|
166
169
|
ableton=ableton,
|
|
167
170
|
compiled_plan=compiled_dict,
|
|
168
171
|
capture_fn=lambda: _capture_snapshot(ctx),
|
|
172
|
+
bridge=bridge,
|
|
173
|
+
mcp_registry=mcp_registry,
|
|
174
|
+
ctx=ctx,
|
|
169
175
|
)
|
|
170
176
|
|
|
171
177
|
# Evaluate
|
|
@@ -236,22 +242,34 @@ def compare_experiments(
|
|
|
236
242
|
|
|
237
243
|
|
|
238
244
|
@mcp.tool()
|
|
239
|
-
def commit_experiment(
|
|
245
|
+
async def commit_experiment(
|
|
240
246
|
ctx: Context,
|
|
241
247
|
experiment_id: str,
|
|
242
248
|
branch_id: str,
|
|
243
249
|
) -> dict:
|
|
244
250
|
"""Commit the winning branch — re-apply its moves permanently.
|
|
245
251
|
|
|
246
|
-
|
|
247
|
-
|
|
252
|
+
Routes the compiled plan through the async router (v1.10.3 truth).
|
|
253
|
+
Returns a result dict with per-step execution_log. If any step failed,
|
|
254
|
+
branch.status is set to 'committed_with_errors' and the response
|
|
255
|
+
reports steps_failed > 0, so callers can tell the commit was partial.
|
|
248
256
|
"""
|
|
249
257
|
experiment = engine.get_experiment(experiment_id)
|
|
250
258
|
if not experiment:
|
|
251
259
|
return {"error": f"Experiment {experiment_id} not found"}
|
|
252
260
|
|
|
253
261
|
ableton = _get_ableton(ctx)
|
|
254
|
-
|
|
262
|
+
bridge = ctx.lifespan_context.get("m4l")
|
|
263
|
+
mcp_registry = ctx.lifespan_context.get("mcp_dispatch", {})
|
|
264
|
+
|
|
265
|
+
return await engine.commit_branch_async(
|
|
266
|
+
experiment,
|
|
267
|
+
branch_id,
|
|
268
|
+
ableton,
|
|
269
|
+
bridge=bridge,
|
|
270
|
+
mcp_registry=mcp_registry,
|
|
271
|
+
ctx=ctx,
|
|
272
|
+
)
|
|
255
273
|
|
|
256
274
|
|
|
257
275
|
@mcp.tool()
|
|
@@ -20,15 +20,69 @@ _MAX_WONDER_OUTCOMES = 10
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def project_hash(session_info: dict) -> str:
|
|
23
|
-
"""Compute a
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
23
|
+
"""Compute a project fingerprint from session info.
|
|
24
|
+
|
|
25
|
+
v1.10.3 Truth Release: this used to use `tempo + len(tracks) + sorted
|
|
26
|
+
track names`, which had obvious collisions — any two songs at the same
|
|
27
|
+
tempo with the same track names collided even if the tracks were in
|
|
28
|
+
different order, the scenes were different, or the arrangement length
|
|
29
|
+
differed. The author's own comment acknowledged the weakness.
|
|
30
|
+
|
|
31
|
+
The new hash uses a lot more entropy from the session:
|
|
32
|
+
* tempo (1 decimal)
|
|
33
|
+
* time signature (num/denom)
|
|
34
|
+
* song_length (arrangement length in beats) — very distinguishing
|
|
35
|
+
* ORDERED track list: (index, name, color_index, has_midi_input)
|
|
36
|
+
* ORDERED scene list: (index, name, color_index)
|
|
37
|
+
* return track count + names
|
|
38
|
+
|
|
39
|
+
This is still a fingerprint, not a true project ID (for that we'd need
|
|
40
|
+
the Live set file path, which requires a new Remote Script handler).
|
|
41
|
+
But it's collision-resistant across the common failure modes:
|
|
42
|
+
* template-based starts diverge once the user renames a track, adds
|
|
43
|
+
a scene, or adjusts the arrangement length
|
|
44
|
+
* track reordering produces a new hash (correctly — it's a real edit)
|
|
45
|
+
* two songs at 128 BPM with tracks named Drums/Bass no longer collide
|
|
46
|
+
unless they also share identical scene lists AND song length
|
|
27
47
|
"""
|
|
28
48
|
tempo = session_info.get("tempo", 120.0)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
49
|
+
sig_num = session_info.get("signature_numerator", 4)
|
|
50
|
+
sig_denom = session_info.get("signature_denominator", 4)
|
|
51
|
+
song_length = session_info.get("song_length", 0.0)
|
|
52
|
+
|
|
53
|
+
tracks = session_info.get("tracks", []) or []
|
|
54
|
+
# Ordered track signature — (index, name, color, has_midi_input)
|
|
55
|
+
track_sig = "|".join(
|
|
56
|
+
f"{t.get('index', i)}:{t.get('name', '')}:{t.get('color_index', 0)}:{int(t.get('has_midi_input', False))}"
|
|
57
|
+
for i, t in enumerate(tracks)
|
|
58
|
+
if isinstance(t, dict)
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
return_tracks = session_info.get("return_tracks", []) or []
|
|
62
|
+
return_sig = "|".join(
|
|
63
|
+
f"{r.get('index', i)}:{r.get('name', '')}"
|
|
64
|
+
for i, r in enumerate(return_tracks)
|
|
65
|
+
if isinstance(r, dict)
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
scenes = session_info.get("scenes", []) or []
|
|
69
|
+
scene_sig = "|".join(
|
|
70
|
+
f"{s.get('index', i)}:{s.get('name', '')}:{s.get('color_index', 0)}"
|
|
71
|
+
for i, s in enumerate(scenes)
|
|
72
|
+
if isinstance(s, dict)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
seed = "||".join([
|
|
76
|
+
f"t={tempo:.1f}",
|
|
77
|
+
f"sig={sig_num}/{sig_denom}",
|
|
78
|
+
f"len={song_length:.2f}",
|
|
79
|
+
f"n_tracks={len(tracks)}",
|
|
80
|
+
f"tracks=[{track_sig}]",
|
|
81
|
+
f"n_returns={len(return_tracks)}",
|
|
82
|
+
f"returns=[{return_sig}]",
|
|
83
|
+
f"n_scenes={len(scenes)}",
|
|
84
|
+
f"scenes=[{scene_sig}]",
|
|
85
|
+
])
|
|
32
86
|
return hashlib.sha256(seed.encode()).hexdigest()[:12]
|
|
33
87
|
|
|
34
88
|
|
|
@@ -225,18 +225,33 @@ def compare_preview_variants(
|
|
|
225
225
|
|
|
226
226
|
|
|
227
227
|
@mcp.tool()
|
|
228
|
-
def commit_preview_variant(
|
|
228
|
+
async def commit_preview_variant(
|
|
229
229
|
ctx: Context,
|
|
230
230
|
set_id: str,
|
|
231
231
|
variant_id: str,
|
|
232
232
|
) -> dict:
|
|
233
|
-
"""Commit the chosen variant from a preview set.
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
233
|
+
"""Commit the chosen variant from a preview set — APPLIES the plan.
|
|
234
|
+
|
|
235
|
+
v1.10.3 Truth Release: this tool used to only mark the variant as
|
|
236
|
+
committed in the in-memory store and leave plan application to the
|
|
237
|
+
caller, which was a trust leak — users expected "commit" to actually
|
|
238
|
+
apply the chosen variant. It now actually runs the variant's compiled
|
|
239
|
+
plan through the async execution router. No undo after, the changes
|
|
240
|
+
stick.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
{
|
|
244
|
+
committed: bool (true if all steps applied, false if plan failed),
|
|
245
|
+
variant_id, label, intent, move_id, identity_effect, what_preserved,
|
|
246
|
+
execution_log: [{tool, backend, ok, error/result} per step],
|
|
247
|
+
steps_ok: int,
|
|
248
|
+
steps_failed: int,
|
|
249
|
+
status: "committed" | "committed_with_errors" | "failed",
|
|
250
|
+
}
|
|
237
251
|
|
|
238
|
-
|
|
239
|
-
|
|
252
|
+
If the variant is analytical-only (no compiled_plan), the tool records
|
|
253
|
+
the choice and returns status="analytical_only" WITHOUT pretending to
|
|
254
|
+
execute anything — callers get a clear signal instead of a silent no-op.
|
|
240
255
|
"""
|
|
241
256
|
ps = engine.get_preview_set(set_id)
|
|
242
257
|
if not ps:
|
|
@@ -260,6 +275,57 @@ def commit_preview_variant(
|
|
|
260
275
|
"what_preserved": chosen.what_preserved,
|
|
261
276
|
}
|
|
262
277
|
|
|
278
|
+
# ── v1.10.3: actually execute the compiled plan ──
|
|
279
|
+
# If there's no compiled plan, the variant is analytical-only — record
|
|
280
|
+
# the choice and return honestly instead of pretending it was applied.
|
|
281
|
+
if not chosen.compiled_plan:
|
|
282
|
+
result["committed"] = False
|
|
283
|
+
result["status"] = "analytical_only"
|
|
284
|
+
result["note"] = (
|
|
285
|
+
"Variant has no compiled plan (analytical-only). Preview set "
|
|
286
|
+
"marked the choice but no session changes were made. Use an "
|
|
287
|
+
"executable variant if you want the commit to apply changes."
|
|
288
|
+
)
|
|
289
|
+
else:
|
|
290
|
+
from ..runtime.execution_router import execute_plan_steps_async
|
|
291
|
+
plan = chosen.compiled_plan
|
|
292
|
+
steps = plan if isinstance(plan, list) else plan.get("steps", []) or []
|
|
293
|
+
ableton = _get_ableton(ctx)
|
|
294
|
+
bridge = ctx.lifespan_context.get("m4l")
|
|
295
|
+
mcp_registry = ctx.lifespan_context.get("mcp_dispatch", {})
|
|
296
|
+
|
|
297
|
+
exec_results = await execute_plan_steps_async(
|
|
298
|
+
steps,
|
|
299
|
+
ableton=ableton,
|
|
300
|
+
bridge=bridge,
|
|
301
|
+
mcp_registry=mcp_registry,
|
|
302
|
+
ctx=ctx,
|
|
303
|
+
stop_on_failure=False,
|
|
304
|
+
)
|
|
305
|
+
log = [
|
|
306
|
+
{
|
|
307
|
+
"tool": r.tool,
|
|
308
|
+
"backend": r.backend,
|
|
309
|
+
"ok": r.ok,
|
|
310
|
+
**({"result": r.result} if r.ok else {"error": r.error}),
|
|
311
|
+
}
|
|
312
|
+
for r in exec_results
|
|
313
|
+
]
|
|
314
|
+
steps_ok = sum(1 for r in exec_results if r.ok)
|
|
315
|
+
steps_failed = len(exec_results) - steps_ok
|
|
316
|
+
|
|
317
|
+
result["execution_log"] = log
|
|
318
|
+
result["steps_ok"] = steps_ok
|
|
319
|
+
result["steps_failed"] = steps_failed
|
|
320
|
+
|
|
321
|
+
if steps_failed == 0 and steps_ok > 0:
|
|
322
|
+
result["status"] = "committed"
|
|
323
|
+
elif steps_ok > 0:
|
|
324
|
+
result["status"] = "committed_with_errors"
|
|
325
|
+
else:
|
|
326
|
+
result["status"] = "failed"
|
|
327
|
+
result["committed"] = False
|
|
328
|
+
|
|
263
329
|
# Wonder lifecycle hooks
|
|
264
330
|
ws = _find_wonder_session_by_preview(set_id)
|
|
265
331
|
if ws:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "livepilot",
|
|
3
|
-
"version": "1.10.
|
|
3
|
+
"version": "1.10.3",
|
|
4
4
|
"mcpName": "io.github.dreamrec/livepilot",
|
|
5
5
|
"description": "Agentic production system for Ableton Live 12 — 317 tools, 43 domains. Device atlas (1305 devices), sample engine (Splice + browser + filesystem), auto-composition, spectral perception, technique memory, creative intelligence (12 engines)",
|
|
6
6
|
"author": "Pilot Studio",
|
|
@@ -5,7 +5,7 @@ Entry point for the ControlSurface. Ableton calls create_instance(c_instance)
|
|
|
5
5
|
when this script is selected in Preferences > Link, Tempo & MIDI.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "1.10.
|
|
8
|
+
__version__ = "1.10.3"
|
|
9
9
|
|
|
10
10
|
from _Framework.ControlSurface import ControlSurface
|
|
11
11
|
from .server import LivePilotServer
|
package/scripts/sync_metadata.py
CHANGED
|
@@ -19,13 +19,13 @@ ROOT = Path(__file__).resolve().parents[1]
|
|
|
19
19
|
|
|
20
20
|
def get_version() -> str:
|
|
21
21
|
"""Read version from package.json (source of truth)."""
|
|
22
|
-
pkg = json.loads((ROOT / "package.json").read_text())
|
|
22
|
+
pkg = json.loads((ROOT / "package.json").read_text(encoding="utf-8"))
|
|
23
23
|
return pkg["version"]
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
def get_tool_count() -> int:
|
|
27
27
|
"""Read tool count from test_tools_contract.py assertion."""
|
|
28
|
-
src = (ROOT / "tests" / "test_tools_contract.py").read_text()
|
|
28
|
+
src = (ROOT / "tests" / "test_tools_contract.py").read_text(encoding="utf-8")
|
|
29
29
|
match = re.search(r"assert len\(tools\) == (\d+)", src)
|
|
30
30
|
if match:
|
|
31
31
|
return int(match.group(1))
|
|
@@ -73,7 +73,7 @@ def check_version(version: str) -> list[str]:
|
|
|
73
73
|
path = ROOT / rel_path
|
|
74
74
|
if not path.exists():
|
|
75
75
|
continue
|
|
76
|
-
content = path.read_text()
|
|
76
|
+
content = path.read_text(encoding="utf-8")
|
|
77
77
|
if version not in content:
|
|
78
78
|
# Find what version IS there
|
|
79
79
|
old = re.search(r"1\.\d+\.\d+", content)
|
|
@@ -91,7 +91,7 @@ def check_tool_count(count: int) -> list[str]:
|
|
|
91
91
|
path = ROOT / rel_path
|
|
92
92
|
if not path.exists():
|
|
93
93
|
continue
|
|
94
|
-
content = path.read_text()
|
|
94
|
+
content = path.read_text(encoding="utf-8")
|
|
95
95
|
# Look for "N tools" pattern
|
|
96
96
|
matches = re.findall(r"(\d+)\s+tools", content)
|
|
97
97
|
for m in matches:
|