aiden-runtime 4.1.2 → 4.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/v4/aidenCLI.js +10 -0
- package/dist/cli/v4/callbacks.js +85 -13
- package/dist/cli/v4/chatSession.js +250 -24
- package/dist/cli/v4/commands/doctor.js +23 -27
- package/dist/cli/v4/commands/model.js +30 -1
- package/dist/cli/v4/defaultSoul.js +69 -2
- package/dist/cli/v4/display/capabilityCard.js +135 -0
- package/dist/cli/v4/display/frame.js +234 -0
- package/dist/cli/v4/display/progressBar.js +137 -0
- package/dist/cli/v4/display/sessionEndCard.js +127 -0
- package/dist/cli/v4/display/toolTrail.js +172 -0
- package/dist/cli/v4/display.js +891 -153
- package/dist/cli/v4/doctor.js +377 -75
- package/dist/cli/v4/promotionPrompt.js +135 -5
- package/dist/cli/v4/replyRenderer.js +487 -26
- package/dist/cli/v4/skinEngine.js +26 -4
- package/dist/cli/v4/toolPreview.js +82 -19
- package/dist/core/tools/nowPlaying.js +7 -15
- package/dist/core/v4/aidenAgent.js +9 -0
- package/dist/core/v4/promptBuilder.js +2 -1
- package/dist/core/v4/sessionDistiller.js +48 -1
- package/dist/core/v4/toolRegistry.js +16 -1
- package/dist/core/version.js +1 -1
- package/dist/moat/plannerGuard.js +19 -0
- package/dist/providers/v4/anthropicAdapter.js +25 -2
- package/dist/providers/v4/errors.js +92 -0
- package/dist/tools/v4/index.js +24 -1
- package/dist/tools/v4/sessions/recallSession.js +14 -0
- package/dist/tools/v4/system/_psHelpers.js +70 -2
- package/dist/tools/v4/system/appInput.js +154 -0
- package/dist/tools/v4/system/appLaunch.js +136 -10
- package/dist/tools/v4/system/mediaKey.js +35 -4
- package/dist/tools/v4/system/mediaSessions.js +163 -0
- package/dist/tools/v4/system/mediaTransport.js +211 -0
- package/package.json +2 -1
- package/skills/system_control.md +56 -6
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) 2026 Shiva Deore (Taracod).
|
|
4
|
+
* Licensed under AGPL-3.0. See LICENSE for details.
|
|
5
|
+
*
|
|
6
|
+
* Aiden — local-first agent.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* tools/v4/system/mediaTransport.ts — `media_transport` tool. v4.1.4-media.
|
|
10
|
+
*
|
|
11
|
+
* Verified play/pause/skip against a specific GSMTC session. Replaces
|
|
12
|
+
* the blind-keystroke `media_key` behavior for the common case where
|
|
13
|
+
* the user names an app ("pause Spotify", "resume YouTube"): instead
|
|
14
|
+
* of blasting VK_MEDIA_PLAY_PAUSE at whichever app the OS most
|
|
15
|
+
* recently routed to, we enumerate sessions, match the target by
|
|
16
|
+
* AppUserModelId substring (or fall back to title contains), and call
|
|
17
|
+
* `TryPlayAsync()` / `TryPauseAsync()` / etc. directly on that session.
|
|
18
|
+
*
|
|
19
|
+
* Layer 2 of the three-layer media-control hierarchy v4.1.4 establishes:
|
|
20
|
+
* 1. Semantic API (Spotify Web API when authed) — out of this slice
|
|
21
|
+
* 2. OS media-session API (GSMTC) ← this tool writes
|
|
22
|
+
* 3. Global media keys (mediaKey tool) — blind fallback
|
|
23
|
+
*
|
|
24
|
+
* Honesty story: unlike `media_key`'s blind keystroke + degraded flag,
|
|
25
|
+
* `media_transport` reports `success: true` ONLY when GSMTC returns
|
|
26
|
+
* its `Success` result. Failures (session disappeared mid-call, app
|
|
27
|
+
* doesn't support that action, no matching target) surface as
|
|
28
|
+
* `success: false` with the specific reason. No degraded flag — we
|
|
29
|
+
* either have OS-confirmed action or we have an honest failure.
|
|
30
|
+
*/
|
|
31
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
32
|
+
exports.mediaTransportTool = void 0;
|
|
33
|
+
const _psHelpers_1 = require("./_psHelpers");
|
|
34
|
+
/** GSMTC API call per action. Keys match the schema enum verbatim. */
|
|
35
|
+
const ACTION_METHOD = {
|
|
36
|
+
play: 'TryPlayAsync',
|
|
37
|
+
pause: 'TryPauseAsync',
|
|
38
|
+
toggle: 'TryTogglePlayPauseAsync',
|
|
39
|
+
next: 'TrySkipNextAsync',
|
|
40
|
+
previous: 'TrySkipPreviousAsync',
|
|
41
|
+
stop: 'TryStopAsync',
|
|
42
|
+
};
|
|
43
|
+
/**
|
|
44
|
+
* Build the PowerShell snippet. `target` is a case-insensitive substring
|
|
45
|
+
* matched against each session's AppUserModelId first, then the track
|
|
46
|
+
* title as a softer fallback. Empty/omitted target selects the current
|
|
47
|
+
* session (matches the legacy `media_key` semantics, no surprise).
|
|
48
|
+
*
|
|
49
|
+
* Output: a single JSON line with `matched` (boolean — did we find a
|
|
50
|
+
* session) and `result` (the GSMTC enum value as a string —
|
|
51
|
+
* `Success` / `Failed` / `UnknownError` etc.).
|
|
52
|
+
*/
|
|
53
|
+
function buildPs(action, target) {
|
|
54
|
+
const method = ACTION_METHOD[action];
|
|
55
|
+
// Single-quote-escape target for PS string literal. Lowercase compare
|
|
56
|
+
// happens inside the script so the model can pass "Spotify" or "spotify".
|
|
57
|
+
const safeTarget = target.replace(/'/g, "''");
|
|
58
|
+
return `
|
|
59
|
+
${(0, _psHelpers_1.winRtAwaitPreamble)()}
|
|
60
|
+
$mgType = [Windows.Media.Control.GlobalSystemMediaTransportControlsSessionManager,Windows.Media.Control,ContentType=WindowsRuntime]
|
|
61
|
+
$pType = [Windows.Media.Control.GlobalSystemMediaTransportControlsSessionMediaProperties,Windows.Media.Control,ContentType=WindowsRuntime]
|
|
62
|
+
$mgr = Await ($mgType::RequestAsync()) $mgType
|
|
63
|
+
$target = '${safeTarget}'
|
|
64
|
+
$picked = $null
|
|
65
|
+
if ($target.Length -gt 0) {
|
|
66
|
+
$lt = $target.ToLower()
|
|
67
|
+
foreach ($s in $mgr.GetSessions()) {
|
|
68
|
+
if ($s.SourceAppUserModelId -and $s.SourceAppUserModelId.ToLower().Contains($lt)) {
|
|
69
|
+
$picked = $s
|
|
70
|
+
break
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
if (-not $picked) {
|
|
74
|
+
# Soft fallback: title contains.
|
|
75
|
+
foreach ($s in $mgr.GetSessions()) {
|
|
76
|
+
$p = $null
|
|
77
|
+
try { $p = Await ($s.TryGetMediaPropertiesAsync()) $pType } catch { $p = $null }
|
|
78
|
+
if ($p -and $p.Title -and $p.Title.ToLower().Contains($lt)) {
|
|
79
|
+
$picked = $s
|
|
80
|
+
break
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
} else {
|
|
85
|
+
$picked = $mgr.GetCurrentSession()
|
|
86
|
+
}
|
|
87
|
+
if (-not $picked) {
|
|
88
|
+
@{ matched=$false; result='NoSession'; appUserModelId=$null } | ConvertTo-Json -Compress
|
|
89
|
+
exit 0
|
|
90
|
+
}
|
|
91
|
+
$res = Await ($picked.${method}()) ([bool])
|
|
92
|
+
# v4.1.3-essentials bugfix: PowerShell 5.1 does NOT accept a bare
|
|
93
|
+
# parenthesized \`if\` expression inside a hashtable literal — it
|
|
94
|
+
# parses \`(if ...)\` as a command invocation and fails with
|
|
95
|
+
# "The term 'if' is not recognized as the name of a cmdlet..." (no
|
|
96
|
+
# ternary operator until PS 7+). The \`$(...)\` subexpression
|
|
97
|
+
# operator forces statement-context evaluation in PS 5.1, which is
|
|
98
|
+
# what we need here.
|
|
99
|
+
$status = if ($res) { 'Success' } else { 'Failed' }
|
|
100
|
+
@{ matched=$true; result=$status; appUserModelId=$picked.SourceAppUserModelId } | ConvertTo-Json -Compress
|
|
101
|
+
`.trim();
|
|
102
|
+
}
|
|
103
|
+
exports.mediaTransportTool = {
|
|
104
|
+
schema: {
|
|
105
|
+
name: 'media_transport',
|
|
106
|
+
description: 'PREFERRED for named-app media control. Verified play/pause/skip ' +
|
|
107
|
+
'against a specific Windows GSMTC media session — returns OS-confirmed ' +
|
|
108
|
+
'success/failure, NOT a blind keystroke like `media_key`. Use this ' +
|
|
109
|
+
'whenever the user names an app ("pause Spotify", "resume YouTube"). ' +
|
|
110
|
+
'Target matches by AppUserModelId substring ("spotify" → Spotify.exe), ' +
|
|
111
|
+
'then track title as soft fallback. Omit `target` to act on the ' +
|
|
112
|
+
'current session. Pair with `media_sessions` (read) to enumerate ' +
|
|
113
|
+
'available apps. Windows-only in v4.1.4.',
|
|
114
|
+
inputSchema: {
|
|
115
|
+
type: 'object',
|
|
116
|
+
properties: {
|
|
117
|
+
action: {
|
|
118
|
+
type: 'string',
|
|
119
|
+
enum: ['play', 'pause', 'toggle', 'next', 'previous', 'stop'],
|
|
120
|
+
description: "Action to invoke on the matched session. 'toggle' flips " +
|
|
121
|
+
"play/pause. 'play' / 'pause' are explicit. 'next' / 'previous' " +
|
|
122
|
+
"skip tracks. 'stop' halts playback.",
|
|
123
|
+
},
|
|
124
|
+
target: {
|
|
125
|
+
type: 'string',
|
|
126
|
+
description: 'Optional app/track identifier. Case-insensitive substring ' +
|
|
127
|
+
'match against AppUserModelId first ("spotify" matches ' +
|
|
128
|
+
'Spotify.exe), then track title. Omit to act on the OS-routed ' +
|
|
129
|
+
'current session.',
|
|
130
|
+
},
|
|
131
|
+
},
|
|
132
|
+
required: ['action'],
|
|
133
|
+
},
|
|
134
|
+
},
|
|
135
|
+
category: 'execute',
|
|
136
|
+
mutates: true,
|
|
137
|
+
toolset: 'system',
|
|
138
|
+
async execute(args, _ctx) {
|
|
139
|
+
if (!(0, _psHelpers_1.isWindows)()) {
|
|
140
|
+
// v4.1.3-essentials: tailored capability card for non-Windows.
|
|
141
|
+
// Layer-1 (web API) and layer-3b (CDP) alternatives exist on
|
|
142
|
+
// every platform; only layer-2 (GSMTC verified transport) is
|
|
143
|
+
// Windows-bound.
|
|
144
|
+
return (0, _psHelpers_1.windowsOnlyError)('media_transport', {
|
|
145
|
+
canStill: [
|
|
146
|
+
'Use Spotify Web API via a skill that wraps OAuth + /me/player',
|
|
147
|
+
'Use Chrome DevTools Protocol (`browser_*` tools) to drive a YouTube tab',
|
|
148
|
+
'Use `shell_exec` with `playerctl` (Linux) or `osascript` (macOS) for system-wide control',
|
|
149
|
+
],
|
|
150
|
+
cannotReliably: [
|
|
151
|
+
'GSMTC-verified play/pause/skip with OS-level success confirmation',
|
|
152
|
+
'Target a specific app by AppUserModelId without OS media-session APIs',
|
|
153
|
+
],
|
|
154
|
+
fix: 'Run Aiden on Windows for GSMTC, OR install a Spotify-OAuth skill ' +
|
|
155
|
+
'for layer-1 control, OR use `shell_exec` with the platform\'s media-key utility.',
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
const action = args.action;
|
|
159
|
+
if (!ACTION_METHOD[action]) {
|
|
160
|
+
return {
|
|
161
|
+
success: false,
|
|
162
|
+
error: `Unknown action: ${String(args.action)}. ` +
|
|
163
|
+
`Valid: ${Object.keys(ACTION_METHOD).join(', ')}`,
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
const target = typeof args.target === 'string' ? args.target.trim() : '';
|
|
167
|
+
try {
|
|
168
|
+
const { stdout } = await (0, _psHelpers_1.runPowerShell)(buildPs(action, target), {
|
|
169
|
+
timeoutMs: 8000,
|
|
170
|
+
});
|
|
171
|
+
const trimmed = stdout.trim();
|
|
172
|
+
if (trimmed.length === 0) {
|
|
173
|
+
return {
|
|
174
|
+
success: false,
|
|
175
|
+
error: 'media_transport returned empty output from PowerShell',
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
const parsed = JSON.parse(trimmed);
|
|
179
|
+
if (!parsed.matched) {
|
|
180
|
+
return {
|
|
181
|
+
success: false,
|
|
182
|
+
error: target
|
|
183
|
+
? `No media session matched target "${target}". Call media_sessions to see what's available.`
|
|
184
|
+
: 'No active media session. Open a media app first (Spotify, YouTube, etc.).',
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
if (parsed.result !== 'Success') {
|
|
188
|
+
return {
|
|
189
|
+
success: false,
|
|
190
|
+
error: `GSMTC ${action} returned ${parsed.result} for ${parsed.appUserModelId}. ` +
|
|
191
|
+
`The app may not support that action in its current state.`,
|
|
192
|
+
appUserModelId: parsed.appUserModelId,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
// OS-confirmed success. No degraded flag — unlike media_key we
|
|
196
|
+
// KNOW the action landed on a specific session and the OS
|
|
197
|
+
// accepted it.
|
|
198
|
+
return {
|
|
199
|
+
success: true,
|
|
200
|
+
action,
|
|
201
|
+
appUserModelId: parsed.appUserModelId,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
catch (e) {
|
|
205
|
+
return {
|
|
206
|
+
success: false,
|
|
207
|
+
error: e instanceof Error ? e.message : String(e),
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
},
|
|
211
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "aiden-runtime",
|
|
3
|
-
"version": "4.1.
|
|
3
|
+
"version": "4.1.4",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"access": "public"
|
|
6
6
|
},
|
|
@@ -277,6 +277,7 @@
|
|
|
277
277
|
"twilio": "^5.13.1",
|
|
278
278
|
"uuid": "^9.0.0",
|
|
279
279
|
"whatsapp-web.js": "^1.26.0",
|
|
280
|
+
"wrap-ansi": "^9.0.2",
|
|
280
281
|
"ws": "^8.20.0"
|
|
281
282
|
},
|
|
282
283
|
"optionalDependencies": {
|
package/skills/system_control.md
CHANGED
|
@@ -54,10 +54,34 @@ Replace the clipboard with new text. Handles multi-line strings safely
|
|
|
54
54
|
{ "tool": "clipboard_write", "input": { "text": "Hello, world!" } }
|
|
55
55
|
```
|
|
56
56
|
|
|
57
|
+
### media_sessions
|
|
58
|
+
Enumerate every Windows media session registered with the OS (Spotify,
|
|
59
|
+
YouTube in browser, VLC, etc.). One entry per app, with which one is
|
|
60
|
+
the OS-routed target for global media keys. Use this BEFORE
|
|
61
|
+
`media_transport` when controlling a specific app.
|
|
62
|
+
```json
|
|
63
|
+
{ "tool": "media_sessions", "input": {} }
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### media_transport
|
|
67
|
+
Verified play / pause / skip against a specific GSMTC media session.
|
|
68
|
+
Targets by `AppUserModelId` substring (case-insensitive — "spotify"
|
|
69
|
+
matches `Spotify.exe`), then by track title as a softer fallback. Omit
|
|
70
|
+
`target` to act on the OS-routed current session. Returns OS-level
|
|
71
|
+
success/failure — NOT a blind keystroke like `media_key`.
|
|
72
|
+
```json
|
|
73
|
+
{ "tool": "media_transport", "input": { "action": "pause", "target": "spotify" } }
|
|
74
|
+
{ "tool": "media_transport", "input": { "action": "play", "target": "spotify" } }
|
|
75
|
+
{ "tool": "media_transport", "input": { "action": "next", "target": "youtube" } }
|
|
76
|
+
{ "tool": "media_transport", "input": { "action": "toggle" } }
|
|
77
|
+
```
|
|
78
|
+
|
|
57
79
|
### media_key
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
80
|
+
Blind global media keypress (`VK_MEDIA_PLAY_PAUSE` and friends). Layer-3
|
|
81
|
+
fallback for the rare case where neither a semantic API nor GSMTC can
|
|
82
|
+
act. Prefer `media_transport` whenever the user names an app — this
|
|
83
|
+
tool returns `degraded:true` because Windows doesn't surface the SMTC
|
|
84
|
+
routing outcome to user-mode, so we can't verify any app received it.
|
|
61
85
|
```json
|
|
62
86
|
{ "tool": "media_key", "input": { "action": "play_pause" } }
|
|
63
87
|
{ "tool": "media_key", "input": { "action": "next" } }
|
|
@@ -65,6 +89,17 @@ inspect state first.
|
|
|
65
89
|
{ "tool": "media_key", "input": { "action": "stop" } }
|
|
66
90
|
```
|
|
67
91
|
|
|
92
|
+
### app_input
|
|
93
|
+
Focus a Windows application by process name and send a SendKeys
|
|
94
|
+
sequence to it. Escape hatch when GSMTC doesn't enumerate the surface
|
|
95
|
+
("press space in Chrome to pause this YouTube tab"). Always returns
|
|
96
|
+
`degraded:true` — SendKeys cannot verify receipt at the target window.
|
|
97
|
+
```json
|
|
98
|
+
{ "tool": "app_input", "input": { "app": "chrome", "keys": "{SPACE}" } }
|
|
99
|
+
{ "tool": "app_input", "input": { "app": "notepad", "keys": "Hello{ENTER}" } }
|
|
100
|
+
{ "tool": "app_input", "input": { "app": "Spotify", "keys": "^{RIGHT}" } }
|
|
101
|
+
```
|
|
102
|
+
|
|
68
103
|
### volume_set
|
|
69
104
|
Set Windows master volume to a percentage, or mute / unmute / toggle.
|
|
70
105
|
```json
|
|
@@ -126,9 +161,24 @@ turn into common requests.
|
|
|
126
161
|
1. `os_process_list` with `name: "<substring>"` → returns matching processes
|
|
127
162
|
2. If `count === 0` → tell the user honestly, suggest `app_launch`
|
|
128
163
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
164
|
+
## Media control — strict order
|
|
165
|
+
|
|
166
|
+
1. If the user names an app ("Spotify", "YouTube", "VLC") — ALWAYS try
|
|
167
|
+
`media_transport({action, target})` first. Verified, OS-confirmed.
|
|
168
|
+
2. If `media_transport` returns `NoSession` OR the user didn't name an app
|
|
169
|
+
— fall back to `media_key({action})`. Blind global keystroke, returns
|
|
170
|
+
`degraded:true` because Windows can't tell us if anything received it.
|
|
171
|
+
3. If GSMTC doesn't enumerate the surface at all (e.g. a YouTube tab the
|
|
172
|
+
browser hasn't registered with SMTC) — last resort: `app_input({app,
|
|
173
|
+
keys})` to focus the window and send a keystroke directly.
|
|
174
|
+
|
|
175
|
+
Never call `media_key` and `media_transport` in the same turn — redundant.
|
|
176
|
+
First call gives you the answer; second is noise the user has to read.
|
|
177
|
+
|
|
178
|
+
Honesty contract:
|
|
179
|
+
- `media_transport` success is OS-confirmed → trail row is silent (success).
|
|
180
|
+
- `media_key` and `app_input` always report `degraded:true` → yellow trail
|
|
181
|
+
row, because neither can verify receipt at the target app.
|
|
132
182
|
|
|
133
183
|
**Volume change with feedback:**
|
|
134
184
|
1. `volume_set` → returns the resulting volume percent in `result`
|