aiden-runtime 4.1.2 → 4.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/v4/aidenCLI.js +10 -0
- package/dist/cli/v4/callbacks.js +85 -13
- package/dist/cli/v4/chatSession.js +250 -24
- package/dist/cli/v4/commands/doctor.js +23 -27
- package/dist/cli/v4/commands/model.js +30 -1
- package/dist/cli/v4/defaultSoul.js +69 -2
- package/dist/cli/v4/display/capabilityCard.js +135 -0
- package/dist/cli/v4/display/frame.js +234 -0
- package/dist/cli/v4/display/progressBar.js +137 -0
- package/dist/cli/v4/display/sessionEndCard.js +127 -0
- package/dist/cli/v4/display/toolTrail.js +172 -0
- package/dist/cli/v4/display.js +891 -153
- package/dist/cli/v4/doctor.js +377 -75
- package/dist/cli/v4/promotionPrompt.js +135 -5
- package/dist/cli/v4/replyRenderer.js +487 -26
- package/dist/cli/v4/skinEngine.js +26 -4
- package/dist/cli/v4/toolPreview.js +82 -19
- package/dist/core/tools/nowPlaying.js +7 -15
- package/dist/core/v4/aidenAgent.js +9 -0
- package/dist/core/v4/promptBuilder.js +2 -1
- package/dist/core/v4/sessionDistiller.js +48 -1
- package/dist/core/v4/toolRegistry.js +16 -1
- package/dist/core/version.js +1 -1
- package/dist/moat/plannerGuard.js +19 -0
- package/dist/providers/v4/anthropicAdapter.js +25 -2
- package/dist/providers/v4/errors.js +92 -0
- package/dist/tools/v4/index.js +24 -1
- package/dist/tools/v4/sessions/recallSession.js +14 -0
- package/dist/tools/v4/system/_psHelpers.js +70 -2
- package/dist/tools/v4/system/appInput.js +154 -0
- package/dist/tools/v4/system/appLaunch.js +136 -10
- package/dist/tools/v4/system/mediaKey.js +35 -4
- package/dist/tools/v4/system/mediaSessions.js +163 -0
- package/dist/tools/v4/system/mediaTransport.js +211 -0
- package/package.json +2 -1
- package/skills/system_control.md +56 -6
|
@@ -19,6 +19,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
19
19
|
exports.isWindows = exports.execAsync = void 0;
|
|
20
20
|
exports.windowsOnlyError = windowsOnlyError;
|
|
21
21
|
exports.runPowerShell = runPowerShell;
|
|
22
|
+
exports.winRtAwaitPreamble = winRtAwaitPreamble;
|
|
22
23
|
const node_child_process_1 = require("node:child_process");
|
|
23
24
|
const node_util_1 = require("node:util");
|
|
24
25
|
exports.execAsync = (0, node_util_1.promisify)(node_child_process_1.exec);
|
|
@@ -26,13 +27,43 @@ exports.execAsync = (0, node_util_1.promisify)(node_child_process_1.exec);
|
|
|
26
27
|
* Standard "not supported on this platform" error payload. Surfaces a
|
|
27
28
|
* link the user can file an issue against rather than pretending the
|
|
28
29
|
* call quietly no-op'd.
|
|
30
|
+
*
|
|
31
|
+
* v4.1.3-essentials: now also returns a structured `capabilityCard`
|
|
32
|
+
* payload (per ToolCallResult.capabilityCard contract). The REPL
|
|
33
|
+
* renders the card as a bordered block above the bare-error fallback,
|
|
34
|
+
* giving non-Windows users a clear "here's what you can still do"
|
|
35
|
+
* surface instead of a one-line "platform unsupported" wall.
|
|
36
|
+
*
|
|
37
|
+
* The `canStill` / `cannotReliably` lists are passed by the caller so
|
|
38
|
+
* each tool can be specific (e.g. `app_input` mentions Chrome DevTools
|
|
39
|
+
* Protocol as a non-Windows alternative; `media_transport` points at
|
|
40
|
+
* `media_key` or a Spotify Web API skill instead). Falls back to a
|
|
41
|
+
* generic "use shell_exec for platform commands" hint when caller
|
|
42
|
+
* doesn't supply alternatives.
|
|
29
43
|
*/
|
|
30
|
-
function windowsOnlyError(toolName) {
|
|
44
|
+
function windowsOnlyError(toolName, alternatives) {
|
|
45
|
+
const canStill = alternatives?.canStill ?? [
|
|
46
|
+
'Use `shell_exec` to run platform-native commands directly',
|
|
47
|
+
'Use `os_process_list` to inspect what\'s running',
|
|
48
|
+
];
|
|
49
|
+
const cannotReliably = alternatives?.cannotReliably ?? [
|
|
50
|
+
`Call \`${toolName}\` until cross-platform support lands`,
|
|
51
|
+
];
|
|
52
|
+
const fix = alternatives?.fix
|
|
53
|
+
?? `Run Aiden on Windows for full \`${toolName}\` support, or file an ` +
|
|
54
|
+
`issue at github.com/taracodlabs/aiden if your platform is a priority.`;
|
|
31
55
|
return {
|
|
32
56
|
success: false,
|
|
33
|
-
error: `Tool '${toolName}' is Windows-only
|
|
57
|
+
error: `Tool '${toolName}' is Windows-only. macOS/Linux ` +
|
|
34
58
|
`support tracked at github.com/taracodlabs/aiden — please file an ` +
|
|
35
59
|
`issue if needed. (Detected platform: ${process.platform})`,
|
|
60
|
+
requires: ['Windows'],
|
|
61
|
+
capabilityCard: {
|
|
62
|
+
title: `${toolName} requires Windows`,
|
|
63
|
+
canStill,
|
|
64
|
+
cannotReliably,
|
|
65
|
+
fix,
|
|
66
|
+
},
|
|
36
67
|
};
|
|
37
68
|
}
|
|
38
69
|
/**
|
|
@@ -53,3 +84,40 @@ async function runPowerShell(script, options = {}) {
|
|
|
53
84
|
}
|
|
54
85
|
const isWindows = () => process.platform === 'win32';
|
|
55
86
|
exports.isWindows = isWindows;
|
|
87
|
+
/**
|
|
88
|
+
* v4.1.4-media: PowerShell 5.1 preamble that bridges WinRT
|
|
89
|
+
* `IAsyncOperation<T>` into a .NET `Task<T>` via
|
|
90
|
+
* `System.WindowsRuntimeSystemExtensions.AsTask`.
|
|
91
|
+
*
|
|
92
|
+
* Why: every WinRT call surface we touch — `GlobalSystemMediaTransport-
|
|
93
|
+
* ControlsSessionManager.RequestAsync()`, `Session.TryGetMediaPropertiesAsync()`,
|
|
94
|
+
* `Session.TryPlayAsync()`, etc. — returns `IAsyncOperation<T>`. PS5.1
|
|
95
|
+
* (the shell we target — it ships on every stock Win10/11 install) cannot
|
|
96
|
+
* call `.GetAwaiter().GetResult()` on those because WinRT awaiters aren't
|
|
97
|
+
* recognized as TPL-compatible. The reflection dance below grabs the
|
|
98
|
+
* single-arg overload of `AsTask`, specializes it to `T`, and invokes —
|
|
99
|
+
* yielding a `Task<T>` we can `.Wait()` on.
|
|
100
|
+
*
|
|
101
|
+
* Three callers consume this string:
|
|
102
|
+
* - `core/tools/nowPlaying.ts` (read GSMTC properties)
|
|
103
|
+
* - `tools/v4/system/mediaSessions.ts` (enumerate GSMTC sessions)
|
|
104
|
+
* - `tools/v4/system/mediaTransport.ts` (play/pause/skip on a target)
|
|
105
|
+
*
|
|
106
|
+
* Returned as a literal string — caller composes it into a larger
|
|
107
|
+
* PS script. Pure (no side effects, no PowerShell exec). No leading/
|
|
108
|
+
* trailing whitespace so callers can interpolate without surprises.
|
|
109
|
+
*/
|
|
110
|
+
function winRtAwaitPreamble() {
|
|
111
|
+
return `Add-Type -AssemblyName System.Runtime.WindowsRuntime
|
|
112
|
+
function Await($WinRtTask, $ResultType) {
|
|
113
|
+
$m = ([System.WindowsRuntimeSystemExtensions].GetMethods() | Where-Object {
|
|
114
|
+
$_.Name -eq 'AsTask' -and
|
|
115
|
+
$_.GetParameters().Count -eq 1 -and
|
|
116
|
+
$_.GetParameters()[0].ParameterType.Name -eq 'IAsyncOperation\`1'
|
|
117
|
+
})[0]
|
|
118
|
+
$m = $m.MakeGenericMethod($ResultType)
|
|
119
|
+
$t = $m.Invoke($null, @($WinRtTask))
|
|
120
|
+
$t.Wait(-1) | Out-Null
|
|
121
|
+
$t.Result
|
|
122
|
+
}`;
|
|
123
|
+
}
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) 2026 Shiva Deore (Taracod).
|
|
4
|
+
* Licensed under AGPL-3.0. See LICENSE for details.
|
|
5
|
+
*
|
|
6
|
+
* Aiden — local-first agent.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* tools/v4/system/appInput.ts — `app_input` tool. v4.1.4-media.
|
|
10
|
+
*
|
|
11
|
+
* Focus a window by process name, then send a SendKeys keystroke
|
|
12
|
+
* sequence to it. Useful escape hatch when neither the semantic API
|
|
13
|
+
* (layer 1) nor GSMTC (layer 2) surface a control — e.g. "press space
|
|
14
|
+
* in Chrome to pause this YouTube tab" when GSMTC doesn't enumerate
|
|
15
|
+
* the page as a media session.
|
|
16
|
+
*
|
|
17
|
+
* Honest about what it doesn't do: SendKeys lands keys in whatever
|
|
18
|
+
* window has focus AT THE MOMENT of the keystroke. We try
|
|
19
|
+
* AppActivate, but Windows refuses foreground activation when the
|
|
20
|
+
* calling process didn't recently receive input — the call returns
|
|
21
|
+
* a result we surface, but receipt at the target app is not
|
|
22
|
+
* guaranteed. Hence `degraded: true` on every successful invocation
|
|
23
|
+
* (mirrors the v4.1.3 honesty-degraded convention from `media_key`).
|
|
24
|
+
*
|
|
25
|
+
* Scope (v4.1.4): focus + SendKeys only. Mouse click coordinates,
|
|
26
|
+
* window-coords resolution, UI Automation deferred.
|
|
27
|
+
*/
|
|
28
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
29
|
+
exports.appInputTool = void 0;
|
|
30
|
+
const _psHelpers_1 = require("./_psHelpers");
|
|
31
|
+
/**
|
|
32
|
+
* Build the PowerShell snippet. Calls AppActivate on the process by
|
|
33
|
+
* name, then SendKeys.SendWait. Both PowerShell calls return booleans /
|
|
34
|
+
* void; we capture stdout to JSON with the activation outcome so the
|
|
35
|
+
* model can see whether focus probably landed.
|
|
36
|
+
*
|
|
37
|
+
* Note on AppActivate: it returns $true if the process exists and a
|
|
38
|
+
* window was activated, $false otherwise. It does NOT confirm the
|
|
39
|
+
* window is the foreground from the OS's perspective — Windows
|
|
40
|
+
* sometimes flashes the taskbar entry instead. We pass that flag
|
|
41
|
+
* through as `activated` for transparency.
|
|
42
|
+
*/
|
|
43
|
+
function buildPs(processName, keys) {
|
|
44
|
+
// Single-quote escape both inputs for the PowerShell string literals.
|
|
45
|
+
const safeProc = processName.replace(/'/g, "''");
|
|
46
|
+
const safeKeys = keys.replace(/'/g, "''");
|
|
47
|
+
return [
|
|
48
|
+
'Add-Type -AssemblyName Microsoft.VisualBasic;',
|
|
49
|
+
'Add-Type -AssemblyName System.Windows.Forms;',
|
|
50
|
+
'$shell = New-Object -ComObject WScript.Shell;',
|
|
51
|
+
`$activated = $shell.AppActivate('${safeProc}');`,
|
|
52
|
+
// Give the OS ~150ms to settle before keystrokes — without this
|
|
53
|
+
// the keys can land in the calling shell on slower hardware.
|
|
54
|
+
'Start-Sleep -Milliseconds 150;',
|
|
55
|
+
`[System.Windows.Forms.SendKeys]::SendWait('${safeKeys}');`,
|
|
56
|
+
"@{ activated=[bool]$activated } | ConvertTo-Json -Compress;",
|
|
57
|
+
].join(' ');
|
|
58
|
+
}
|
|
59
|
+
exports.appInputTool = {
|
|
60
|
+
schema: {
|
|
61
|
+
name: 'app_input',
|
|
62
|
+
description: 'Focus a Windows application window by process name and send a ' +
|
|
63
|
+
'SendKeys keystroke sequence to it. Use as a layer-3 fallback when ' +
|
|
64
|
+
'neither a semantic API (layer 1, e.g. Spotify Web API) nor GSMTC ' +
|
|
65
|
+
'(layer 2, `media_transport`) can do the job. Examples: "{SPACE}" to ' +
|
|
66
|
+
'pause a YouTube tab in Chrome, "^l" for Ctrl+L address-bar focus. ' +
|
|
67
|
+
'Receipt at the target app is best-effort — Windows can refuse ' +
|
|
68
|
+
'foreground activation; the tool reports `degraded:true` even on ' +
|
|
69
|
+
'apparent success. Windows-only in v4.1.4.',
|
|
70
|
+
inputSchema: {
|
|
71
|
+
type: 'object',
|
|
72
|
+
properties: {
|
|
73
|
+
app: {
|
|
74
|
+
type: 'string',
|
|
75
|
+
description: 'Process name (with or without .exe) or window-title substring ' +
|
|
76
|
+
'AppActivate accepts: "chrome", "Spotify", "Notepad", etc.',
|
|
77
|
+
},
|
|
78
|
+
keys: {
|
|
79
|
+
type: 'string',
|
|
80
|
+
description: 'SendKeys-format keystroke sequence. Examples: "{SPACE}" = ' +
|
|
81
|
+
'space, "^c" = Ctrl+C, "%{TAB}" = Alt+Tab, "Hello{ENTER}" = ' +
|
|
82
|
+
'literal text + Enter. See Microsoft\'s SendKeys docs for the ' +
|
|
83
|
+
'full grammar.',
|
|
84
|
+
},
|
|
85
|
+
},
|
|
86
|
+
required: ['app', 'keys'],
|
|
87
|
+
},
|
|
88
|
+
},
|
|
89
|
+
category: 'execute',
|
|
90
|
+
mutates: true,
|
|
91
|
+
toolset: 'system',
|
|
92
|
+
async execute(args, _ctx) {
|
|
93
|
+
if (!(0, _psHelpers_1.isWindows)()) {
|
|
94
|
+
return (0, _psHelpers_1.windowsOnlyError)('app_input', {
|
|
95
|
+
canStill: [
|
|
96
|
+
'`browser_*` tools for any browser-hosted UI (Playwright cross-platform)',
|
|
97
|
+
'`shell_exec` with `xdotool` (Linux X11) for arbitrary window input',
|
|
98
|
+
'`shell_exec` with `osascript` (macOS) for AppleScript-driven keystrokes',
|
|
99
|
+
],
|
|
100
|
+
cannotReliably: [
|
|
101
|
+
'AppActivate + SendKeys against a specific Windows process',
|
|
102
|
+
'VBA-style window focus by process-name substring',
|
|
103
|
+
],
|
|
104
|
+
fix: 'Run Aiden on Windows for native AppActivate, or use Playwright ' +
|
|
105
|
+
'(`browser_*`) / xdotool / osascript via `shell_exec` for your platform.',
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
const app = typeof args.app === 'string' ? args.app.trim() : '';
|
|
109
|
+
const keys = typeof args.keys === 'string' ? args.keys : '';
|
|
110
|
+
if (!app) {
|
|
111
|
+
return { success: false, error: '`app` is required and must be non-empty.' };
|
|
112
|
+
}
|
|
113
|
+
if (!keys) {
|
|
114
|
+
return { success: false, error: '`keys` is required and must be non-empty.' };
|
|
115
|
+
}
|
|
116
|
+
try {
|
|
117
|
+
const { stdout } = await (0, _psHelpers_1.runPowerShell)(buildPs(app, keys), {
|
|
118
|
+
timeoutMs: 5000,
|
|
119
|
+
});
|
|
120
|
+
const trimmed = stdout.trim();
|
|
121
|
+
let activated = false;
|
|
122
|
+
if (trimmed.length > 0) {
|
|
123
|
+
try {
|
|
124
|
+
const parsed = JSON.parse(trimmed);
|
|
125
|
+
activated = parsed.activated === true;
|
|
126
|
+
}
|
|
127
|
+
catch {
|
|
128
|
+
// Non-JSON output — degraded but not failed; the SendKeys
|
|
129
|
+
// call likely still ran. Surface in degradedReason.
|
|
130
|
+
activated = false;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
return {
|
|
134
|
+
success: true,
|
|
135
|
+
app,
|
|
136
|
+
activated,
|
|
137
|
+
// v4.1.3-repl-polish honesty pattern: SendKeys cannot confirm
|
|
138
|
+
// receipt at the target window. AppActivate returning $true
|
|
139
|
+
// narrows the gap but doesn't close it — Windows can reject
|
|
140
|
+
// foreground activation silently. Always degraded.
|
|
141
|
+
degraded: true,
|
|
142
|
+
degradedReason: activated
|
|
143
|
+
? `keys sent to ${app}; activation reported success but cannot verify receipt`
|
|
144
|
+
: `keys sent; ${app} window activation reported failure — receipt unlikely`,
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
catch (e) {
|
|
148
|
+
return {
|
|
149
|
+
success: false,
|
|
150
|
+
error: e instanceof Error ? e.message : String(e),
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
},
|
|
154
|
+
};
|
|
@@ -19,24 +19,84 @@
|
|
|
19
19
|
*/
|
|
20
20
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
21
21
|
exports.appLaunchTool = void 0;
|
|
22
|
+
exports.processNameFromApp = processNameFromApp;
|
|
22
23
|
const _psHelpers_1 = require("./_psHelpers");
|
|
24
|
+
/**
|
|
25
|
+
* Derive the bare process-name we expect `Get-Process` to find after
|
|
26
|
+
* launch. Strips path components, lowercases, drops the `.exe` extension.
|
|
27
|
+
* Used by the v4.1.3-essentials launch-verification poll.
|
|
28
|
+
*
|
|
29
|
+
* "C:\\Program Files\\Spotify\\Spotify.exe" → "spotify"
|
|
30
|
+
* "Spotify.exe" → "spotify"
|
|
31
|
+
* "spotify" → "spotify"
|
|
32
|
+
* "notepad++.exe" → "notepad++"
|
|
33
|
+
*
|
|
34
|
+
* Pure helper, exported for unit testing.
|
|
35
|
+
*/
|
|
36
|
+
function processNameFromApp(app) {
|
|
37
|
+
// Strip path components (Windows uses \; tolerate / too).
|
|
38
|
+
let bare = app.replace(/\\/g, '/').split('/').pop() ?? app;
|
|
39
|
+
// Drop a single trailing .exe (case-insensitive).
|
|
40
|
+
bare = bare.replace(/\.exe$/i, '');
|
|
41
|
+
return bare.toLowerCase();
|
|
42
|
+
}
|
|
23
43
|
function buildPs(appName, args) {
|
|
24
44
|
// Single-quote escape the app name for PowerShell.
|
|
25
45
|
const safeApp = appName.replace(/'/g, "''");
|
|
46
|
+
// The Get-Process verification probe uses the bare process name
|
|
47
|
+
// (no path, no .exe). Compute it once on the TS side so the PS
|
|
48
|
+
// script doesn't have to do string surgery.
|
|
49
|
+
const procName = processNameFromApp(appName).replace(/'/g, "''");
|
|
26
50
|
const argString = args && args.length > 0
|
|
27
51
|
? `-ArgumentList @(${args.map((a) => `'${a.replace(/'/g, "''")}'`).join(',')})`
|
|
28
52
|
: '';
|
|
53
|
+
// v4.1.3-essentials launch reliability fix:
|
|
54
|
+
//
|
|
55
|
+
// Primary path: `Start-Process -PassThru` — captures PID for any
|
|
56
|
+
// traditional Win32 exe. Fails for UWP / Microsoft Store apps
|
|
57
|
+
// (Spotify is UWP on most systems) because UWP launches route
|
|
58
|
+
// through ShellExecute which doesn't yield a child-process handle
|
|
59
|
+
// for `-PassThru`.
|
|
60
|
+
//
|
|
61
|
+
// Fallback path: `[System.Diagnostics.Process]::Start($app)` — the
|
|
62
|
+
// direct .NET ShellExecute call. Same App Paths / shell-association
|
|
63
|
+
// resolution as cmd's `start` builtin, but with proper error
|
|
64
|
+
// propagation (Windows popup → .NET exception → PS throw → tool
|
|
65
|
+
// returns success:false) and no quoting hell.
|
|
66
|
+
//
|
|
67
|
+
// Verification: after either path lands "PID=unknown", sleep 300ms
|
|
68
|
+
// and probe `Get-Process` for the bare process name. If the process
|
|
69
|
+
// exists, capture its PID — the launch verifiably succeeded. If not,
|
|
70
|
+
// signal "launched but no matching process appeared" so the tool can
|
|
71
|
+
// surface `success:false` honestly instead of pretending it worked.
|
|
29
72
|
return [
|
|
73
|
+
`$ErrorActionPreference = 'Stop';`,
|
|
74
|
+
`$pid_out = $null;`,
|
|
30
75
|
`try {`,
|
|
31
|
-
` $p = Start-Process '${safeApp}' ${argString} -PassThru
|
|
32
|
-
`
|
|
76
|
+
` $p = Start-Process '${safeApp}' ${argString} -PassThru;`,
|
|
77
|
+
` if ($p -and $p.Id) { $pid_out = $p.Id }`,
|
|
33
78
|
`} catch {`,
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
`
|
|
39
|
-
`
|
|
79
|
+
` try {`,
|
|
80
|
+
` $p = [System.Diagnostics.Process]::Start('${safeApp}');`,
|
|
81
|
+
` if ($p -and $p.Id) { $pid_out = $p.Id }`,
|
|
82
|
+
` } catch {`,
|
|
83
|
+
` Write-Output ('LAUNCH_FAILED=' + $_.Exception.Message);`,
|
|
84
|
+
` return;`,
|
|
85
|
+
` }`,
|
|
86
|
+
`}`,
|
|
87
|
+
// If we got a PID from either Start-Process or .NET Process.Start,
|
|
88
|
+
// we're done — emit it and exit.
|
|
89
|
+
`if ($pid_out) { Write-Output ('PID=' + $pid_out); return };`,
|
|
90
|
+
// Otherwise (UWP path, both layers returned null) verify via
|
|
91
|
+
// Get-Process. 300ms grace; enough for Windows shell to either
|
|
92
|
+
// launch the app or surface the "cannot find" popup.
|
|
93
|
+
`Start-Sleep -Milliseconds 300;`,
|
|
94
|
+
`$found = Get-Process -Name '${procName}' -ErrorAction SilentlyContinue ` +
|
|
95
|
+
`| Select-Object -First 1;`,
|
|
96
|
+
`if ($found) {`,
|
|
97
|
+
` Write-Output ('PID=' + $found.Id + ' (verified via Get-Process)');`,
|
|
98
|
+
`} else {`,
|
|
99
|
+
` Write-Output ('LAUNCH_UNVERIFIED=' + '${procName}');`,
|
|
40
100
|
`}`,
|
|
41
101
|
].join(' ');
|
|
42
102
|
}
|
|
@@ -77,10 +137,76 @@ exports.appLaunchTool = {
|
|
|
77
137
|
timeoutMs: 20000,
|
|
78
138
|
});
|
|
79
139
|
const out = stdout.trim();
|
|
80
|
-
//
|
|
140
|
+
// v4.1.3-essentials: the PS script emits exactly ONE of three
|
|
141
|
+
// outcomes. Parse in order of confidence:
|
|
142
|
+
// 1. `LAUNCH_FAILED=<message>` → .NET Process.Start threw;
|
|
143
|
+
// the popup-error class is here.
|
|
144
|
+
// 2. `LAUNCH_UNVERIFIED=<name>` → ShellExecute returned but
|
|
145
|
+
// no matching process appeared
|
|
146
|
+
// within 300ms — silently broken.
|
|
147
|
+
// 3. `PID=<n>` (optional `(verified via Get-Process)` suffix) →
|
|
148
|
+
// verified launch with PID.
|
|
149
|
+
//
|
|
150
|
+
// Outcomes 1 and 2 return `success:false` so the model + user see
|
|
151
|
+
// the honest failure instead of a "launched" lie. Outcome 3 still
|
|
152
|
+
// sets `degraded:true` for the case where Start-Process succeeded
|
|
153
|
+
// but the app might still crash post-init (Spotify "boots" for 21s
|
|
154
|
+
// before stable state) — caller verifies via `os_process_list`.
|
|
155
|
+
const launchFailedMatch = out.match(/LAUNCH_FAILED=(.+)$/m);
|
|
156
|
+
if (launchFailedMatch) {
|
|
157
|
+
return {
|
|
158
|
+
success: false,
|
|
159
|
+
app,
|
|
160
|
+
raw: out,
|
|
161
|
+
error: `Could not launch '${app}': ${launchFailedMatch[1].trim()}. ` +
|
|
162
|
+
`Verify the app is installed and resolvable via App Paths or PATH.`,
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
const launchUnverifiedMatch = out.match(/LAUNCH_UNVERIFIED=(.+)$/m);
|
|
166
|
+
if (launchUnverifiedMatch) {
|
|
167
|
+
return {
|
|
168
|
+
success: false,
|
|
169
|
+
app,
|
|
170
|
+
raw: out,
|
|
171
|
+
error: `Launch attempted but no process named '${launchUnverifiedMatch[1].trim()}' ` +
|
|
172
|
+
`appeared within 300ms. Windows may have shown an error dialog, ` +
|
|
173
|
+
`or the app failed to start. Try \`os_process_list\` with a ` +
|
|
174
|
+
`name filter to confirm, or pass an absolute path.`,
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
// Extract PID — both bare `PID=12345` and the verified
|
|
178
|
+
// `PID=12345 (verified via Get-Process)` shapes parse the same.
|
|
81
179
|
const pidMatch = out.match(/PID=(\d+)/);
|
|
82
180
|
const pid = pidMatch ? Number(pidMatch[1]) : null;
|
|
83
|
-
|
|
181
|
+
const verified = /verified via Get-Process/.test(out);
|
|
182
|
+
if (pid === null) {
|
|
183
|
+
// Shouldn't happen — the PS script always emits one of the
|
|
184
|
+
// three outcome lines. Surface honestly so the model sees the
|
|
185
|
+
// unexpected stdout instead of pretending success.
|
|
186
|
+
return {
|
|
187
|
+
success: false,
|
|
188
|
+
app,
|
|
189
|
+
raw: out,
|
|
190
|
+
error: `Launch returned unexpected stdout (no PID / failure sentinel). ` +
|
|
191
|
+
`Output: ${out.slice(0, 200)}`,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
// Verified launch — still degraded because the app may crash
|
|
195
|
+
// post-init or split into a different process tree (Chrome's
|
|
196
|
+
// multi-process model, Spotify's spawn-and-detach). The honest
|
|
197
|
+
// signal is "we have a PID we can hand off; verify via
|
|
198
|
+
// os_process_list before relying on it".
|
|
199
|
+
return {
|
|
200
|
+
success: true,
|
|
201
|
+
app,
|
|
202
|
+
pid,
|
|
203
|
+
verified,
|
|
204
|
+
raw: out,
|
|
205
|
+
degraded: true,
|
|
206
|
+
degradedReason: verified
|
|
207
|
+
? `launched (PID ${pid}, verified via Get-Process); call os_process_list to confirm it's still alive`
|
|
208
|
+
: `launched (PID ${pid}); call os_process_list to confirm it's still alive`,
|
|
209
|
+
};
|
|
84
210
|
}
|
|
85
211
|
catch (e) {
|
|
86
212
|
return {
|
|
@@ -30,7 +30,13 @@ const ACTION_KEYS = {
|
|
|
30
30
|
exports.mediaKeyTool = {
|
|
31
31
|
schema: {
|
|
32
32
|
name: 'media_key',
|
|
33
|
-
description: '
|
|
33
|
+
description: 'FALLBACK ONLY — prefer `media_transport(action, target)` for verified ' +
|
|
34
|
+
'control of named apps (Spotify, YouTube, etc.). Use `media_key` only ' +
|
|
35
|
+
'when (1) the target app is unknown / not registered with the OS media ' +
|
|
36
|
+
'bus, or (2) `media_transport` returned `NoSession`. Blind global ' +
|
|
37
|
+
'keystroke (VK_MEDIA_PLAY_PAUSE and friends) — Windows doesn\'t surface ' +
|
|
38
|
+
'routing outcome, so this tool always reports `degraded:true`. Pair ' +
|
|
39
|
+
'with `now_playing` to inspect state first. Windows-only.',
|
|
34
40
|
inputSchema: {
|
|
35
41
|
type: 'object',
|
|
36
42
|
properties: {
|
|
@@ -48,8 +54,20 @@ exports.mediaKeyTool = {
|
|
|
48
54
|
mutates: true,
|
|
49
55
|
toolset: 'system',
|
|
50
56
|
async execute(args, _ctx) {
|
|
51
|
-
if (!(0, _psHelpers_1.isWindows)())
|
|
52
|
-
return (0, _psHelpers_1.windowsOnlyError)('media_key'
|
|
57
|
+
if (!(0, _psHelpers_1.isWindows)()) {
|
|
58
|
+
return (0, _psHelpers_1.windowsOnlyError)('media_key', {
|
|
59
|
+
canStill: [
|
|
60
|
+
'`shell_exec` with `xdotool key XF86AudioPlay` on Linux X11',
|
|
61
|
+
'`shell_exec` with `osascript -e \'tell application "Spotify" to playpause\'` on macOS',
|
|
62
|
+
'Use `media_transport` if a layer-1 skill (Spotify Web API) is installed',
|
|
63
|
+
],
|
|
64
|
+
cannotReliably: [
|
|
65
|
+
'Blind global VK_MEDIA_PLAY_PAUSE keystroke via SendKeys',
|
|
66
|
+
],
|
|
67
|
+
fix: 'Run Aiden on Windows for direct media-key emission, or use the ' +
|
|
68
|
+
'platform-native helpers above via `shell_exec`.',
|
|
69
|
+
});
|
|
70
|
+
}
|
|
53
71
|
const action = args.action;
|
|
54
72
|
if (!ACTION_KEYS[action]) {
|
|
55
73
|
return {
|
|
@@ -66,7 +84,20 @@ exports.mediaKeyTool = {
|
|
|
66
84
|
].join(' ');
|
|
67
85
|
try {
|
|
68
86
|
await (0, _psHelpers_1.runPowerShell)(script, { timeoutMs: 5000 });
|
|
69
|
-
|
|
87
|
+
// v4.1.3-repl-polish: SendKeys returns 0 whether or not any
|
|
88
|
+
// media-aware app received the keystroke — Windows doesn't
|
|
89
|
+
// surface the SMTC routing outcome to user-mode. We could
|
|
90
|
+
// scan `osProcessListImpl` for known media apps, but that's
|
|
91
|
+
// a cross-tool dep that distorts mediaKey's surface area. The
|
|
92
|
+
// honest answer is "we don't know if it landed"; the trail
|
|
93
|
+
// row renders yellow to signal that to the user without
|
|
94
|
+
// affecting the model's read of the result.
|
|
95
|
+
return {
|
|
96
|
+
success: true,
|
|
97
|
+
action,
|
|
98
|
+
degraded: true,
|
|
99
|
+
degradedReason: 'media key sent; cannot verify any app received it',
|
|
100
|
+
};
|
|
70
101
|
}
|
|
71
102
|
catch (e) {
|
|
72
103
|
return {
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Copyright (c) 2026 Shiva Deore (Taracod).
|
|
4
|
+
* Licensed under AGPL-3.0. See LICENSE for details.
|
|
5
|
+
*
|
|
6
|
+
* Aiden — local-first agent.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* tools/v4/system/mediaSessions.ts — `media_sessions` tool. v4.1.4-media.
|
|
10
|
+
*
|
|
11
|
+
* Enumerate every Windows GSMTC (GlobalSystemMediaTransportControls) media
|
|
12
|
+
* session — one entry per app that has registered with the OS media bus
|
|
13
|
+
* (Spotify, YouTube in browser, Windows Media Player, Apple Music for
|
|
14
|
+
* Windows, VLC with the SMTC plugin, etc.).
|
|
15
|
+
*
|
|
16
|
+
* Layer 2 of the three-layer media-control hierarchy v4.1.4 establishes:
|
|
17
|
+
* 1. Semantic API (Spotify Web API when authed) — out of this slice
|
|
18
|
+
* 2. OS media-session API (GSMTC) ← this tool reads, mediaTransport writes
|
|
19
|
+
* 3. Global media keys (mediaKey tool) — blind fallback
|
|
20
|
+
*
|
|
21
|
+
* Pairs with `media_transport` (write tool) — the model calls
|
|
22
|
+
* `media_sessions` to see what's available, then `media_transport`
|
|
23
|
+
* with a target string ("spotify", "chrome", etc.) to act. Distinct
|
|
24
|
+
* from `now_playing` which only returns the SINGLE active session.
|
|
25
|
+
*
|
|
26
|
+
* Read-only. Windows-only in v4.1.4 (consistent with the rest of the
|
|
27
|
+
* computer-control family).
|
|
28
|
+
*/
|
|
29
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
30
|
+
exports.__friendlyAppName = exports.mediaSessionsTool = void 0;
|
|
31
|
+
const _psHelpers_1 = require("./_psHelpers");
|
|
32
|
+
/** Map a Windows AppUserModelId to a friendly display name. Mirror of
|
|
33
|
+
* the normalization in core/tools/nowPlaying.ts; kept in sync so the
|
|
34
|
+
* two tools talk about the same app the same way. */
|
|
35
|
+
function friendlyAppName(aumid) {
|
|
36
|
+
if (!aumid)
|
|
37
|
+
return 'unknown';
|
|
38
|
+
const id = aumid.toLowerCase();
|
|
39
|
+
if (id.includes('spotify'))
|
|
40
|
+
return 'Spotify';
|
|
41
|
+
if (id.includes('msedge'))
|
|
42
|
+
return 'Microsoft Edge';
|
|
43
|
+
if (id.includes('chrome'))
|
|
44
|
+
return 'Google Chrome';
|
|
45
|
+
if (id.includes('firefox'))
|
|
46
|
+
return 'Firefox';
|
|
47
|
+
if (id.includes('vlc'))
|
|
48
|
+
return 'VLC';
|
|
49
|
+
if (id.includes('groove'))
|
|
50
|
+
return 'Groove Music';
|
|
51
|
+
if (id.includes('mediaplay'))
|
|
52
|
+
return 'Windows Media Player';
|
|
53
|
+
if (id.includes('apple'))
|
|
54
|
+
return 'Apple Music';
|
|
55
|
+
return aumid;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Build the PowerShell snippet. Enumerates every session via
|
|
59
|
+
* `GetSessions()`, marks the current one (the OS-routed-keypress
|
|
60
|
+
* target), and returns a JSON array. Each session's media properties
|
|
61
|
+
* are awaited individually — TryGetMediaPropertiesAsync can return
|
|
62
|
+
* null on transient state (track-skip mid-call) which we surface as
|
|
63
|
+
* empty fields rather than failing the whole enumeration.
|
|
64
|
+
*/
|
|
65
|
+
function buildPs() {
|
|
66
|
+
return `
|
|
67
|
+
${(0, _psHelpers_1.winRtAwaitPreamble)()}
|
|
68
|
+
$mgType = [Windows.Media.Control.GlobalSystemMediaTransportControlsSessionManager,Windows.Media.Control,ContentType=WindowsRuntime]
|
|
69
|
+
$pType = [Windows.Media.Control.GlobalSystemMediaTransportControlsSessionMediaProperties,Windows.Media.Control,ContentType=WindowsRuntime]
|
|
70
|
+
$mgr = Await ($mgType::RequestAsync()) $mgType
|
|
71
|
+
$current = $mgr.GetCurrentSession()
|
|
72
|
+
$currentId = if ($current) { $current.SourceAppUserModelId } else { '' }
|
|
73
|
+
$sessions = $mgr.GetSessions()
|
|
74
|
+
$out = @()
|
|
75
|
+
foreach ($s in $sessions) {
|
|
76
|
+
$p = $null
|
|
77
|
+
try { $p = Await ($s.TryGetMediaPropertiesAsync()) $pType } catch { $p = $null }
|
|
78
|
+
$pb = $s.GetPlaybackInfo()
|
|
79
|
+
$row = @{
|
|
80
|
+
appUserModelId = $s.SourceAppUserModelId
|
|
81
|
+
isCurrent = ($s.SourceAppUserModelId -eq $currentId)
|
|
82
|
+
playbackStatus = $pb.PlaybackStatus.ToString()
|
|
83
|
+
title = if ($p) { $p.Title } else { $null }
|
|
84
|
+
artist = if ($p) { $p.Artist } else { $null }
|
|
85
|
+
album = if ($p) { $p.AlbumTitle } else { $null }
|
|
86
|
+
}
|
|
87
|
+
$out += $row
|
|
88
|
+
}
|
|
89
|
+
if ($out.Count -eq 0) {
|
|
90
|
+
'[]'
|
|
91
|
+
} else {
|
|
92
|
+
$out | ConvertTo-Json -Compress -Depth 3
|
|
93
|
+
}
|
|
94
|
+
`.trim();
|
|
95
|
+
}
|
|
96
|
+
exports.mediaSessionsTool = {
|
|
97
|
+
schema: {
|
|
98
|
+
name: 'media_sessions',
|
|
99
|
+
description: 'List active Windows MEDIA PLAYBACK sessions (audio/video apps — ' +
|
|
100
|
+
'Spotify, YouTube in browser, VLC, etc.). NOT for past conversation ' +
|
|
101
|
+
'history — call `session_search` for chat-message search or ' +
|
|
102
|
+
'`recall_session` for past-session topic recall. One entry per app, ' +
|
|
103
|
+
'including which one is the OS-routed target for global media keys. ' +
|
|
104
|
+
'Use this BEFORE `media_transport` when you need to pick a specific ' +
|
|
105
|
+
'app rather than blindly toggling the current session. Distinct from ' +
|
|
106
|
+
'`now_playing` which returns only the single current session. ' +
|
|
107
|
+
'Windows-only in v4.1.4.',
|
|
108
|
+
inputSchema: {
|
|
109
|
+
type: 'object',
|
|
110
|
+
properties: {},
|
|
111
|
+
},
|
|
112
|
+
},
|
|
113
|
+
category: 'read',
|
|
114
|
+
mutates: false,
|
|
115
|
+
toolset: 'system',
|
|
116
|
+
async execute(_args, _ctx) {
|
|
117
|
+
if (!(0, _psHelpers_1.isWindows)()) {
|
|
118
|
+
return (0, _psHelpers_1.windowsOnlyError)('media_sessions', {
|
|
119
|
+
canStill: [
|
|
120
|
+
'Call `now_playing` if a Spotify Web API skill exposes that surface',
|
|
121
|
+
'Use `os_process_list` with a media-app filter (spotify, vlc, chrome) for coarse presence detection',
|
|
122
|
+
'`shell_exec` with `playerctl --list-all` on Linux to enumerate MPRIS clients',
|
|
123
|
+
],
|
|
124
|
+
cannotReliably: [
|
|
125
|
+
'OS-level enumeration of every media-bus-registered app',
|
|
126
|
+
'Distinguishing the OS-routed "current" session from inactive ones',
|
|
127
|
+
],
|
|
128
|
+
fix: 'Run Aiden on Windows for GSMTC enumeration, or wrap your platform\'s ' +
|
|
129
|
+
'native media-control bus (MPRIS / NowPlaying) in a skill.',
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
try {
|
|
133
|
+
const { stdout } = await (0, _psHelpers_1.runPowerShell)(buildPs(), { timeoutMs: 8000 });
|
|
134
|
+
const trimmed = stdout.trim();
|
|
135
|
+
if (trimmed.length === 0 || trimmed === '[]') {
|
|
136
|
+
return { success: true, sessions: [], count: 0 };
|
|
137
|
+
}
|
|
138
|
+
const parsed = JSON.parse(trimmed);
|
|
139
|
+
// ConvertTo-Json emits an object (single result) or array (multiple).
|
|
140
|
+
// Normalise to array, then attach friendlyApp.
|
|
141
|
+
const rows = Array.isArray(parsed) ? parsed : [parsed];
|
|
142
|
+
const sessions = rows.map((row) => ({
|
|
143
|
+
appUserModelId: String(row.appUserModelId ?? ''),
|
|
144
|
+
friendlyApp: friendlyAppName(row.appUserModelId),
|
|
145
|
+
isCurrent: row.isCurrent === true,
|
|
146
|
+
playbackStatus: String(row.playbackStatus ?? 'Unknown'),
|
|
147
|
+
title: typeof row.title === 'string' ? row.title : undefined,
|
|
148
|
+
artist: typeof row.artist === 'string' ? row.artist : undefined,
|
|
149
|
+
album: typeof row.album === 'string' ? row.album : undefined,
|
|
150
|
+
}));
|
|
151
|
+
return { success: true, sessions, count: sessions.length };
|
|
152
|
+
}
|
|
153
|
+
catch (e) {
|
|
154
|
+
return {
|
|
155
|
+
success: false,
|
|
156
|
+
error: e instanceof Error ? e.message : String(e),
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
},
|
|
160
|
+
};
|
|
161
|
+
// Re-export the friendly-app mapper so mediaTransport can use the same
|
|
162
|
+
// normalization for target-string matching.
|
|
163
|
+
exports.__friendlyAppName = friendlyAppName;
|