@kernel.chat/kbot 4.3.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/peekaboo/commands.d.ts +42 -0
- package/dist/adapters/peekaboo/commands.js +209 -0
- package/dist/adapters/peekaboo/index.d.ts +4 -0
- package/dist/adapters/peekaboo/index.js +9 -0
- package/dist/adapters/peekaboo/runner.d.ts +25 -0
- package/dist/adapters/peekaboo/runner.js +75 -0
- package/dist/adapters/peekaboo/types.d.ts +69 -0
- package/dist/adapters/peekaboo/types.js +8 -0
- package/dist/tools/computer.js +64 -0
- package/dist/tools/peekaboo.d.ts +4 -0
- package/dist/tools/peekaboo.js +371 -0
- package/dist/tools/swarm-2026-04.js +2 -0
- package/package.json +1 -1
- package/skills/native-automation/peekaboo-snapshot-act/SKILL.md +91 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type { PeekabooAgentResult, PeekabooClickResult, PeekabooOutcome, PeekabooPerformActionResult, PeekabooSeeResult, PeekabooSetValueResult, PeekabooTypeResult } from './types.js';
|
|
2
|
+
export interface SeeOptions {
|
|
3
|
+
app?: string;
|
|
4
|
+
mode?: 'screen' | 'window';
|
|
5
|
+
retina?: boolean;
|
|
6
|
+
}
|
|
7
|
+
export declare function see(opts?: SeeOptions): Promise<PeekabooOutcome<PeekabooSeeResult>>;
|
|
8
|
+
export interface ClickOptions {
|
|
9
|
+
snapshot: string;
|
|
10
|
+
on?: string;
|
|
11
|
+
coords?: [number, number];
|
|
12
|
+
wait?: number;
|
|
13
|
+
}
|
|
14
|
+
export declare function click(opts: ClickOptions): Promise<PeekabooOutcome<PeekabooClickResult>>;
|
|
15
|
+
export interface TypeOptions {
|
|
16
|
+
text: string;
|
|
17
|
+
clear?: boolean;
|
|
18
|
+
delayMs?: number;
|
|
19
|
+
}
|
|
20
|
+
export declare function type_(opts: TypeOptions): Promise<PeekabooOutcome<PeekabooTypeResult>>;
|
|
21
|
+
export interface SetValueOptions {
|
|
22
|
+
snapshot: string;
|
|
23
|
+
on: string;
|
|
24
|
+
value: string;
|
|
25
|
+
}
|
|
26
|
+
export declare function setValue(opts: SetValueOptions): Promise<PeekabooOutcome<PeekabooSetValueResult>>;
|
|
27
|
+
export interface PerformActionOptions {
|
|
28
|
+
snapshot: string;
|
|
29
|
+
on: string;
|
|
30
|
+
action: string;
|
|
31
|
+
}
|
|
32
|
+
export declare function performAction(opts: PerformActionOptions): Promise<PeekabooOutcome<PeekabooPerformActionResult>>;
|
|
33
|
+
export interface AgentOptions {
|
|
34
|
+
prompt: string;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Runs `peekaboo agent "$prompt"` and returns the final stdout. Unlike the
|
|
38
|
+
* structured commands the agent subcommand may emit free-form text, so we
|
|
39
|
+
* surface stdout verbatim under `output`.
|
|
40
|
+
*/
|
|
41
|
+
export declare function agent(opts: AgentOptions): Promise<PeekabooOutcome<PeekabooAgentResult>>;
|
|
42
|
+
//# sourceMappingURL=commands.d.ts.map
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
// Peekaboo high-level helpers — typed wrappers around the JSON CLI.
|
|
2
|
+
//
|
|
3
|
+
// Each helper assembles argv for `peekaboo <subcommand> --json`, runs it
|
|
4
|
+
// through `runPeekaboo`, and parses stdout into the appropriate result
|
|
5
|
+
// type. Non-zero exits and malformed JSON are returned as `PeekabooError`
|
|
6
|
+
// rather than thrown — callers fan out via discriminated unions.
|
|
7
|
+
import { runPeekaboo } from './runner.js';
|
|
8
|
+
function failNonZero(code, stdout, stderr) {
|
|
9
|
+
return {
|
|
10
|
+
ok: false,
|
|
11
|
+
error: {
|
|
12
|
+
code: 'non-zero-exit',
|
|
13
|
+
message: `peekaboo exited ${code}`,
|
|
14
|
+
stdout,
|
|
15
|
+
stderr,
|
|
16
|
+
exitCode: code,
|
|
17
|
+
},
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
function failParse(message, stdout) {
|
|
21
|
+
return {
|
|
22
|
+
ok: false,
|
|
23
|
+
error: {
|
|
24
|
+
code: 'malformed-json',
|
|
25
|
+
message,
|
|
26
|
+
stdout,
|
|
27
|
+
},
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
function parseJson(stdout) {
|
|
31
|
+
try {
|
|
32
|
+
return { ok: true, value: JSON.parse(stdout) };
|
|
33
|
+
}
|
|
34
|
+
catch (e) {
|
|
35
|
+
return { ok: false, err: failParse(e.message, stdout) };
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
function isRecord(v) {
|
|
39
|
+
return typeof v === 'object' && v !== null && !Array.isArray(v);
|
|
40
|
+
}
|
|
41
|
+
function asString(v, fallback = '') {
|
|
42
|
+
return typeof v === 'string' ? v : fallback;
|
|
43
|
+
}
|
|
44
|
+
function asBool(v, fallback = false) {
|
|
45
|
+
return typeof v === 'boolean' ? v : fallback;
|
|
46
|
+
}
|
|
47
|
+
function asNumber(v, fallback = 0) {
|
|
48
|
+
return typeof v === 'number' && Number.isFinite(v) ? v : fallback;
|
|
49
|
+
}
|
|
50
|
+
export async function see(opts = {}) {
|
|
51
|
+
const args = ['see', '--json'];
|
|
52
|
+
if (opts.app)
|
|
53
|
+
args.push('--app', opts.app);
|
|
54
|
+
if (opts.mode)
|
|
55
|
+
args.push('--mode', opts.mode);
|
|
56
|
+
if (opts.retina)
|
|
57
|
+
args.push('--retina');
|
|
58
|
+
const { stdout, stderr, code } = await runPeekaboo(args);
|
|
59
|
+
if (code !== 0)
|
|
60
|
+
return failNonZero(code, stdout, stderr);
|
|
61
|
+
const parsed = parseJson(stdout);
|
|
62
|
+
if (!parsed.ok)
|
|
63
|
+
return parsed.err;
|
|
64
|
+
const v = parsed.value;
|
|
65
|
+
if (!isRecord(v))
|
|
66
|
+
return failParse('see: expected object at root', stdout);
|
|
67
|
+
const rawElements = Array.isArray(v.elements) ? v.elements : [];
|
|
68
|
+
const elements = rawElements.flatMap((el) => {
|
|
69
|
+
if (!isRecord(el))
|
|
70
|
+
return [];
|
|
71
|
+
const frameRaw = isRecord(el.frame) ? el.frame : {};
|
|
72
|
+
return [
|
|
73
|
+
{
|
|
74
|
+
id: asString(el.id),
|
|
75
|
+
role: asString(el.role),
|
|
76
|
+
label: typeof el.label === 'string' ? el.label : undefined,
|
|
77
|
+
frame: {
|
|
78
|
+
x: asNumber(frameRaw.x),
|
|
79
|
+
y: asNumber(frameRaw.y),
|
|
80
|
+
width: asNumber(frameRaw.width),
|
|
81
|
+
height: asNumber(frameRaw.height),
|
|
82
|
+
},
|
|
83
|
+
settable: typeof el.settable === 'boolean' ? el.settable : undefined,
|
|
84
|
+
named_actions: Array.isArray(el.named_actions)
|
|
85
|
+
? el.named_actions.filter((a) => typeof a === 'string')
|
|
86
|
+
: undefined,
|
|
87
|
+
},
|
|
88
|
+
];
|
|
89
|
+
});
|
|
90
|
+
return {
|
|
91
|
+
ok: true,
|
|
92
|
+
snapshot: asString(v.snapshot),
|
|
93
|
+
app: typeof v.app === 'string' ? v.app : undefined,
|
|
94
|
+
window: typeof v.window === 'string' ? v.window : undefined,
|
|
95
|
+
elements,
|
|
96
|
+
screenshot_path: typeof v.screenshot_path === 'string' ? v.screenshot_path : undefined,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
export async function click(opts) {
|
|
100
|
+
const args = ['click', '--json', '--snapshot', opts.snapshot];
|
|
101
|
+
if (opts.on)
|
|
102
|
+
args.push('--on', opts.on);
|
|
103
|
+
if (opts.coords)
|
|
104
|
+
args.push('--coords', `${opts.coords[0]},${opts.coords[1]}`);
|
|
105
|
+
if (typeof opts.wait === 'number')
|
|
106
|
+
args.push('--wait', String(opts.wait));
|
|
107
|
+
const { stdout, stderr, code } = await runPeekaboo(args);
|
|
108
|
+
if (code !== 0)
|
|
109
|
+
return failNonZero(code, stdout, stderr);
|
|
110
|
+
const parsed = parseJson(stdout);
|
|
111
|
+
if (!parsed.ok)
|
|
112
|
+
return parsed.err;
|
|
113
|
+
if (!isRecord(parsed.value))
|
|
114
|
+
return failParse('click: expected object at root', stdout);
|
|
115
|
+
const v = parsed.value;
|
|
116
|
+
return {
|
|
117
|
+
ok: true,
|
|
118
|
+
target: typeof v.target === 'string' ? v.target : undefined,
|
|
119
|
+
coords: Array.isArray(v.coords) && v.coords.length === 2
|
|
120
|
+
? [asNumber(v.coords[0]), asNumber(v.coords[1])]
|
|
121
|
+
: undefined,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
// `type` is reserved in TS; export the helper as `type_`.
|
|
125
|
+
export async function type_(opts) {
|
|
126
|
+
const args = ['type', '--json', '--text', opts.text];
|
|
127
|
+
if (opts.clear)
|
|
128
|
+
args.push('--clear');
|
|
129
|
+
if (typeof opts.delayMs === 'number')
|
|
130
|
+
args.push('--delay', String(opts.delayMs));
|
|
131
|
+
const { stdout, stderr, code } = await runPeekaboo(args);
|
|
132
|
+
if (code !== 0)
|
|
133
|
+
return failNonZero(code, stdout, stderr);
|
|
134
|
+
const parsed = parseJson(stdout);
|
|
135
|
+
if (!parsed.ok)
|
|
136
|
+
return parsed.err;
|
|
137
|
+
if (!isRecord(parsed.value))
|
|
138
|
+
return failParse('type: expected object at root', stdout);
|
|
139
|
+
const v = parsed.value;
|
|
140
|
+
return {
|
|
141
|
+
ok: true,
|
|
142
|
+
typed: asString(v.typed, opts.text),
|
|
143
|
+
cleared: typeof v.cleared === 'boolean' ? v.cleared : undefined,
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
export async function setValue(opts) {
|
|
147
|
+
const args = [
|
|
148
|
+
'set-value',
|
|
149
|
+
'--json',
|
|
150
|
+
'--snapshot',
|
|
151
|
+
opts.snapshot,
|
|
152
|
+
'--on',
|
|
153
|
+
opts.on,
|
|
154
|
+
'--value',
|
|
155
|
+
opts.value,
|
|
156
|
+
];
|
|
157
|
+
const { stdout, stderr, code } = await runPeekaboo(args);
|
|
158
|
+
if (code !== 0)
|
|
159
|
+
return failNonZero(code, stdout, stderr);
|
|
160
|
+
const parsed = parseJson(stdout);
|
|
161
|
+
if (!parsed.ok)
|
|
162
|
+
return parsed.err;
|
|
163
|
+
if (!isRecord(parsed.value))
|
|
164
|
+
return failParse('set-value: expected object at root', stdout);
|
|
165
|
+
const v = parsed.value;
|
|
166
|
+
return {
|
|
167
|
+
ok: true,
|
|
168
|
+
target: asString(v.target, opts.on),
|
|
169
|
+
value: asString(v.value, opts.value),
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
export async function performAction(opts) {
|
|
173
|
+
const args = [
|
|
174
|
+
'perform-action',
|
|
175
|
+
'--json',
|
|
176
|
+
'--snapshot',
|
|
177
|
+
opts.snapshot,
|
|
178
|
+
'--on',
|
|
179
|
+
opts.on,
|
|
180
|
+
'--action',
|
|
181
|
+
opts.action,
|
|
182
|
+
];
|
|
183
|
+
const { stdout, stderr, code } = await runPeekaboo(args);
|
|
184
|
+
if (code !== 0)
|
|
185
|
+
return failNonZero(code, stdout, stderr);
|
|
186
|
+
const parsed = parseJson(stdout);
|
|
187
|
+
if (!parsed.ok)
|
|
188
|
+
return parsed.err;
|
|
189
|
+
if (!isRecord(parsed.value))
|
|
190
|
+
return failParse('perform-action: expected object at root', stdout);
|
|
191
|
+
const v = parsed.value;
|
|
192
|
+
return {
|
|
193
|
+
ok: true,
|
|
194
|
+
target: asString(v.target, opts.on),
|
|
195
|
+
action: asString(v.action, opts.action),
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Runs `peekaboo agent "$prompt"` and returns the final stdout. Unlike the
|
|
200
|
+
* structured commands the agent subcommand may emit free-form text, so we
|
|
201
|
+
* surface stdout verbatim under `output`.
|
|
202
|
+
*/
|
|
203
|
+
export async function agent(opts) {
|
|
204
|
+
const { stdout, stderr, code } = await runPeekaboo(['agent', opts.prompt]);
|
|
205
|
+
if (code !== 0)
|
|
206
|
+
return failNonZero(code, stdout, stderr);
|
|
207
|
+
return { ok: true, output: stdout };
|
|
208
|
+
}
|
|
209
|
+
//# sourceMappingURL=commands.js.map
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export type { PeekabooFrame, PeekabooElement, PeekabooSeeResult, PeekabooClickResult, PeekabooTypeResult, PeekabooSetValueResult, PeekabooPerformActionResult, PeekabooAgentResult, PeekabooError, PeekabooOutcome, } from './types.js';
|
|
2
|
+
export { runPeekaboo, peekabooAvailable, type RunOptions, type RunResult } from './runner.js';
|
|
3
|
+
export { see, click, type_, setValue, performAction, agent, type SeeOptions, type ClickOptions, type TypeOptions, type SetValueOptions, type PerformActionOptions, type AgentOptions, } from './commands.js';
|
|
4
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
// Peekaboo adapter — public surface.
|
|
2
|
+
//
|
|
3
|
+
// Wraps the `peekaboo` macOS CLI behind a typed interface so kbot tools and
|
|
4
|
+
// agents can drive the screen-capture + GUI-automation features without
|
|
5
|
+
// taking a runtime dependency on the binary or the @steipete/peekaboo
|
|
6
|
+
// distribution. All execution flows through `runPeekaboo`.
|
|
7
|
+
export { runPeekaboo, peekabooAvailable } from './runner.js';
|
|
8
|
+
export { see, click, type_, setValue, performAction, agent, } from './commands.js';
|
|
9
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
export interface RunOptions {
|
|
2
|
+
/** Optional stdin payload. */
|
|
3
|
+
input?: string;
|
|
4
|
+
/** Working directory for the child. */
|
|
5
|
+
cwd?: string;
|
|
6
|
+
/** Hard timeout in milliseconds. */
|
|
7
|
+
timeoutMs?: number;
|
|
8
|
+
}
|
|
9
|
+
export interface RunResult {
|
|
10
|
+
stdout: string;
|
|
11
|
+
stderr: string;
|
|
12
|
+
code: number;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Run the peekaboo CLI with the given args. Resolves with stdout/stderr/code
|
|
16
|
+
* regardless of exit status; never rejects on non-zero exit. Rejects only
|
|
17
|
+
* when the binary cannot be spawned at all (ENOENT, permissions, etc.).
|
|
18
|
+
*/
|
|
19
|
+
export declare function runPeekaboo(args: string[], opts?: RunOptions): Promise<RunResult>;
|
|
20
|
+
/**
|
|
21
|
+
* Returns true when the peekaboo binary responds to `--version`. Used by
|
|
22
|
+
* higher-level tools to gate Peekaboo features without crashing the host.
|
|
23
|
+
*/
|
|
24
|
+
export declare function peekabooAvailable(): Promise<boolean>;
|
|
25
|
+
//# sourceMappingURL=runner.d.ts.map
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// Peekaboo binary runner — thin child_process wrapper.
|
|
2
|
+
//
|
|
3
|
+
// All execution of the `peekaboo` CLI flows through `runPeekaboo`. The
|
|
4
|
+
// binary path is resolved from the PEEKABOO_BIN env var (default: 'peekaboo')
|
|
5
|
+
// so tests can stub it and hosts can pin a specific install. No shell is
|
|
6
|
+
// involved — args go through execFile to keep arg quoting predictable.
|
|
7
|
+
import { execFile } from 'node:child_process';
|
|
8
|
+
function resolveBinary() {
|
|
9
|
+
return process.env.PEEKABOO_BIN && process.env.PEEKABOO_BIN.trim().length > 0
|
|
10
|
+
? process.env.PEEKABOO_BIN
|
|
11
|
+
: 'peekaboo';
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Run the peekaboo CLI with the given args. Resolves with stdout/stderr/code
|
|
15
|
+
* regardless of exit status; never rejects on non-zero exit. Rejects only
|
|
16
|
+
* when the binary cannot be spawned at all (ENOENT, permissions, etc.).
|
|
17
|
+
*/
|
|
18
|
+
export function runPeekaboo(args, opts = {}) {
|
|
19
|
+
const bin = resolveBinary();
|
|
20
|
+
return new Promise((resolve, reject) => {
|
|
21
|
+
const child = execFile(bin, args, {
|
|
22
|
+
cwd: opts.cwd,
|
|
23
|
+
timeout: opts.timeoutMs,
|
|
24
|
+
maxBuffer: 64 * 1024 * 1024,
|
|
25
|
+
}, (err, stdout, stderr) => {
|
|
26
|
+
// execFile populates err for non-zero exit too. We surface the result
|
|
27
|
+
// either way and only reject when the spawn itself failed.
|
|
28
|
+
const out = stdout;
|
|
29
|
+
const errOut = stderr;
|
|
30
|
+
const stdoutStr = typeof out === 'string'
|
|
31
|
+
? out
|
|
32
|
+
: out instanceof Buffer
|
|
33
|
+
? out.toString('utf8')
|
|
34
|
+
: '';
|
|
35
|
+
const stderrStr = typeof errOut === 'string'
|
|
36
|
+
? errOut
|
|
37
|
+
: errOut instanceof Buffer
|
|
38
|
+
? errOut.toString('utf8')
|
|
39
|
+
: '';
|
|
40
|
+
if (err) {
|
|
41
|
+
const errno = err.code;
|
|
42
|
+
// Spawn-level failures (binary missing, EACCES, etc.) reject.
|
|
43
|
+
if (errno === 'ENOENT' || errno === 'EACCES' || errno === 'EPERM') {
|
|
44
|
+
reject(err);
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
// Otherwise treat as "process ran, exited non-zero".
|
|
48
|
+
const code = typeof err.code === 'number'
|
|
49
|
+
? (err.code)
|
|
50
|
+
: 1;
|
|
51
|
+
resolve({ stdout: stdoutStr, stderr: stderrStr, code });
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
resolve({ stdout: stdoutStr, stderr: stderrStr, code: 0 });
|
|
55
|
+
});
|
|
56
|
+
if (opts.input !== undefined && child.stdin) {
|
|
57
|
+
child.stdin.write(opts.input);
|
|
58
|
+
child.stdin.end();
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Returns true when the peekaboo binary responds to `--version`. Used by
|
|
64
|
+
* higher-level tools to gate Peekaboo features without crashing the host.
|
|
65
|
+
*/
|
|
66
|
+
export async function peekabooAvailable() {
|
|
67
|
+
try {
|
|
68
|
+
const r = await runPeekaboo(['--version'], { timeoutMs: 5000 });
|
|
69
|
+
return r.code === 0;
|
|
70
|
+
}
|
|
71
|
+
catch {
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=runner.js.map
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
export interface PeekabooFrame {
|
|
2
|
+
x: number;
|
|
3
|
+
y: number;
|
|
4
|
+
width: number;
|
|
5
|
+
height: number;
|
|
6
|
+
}
|
|
7
|
+
export interface PeekabooElement {
|
|
8
|
+
/** Element handle, e.g. "B1" (button), "T1" (text field). */
|
|
9
|
+
id: string;
|
|
10
|
+
role: string;
|
|
11
|
+
label?: string;
|
|
12
|
+
frame: PeekabooFrame;
|
|
13
|
+
/** Whether the element accepts a value (text fields, sliders, etc.). */
|
|
14
|
+
settable?: boolean;
|
|
15
|
+
/** Action names the element advertises via the AX API. */
|
|
16
|
+
named_actions?: string[];
|
|
17
|
+
}
|
|
18
|
+
export interface PeekabooSeeResult {
|
|
19
|
+
/** Snapshot id used by subsequent `--snapshot $id` arguments. */
|
|
20
|
+
snapshot: string;
|
|
21
|
+
app?: string;
|
|
22
|
+
window?: string;
|
|
23
|
+
elements: PeekabooElement[];
|
|
24
|
+
/** Optional path on disk where the screenshot was written. */
|
|
25
|
+
screenshot_path?: string;
|
|
26
|
+
}
|
|
27
|
+
export interface PeekabooClickResult {
|
|
28
|
+
ok: boolean;
|
|
29
|
+
target?: string;
|
|
30
|
+
coords?: [number, number];
|
|
31
|
+
}
|
|
32
|
+
export interface PeekabooTypeResult {
|
|
33
|
+
ok: boolean;
|
|
34
|
+
typed: string;
|
|
35
|
+
cleared?: boolean;
|
|
36
|
+
}
|
|
37
|
+
export interface PeekabooSetValueResult {
|
|
38
|
+
ok: boolean;
|
|
39
|
+
target: string;
|
|
40
|
+
value: string;
|
|
41
|
+
}
|
|
42
|
+
export interface PeekabooPerformActionResult {
|
|
43
|
+
ok: boolean;
|
|
44
|
+
target: string;
|
|
45
|
+
action: string;
|
|
46
|
+
}
|
|
47
|
+
export interface PeekabooAgentResult {
|
|
48
|
+
ok: boolean;
|
|
49
|
+
output: string;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Structured error returned by all command helpers when the binary exits
|
|
53
|
+
* non-zero or emits malformed JSON. Helpers return `{ ok: false, error }`
|
|
54
|
+
* rather than throw so callers can route via discriminated unions.
|
|
55
|
+
*/
|
|
56
|
+
export interface PeekabooError {
|
|
57
|
+
ok: false;
|
|
58
|
+
error: {
|
|
59
|
+
code: 'non-zero-exit' | 'malformed-json' | 'binary-missing' | 'unknown';
|
|
60
|
+
message: string;
|
|
61
|
+
stderr?: string;
|
|
62
|
+
stdout?: string;
|
|
63
|
+
exitCode?: number;
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
export type PeekabooOutcome<T> = ({
|
|
67
|
+
ok: true;
|
|
68
|
+
} & T) | PeekabooError;
|
|
69
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
// Peekaboo CLI surface — minimal type model.
|
|
2
|
+
//
|
|
3
|
+
// Mirrors the JSON shape emitted by the `peekaboo` macOS CLI
|
|
4
|
+
// (https://github.com/openclaw/Peekaboo) without taking a runtime
|
|
5
|
+
// dependency. kbot stays binary-agnostic; this adapter only ever
|
|
6
|
+
// speaks JSON across the process boundary.
|
|
7
|
+
export {};
|
|
8
|
+
//# sourceMappingURL=types.js.map
|
package/dist/tools/computer.js
CHANGED
|
@@ -17,7 +17,19 @@ import { join } from 'node:path';
|
|
|
17
17
|
import { readFileSync, unlinkSync, existsSync, mkdirSync, rmSync } from 'node:fs';
|
|
18
18
|
import { registerTool } from './index.js';
|
|
19
19
|
import { Coordinator } from '../computer-use-coordinator.js';
|
|
20
|
+
import { peekabooAvailable, see as peekabooSee, click as peekabooClick, type_ as peekabooType, } from '../adapters/peekaboo/index.js';
|
|
20
21
|
const platform = process.platform;
|
|
22
|
+
// ── Peekaboo AX-first fallback ───────────────────────────────────
|
|
23
|
+
// When the peekaboo CLI is installed, try AX-aware automation before
|
|
24
|
+
// synthetic input. Cached at module-load, refreshed if env hint changes.
|
|
25
|
+
let _peekabooReady = null;
|
|
26
|
+
function peekabooReady() {
|
|
27
|
+
if (process.env.KBOT_DISABLE_PEEKABOO === '1')
|
|
28
|
+
return Promise.resolve(false);
|
|
29
|
+
if (!_peekabooReady)
|
|
30
|
+
_peekabooReady = peekabooAvailable().catch(() => false);
|
|
31
|
+
return _peekabooReady;
|
|
32
|
+
}
|
|
21
33
|
const LOCK_DIR = join(homedir(), '.kbot');
|
|
22
34
|
// Legacy single-session lock path. Retained as a constant for back-compat
|
|
23
35
|
// with any callers that referenced it; the actual locking is now performed
|
|
@@ -485,6 +497,27 @@ export function registerComputerTools() {
|
|
|
485
497
|
return 'Error: x and y must be numbers';
|
|
486
498
|
}
|
|
487
499
|
try {
|
|
500
|
+
// AX-first fallback: when peekaboo is on PATH, try AX-aware click
|
|
501
|
+
// before synthetic input. Falls through to AppleScript+cliclick on
|
|
502
|
+
// any failure so existing behavior is preserved.
|
|
503
|
+
if (platform === 'darwin' && app && await peekabooReady()) {
|
|
504
|
+
try {
|
|
505
|
+
const snap = await peekabooSee({ app });
|
|
506
|
+
if (snap.ok) {
|
|
507
|
+
const target = (args.element_id ?? args.label ?? null);
|
|
508
|
+
if (target) {
|
|
509
|
+
const result = await peekabooClick({
|
|
510
|
+
snapshot: snap.snapshot,
|
|
511
|
+
on: String(target),
|
|
512
|
+
});
|
|
513
|
+
if (result.ok)
|
|
514
|
+
return `clicked via AX: ${target}`;
|
|
515
|
+
// fall through to synthetic on AX failure
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
catch { /* fall through */ }
|
|
520
|
+
}
|
|
488
521
|
if (platform === 'darwin') {
|
|
489
522
|
try {
|
|
490
523
|
if (button === 'double') {
|
|
@@ -712,6 +745,21 @@ export function registerComputerTools() {
|
|
|
712
745
|
const text = String(args.text);
|
|
713
746
|
if (!text)
|
|
714
747
|
return 'Error: text is required';
|
|
748
|
+
// AX-first fallback: when peekaboo is on PATH, try AX-aware typing
|
|
749
|
+
// before synthetic keystrokes. Falls through to AppleScript on any
|
|
750
|
+
// failure so existing behavior is preserved.
|
|
751
|
+
if (platform === 'darwin' && await peekabooReady()) {
|
|
752
|
+
try {
|
|
753
|
+
const clear = args.clear === true;
|
|
754
|
+
const delayMs = typeof args.delayMs === 'number' ? args.delayMs : undefined;
|
|
755
|
+
const result = await peekabooType({ text, clear, delayMs });
|
|
756
|
+
if (result.ok) {
|
|
757
|
+
return `Typed via AX: ${text.slice(0, 80)}${text.length > 80 ? '...' : ''}`;
|
|
758
|
+
}
|
|
759
|
+
// fall through to synthetic on AX failure
|
|
760
|
+
}
|
|
761
|
+
catch { /* fall through */ }
|
|
762
|
+
}
|
|
715
763
|
if (platform === 'darwin') {
|
|
716
764
|
const escaped = escapeAppleScript(text);
|
|
717
765
|
try {
|
|
@@ -1104,5 +1152,21 @@ export function registerComputerTools() {
|
|
|
1104
1152
|
return `Computer use session ended.${releasedNote}`;
|
|
1105
1153
|
},
|
|
1106
1154
|
});
|
|
1155
|
+
// ── Peekaboo status (AX-first diagnostics) ──
|
|
1156
|
+
registerTool({
|
|
1157
|
+
name: 'peekaboo_status',
|
|
1158
|
+
description: 'Report whether the AX-first peekaboo CLI is available on PATH for accessibility-aware automation.',
|
|
1159
|
+
parameters: {},
|
|
1160
|
+
tier: 'free',
|
|
1161
|
+
async execute() {
|
|
1162
|
+
if (process.env.KBOT_DISABLE_PEEKABOO === '1') {
|
|
1163
|
+
return 'AX-first unavailable (disabled via KBOT_DISABLE_PEEKABOO=1)';
|
|
1164
|
+
}
|
|
1165
|
+
const ready = await peekabooReady();
|
|
1166
|
+
return ready
|
|
1167
|
+
? 'AX-first available via peekaboo'
|
|
1168
|
+
: 'AX-first unavailable (peekaboo CLI not on PATH)';
|
|
1169
|
+
},
|
|
1170
|
+
});
|
|
1107
1171
|
}
|
|
1108
1172
|
//# sourceMappingURL=computer.js.map
|
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
// Peekaboo tools — kbot tool-registry surface for the macOS Peekaboo CLI.
|
|
2
|
+
//
|
|
3
|
+
// Wraps src/adapters/peekaboo so the LLM tool layer can drive AX-aware
|
|
4
|
+
// snapshots, clicks, typing, value-set, named-action invocation, and the
|
|
5
|
+
// peekaboo agent subcommand. App-bound calls are gated by the same per-app
|
|
6
|
+
// approval contract computer.ts uses; because computer.ts does not export
|
|
7
|
+
// its in-process `approvedApps` set, this module falls back to checking
|
|
8
|
+
// the on-disk Coordinator lock file at ~/.kbot/computer-use/<app>.lock as
|
|
9
|
+
// a best-effort cross-process signal — see requireApproval() below.
|
|
10
|
+
import { existsSync } from 'node:fs';
|
|
11
|
+
import { homedir } from 'node:os';
|
|
12
|
+
import { join } from 'node:path';
|
|
13
|
+
import { registerTool } from './index.js';
|
|
14
|
+
import { see, click, type_, setValue, performAction, agent, peekabooAvailable, } from '../adapters/peekaboo/index.js';
|
|
15
|
+
// ── Approval gate ──────────────────────────────────────────────────────
|
|
16
|
+
//
|
|
17
|
+
// computer.ts holds an in-process `approvedApps` Set keyed by lowercase app
|
|
18
|
+
// name, plus a Coordinator that writes a per-app lock file at
|
|
19
|
+
// ~/.kbot/computer-use/<app>.lock when an *active* claim is held.
|
|
20
|
+
//
|
|
21
|
+
// Limitation: the `approvedApps` set is not exported and the Coordinator
|
|
22
|
+
// lock file only exists during an active claim, not after a bare
|
|
23
|
+
// app_approve. So from a sibling module like this one we cannot directly
|
|
24
|
+
// observe approval state. We treat the lock-file presence as the strongest
|
|
25
|
+
// available cross-process approval signal — if the user has driven the app
|
|
26
|
+
// through computer.ts at all this session, the lock file will exist
|
|
27
|
+
// (computer.ts re-acquires + retains during the call). When we can't see
|
|
28
|
+
// it, we fail closed with a clear pointer to `app_approve`.
|
|
29
|
+
const LOCK_ROOT = process.env.KBOT_COMPUTER_USE_ROOT || join(homedir(), '.kbot', 'computer-use');
|
|
30
|
+
/** Mirror of computer-use-coordinator.ts#sanitizeApp — keep in sync. */
|
|
31
|
+
function sanitizeApp(app) {
|
|
32
|
+
return app.replace(/[/\\ -]/g, '_');
|
|
33
|
+
}
|
|
34
|
+
function lockPath(app) {
|
|
35
|
+
return join(LOCK_ROOT, `${sanitizeApp(app.toLowerCase())}.lock`);
|
|
36
|
+
}
|
|
37
|
+
/** Returns null when the gate passes, or an `Error: ...` string on denial. */
|
|
38
|
+
function requireApproval(app) {
|
|
39
|
+
if (!app)
|
|
40
|
+
return null;
|
|
41
|
+
if (existsSync(lockPath(app)))
|
|
42
|
+
return null;
|
|
43
|
+
return `Error: ${app} is not approved for computer use. Run app_approve first (or drive a computer.ts tool against ${app} so the Coordinator lock is created).`;
|
|
44
|
+
}
|
|
45
|
+
// ── Outcome → string helpers ───────────────────────────────────────────
|
|
46
|
+
function outcomeToString(out) {
|
|
47
|
+
if (!out.ok) {
|
|
48
|
+
const err = out.error;
|
|
49
|
+
const detail = err.stderr?.trim() || err.message;
|
|
50
|
+
return `Error: peekaboo ${err.code}: ${detail}`;
|
|
51
|
+
}
|
|
52
|
+
// Strip the discriminant before pretty-printing the success payload.
|
|
53
|
+
const { ok: _ok, ...payload } = out;
|
|
54
|
+
void _ok;
|
|
55
|
+
return JSON.stringify(payload, null, 2);
|
|
56
|
+
}
|
|
57
|
+
async function ensureBinary() {
|
|
58
|
+
const ok = await peekabooAvailable();
|
|
59
|
+
if (ok)
|
|
60
|
+
return null;
|
|
61
|
+
return "Error: peekaboo CLI not found on PATH. Install via 'brew install steipete/tap/peekaboo' or 'npm i -g @steipete/peekaboo'.";
|
|
62
|
+
}
|
|
63
|
+
// ── Tool definitions ───────────────────────────────────────────────────
|
|
64
|
+
const peekabooSee = {
|
|
65
|
+
name: 'peekaboo_see',
|
|
66
|
+
description: 'Capture an AX snapshot of an app or the screen via the Peekaboo CLI. Returns a snapshot id plus a list of labeled element ids (B1, T1, …) usable by peekaboo_click / peekaboo_set_value / peekaboo_perform_action.',
|
|
67
|
+
parameters: {
|
|
68
|
+
app: {
|
|
69
|
+
type: 'string',
|
|
70
|
+
description: 'Target app name (optional). When supplied, the app must be approved via app_approve.',
|
|
71
|
+
},
|
|
72
|
+
mode: {
|
|
73
|
+
type: 'string',
|
|
74
|
+
description: 'Capture mode: "screen" (default) or "window".',
|
|
75
|
+
},
|
|
76
|
+
retina: {
|
|
77
|
+
type: 'boolean',
|
|
78
|
+
description: 'Capture at retina resolution (optional).',
|
|
79
|
+
},
|
|
80
|
+
},
|
|
81
|
+
tier: 'free',
|
|
82
|
+
async execute(args) {
|
|
83
|
+
const binErr = await ensureBinary();
|
|
84
|
+
if (binErr)
|
|
85
|
+
return binErr;
|
|
86
|
+
const app = typeof args.app === 'string' && args.app.length > 0 ? args.app : undefined;
|
|
87
|
+
if (app) {
|
|
88
|
+
const gate = requireApproval(app);
|
|
89
|
+
if (gate)
|
|
90
|
+
return gate;
|
|
91
|
+
}
|
|
92
|
+
const mode = args.mode === 'window' || args.mode === 'screen' ? args.mode : undefined;
|
|
93
|
+
const retina = args.retina === true;
|
|
94
|
+
try {
|
|
95
|
+
const out = await see({
|
|
96
|
+
...(app ? { app } : {}),
|
|
97
|
+
...(mode ? { mode } : {}),
|
|
98
|
+
...(retina ? { retina: true } : {}),
|
|
99
|
+
});
|
|
100
|
+
return outcomeToString(out);
|
|
101
|
+
}
|
|
102
|
+
catch (e) {
|
|
103
|
+
return `Error: ${e.message}`;
|
|
104
|
+
}
|
|
105
|
+
},
|
|
106
|
+
};
|
|
107
|
+
const peekabooClick = {
|
|
108
|
+
name: 'peekaboo_click',
|
|
109
|
+
description: 'Click against a Peekaboo snapshot. Provide either `on` (element id or query) or `coords` ("x,y"). Requires a prior peekaboo_see.',
|
|
110
|
+
parameters: {
|
|
111
|
+
app: {
|
|
112
|
+
type: 'string',
|
|
113
|
+
description: 'App being targeted. Required for the approval gate.',
|
|
114
|
+
required: true,
|
|
115
|
+
},
|
|
116
|
+
snapshot: {
|
|
117
|
+
type: 'string',
|
|
118
|
+
description: 'Snapshot id from peekaboo_see.',
|
|
119
|
+
required: true,
|
|
120
|
+
},
|
|
121
|
+
on: {
|
|
122
|
+
type: 'string',
|
|
123
|
+
description: 'Element id (e.g. "B1") or query string. Mutually exclusive with coords.',
|
|
124
|
+
},
|
|
125
|
+
coords: {
|
|
126
|
+
type: 'string',
|
|
127
|
+
description: 'Click coordinates as "x,y" (numbers). Mutually exclusive with on.',
|
|
128
|
+
},
|
|
129
|
+
wait: {
|
|
130
|
+
type: 'number',
|
|
131
|
+
description: 'Optional pre-click wait in milliseconds.',
|
|
132
|
+
},
|
|
133
|
+
},
|
|
134
|
+
tier: 'free',
|
|
135
|
+
async execute(args) {
|
|
136
|
+
const binErr = await ensureBinary();
|
|
137
|
+
if (binErr)
|
|
138
|
+
return binErr;
|
|
139
|
+
const app = String(args.app ?? '');
|
|
140
|
+
if (!app)
|
|
141
|
+
return 'Error: app is required.';
|
|
142
|
+
const gate = requireApproval(app);
|
|
143
|
+
if (gate)
|
|
144
|
+
return gate;
|
|
145
|
+
const snapshot = String(args.snapshot ?? '');
|
|
146
|
+
if (!snapshot)
|
|
147
|
+
return 'Error: snapshot is required.';
|
|
148
|
+
const on = typeof args.on === 'string' && args.on.length > 0 ? args.on : undefined;
|
|
149
|
+
let coords;
|
|
150
|
+
if (typeof args.coords === 'string' && args.coords.length > 0) {
|
|
151
|
+
const parts = args.coords.split(',').map((p) => Number(p.trim()));
|
|
152
|
+
if (parts.length !== 2 || parts.some((n) => !Number.isFinite(n))) {
|
|
153
|
+
return 'Error: coords must be "x,y" (numbers).';
|
|
154
|
+
}
|
|
155
|
+
coords = [parts[0], parts[1]];
|
|
156
|
+
}
|
|
157
|
+
if (!on && !coords)
|
|
158
|
+
return 'Error: provide either `on` or `coords`.';
|
|
159
|
+
const wait = typeof args.wait === 'number' && Number.isFinite(args.wait) ? args.wait : undefined;
|
|
160
|
+
try {
|
|
161
|
+
const out = await click({
|
|
162
|
+
snapshot,
|
|
163
|
+
...(on ? { on } : {}),
|
|
164
|
+
...(coords ? { coords } : {}),
|
|
165
|
+
...(wait !== undefined ? { wait } : {}),
|
|
166
|
+
});
|
|
167
|
+
return outcomeToString(out);
|
|
168
|
+
}
|
|
169
|
+
catch (e) {
|
|
170
|
+
return `Error: ${e.message}`;
|
|
171
|
+
}
|
|
172
|
+
},
|
|
173
|
+
};
|
|
174
|
+
const peekabooType = {
|
|
175
|
+
name: 'peekaboo_type',
|
|
176
|
+
description: 'Type text into the focused field via the Peekaboo CLI. Use peekaboo_click to focus first.',
|
|
177
|
+
parameters: {
|
|
178
|
+
app: {
|
|
179
|
+
type: 'string',
|
|
180
|
+
description: 'App being targeted. Required for the approval gate.',
|
|
181
|
+
required: true,
|
|
182
|
+
},
|
|
183
|
+
text: {
|
|
184
|
+
type: 'string',
|
|
185
|
+
description: 'Text to type.',
|
|
186
|
+
required: true,
|
|
187
|
+
},
|
|
188
|
+
clear: {
|
|
189
|
+
type: 'boolean',
|
|
190
|
+
description: 'Clear the field before typing (optional).',
|
|
191
|
+
},
|
|
192
|
+
delay_ms: {
|
|
193
|
+
type: 'number',
|
|
194
|
+
description: 'Per-character delay in milliseconds (optional).',
|
|
195
|
+
},
|
|
196
|
+
},
|
|
197
|
+
tier: 'free',
|
|
198
|
+
async execute(args) {
|
|
199
|
+
const binErr = await ensureBinary();
|
|
200
|
+
if (binErr)
|
|
201
|
+
return binErr;
|
|
202
|
+
const app = String(args.app ?? '');
|
|
203
|
+
if (!app)
|
|
204
|
+
return 'Error: app is required.';
|
|
205
|
+
const gate = requireApproval(app);
|
|
206
|
+
if (gate)
|
|
207
|
+
return gate;
|
|
208
|
+
const text = typeof args.text === 'string' ? args.text : '';
|
|
209
|
+
if (!text)
|
|
210
|
+
return 'Error: text is required.';
|
|
211
|
+
const clear = args.clear === true;
|
|
212
|
+
const delayMs = typeof args.delay_ms === 'number' && Number.isFinite(args.delay_ms) ? args.delay_ms : undefined;
|
|
213
|
+
try {
|
|
214
|
+
const out = await type_({
|
|
215
|
+
text,
|
|
216
|
+
...(clear ? { clear: true } : {}),
|
|
217
|
+
...(delayMs !== undefined ? { delayMs } : {}),
|
|
218
|
+
});
|
|
219
|
+
return outcomeToString(out);
|
|
220
|
+
}
|
|
221
|
+
catch (e) {
|
|
222
|
+
return `Error: ${e.message}`;
|
|
223
|
+
}
|
|
224
|
+
},
|
|
225
|
+
};
|
|
226
|
+
const peekabooSetValue = {
|
|
227
|
+
name: 'peekaboo_set_value',
|
|
228
|
+
description: 'Set a settable AX value directly on an element (skips clicking). Faster than click+type for text fields, sliders, etc.',
|
|
229
|
+
parameters: {
|
|
230
|
+
app: {
|
|
231
|
+
type: 'string',
|
|
232
|
+
description: 'App being targeted. Required for the approval gate.',
|
|
233
|
+
required: true,
|
|
234
|
+
},
|
|
235
|
+
snapshot: {
|
|
236
|
+
type: 'string',
|
|
237
|
+
description: 'Snapshot id from peekaboo_see.',
|
|
238
|
+
required: true,
|
|
239
|
+
},
|
|
240
|
+
on: {
|
|
241
|
+
type: 'string',
|
|
242
|
+
description: 'Target element id or query (must be settable).',
|
|
243
|
+
required: true,
|
|
244
|
+
},
|
|
245
|
+
value: {
|
|
246
|
+
type: 'string',
|
|
247
|
+
description: 'Value to assign.',
|
|
248
|
+
required: true,
|
|
249
|
+
},
|
|
250
|
+
},
|
|
251
|
+
tier: 'free',
|
|
252
|
+
async execute(args) {
|
|
253
|
+
const binErr = await ensureBinary();
|
|
254
|
+
if (binErr)
|
|
255
|
+
return binErr;
|
|
256
|
+
const app = String(args.app ?? '');
|
|
257
|
+
if (!app)
|
|
258
|
+
return 'Error: app is required.';
|
|
259
|
+
const gate = requireApproval(app);
|
|
260
|
+
if (gate)
|
|
261
|
+
return gate;
|
|
262
|
+
const snapshot = String(args.snapshot ?? '');
|
|
263
|
+
const on = String(args.on ?? '');
|
|
264
|
+
const value = typeof args.value === 'string' ? args.value : '';
|
|
265
|
+
if (!snapshot)
|
|
266
|
+
return 'Error: snapshot is required.';
|
|
267
|
+
if (!on)
|
|
268
|
+
return 'Error: on is required.';
|
|
269
|
+
try {
|
|
270
|
+
const out = await setValue({ snapshot, on, value });
|
|
271
|
+
return outcomeToString(out);
|
|
272
|
+
}
|
|
273
|
+
catch (e) {
|
|
274
|
+
return `Error: ${e.message}`;
|
|
275
|
+
}
|
|
276
|
+
},
|
|
277
|
+
};
|
|
278
|
+
const peekabooPerformAction = {
|
|
279
|
+
name: 'peekaboo_perform_action',
|
|
280
|
+
description: 'Invoke a named AX action (e.g. AXPress, AXShowMenu) on an element from a Peekaboo snapshot.',
|
|
281
|
+
parameters: {
|
|
282
|
+
app: {
|
|
283
|
+
type: 'string',
|
|
284
|
+
description: 'App being targeted. Required for the approval gate.',
|
|
285
|
+
required: true,
|
|
286
|
+
},
|
|
287
|
+
snapshot: {
|
|
288
|
+
type: 'string',
|
|
289
|
+
description: 'Snapshot id from peekaboo_see.',
|
|
290
|
+
required: true,
|
|
291
|
+
},
|
|
292
|
+
on: {
|
|
293
|
+
type: 'string',
|
|
294
|
+
description: 'Target element id or query.',
|
|
295
|
+
required: true,
|
|
296
|
+
},
|
|
297
|
+
action: {
|
|
298
|
+
type: 'string',
|
|
299
|
+
description: 'Named AX action (e.g. "AXPress", "AXShowMenu").',
|
|
300
|
+
required: true,
|
|
301
|
+
},
|
|
302
|
+
},
|
|
303
|
+
tier: 'free',
|
|
304
|
+
async execute(args) {
|
|
305
|
+
const binErr = await ensureBinary();
|
|
306
|
+
if (binErr)
|
|
307
|
+
return binErr;
|
|
308
|
+
const app = String(args.app ?? '');
|
|
309
|
+
if (!app)
|
|
310
|
+
return 'Error: app is required.';
|
|
311
|
+
const gate = requireApproval(app);
|
|
312
|
+
if (gate)
|
|
313
|
+
return gate;
|
|
314
|
+
const snapshot = String(args.snapshot ?? '');
|
|
315
|
+
const on = String(args.on ?? '');
|
|
316
|
+
const action = String(args.action ?? '');
|
|
317
|
+
if (!snapshot)
|
|
318
|
+
return 'Error: snapshot is required.';
|
|
319
|
+
if (!on)
|
|
320
|
+
return 'Error: on is required.';
|
|
321
|
+
if (!action)
|
|
322
|
+
return 'Error: action is required.';
|
|
323
|
+
try {
|
|
324
|
+
const out = await performAction({ snapshot, on, action });
|
|
325
|
+
return outcomeToString(out);
|
|
326
|
+
}
|
|
327
|
+
catch (e) {
|
|
328
|
+
return `Error: ${e.message}`;
|
|
329
|
+
}
|
|
330
|
+
},
|
|
331
|
+
};
|
|
332
|
+
const peekabooAgent = {
|
|
333
|
+
name: 'peekaboo_agent',
|
|
334
|
+
description: "Hand a natural-language automation prompt to peekaboo's own agent subcommand. Returns the agent's stdout verbatim. Not gated by app approval — peekaboo's agent decides which apps to drive.",
|
|
335
|
+
parameters: {
|
|
336
|
+
prompt: {
|
|
337
|
+
type: 'string',
|
|
338
|
+
description: 'Free-form instruction for the peekaboo agent.',
|
|
339
|
+
required: true,
|
|
340
|
+
},
|
|
341
|
+
},
|
|
342
|
+
tier: 'free',
|
|
343
|
+
async execute(args) {
|
|
344
|
+
const binErr = await ensureBinary();
|
|
345
|
+
if (binErr)
|
|
346
|
+
return binErr;
|
|
347
|
+
const prompt = typeof args.prompt === 'string' ? args.prompt : '';
|
|
348
|
+
if (!prompt)
|
|
349
|
+
return 'Error: prompt is required.';
|
|
350
|
+
try {
|
|
351
|
+
const out = await agent({ prompt });
|
|
352
|
+
return outcomeToString(out);
|
|
353
|
+
}
|
|
354
|
+
catch (e) {
|
|
355
|
+
return `Error: ${e.message}`;
|
|
356
|
+
}
|
|
357
|
+
},
|
|
358
|
+
};
|
|
359
|
+
export const peekabooTools = [
|
|
360
|
+
peekabooSee,
|
|
361
|
+
peekabooClick,
|
|
362
|
+
peekabooType,
|
|
363
|
+
peekabooSetValue,
|
|
364
|
+
peekabooPerformAction,
|
|
365
|
+
peekabooAgent,
|
|
366
|
+
];
|
|
367
|
+
export function registerPeekabooTools() {
|
|
368
|
+
for (const t of peekabooTools)
|
|
369
|
+
registerTool(t);
|
|
370
|
+
}
|
|
371
|
+
//# sourceMappingURL=peekaboo.js.map
|
|
@@ -12,6 +12,7 @@ import { SECURITY_AGENT_TOOLS } from './security-agent-tools.js';
|
|
|
12
12
|
import { anthropicManagedAgentTools } from './anthropic-managed-agents-tools.js';
|
|
13
13
|
import { forecastSummaryTool } from './forecast-summary.js';
|
|
14
14
|
import { securityAuditLocalTool } from './security-audit-local.js';
|
|
15
|
+
import { registerPeekabooTools } from './peekaboo.js';
|
|
15
16
|
function adaptCoordinatorTool(t) {
|
|
16
17
|
const props = t.inputSchema.properties ?? {};
|
|
17
18
|
const required = new Set(t.inputSchema.required ?? []);
|
|
@@ -84,6 +85,7 @@ export function registerSwarm2026Tools() {
|
|
|
84
85
|
registerTool(fileLibraryGetTool);
|
|
85
86
|
registerTool(forecastSummaryTool);
|
|
86
87
|
registerTool(securityAuditLocalTool);
|
|
88
|
+
registerPeekabooTools();
|
|
87
89
|
for (const t of workspaceAgentTools)
|
|
88
90
|
registerTool(t);
|
|
89
91
|
for (const t of anthropicManagedAgentTools)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kernel.chat/kbot",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.4.0",
|
|
4
4
|
"description": "Open-source terminal AI agent. 100+ specialist skills, 35 specialist agents, 20 providers. Dreams, learns, watches your system. Controls your phone. Fully local, fully sovereign. MIT. v4.0 — evidence-based curation.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"repository": {
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: peekaboo-snapshot-act
|
|
3
|
+
description: Use when an agent needs to drive a macOS native app reliably — snapshot-then-act via Peekaboo's accessibility-aware element IDs beats screenshot-per-click for any app with proper AX support.
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
author: kbot
|
|
6
|
+
license: MIT
|
|
7
|
+
metadata:
|
|
8
|
+
kbot:
|
|
9
|
+
tags: [computer-use, macos, accessibility, native-apps, automation]
|
|
10
|
+
related_skills: [computer-use-coordination, ableton-osc-control]
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Peekaboo Snapshot-Act
|
|
14
|
+
|
|
15
|
+
Every input surface has a right shape. Web is DOM — address elements by ref through Chrome MCP or Playwright. Audio is OSC — address tracks and clips through AbletonOSC's Live Object Model. Native macOS is AX — address controls through the Accessibility tree. Snapshot-and-act replaces screenshot-and-guess: capture the AX tree once, reference labeled element IDs many times, and let the OS resolve coordinates.
|
|
16
|
+
|
|
17
|
+
## When to Use
|
|
18
|
+
|
|
19
|
+
- Native macOS app automation where the target exposes Accessibility (Notes, Mail, System Settings, Music, Finder, Xcode, Numbers, Keynote, Pages).
|
|
20
|
+
- Forms with multiple fields where one snapshot fills them all.
|
|
21
|
+
- Repeated interactions inside the same app state where stable element IDs survive across calls.
|
|
22
|
+
- Any task where pixel coordinates would drift between window resizes, theme changes, or display scales.
|
|
23
|
+
|
|
24
|
+
Do **not** use this skill for:
|
|
25
|
+
|
|
26
|
+
- Browser tabs — use Chrome MCP (`mcp__claude-in-chrome__*`); the DOM is the right surface.
|
|
27
|
+
- Ableton Live or other audio software with OSC bridges — use `ableton_*` tools; OSC is the right surface.
|
|
28
|
+
- Apps with no Accessibility support — fall through to synthetic input via `mcp__computer-use__*` and accept the cost.
|
|
29
|
+
|
|
30
|
+
## Iron Laws
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
ONE SNAPSHOT, MANY ACTIONS.
|
|
34
|
+
ELEMENT ID OVER COORDINATES.
|
|
35
|
+
PERFORM-ACTION OVER CLICK.
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
A snapshot is a contract: while the UI does not change, the IDs are stable. Capture once, act many times, re-snapshot only on visible state change. An element ID survives where coordinates do not — themes shift, windows resize, scroll positions move; `B7` does not. And when an AX action is named (`AXPress`, `AXShowMenu`, `AXIncrement`), perform it directly; clicking the rendered pixel is a worse approximation of the user's intent.
|
|
39
|
+
|
|
40
|
+
## Five Phases
|
|
41
|
+
|
|
42
|
+
### Phase 1 — Approve & focus
|
|
43
|
+
|
|
44
|
+
Bring the target app forward and clear the per-app session lock before any Peekaboo call.
|
|
45
|
+
|
|
46
|
+
- `app_approve` — gate the sensitive-app warning; respect the user's per-app trust state.
|
|
47
|
+
- `app_launch` — bring the app to the front so AX queries hit the right process.
|
|
48
|
+
- One-time at the OS level: grant Peekaboo **Screen Recording** and **Accessibility** in System Settings → Privacy & Security. Without both, `see` returns an empty tree.
|
|
49
|
+
|
|
50
|
+
### Phase 2 — Capture surface
|
|
51
|
+
|
|
52
|
+
Pull the AX snapshot once.
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
peekaboo see --app <Name> --json
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
The response contains a snapshot ID and labeled element IDs by role: `B1` for the first button, `T1` for the first text field, `L1` for a link, `M1` for a menu. Read the labels, not the pixels. The snapshot ID is the handle every subsequent call references.
|
|
59
|
+
|
|
60
|
+
### Phase 3 — Choose the right verb
|
|
61
|
+
|
|
62
|
+
Three verbs cover almost every native interaction. Pick the most specific one that fits.
|
|
63
|
+
|
|
64
|
+
- `set-value` — for any settable field (text inputs, sliders, steppers). Faster and more reliable than `click + type`. Sets the AX value directly.
|
|
65
|
+
- `perform-action` — for any named AX action (`AXPress`, `AXShowMenu`, `AXConfirm`, `AXIncrement`, `AXDecrement`). Names the intent the OS already understands.
|
|
66
|
+
- `click` — only when neither of the above applies (custom non-AX views, web content embedded in a native shell).
|
|
67
|
+
|
|
68
|
+
### Phase 4 — Reuse the snapshot
|
|
69
|
+
|
|
70
|
+
Successive actions reference the same `--snapshot $ID` until the UI changes. Filling a five-field form is one snapshot and five `set-value` calls, not five snapshots and five clicks. Re-snapshot only when the visible state actually changes — a panel opens, a sheet appears, a navigation transitions. Re-snapshotting before every action defeats the entire pattern and is slower than synthetic input.
|
|
71
|
+
|
|
72
|
+
### Phase 5 — Fall back gracefully
|
|
73
|
+
|
|
74
|
+
If the AX path fails — element ID stale, app exposes no AX tree, action returns an error — fall through to synthetic input via `kbot_click` or `mcp__computer-use__*` and log the fallback. Record which app and which action degraded so the next session knows. Graceful degradation beats a hard failure; opaque retry loops do not.
|
|
75
|
+
|
|
76
|
+
## Anti-Patterns
|
|
77
|
+
|
|
78
|
+
- Re-snapshotting before every click. The whole point is reuse — one snapshot, many actions.
|
|
79
|
+
- Using coordinates when an element ID exists. IDs survive resize, theme, and scale changes; coordinates do not.
|
|
80
|
+
- Ignoring `set-value` and falling through to `click + type` for text fields. Slower, less reliable, and breaks on focus drift.
|
|
81
|
+
- Driving Chrome with Peekaboo. Chrome MCP exists for a reason; the DOM is the right surface for the web.
|
|
82
|
+
- Skipping `app_approve`. The per-app session lock and sensitive-app warnings still apply — Peekaboo does not bypass kbot's trust model.
|
|
83
|
+
|
|
84
|
+
## How kbot Helps
|
|
85
|
+
|
|
86
|
+
- `peekaboo_see` — wraps `peekaboo see --json` with kbot's lock + approval flow; returns snapshot ID and element IDs.
|
|
87
|
+
- `peekaboo_click` / `peekaboo_type` / `peekaboo_set_value` / `peekaboo_perform_action` — the four verbs, each lock-aware and approval-gated.
|
|
88
|
+
- `peekaboo_agent` — composite tool for multi-step flows that snapshot once and act many times under one approval.
|
|
89
|
+
- `kbot_click` — falls through to the AX-first path automatically when the `peekaboo` binary is on `PATH`; no caller change required.
|
|
90
|
+
- `app_approve` — gates per-app sensitive-app warnings before any Peekaboo call lands.
|
|
91
|
+
- Coordinator — the same per-app sub-locks apply across Peekaboo and the existing `computer.ts` synthetic-input path; both share one lock file, so AX and pixel routes never race against each other.
|