@kernel.chat/kbot 4.4.0 → 4.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,13 +23,13 @@ export interface SetValueOptions {
23
23
  on: string;
24
24
  value: string;
25
25
  }
26
- export declare function setValue(opts: SetValueOptions): Promise<PeekabooOutcome<PeekabooSetValueResult>>;
26
+ export declare function setValue(_opts: SetValueOptions): Promise<PeekabooOutcome<PeekabooSetValueResult>>;
27
27
  export interface PerformActionOptions {
28
28
  snapshot: string;
29
29
  on: string;
30
30
  action: string;
31
31
  }
32
- export declare function performAction(opts: PerformActionOptions): Promise<PeekabooOutcome<PeekabooPerformActionResult>>;
32
+ export declare function performAction(_opts: PerformActionOptions): Promise<PeekabooOutcome<PeekabooPerformActionResult>>;
33
33
  export interface AgentOptions {
34
34
  prompt: string;
35
35
  }
@@ -4,6 +4,10 @@
4
4
  // through `runPeekaboo`, and parses stdout into the appropriate result
5
5
  // type. Non-zero exits and malformed JSON are returned as `PeekabooError`
6
6
  // rather than thrown — callers fan out via discriminated unions.
7
+ //
8
+ // Every peekaboo 3.0.0-beta4 JSON command wraps its payload in
9
+ // `{ success, data, error? }`. Helpers unwrap that envelope and treat
10
+ // `success: false` as a structured failure rather than a parse error.
7
11
  import { runPeekaboo } from './runner.js';
8
12
  function failNonZero(code, stdout, stderr) {
9
13
  return {
@@ -41,12 +45,42 @@ function isRecord(v) {
41
45
  function asString(v, fallback = '') {
42
46
  return typeof v === 'string' ? v : fallback;
43
47
  }
44
- function asBool(v, fallback = false) {
45
- return typeof v === 'boolean' ? v : fallback;
48
+ function asOptString(v) {
49
+ return typeof v === 'string' ? v : undefined;
50
+ }
51
+ function asOptBool(v) {
52
+ return typeof v === 'boolean' ? v : undefined;
53
+ }
54
+ function asOptNumber(v) {
55
+ return typeof v === 'number' && Number.isFinite(v) ? v : undefined;
46
56
  }
47
57
  function asNumber(v, fallback = 0) {
48
58
  return typeof v === 'number' && Number.isFinite(v) ? v : fallback;
49
59
  }
60
+ /**
61
+ * Unwrap the `{ success, data, error? }` envelope every peekaboo 3.0.0-beta4
62
+ * JSON command emits. On `success: true` returns the inner `data` record; on
63
+ * `success: false` (or missing fields) returns a structured PeekabooError so
64
+ * callers can propagate it through their discriminated union.
65
+ */
66
+ function unwrapEnvelope(command, parsed, stdout) {
67
+ if (!isRecord(parsed)) {
68
+ return { ok: false, err: failParse(`${command}: expected object at root`, stdout) };
69
+ }
70
+ if (parsed.success === false) {
71
+ const errMsg = isRecord(parsed.error)
72
+ ? asString(parsed.error.message, asString(parsed.error.code, 'unknown error'))
73
+ : asString(parsed.error, 'unknown error');
74
+ return {
75
+ ok: false,
76
+ err: failParse(`${command}: success=false: ${errMsg}`, stdout),
77
+ };
78
+ }
79
+ if (!isRecord(parsed.data)) {
80
+ return { ok: false, err: failParse(`${command}: missing data field`, stdout) };
81
+ }
82
+ return { ok: true, data: parsed.data };
83
+ }
50
84
  export async function see(opts = {}) {
51
85
  const args = ['see', '--json'];
52
86
  if (opts.app)
@@ -61,39 +95,39 @@ export async function see(opts = {}) {
61
95
  const parsed = parseJson(stdout);
62
96
  if (!parsed.ok)
63
97
  return parsed.err;
64
- const v = parsed.value;
65
- if (!isRecord(v))
66
- return failParse('see: expected object at root', stdout);
67
- const rawElements = Array.isArray(v.elements) ? v.elements : [];
98
+ const env = unwrapEnvelope('see', parsed.value, stdout);
99
+ if (!env.ok)
100
+ return env.err;
101
+ const data = env.data;
102
+ const rawElements = Array.isArray(data.ui_elements) ? data.ui_elements : [];
68
103
  const elements = rawElements.flatMap((el) => {
69
104
  if (!isRecord(el))
70
105
  return [];
71
- const frameRaw = isRecord(el.frame) ? el.frame : {};
72
106
  return [
73
107
  {
74
108
  id: asString(el.id),
75
109
  role: asString(el.role),
76
- label: typeof el.label === 'string' ? el.label : undefined,
77
- frame: {
78
- x: asNumber(frameRaw.x),
79
- y: asNumber(frameRaw.y),
80
- width: asNumber(frameRaw.width),
81
- height: asNumber(frameRaw.height),
82
- },
83
- settable: typeof el.settable === 'boolean' ? el.settable : undefined,
84
- named_actions: Array.isArray(el.named_actions)
85
- ? el.named_actions.filter((a) => typeof a === 'string')
86
- : undefined,
110
+ roleDescription: asOptString(el.role_description),
111
+ label: asOptString(el.label),
112
+ description: asOptString(el.description),
113
+ help: asOptString(el.help),
114
+ identifier: asOptString(el.identifier),
115
+ title: asOptString(el.title),
116
+ isActionable: asOptBool(el.is_actionable),
87
117
  },
88
118
  ];
89
119
  });
90
120
  return {
91
121
  ok: true,
92
- snapshot: asString(v.snapshot),
93
- app: typeof v.app === 'string' ? v.app : undefined,
94
- window: typeof v.window === 'string' ? v.window : undefined,
122
+ snapshot: asString(data.snapshot_id),
95
123
  elements,
96
- screenshot_path: typeof v.screenshot_path === 'string' ? v.screenshot_path : undefined,
124
+ applicationName: asOptString(data.application_name),
125
+ windowTitle: asOptString(data.window_title),
126
+ elementCount: asOptNumber(data.element_count),
127
+ interactableCount: asOptNumber(data.interactable_count),
128
+ captureMode: asOptString(data.capture_mode),
129
+ uiMap: asOptString(data.ui_map),
130
+ screenshotPath: asOptString(data.screenshot_path),
97
131
  };
98
132
  }
99
133
  export async function click(opts) {
@@ -103,27 +137,29 @@ export async function click(opts) {
103
137
  if (opts.coords)
104
138
  args.push('--coords', `${opts.coords[0]},${opts.coords[1]}`);
105
139
  if (typeof opts.wait === 'number')
106
- args.push('--wait', String(opts.wait));
140
+ args.push('--wait-for', String(opts.wait));
107
141
  const { stdout, stderr, code } = await runPeekaboo(args);
108
142
  if (code !== 0)
109
143
  return failNonZero(code, stdout, stderr);
110
144
  const parsed = parseJson(stdout);
111
145
  if (!parsed.ok)
112
146
  return parsed.err;
113
- if (!isRecord(parsed.value))
114
- return failParse('click: expected object at root', stdout);
115
- const v = parsed.value;
147
+ const env = unwrapEnvelope('click', parsed.value, stdout);
148
+ if (!env.ok)
149
+ return env.err;
150
+ const v = env.data;
116
151
  return {
117
152
  ok: true,
118
- target: typeof v.target === 'string' ? v.target : undefined,
153
+ target: asOptString(v.target),
119
154
  coords: Array.isArray(v.coords) && v.coords.length === 2
120
155
  ? [asNumber(v.coords[0]), asNumber(v.coords[1])]
121
156
  : undefined,
122
157
  };
123
158
  }
124
159
  // `type` is reserved in TS; export the helper as `type_`.
160
+ // peekaboo 3.0.0-beta4 takes the text as a positional argument, not a --text flag.
125
161
  export async function type_(opts) {
126
- const args = ['type', '--json', '--text', opts.text];
162
+ const args = ['type', '--json', opts.text];
127
163
  if (opts.clear)
128
164
  args.push('--clear');
129
165
  if (typeof opts.delayMs === 'number')
@@ -134,65 +170,34 @@ export async function type_(opts) {
134
170
  const parsed = parseJson(stdout);
135
171
  if (!parsed.ok)
136
172
  return parsed.err;
137
- if (!isRecord(parsed.value))
138
- return failParse('type: expected object at root', stdout);
139
- const v = parsed.value;
173
+ const env = unwrapEnvelope('type', parsed.value, stdout);
174
+ if (!env.ok)
175
+ return env.err;
176
+ const v = env.data;
140
177
  return {
141
178
  ok: true,
142
179
  typed: asString(v.typed, opts.text),
143
- cleared: typeof v.cleared === 'boolean' ? v.cleared : undefined,
180
+ cleared: asOptBool(v.cleared),
144
181
  };
145
182
  }
146
- export async function setValue(opts) {
147
- const args = [
148
- 'set-value',
149
- '--json',
150
- '--snapshot',
151
- opts.snapshot,
152
- '--on',
153
- opts.on,
154
- '--value',
155
- opts.value,
156
- ];
157
- const { stdout, stderr, code } = await runPeekaboo(args);
158
- if (code !== 0)
159
- return failNonZero(code, stdout, stderr);
160
- const parsed = parseJson(stdout);
161
- if (!parsed.ok)
162
- return parsed.err;
163
- if (!isRecord(parsed.value))
164
- return failParse('set-value: expected object at root', stdout);
165
- const v = parsed.value;
183
+ export async function setValue(_opts) {
166
184
  return {
167
- ok: true,
168
- target: asString(v.target, opts.on),
169
- value: asString(v.value, opts.value),
185
+ ok: false,
186
+ error: {
187
+ code: 'unknown',
188
+ message: "peekaboo 3.0.0-beta4 does not expose a 'set-value' top-level command. " +
189
+ 'Track upstream: https://github.com/openclaw/Peekaboo',
190
+ },
170
191
  };
171
192
  }
172
- export async function performAction(opts) {
173
- const args = [
174
- 'perform-action',
175
- '--json',
176
- '--snapshot',
177
- opts.snapshot,
178
- '--on',
179
- opts.on,
180
- '--action',
181
- opts.action,
182
- ];
183
- const { stdout, stderr, code } = await runPeekaboo(args);
184
- if (code !== 0)
185
- return failNonZero(code, stdout, stderr);
186
- const parsed = parseJson(stdout);
187
- if (!parsed.ok)
188
- return parsed.err;
189
- if (!isRecord(parsed.value))
190
- return failParse('perform-action: expected object at root', stdout);
191
- const v = parsed.value;
193
+ export async function performAction(_opts) {
192
194
  return {
193
- ok: true,
194
- target: asString(v.target, opts.on),
195
- action: asString(v.action, opts.action),
195
+ ok: false,
196
+ error: {
197
+ code: 'unknown',
198
+ message: "peekaboo 3.0.0-beta4 does not expose a 'perform-action' top-level command. " +
199
+ 'Track upstream: https://github.com/openclaw/Peekaboo',
200
+ },
196
201
  };
197
202
  }
198
203
  /**
@@ -1,4 +1,4 @@
1
- export type { PeekabooFrame, PeekabooElement, PeekabooSeeResult, PeekabooClickResult, PeekabooTypeResult, PeekabooSetValueResult, PeekabooPerformActionResult, PeekabooAgentResult, PeekabooError, PeekabooOutcome, } from './types.js';
1
+ export type { PeekabooElement, PeekabooSeeResult, PeekabooClickResult, PeekabooTypeResult, PeekabooSetValueResult, PeekabooPerformActionResult, PeekabooAgentResult, PeekabooError, PeekabooOutcome, } from './types.js';
2
2
  export { runPeekaboo, peekabooAvailable, type RunOptions, type RunResult } from './runner.js';
3
3
  export { see, click, type_, setValue, performAction, agent, type SeeOptions, type ClickOptions, type TypeOptions, type SetValueOptions, type PerformActionOptions, type AgentOptions, } from './commands.js';
4
4
  //# sourceMappingURL=index.d.ts.map
@@ -1,28 +1,34 @@
1
- export interface PeekabooFrame {
2
- x: number;
3
- y: number;
4
- width: number;
5
- height: number;
6
- }
7
1
  export interface PeekabooElement {
8
- /** Element handle, e.g. "B1" (button), "T1" (text field). */
2
+ /** Element handle, e.g. "elem_19", "elem_169" integer-suffixed in 3.0.0-beta4. */
9
3
  id: string;
10
4
  role: string;
5
+ /** Human-readable role description, e.g. "increment page button". */
6
+ roleDescription?: string;
11
7
  label?: string;
12
- frame: PeekabooFrame;
13
- /** Whether the element accepts a value (text fields, sliders, etc.). */
14
- settable?: boolean;
15
- /** Action names the element advertises via the AX API. */
16
- named_actions?: string[];
8
+ description?: string;
9
+ /** AX help text, e.g. "Share the selected items". */
10
+ help?: string;
11
+ /** Stable AX identifier when the app sets one (e.g. "QuickActionMoreButton"). */
12
+ identifier?: string;
13
+ /** Window/element title; often empty for buttons. */
14
+ title?: string;
15
+ /** Whether the element is interactable (clickable / focusable). */
16
+ isActionable?: boolean;
17
17
  }
18
18
  export interface PeekabooSeeResult {
19
19
  /** Snapshot id used by subsequent `--snapshot $id` arguments. */
20
20
  snapshot: string;
21
- app?: string;
22
- window?: string;
23
21
  elements: PeekabooElement[];
22
+ /** Application name as reported by Peekaboo. */
23
+ applicationName?: string;
24
+ windowTitle?: string;
25
+ elementCount?: number;
26
+ interactableCount?: number;
27
+ captureMode?: string;
28
+ /** Path to the JSON UI map written by Peekaboo. */
29
+ uiMap?: string;
24
30
  /** Optional path on disk where the screenshot was written. */
25
- screenshot_path?: string;
31
+ screenshotPath?: string;
26
32
  }
27
33
  export interface PeekabooClickResult {
28
34
  ok: boolean;
@@ -4,5 +4,10 @@
4
4
  // (https://github.com/openclaw/Peekaboo) without taking a runtime
5
5
  // dependency. kbot stays binary-agnostic; this adapter only ever
6
6
  // speaks JSON across the process boundary.
7
+ //
8
+ // Schema is calibrated to peekaboo 3.0.0-beta4: every command wraps
9
+ // its payload in `{ success, data, error? }`, element ids look like
10
+ // `elem_19` / `elem_169`, and elements expose AX role/label/help
11
+ // fields rather than frame rectangles or named-actions arrays.
7
12
  export {};
8
13
  //# sourceMappingURL=types.js.map
@@ -63,7 +63,7 @@ async function ensureBinary() {
63
63
  // ── Tool definitions ───────────────────────────────────────────────────
64
64
  const peekabooSee = {
65
65
  name: 'peekaboo_see',
66
- description: 'Capture an AX snapshot of an app or the screen via the Peekaboo CLI. Returns a snapshot id plus a list of labeled element ids (B1, T1, ) usable by peekaboo_click / peekaboo_set_value / peekaboo_perform_action.',
66
+ description: 'Capture an AX snapshot of an app or the screen via the Peekaboo CLI. Returns a snapshot id plus a list of element ids (e.g. elem_19, elem_169) usable by peekaboo_click / peekaboo_type.',
67
67
  parameters: {
68
68
  app: {
69
69
  type: 'string',
@@ -120,7 +120,7 @@ const peekabooClick = {
120
120
  },
121
121
  on: {
122
122
  type: 'string',
123
- description: 'Element id (e.g. "B1") or query string. Mutually exclusive with coords.',
123
+ description: 'Element id (e.g. "elem_169") or query string. Mutually exclusive with coords.',
124
124
  },
125
125
  coords: {
126
126
  type: 'string',
@@ -225,7 +225,7 @@ const peekabooType = {
225
225
  };
226
226
  const peekabooSetValue = {
227
227
  name: 'peekaboo_set_value',
228
- description: 'Set a settable AX value directly on an element (skips clicking). Faster than click+type for text fields, sliders, etc.',
228
+ description: "Set a settable AX value directly on an element (skips clicking). NOTE: requires Peekaboo CLI with the 'set-value' top-level command, which is absent in 3.0.0-beta4. Use peekaboo_click + peekaboo_type as a workaround.",
229
229
  parameters: {
230
230
  app: {
231
231
  type: 'string',
@@ -249,35 +249,16 @@ const peekabooSetValue = {
249
249
  },
250
250
  },
251
251
  tier: 'free',
252
- async execute(args) {
253
- const binErr = await ensureBinary();
254
- if (binErr)
255
- return binErr;
256
- const app = String(args.app ?? '');
257
- if (!app)
258
- return 'Error: app is required.';
259
- const gate = requireApproval(app);
260
- if (gate)
261
- return gate;
262
- const snapshot = String(args.snapshot ?? '');
263
- const on = String(args.on ?? '');
264
- const value = typeof args.value === 'string' ? args.value : '';
265
- if (!snapshot)
266
- return 'Error: snapshot is required.';
267
- if (!on)
268
- return 'Error: on is required.';
269
- try {
270
- const out = await setValue({ snapshot, on, value });
271
- return outcomeToString(out);
272
- }
273
- catch (e) {
274
- return `Error: ${e.message}`;
275
- }
252
+ async execute(_args) {
253
+ void setValue;
254
+ return ("Error: peekaboo_set_value requires Peekaboo CLI with the 'set-value' top-level command, " +
255
+ 'which is not present in 3.0.0-beta4. Workaround: peekaboo_click then peekaboo_type. ' +
256
+ 'Track upstream: https://github.com/openclaw/Peekaboo');
276
257
  },
277
258
  };
278
259
  const peekabooPerformAction = {
279
260
  name: 'peekaboo_perform_action',
280
- description: 'Invoke a named AX action (e.g. AXPress, AXShowMenu) on an element from a Peekaboo snapshot.',
261
+ description: "Invoke a named AX action (e.g. AXPress, AXShowMenu) on an element from a Peekaboo snapshot. NOTE: requires Peekaboo CLI with the 'perform-action' top-level command, which is absent in 3.0.0-beta4. Use peekaboo_click as a workaround.",
281
262
  parameters: {
282
263
  app: {
283
264
  type: 'string',
@@ -301,32 +282,11 @@ const peekabooPerformAction = {
301
282
  },
302
283
  },
303
284
  tier: 'free',
304
- async execute(args) {
305
- const binErr = await ensureBinary();
306
- if (binErr)
307
- return binErr;
308
- const app = String(args.app ?? '');
309
- if (!app)
310
- return 'Error: app is required.';
311
- const gate = requireApproval(app);
312
- if (gate)
313
- return gate;
314
- const snapshot = String(args.snapshot ?? '');
315
- const on = String(args.on ?? '');
316
- const action = String(args.action ?? '');
317
- if (!snapshot)
318
- return 'Error: snapshot is required.';
319
- if (!on)
320
- return 'Error: on is required.';
321
- if (!action)
322
- return 'Error: action is required.';
323
- try {
324
- const out = await performAction({ snapshot, on, action });
325
- return outcomeToString(out);
326
- }
327
- catch (e) {
328
- return `Error: ${e.message}`;
329
- }
285
+ async execute(_args) {
286
+ void performAction;
287
+ return ("Error: peekaboo_perform_action requires Peekaboo CLI with the 'perform-action' top-level command, " +
288
+ 'which is not present in 3.0.0-beta4. Workaround: peekaboo_click on the element. ' +
289
+ 'Track upstream: https://github.com/openclaw/Peekaboo');
330
290
  },
331
291
  };
332
292
  const peekabooAgent = {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kernel.chat/kbot",
3
- "version": "4.4.0",
3
+ "version": "4.4.1",
4
4
  "description": "Open-source terminal AI agent. 100+ specialist skills, 35 specialist agents, 20 providers. Dreams, learns, watches your system. Controls your phone. Fully local, fully sovereign. MIT. v4.0 — evidence-based curation.",
5
5
  "type": "module",
6
6
  "repository": {
@@ -35,7 +35,7 @@ ELEMENT ID OVER COORDINATES.
35
35
  PERFORM-ACTION OVER CLICK.
36
36
  ```
37
37
 
38
- A snapshot is a contract: while the UI does not change, the IDs are stable. Capture once, act many times, re-snapshot only on visible state change. An element ID survives where coordinates do not — themes shift, windows resize, scroll positions move; `B7` does not. And when an AX action is named (`AXPress`, `AXShowMenu`, `AXIncrement`), perform it directly; clicking the rendered pixel is a worse approximation of the user's intent.
38
+ A snapshot is a contract: while the UI does not change, the IDs are stable. Capture once, act many times, re-snapshot only on visible state change. An element ID survives where coordinates do not — themes shift, windows resize, scroll positions move; `elem_169` does not. And when an AX action is named (`AXPress`, `AXShowMenu`, `AXIncrement`), perform it directly; clicking the rendered pixel is a worse approximation of the user's intent.
39
39
 
40
40
  ## Five Phases
41
41
 
@@ -55,19 +55,21 @@ Pull the AX snapshot once.
55
55
  peekaboo see --app <Name> --json
56
56
  ```
57
57
 
58
- The response contains a snapshot ID and labeled element IDs by role: `B1` for the first button, `T1` for the first text field, `L1` for a link, `M1` for a menu. Read the labels, not the pixels. The snapshot ID is the handle every subsequent call references.
58
+ The response contains a snapshot ID (`data.snapshot_id`) and a list of UI elements (`data.ui_elements`), each with an integer-suffixed id like `elem_19`, `elem_85`, `elem_169`. Read the labels and roles, not the pixels. The snapshot ID is the handle every subsequent call references.
59
+
60
+ > Element IDs in 3.0.0-beta4 are `elem_NN` integers; the README's `B1`/`T2` examples target a future schema.
59
61
 
60
62
  ### Phase 3 — Choose the right verb
61
63
 
62
64
  Three verbs cover almost every native interaction. Pick the most specific one that fits.
63
65
 
64
- - `set-value` — for any settable field (text inputs, sliders, steppers). Faster and more reliable than `click + type`. Sets the AX value directly.
65
- - `perform-action` — for any named AX action (`AXPress`, `AXShowMenu`, `AXConfirm`, `AXIncrement`, `AXDecrement`). Names the intent the OS already understands.
66
- - `click` — only when neither of the above applies (custom non-AX views, web content embedded in a native shell).
66
+ - `click` — the universal verb in 3.0.0-beta4. Targets an element by id (`--on elem_169`), by query string, or by raw `--coords x,y`.
67
+ - `type` — text input. Assumes a focused field; pair with a prior `click` to focus.
68
+ - `set-value` / `perform-action` reserved for future Peekaboo releases. The 3.0.0-beta4 binary does not expose them as top-level commands; the kbot tools surface a clear error pointing at the workaround (`click` + `type`). Track upstream at https://github.com/openclaw/Peekaboo.
67
69
 
68
70
  ### Phase 4 — Reuse the snapshot
69
71
 
70
- Successive actions reference the same `--snapshot $ID` until the UI changes. Filling a five-field form is one snapshot and five `set-value` calls, not five snapshots and five clicks. Re-snapshot only when the visible state actually changes — a panel opens, a sheet appears, a navigation transitions. Re-snapshotting before every action defeats the entire pattern and is slower than synthetic input.
72
+ Successive actions reference the same `--snapshot $ID` until the UI changes. Filling a five-field form is one snapshot and five `click`/`type` pairs, not five snapshots and five blind clicks. Re-snapshot only when the visible state actually changes — a panel opens, a sheet appears, a navigation transitions. Re-snapshotting before every action defeats the entire pattern and is slower than synthetic input.
71
73
 
72
74
  ### Phase 5 — Fall back gracefully
73
75
 
@@ -77,7 +79,7 @@ If the AX path fails — element ID stale, app exposes no AX tree, action return
77
79
 
78
80
  - Re-snapshotting before every click. The whole point is reuse — one snapshot, many actions.
79
81
  - Using coordinates when an element ID exists. IDs survive resize, theme, and scale changes; coordinates do not.
80
- - Ignoring `set-value` and falling through to `click + type` for text fields. Slower, less reliable, and breaks on focus drift.
82
+ - Assuming the README's `set-value` / `perform-action` commands exist in the installed binary. Until 3.x ships them, `click + type` is the path.
81
83
  - Driving Chrome with Peekaboo. Chrome MCP exists for a reason; the DOM is the right surface for the web.
82
84
  - Skipping `app_approve`. The per-app session lock and sensitive-app warnings still apply — Peekaboo does not bypass kbot's trust model.
83
85