@onkernel/cua-agent 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,5 +1,752 @@
1
+ import { Agent, AgentHarness } from "@earendil-works/pi-agent-core";
2
+ import { NodeExecutionEnv } from "@earendil-works/pi-agent-core/node";
3
+ import { CUA_NAVIGATION_TOOL_NAME, createCuaNavigationToolDefinition, getCuaEnvApiKey, normalizeGotoUrl, resolveCuaRuntimeSpec, streamSimple } from "@onkernel/cua-ai";
4
+ import sharp from "sharp";
1
5
  export * from "@earendil-works/pi-agent-core";
2
- export { createCuaComputerTools } from "./tools";
3
- export { SUPPORTED_CUA_EXECUTOR_TOOL_NAMES } from "./tools";
4
- export { CuaAgent, CuaHarness } from "./agent";
5
- //# sourceMappingURL=index.js.map
6
+ //#region src/translator/keys.ts
7
+ const KERNEL_MODIFIER_KEYSYMS = [
8
+ "Control_L",
9
+ "Alt_L",
10
+ "Shift_L",
11
+ "Super_L"
12
+ ];
13
+ const KEY_ALIASES = {
14
+ alt: "Alt_L",
15
+ alt_l: "Alt_L",
16
+ altleft: "Alt_L",
17
+ backspace: "BackSpace",
18
+ backquote: "grave",
19
+ backslash: "backslash",
20
+ bracketleft: "bracketleft",
21
+ bracketright: "bracketright",
22
+ capslock: "Caps_Lock",
23
+ cmd: "Super_L",
24
+ comma: "comma",
25
+ command: "Super_L",
26
+ control: "Control_L",
27
+ control_l: "Control_L",
28
+ controlleft: "Control_L",
29
+ ctrl: "Control_L",
30
+ delete: "Delete",
31
+ down: "Down",
32
+ end: "End",
33
+ enter: "Return",
34
+ equal: "equal",
35
+ esc: "Escape",
36
+ escape: "Escape",
37
+ home: "Home",
38
+ insert: "Insert",
39
+ kp_enter: "Return",
40
+ left: "Left",
41
+ meta: "Super_L",
42
+ minus: "minus",
43
+ numlock: "Num_Lock",
44
+ option: "Alt_L",
45
+ pagedown: "Next",
46
+ page_down: "Next",
47
+ pageup: "Prior",
48
+ page_up: "Prior",
49
+ pause: "Pause",
50
+ period: "period",
51
+ plus: "plus",
52
+ print: "Print",
53
+ printscreen: "Print",
54
+ quote: "apostrophe",
55
+ return: "Return",
56
+ right: "Right",
57
+ scrolllock: "Scroll_Lock",
58
+ semicolon: "semicolon",
59
+ shift: "Shift_L",
60
+ shift_l: "Shift_L",
61
+ shiftleft: "Shift_L",
62
+ slash: "slash",
63
+ space: "space",
64
+ super: "Super_L",
65
+ super_l: "Super_L",
66
+ tab: "Tab",
67
+ up: "Up",
68
+ ...Object.fromEntries(Array.from({ length: 12 }, (_, i) => [`f${i + 1}`, `F${i + 1}`])),
69
+ ...Object.fromEntries(Array.from({ length: 10 }, (_, i) => [`numpad${i}`, String(i)])),
70
+ numpadadd: "plus",
71
+ numpaddecimal: "period",
72
+ numpaddivide: "slash",
73
+ numpadmultiply: "asterisk",
74
+ numpadsubtract: "minus"
75
+ };
76
+ const PRINTABLE_KEY_ALIASES = {
77
+ "*": "asterisk",
78
+ "+": "plus",
79
+ ",": "comma",
80
+ "-": "minus",
81
+ ".": "period",
82
+ "/": "slash",
83
+ ";": "semicolon",
84
+ "=": "equal",
85
+ "[": "bracketleft",
86
+ "\\": "backslash",
87
+ "]": "bracketright",
88
+ "`": "grave",
89
+ "'": "apostrophe"
90
+ };
91
+ const KERNEL_MODIFIER_KEYSYM_SET = new Set(KERNEL_MODIFIER_KEYSYMS);
92
+ function normalizeKernelKey(value) {
93
+ const trimmed = value.trim();
94
+ if (PRINTABLE_KEY_ALIASES[trimmed]) return PRINTABLE_KEY_ALIASES[trimmed];
95
+ const alias = KEY_ALIASES[trimmed.replace(/[-\s]/g, "_").toLowerCase()];
96
+ if (alias) return alias;
97
+ if (/^arrow/i.test(trimmed)) return normalizeKernelKey(trimmed.slice(5));
98
+ if (trimmed.length === 1 && trimmed >= "A" && trimmed <= "Z") return trimmed.toLowerCase();
99
+ return trimmed;
100
+ }
101
+ function normalizeKernelKeyCombo(value) {
102
+ return value.split("+").map((part) => normalizeKernelKey(part)).filter(Boolean);
103
+ }
104
+ function isKernelModifierKey(key) {
105
+ return KERNEL_MODIFIER_KEYSYM_SET.has(key);
106
+ }
107
+ //#endregion
108
+ //#region src/translator/translator.ts
109
+ var InternalComputerTranslator = class {
110
+ sessionId;
111
+ client;
112
+ coordinateSystem;
113
+ screenshotSpec;
114
+ viewport;
115
+ constructor(opts) {
116
+ this.sessionId = opts.browser.session_id;
117
+ this.client = opts.client;
118
+ this.coordinateSystem = opts.coordinateSystem ?? { type: "pixel" };
119
+ this.screenshotSpec = opts.screenshot;
120
+ this.viewport = opts.browser.viewport ?? {
121
+ width: 1920,
122
+ height: 1080
123
+ };
124
+ }
125
+ async screenshotRaw() {
126
+ return (await this.screenshot()).data;
127
+ }
128
+ async screenshot() {
129
+ const response = await this.client.browsers.computer.captureScreenshot(this.sessionId, {});
130
+ let data = Buffer.from(await response.arrayBuffer());
131
+ let mimeType = "image/png";
132
+ const transform = this.screenshotSpec?.transform;
133
+ if (transform) {
134
+ let pipeline = sharp(data).resize(transform.width, transform.height, { fit: "fill" });
135
+ if (transform.format === "webp") {
136
+ pipeline = pipeline.webp({ quality: transform.quality });
137
+ mimeType = "image/webp";
138
+ } else if (transform.format === "jpeg") {
139
+ pipeline = pipeline.jpeg({ quality: transform.quality });
140
+ mimeType = "image/jpeg";
141
+ } else {
142
+ pipeline = pipeline.png();
143
+ mimeType = "image/png";
144
+ }
145
+ data = await pipeline.toBuffer();
146
+ }
147
+ return {
148
+ data,
149
+ mimeType
150
+ };
151
+ }
152
+ async currentUrl() {
153
+ await this.runKernelBatch([keypress(["Control", "l"]), keypress(["Control", "c"])]);
154
+ return ((await this.client.browsers.computer.readClipboard(this.sessionId)).text ?? "").trim();
155
+ }
156
+ async currentMousePosition() {
157
+ const pos = await this.client.browsers.computer.getMousePosition(this.sessionId);
158
+ return {
159
+ x: toInt(pos.x),
160
+ y: toInt(pos.y)
161
+ };
162
+ }
163
+ async executeBatch(actions) {
164
+ const result = { readResults: [] };
165
+ const pending = [];
166
+ const flush = async () => {
167
+ if (pending.length === 0) return;
168
+ await this.runKernelBatch(pending.splice(0));
169
+ };
170
+ for (let i = 0; i < actions.length; i++) {
171
+ const action = actions[i];
172
+ const type = typeof action.type === "string" ? action.type : "";
173
+ if (type === "screenshot") {
174
+ await flush();
175
+ result.readResults.push({
176
+ type: "screenshot",
177
+ ...await this.screenshot()
178
+ });
179
+ continue;
180
+ }
181
+ if (type === "url") {
182
+ await flush();
183
+ result.readResults.push({
184
+ type: "url",
185
+ url: await this.currentUrl()
186
+ });
187
+ continue;
188
+ }
189
+ if (type === "cursor_position") {
190
+ await flush();
191
+ const pos = await this.currentMousePosition();
192
+ result.readResults.push({
193
+ type: "cursor_position",
194
+ ...pos
195
+ });
196
+ continue;
197
+ }
198
+ if (type === "goto") {
199
+ const url = normalizeGotoUrl(action.url) ?? "";
200
+ pending.push(keypress(["Control", "l"]), {
201
+ type: "type_text",
202
+ type_text: { text: url }
203
+ }, keypress(["Enter"]));
204
+ continue;
205
+ }
206
+ if (type === "back") {
207
+ pending.push(keypress(["Alt", "Left"]));
208
+ continue;
209
+ }
210
+ if (type === "forward") {
211
+ pending.push(keypress(["Alt", "Right"]));
212
+ continue;
213
+ }
214
+ pending.push(toSdkAction(type, action, this.coordinateSystem, this.viewport));
215
+ }
216
+ await flush();
217
+ return result;
218
+ }
219
+ async runKernelBatch(actions) {
220
+ await this.client.browsers.computer.batch(this.sessionId, { actions });
221
+ }
222
+ };
223
+ function toSdkAction(type, action, coordinateSystem, viewport) {
224
+ switch (type) {
225
+ case "click": {
226
+ const clickHoldKeys = readHoldKeys(action.hold_keys);
227
+ const point = toViewportPoint(action, coordinateSystem, viewport);
228
+ return {
229
+ type: "click_mouse",
230
+ click_mouse: {
231
+ x: point.x,
232
+ y: point.y,
233
+ button: clickMouseButtonOr(action.button, "left"),
234
+ ...clickHoldKeys.length > 0 ? { hold_keys: clickHoldKeys } : {}
235
+ }
236
+ };
237
+ }
238
+ case "double_click": {
239
+ const doubleClickHoldKeys = readHoldKeys(action.hold_keys);
240
+ const point = toViewportPoint(action, coordinateSystem, viewport);
241
+ return {
242
+ type: "click_mouse",
243
+ click_mouse: {
244
+ x: point.x,
245
+ y: point.y,
246
+ num_clicks: 2,
247
+ ...doubleClickHoldKeys.length > 0 ? { hold_keys: doubleClickHoldKeys } : {}
248
+ }
249
+ };
250
+ }
251
+ case "mouse_down":
252
+ case "mouse_up": {
253
+ const mouseHoldKeys = readHoldKeys(action.hold_keys);
254
+ const point = toViewportPoint(action, coordinateSystem, viewport);
255
+ return {
256
+ type: "click_mouse",
257
+ click_mouse: {
258
+ x: point.x,
259
+ y: point.y,
260
+ button: clickMouseButtonOr(action.button, "left"),
261
+ click_type: type === "mouse_down" ? "down" : "up",
262
+ ...mouseHoldKeys.length > 0 ? { hold_keys: mouseHoldKeys } : {}
263
+ }
264
+ };
265
+ }
266
+ case "type": return {
267
+ type: "type_text",
268
+ type_text: { text: typeof action.text === "string" ? action.text : "" }
269
+ };
270
+ case "keypress": return keypress(toStringArray(action.keys), action.duration);
271
+ case "scroll": {
272
+ const scrollHoldKeys = readHoldKeys(action.hold_keys);
273
+ const point = toViewportPoint(action, coordinateSystem, viewport);
274
+ return {
275
+ type: "scroll",
276
+ scroll: {
277
+ x: point.x,
278
+ y: point.y,
279
+ delta_x: toInt(action.scroll_x),
280
+ delta_y: toInt(action.scroll_y),
281
+ ...scrollHoldKeys.length > 0 ? { hold_keys: scrollHoldKeys } : {}
282
+ }
283
+ };
284
+ }
285
+ case "move": {
286
+ const moveHoldKeys = readHoldKeys(action.hold_keys);
287
+ const point = toViewportPoint(action, coordinateSystem, viewport);
288
+ return {
289
+ type: "move_mouse",
290
+ move_mouse: {
291
+ x: point.x,
292
+ y: point.y,
293
+ ...moveHoldKeys.length > 0 ? { hold_keys: moveHoldKeys } : {}
294
+ }
295
+ };
296
+ }
297
+ case "drag": {
298
+ const dragHoldKeys = readHoldKeys(action.hold_keys);
299
+ return {
300
+ type: "drag_mouse",
301
+ drag_mouse: {
302
+ path: toPath(action.path, coordinateSystem, viewport),
303
+ button: dragMouseButtonOr(action.button, "left"),
304
+ ...dragHoldKeys.length > 0 ? { hold_keys: dragHoldKeys } : {}
305
+ }
306
+ };
307
+ }
308
+ case "wait": return {
309
+ type: "sleep",
310
+ sleep: { duration_ms: typeof action.ms === "number" ? Math.trunc(action.ms) : 1e3 }
311
+ };
312
+ default: throw new Error(`unknown computer action type: ${type}`);
313
+ }
314
+ }
315
+ function toInt(value) {
316
+ if (typeof value === "number" && Number.isFinite(value)) return Math.trunc(value);
317
+ if (typeof value === "string" && value.trim()) {
318
+ const n = Number(value);
319
+ if (Number.isFinite(n)) return Math.trunc(n);
320
+ }
321
+ return 0;
322
+ }
323
+ function stringOr(value, fallback) {
324
+ return typeof value === "string" && value.length > 0 ? value : fallback;
325
+ }
326
+ function clickMouseButtonOr(value, fallback) {
327
+ const candidate = stringOr(value, fallback);
328
+ if (candidate === "left" || candidate === "right" || candidate === "middle" || candidate === "back" || candidate === "forward") return candidate;
329
+ return fallback;
330
+ }
331
+ function dragMouseButtonOr(value, fallback) {
332
+ const candidate = stringOr(value, fallback);
333
+ if (candidate === "left" || candidate === "right" || candidate === "middle") return candidate;
334
+ return fallback;
335
+ }
336
+ function toStringArray(value) {
337
+ return Array.isArray(value) ? value.filter((item) => typeof item === "string") : [];
338
+ }
339
+ function readHoldKeys(value) {
340
+ return toStringArray(value).map(normalizeKernelKey);
341
+ }
342
+ function keypress(keys, duration = void 0) {
343
+ const translated = keys.flatMap(normalizeKernelKeyCombo);
344
+ const pressedKeys = translated.filter((key) => !isKernelModifierKey(key));
345
+ const holdKeys = pressedKeys.length > 0 ? translated.filter(isKernelModifierKey) : translated.slice(0, -1);
346
+ return {
347
+ type: "press_key",
348
+ press_key: {
349
+ keys: pressedKeys.length > 0 ? pressedKeys : translated.slice(-1),
350
+ ...holdKeys.length > 0 ? { hold_keys: holdKeys } : {},
351
+ ...typeof duration === "number" && Number.isFinite(duration) && duration > 0 ? { duration: Math.trunc(duration) } : {}
352
+ }
353
+ };
354
+ }
355
+ function toPath(value, coordinateSystem = { type: "pixel" }, viewport = {
356
+ width: 1920,
357
+ height: 1080
358
+ }) {
359
+ if (!Array.isArray(value)) return [];
360
+ return value.map((point) => toPathPoint(point, coordinateSystem, viewport));
361
+ }
362
+ function toPathPoint(value, coordinateSystem, viewport) {
363
+ if (Array.isArray(value)) {
364
+ const point = transformPoint(toInt(value[0]), toInt(value[1]), coordinateSystem, viewport);
365
+ return [point.x, point.y];
366
+ }
367
+ if (value && typeof value === "object") {
368
+ const point = value;
369
+ const transformed = transformPoint(toInt(point.x), toInt(point.y), coordinateSystem, viewport);
370
+ return [transformed.x, transformed.y];
371
+ }
372
+ return [0, 0];
373
+ }
374
+ function toViewportPoint(action, coordinateSystem, viewport) {
375
+ return transformPoint(toInt(action.x), toInt(action.y), coordinateSystem, viewport);
376
+ }
377
+ function transformPoint(x, y, coordinateSystem, viewport) {
378
+ if (coordinateSystem.type === "pixel") return {
379
+ x,
380
+ y
381
+ };
382
+ const [min, max] = coordinateSystem.range;
383
+ const scale = max - min;
384
+ if (scale <= 0) return {
385
+ x,
386
+ y
387
+ };
388
+ return {
389
+ x: clamp(Math.round((x - min) / scale * viewport.width), 0, viewport.width - 1),
390
+ y: clamp(Math.round((y - min) / scale * viewport.height), 0, viewport.height - 1)
391
+ };
392
+ }
393
+ function clamp(value, min, max) {
394
+ return Math.max(min, Math.min(max, value));
395
+ }
396
+ //#endregion
397
+ //#region src/tools.ts
398
+ function createCuaComputerTools(args) {
399
+ const translator = new InternalComputerTranslator(args);
400
+ return withNavigationTool(args).map((executor) => createExecutorTool(executor, translator));
401
+ }
402
+ function withNavigationTool(args) {
403
+ const executors = [...args.toolExecutors];
404
+ const existing = new Set(executors.map((executor) => executor.definition.name));
405
+ if (args.computerUseExtra && !existing.has(CUA_NAVIGATION_TOOL_NAME)) {
406
+ const definition = createCuaNavigationToolDefinition();
407
+ executors.push({
408
+ kind: "navigation",
409
+ definition
410
+ });
411
+ }
412
+ return executors;
413
+ }
414
+ function createExecutorTool(executor, translator) {
415
+ const { definition } = executor;
416
+ if (isNavigationExecutor(executor)) return {
417
+ name: definition.name,
418
+ label: definition.name,
419
+ description: definition.description,
420
+ parameters: definition.parameters,
421
+ async execute(_toolCallId, params) {
422
+ return executeNavigationTool(translator, asNavigationInput(params));
423
+ }
424
+ };
425
+ return {
426
+ name: definition.name,
427
+ label: definition.name,
428
+ description: definition.description,
429
+ parameters: definition.parameters,
430
+ executionMode: "sequential",
431
+ async execute(_toolCallId, params) {
432
+ return executeBatchTool(translator, { actions: executor.toActions(params) });
433
+ }
434
+ };
435
+ }
436
+ function isNavigationExecutor(executor) {
437
+ return "kind" in executor && executor.kind === "navigation";
438
+ }
439
+ async function executeBatchTool(translator, params) {
440
+ const content = [];
441
+ const readResults = [];
442
+ try {
443
+ const result = await translator.executeBatch(params.actions);
444
+ for (const read of result.readResults) if (read.type === "url") {
445
+ readResults.push({
446
+ type: "url",
447
+ url: read.url
448
+ });
449
+ content.push({
450
+ type: "text",
451
+ text: `url(): ${read.url}`
452
+ });
453
+ } else if (read.type === "cursor_position") {
454
+ readResults.push({
455
+ type: "cursor_position",
456
+ x: read.x,
457
+ y: read.y
458
+ });
459
+ content.push({
460
+ type: "text",
461
+ text: `cursor_position(): ${read.x},${read.y}`
462
+ });
463
+ } else {
464
+ readResults.push({
465
+ type: "screenshot",
466
+ bytes: read.data.length
467
+ });
468
+ content.push({
469
+ type: "image",
470
+ data: read.data.toString("base64"),
471
+ mimeType: read.mimeType
472
+ });
473
+ }
474
+ if (content.length === 0) {
475
+ const screenshot = await translator.screenshot();
476
+ readResults.push({
477
+ type: "screenshot",
478
+ bytes: screenshot.data.length
479
+ });
480
+ content.push({
481
+ type: "image",
482
+ data: screenshot.data.toString("base64"),
483
+ mimeType: screenshot.mimeType
484
+ });
485
+ }
486
+ } catch (err) {
487
+ throw new Error(`Actions failed: ${errorMessage(err)}`, { cause: err });
488
+ }
489
+ return {
490
+ content,
491
+ details: {
492
+ statusText: "Actions executed successfully.",
493
+ readResults
494
+ }
495
+ };
496
+ }
497
+ async function executeNavigationTool(translator, params) {
498
+ const action = params.action;
499
+ try {
500
+ let statusText = `${action} executed successfully.`;
501
+ let url;
502
+ if (action === "url") {
503
+ url = await translator.currentUrl();
504
+ statusText = `Current URL: ${url}`;
505
+ } else await translator.executeBatch([{
506
+ type: action,
507
+ url: params.url
508
+ }]);
509
+ const screenshot = await translator.screenshot();
510
+ return {
511
+ content: [{
512
+ type: "text",
513
+ text: statusText
514
+ }, {
515
+ type: "image",
516
+ data: screenshot.data.toString("base64"),
517
+ mimeType: screenshot.mimeType
518
+ }],
519
+ details: {
520
+ action,
521
+ statusText,
522
+ ...url ? { url } : {}
523
+ }
524
+ };
525
+ } catch (err) {
526
+ throw new Error(`${action} failed: ${errorMessage(err)}`, { cause: err });
527
+ }
528
+ }
529
+ function errorMessage(err) {
530
+ return err instanceof Error ? err.message : String(err);
531
+ }
532
+ function asNavigationInput(value) {
533
+ if (value && typeof value === "object" && typeof value.action === "string") return value;
534
+ throw new Error("invalid computer_use_extra parameters");
535
+ }
536
+ //#endregion
537
+ //#region src/agent.ts
538
+ /**
539
+ * Holds the CUA-specific pieces that have to change when a model changes:
540
+ * the resolved runtime spec, the browser translator built for that spec, and
541
+ * the tools/prompt/payload hooks derived from it. Caller-owned `extraTools`
542
+ * are appended after the CUA defaults.
543
+ */
544
+ var CuaRuntimeController = class {
545
+ options;
546
+ runtimeSpec;
547
+ translator;
548
+ constructor(options) {
549
+ this.options = options;
550
+ this.runtimeSpec = resolveCuaRuntimeSpec(options.model);
551
+ this.translator = this.createTranslator();
552
+ }
553
+ get model() {
554
+ return this.runtimeSpec.model;
555
+ }
556
+ get systemPrompt() {
557
+ return this.runtimeSpec.defaultSystemPrompt;
558
+ }
559
+ setModel(model) {
560
+ this.runtimeSpec = resolveCuaRuntimeSpec(model);
561
+ this.translator = this.createTranslator();
562
+ }
563
+ tools() {
564
+ return [...createCuaComputerTools({
565
+ browser: this.options.browser,
566
+ client: this.options.client,
567
+ toolExecutors: this.runtimeSpec.toolExecutors,
568
+ coordinateSystem: this.runtimeSpec.coordinateSystem,
569
+ screenshot: this.runtimeSpec.screenshot,
570
+ computerUseExtra: this.options.computerUseExtra
571
+ }), ...this.options.extraTools ?? []];
572
+ }
573
+ onPayload() {
574
+ const runtimeSpec = this.runtimeSpec;
575
+ return composeOnPayload(runtimeSpec.onPayload ? async (payload, model) => runtimeSpec.onPayload?.(payload, model, {
576
+ keepToolNames: this.keepToolNames(),
577
+ getScreenshot: () => this.translator.screenshot()
578
+ }) : void 0, this.options.onPayload);
579
+ }
580
+ keepToolNames() {
581
+ return [...(this.options.extraTools ?? []).map((tool) => tool.name), ...this.options.computerUseExtra ? [CUA_NAVIGATION_TOOL_NAME] : []];
582
+ }
583
+ createTranslator() {
584
+ return new InternalComputerTranslator({
585
+ browser: this.options.browser,
586
+ client: this.options.client,
587
+ coordinateSystem: this.runtimeSpec.coordinateSystem,
588
+ screenshot: this.runtimeSpec.screenshot
589
+ });
590
+ }
591
+ };
592
+ /** Harness auth default following the documented CUA env-var convention. */
593
+ async function getCuaEnvApiKeyAndHeaders(model) {
594
+ const apiKey = getCuaEnvApiKey(model.provider);
595
+ return apiKey ? { apiKey } : void 0;
596
+ }
597
+ /**
598
+ * Pi `Agent` configured for Kernel browser computer use.
599
+ *
600
+ * Use this class when you want direct access to the lower-level pi agent state,
601
+ * queues, event stream, and `state.model` mutation model. It resolves CUA model
602
+ * refs, installs provider-appropriate CUA tools by default, and keeps those
603
+ * defaults in sync when `agent.state.model` changes.
604
+ */
605
+ var CuaAgent = class extends Agent {
606
+ runtime;
607
+ ownsSystemPrompt;
608
+ stateProxy;
609
+ constructor(options) {
610
+ const { browser, client, initialState, onPayload, streamFn, prepareNextTurn, extraTools, computerUseExtra, ...agentOptions } = options;
611
+ const runtime = new CuaRuntimeController({
612
+ browser,
613
+ client,
614
+ model: initialState.model,
615
+ extraTools,
616
+ computerUseExtra,
617
+ onPayload
618
+ });
619
+ const wrappedStreamFn = (model, context, streamOptions) => {
620
+ const optionsWithCuaRuntime = {
621
+ ...streamOptions,
622
+ onPayload: runtime.onPayload(),
623
+ keepToolNames: runtime.keepToolNames()
624
+ };
625
+ return (streamFn ?? streamSimple)(model, context, optionsWithCuaRuntime);
626
+ };
627
+ super({
628
+ ...agentOptions,
629
+ getApiKey: agentOptions.getApiKey ?? getCuaEnvApiKey,
630
+ streamFn: wrappedStreamFn,
631
+ initialState: {
632
+ ...initialState,
633
+ model: runtime.model,
634
+ tools: runtime.tools(),
635
+ systemPrompt: initialState.systemPrompt ?? runtime.systemPrompt
636
+ }
637
+ });
638
+ this.runtime = runtime;
639
+ this.ownsSystemPrompt = initialState.systemPrompt === void 0;
640
+ /**
641
+ * pi calls `prepareNextTurn` between provider requests. Wrapping it lets CUA
642
+ * honor any user-provided turn update while also refreshing provider-specific
643
+ * defaults if that update changes the model.
644
+ */
645
+ this.prepareNextTurn = async (signal) => {
646
+ const update = await prepareNextTurn?.(signal);
647
+ if (update?.model) this.applyRuntime(update.model);
648
+ const state = super.state;
649
+ const context = update?.context ?? {
650
+ systemPrompt: state.systemPrompt,
651
+ messages: state.messages.slice(),
652
+ tools: state.tools.slice()
653
+ };
654
+ return {
655
+ ...update,
656
+ model: state.model,
657
+ context: {
658
+ ...context,
659
+ systemPrompt: this.ownsSystemPrompt ? state.systemPrompt : context.systemPrompt,
660
+ tools: state.tools.slice()
661
+ }
662
+ };
663
+ };
664
+ }
665
+ /**
666
+ * Return a state proxy so `agent.state.model = "provider:model"` can behave
667
+ * like pi's normal mutable state while also re-resolving CUA tools, prompt,
668
+ * and payload hooks for the selected provider.
669
+ */
670
+ get state() {
671
+ if (!this.stateProxy) this.stateProxy = new Proxy(super.state, { set: (target, prop, value, receiver) => {
672
+ if (prop === "model") {
673
+ this.applyRuntime(value);
674
+ return true;
675
+ }
676
+ return Reflect.set(target, prop, value, receiver);
677
+ } });
678
+ return this.stateProxy;
679
+ }
680
+ applyRuntime(model) {
681
+ this.runtime.setModel(model);
682
+ const state = super.state;
683
+ state.model = this.runtime.model;
684
+ state.tools = this.runtime.tools();
685
+ if (this.ownsSystemPrompt) state.systemPrompt = this.runtime.systemPrompt;
686
+ }
687
+ };
688
+ /**
689
+ * Pi `AgentHarness` configured for Kernel browser computer use.
690
+ *
691
+ * Use this class when you want pi's higher-level harness APIs for sessions,
692
+ * resources, prompt templates, queue events, compaction, and model selection.
693
+ * It installs provider CUA tools by default and keeps CUA-owned runtime
694
+ * defaults in sync through `setModel()`.
695
+ */
696
+ var CuaAgentHarness = class extends AgentHarness {
697
+ runtime;
698
+ requestedActiveToolNames;
699
+ constructor(options) {
700
+ const { browser, client, model, extraTools, computerUseExtra, systemPrompt, getApiKeyAndHeaders, onPayload, activeToolNames, ...harnessOptions } = options;
701
+ const runtime = new CuaRuntimeController({
702
+ browser,
703
+ client,
704
+ model,
705
+ extraTools,
706
+ computerUseExtra,
707
+ onPayload
708
+ });
709
+ const resolvedTools = runtime.tools();
710
+ super({
711
+ ...harnessOptions,
712
+ model: runtime.model,
713
+ tools: resolvedTools,
714
+ systemPrompt: systemPrompt ?? (() => runtime.systemPrompt),
715
+ getApiKeyAndHeaders: getApiKeyAndHeaders ?? getCuaEnvApiKeyAndHeaders,
716
+ activeToolNames: activeToolNames ?? resolvedTools.map((tool) => tool.name)
717
+ });
718
+ this.runtime = runtime;
719
+ this.requestedActiveToolNames = activeToolNames;
720
+ this.on("before_provider_payload", async ({ model, payload }) => {
721
+ const onPayload = this.runtime.onPayload();
722
+ if (!onPayload) return { payload };
723
+ return { payload: await onPayload(payload, model) ?? payload };
724
+ });
725
+ }
726
+ /**
727
+ * Mirror pi `AgentHarness.setModel()` while accepting CUA model refs.
728
+ *
729
+ * The override refreshes CUA-owned tools before delegating to pi so the
730
+ * harness snapshot and session model-change entry are written with the
731
+ * concrete model selected by `@onkernel/cua-ai`.
732
+ */
733
+ async setModel(model) {
734
+ this.runtime.setModel(model);
735
+ const tools = this.runtime.tools();
736
+ await super.setTools(tools, this.requestedActiveToolNames ?? tools.map((tool) => tool.name));
737
+ await super.setModel(this.runtime.model);
738
+ }
739
+ async setActiveTools(toolNames) {
740
+ await super.setActiveTools(toolNames);
741
+ this.requestedActiveToolNames = [...toolNames];
742
+ }
743
+ };
744
+ function composeOnPayload(first, second) {
745
+ if (!first) return second;
746
+ if (!second) return first;
747
+ return async (payload, modelRef) => {
748
+ return second(await first(payload, modelRef) ?? payload, modelRef);
749
+ };
750
+ }
751
+ //#endregion
752
+ export { CuaAgent, CuaAgentHarness, NodeExecutionEnv, createCuaComputerTools };