@onkernel/cua-agent 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +15 -0
- package/dist/index.d.ts +31 -4
- package/dist/index.js +92 -14
- package/package.json +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.3.4 - 2026-06-23
|
|
4
|
+
|
|
5
|
+
- Add an opt-in `playwright` option to `CuaAgent` and `CuaAgentHarness` that
|
|
6
|
+
exposes a `playwright_execute` tool, running Playwright/TypeScript against
|
|
7
|
+
the live browser session via the Kernel SDK. Results, stdout, and stderr
|
|
8
|
+
come back as tool content; SDK-reported failures surface as content rather
|
|
9
|
+
than throwing. Adds the `PlaywrightDetails` export.
|
|
10
|
+
|
|
3
11
|
## 0.3.3 - 2026-06-12
|
|
4
12
|
|
|
5
13
|
- The action translator now consumes the canonical `CuaAction` union with an
|
package/README.md
CHANGED
|
@@ -98,6 +98,8 @@ Both classes mirror pi constructor shapes and behavior, with minimal additions:
|
|
|
98
98
|
- CUA model refs (`"provider:model"`) accepted where pi expects a concrete model
|
|
99
99
|
- `extraTools` to add your own pi tools alongside the built-in browser tools
|
|
100
100
|
- `computerUseExtra: true` to let the model use a small navigation helper
|
|
101
|
+
- `playwright: true` to let the model run Playwright/TypeScript against the
|
|
102
|
+
live browser session
|
|
101
103
|
|
|
102
104
|
If auth callbacks are omitted, both classes default to CUA env var conventions:
|
|
103
105
|
- OpenAI: `OPENAI_API_KEY`
|
|
@@ -124,6 +126,19 @@ URL or go back. `computerUseExtra: true` adds `computer_use_extra`, a
|
|
|
124
126
|
provider-neutral escape hatch exposing `goto`, `back`, `forward`, and `url`
|
|
125
127
|
so navigation works uniformly regardless of which model is driving.
|
|
126
128
|
|
|
129
|
+
Some steps are awkward as raw pointer/keyboard actions: precise DOM reads,
|
|
130
|
+
form fills, data extraction, or waiting on a specific selector.
|
|
131
|
+
`playwright: true` adds `playwright_execute`, which runs Playwright/TypeScript
|
|
132
|
+
directly against the live browser session. `page`, `context`, and `browser`
|
|
133
|
+
are in scope and the code may `return` a JSON-serializable value. Each call
|
|
134
|
+
runs in a fresh JS context (locals don't persist across calls) but the
|
|
135
|
+
browser session does carry over. No screenshot is returned automatically;
|
|
136
|
+
request one on a follow-up turn when the model needs to see the page.
|
|
137
|
+
Playwright-level failures come back as tool content (so the model can adapt)
|
|
138
|
+
rather than thrown errors. Verified e2e
|
|
139
|
+
against Anthropic, Tzafon, and Yutori CUA models; OpenAI and Google are
|
|
140
|
+
unit-tested.
|
|
141
|
+
|
|
127
142
|
### Model Switching
|
|
128
143
|
|
|
129
144
|
`CuaAgent` follows pi `Agent` semantics: assign `agent.state.model` to a
|
package/dist/index.d.ts
CHANGED
|
@@ -16,6 +16,7 @@ interface ComputerToolOptions {
|
|
|
16
16
|
coordinateSystem?: ComputerToolCoordinateSystem;
|
|
17
17
|
screenshot?: CuaScreenshotSpec;
|
|
18
18
|
computerUseExtra?: boolean;
|
|
19
|
+
playwright?: boolean;
|
|
19
20
|
}
|
|
20
21
|
interface BatchDetails {
|
|
21
22
|
statusText: string;
|
|
@@ -36,10 +37,34 @@ interface NavigationDetails {
|
|
|
36
37
|
statusText: string;
|
|
37
38
|
url?: string;
|
|
38
39
|
}
|
|
40
|
+
/**
|
|
41
|
+
* Structured details for a `playwright_execute` tool result. Library
|
|
42
|
+
* consumers can read these directly instead of re-parsing the model-facing
|
|
43
|
+
* tool content blocks.
|
|
44
|
+
*
|
|
45
|
+
* - `success` — whether the Playwright code itself completed without error.
|
|
46
|
+
* A `false` value means the code threw or the SDK reported failure; in
|
|
47
|
+
* that case the failure is also surfaced as tool content for the model.
|
|
48
|
+
* - `statusText` — short human-readable status (success or failure summary).
|
|
49
|
+
* - `result` — present only when the code returned a JSON-serializable value.
|
|
50
|
+
* - `stdout`/`stderr` — raw daemon output, present whenever the daemon
|
|
51
|
+
* reported a non-empty value on that stream (may be whitespace-only).
|
|
52
|
+
* - `error` — present only when `success` is `false`; the error message from
|
|
53
|
+
* the daemon.
|
|
54
|
+
*/
|
|
55
|
+
interface PlaywrightDetails {
|
|
56
|
+
success: boolean;
|
|
57
|
+
statusText: string;
|
|
58
|
+
result?: unknown;
|
|
59
|
+
stdout?: string;
|
|
60
|
+
stderr?: string;
|
|
61
|
+
error?: string;
|
|
62
|
+
}
|
|
39
63
|
type BatchTool = AgentTool<TSchema, BatchDetails>;
|
|
40
64
|
type NavigationTool = AgentTool<TSchema, NavigationDetails>;
|
|
65
|
+
type PlaywrightTool = AgentTool<TSchema, PlaywrightDetails>;
|
|
41
66
|
type ActionTool = AgentTool<TSchema, BatchDetails>;
|
|
42
|
-
type CuaExecutorTool = BatchTool | NavigationTool | ActionTool;
|
|
67
|
+
type CuaExecutorTool = BatchTool | NavigationTool | PlaywrightTool | ActionTool;
|
|
43
68
|
declare function createCuaComputerTools(args: ComputerToolOptions): CuaExecutorTool[];
|
|
44
69
|
//#endregion
|
|
45
70
|
//#region src/agent.d.ts
|
|
@@ -74,7 +99,8 @@ type CuaAgentOptions = Omit<AgentOptions, "initialState"> & {
|
|
|
74
99
|
client: Kernel; /** Initial pi state plus a CUA-aware model value. */
|
|
75
100
|
initialState: CuaAgentInitialState; /** Add your own pi tools alongside the built-in browser tools. */
|
|
76
101
|
extraTools?: AgentTool[]; /** Expose a helper for browser navigation and URL reads. */
|
|
77
|
-
computerUseExtra?: boolean;
|
|
102
|
+
computerUseExtra?: boolean; /** Expose a tool that runs Playwright code against the browser session. */
|
|
103
|
+
playwright?: boolean;
|
|
78
104
|
};
|
|
79
105
|
/**
|
|
80
106
|
* Constructor options for {@link CuaAgentHarness}.
|
|
@@ -88,7 +114,8 @@ type CuaAgentHarnessOptions<TSkill extends Skill = Skill, TPromptTemplate extend
|
|
|
88
114
|
client: Kernel; /** Model used by the harness. CUA refs are resolved before pi sees the model. */
|
|
89
115
|
model: CuaRuntimeInput; /** Add your own pi tools alongside the built-in browser tools. */
|
|
90
116
|
extraTools?: AgentTool[]; /** Expose a helper for browser navigation and URL reads. */
|
|
91
|
-
computerUseExtra?: boolean; /**
|
|
117
|
+
computerUseExtra?: boolean; /** Expose a tool that runs Playwright code against the browser session. */
|
|
118
|
+
playwright?: boolean; /** Optional payload hook composed after the provider-specific CUA payload hook. */
|
|
92
119
|
onPayload?: SimpleStreamOptions["onPayload"];
|
|
93
120
|
};
|
|
94
121
|
/**
|
|
@@ -137,4 +164,4 @@ declare class CuaAgentHarness<TSkill extends Skill = Skill, TPromptTemplate exte
|
|
|
137
164
|
setActiveTools(toolNames: string[]): Promise<void>;
|
|
138
165
|
}
|
|
139
166
|
//#endregion
|
|
140
|
-
export { type BatchDetails, type ComputerToolOptions, CuaAgent, CuaAgentHarness, type CuaAgentHarnessOptions, type CuaAgentOptions, type CuaAgentState, type CuaExecutorTool, type KernelBrowser, type NavigationDetails, NodeExecutionEnv, createCuaComputerTools };
|
|
167
|
+
export { type BatchDetails, type ComputerToolOptions, CuaAgent, CuaAgentHarness, type CuaAgentHarnessOptions, type CuaAgentOptions, type CuaAgentState, type CuaExecutorTool, type KernelBrowser, type NavigationDetails, NodeExecutionEnv, type PlaywrightDetails, createCuaComputerTools };
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Agent, AgentHarness } from "@earendil-works/pi-agent-core";
|
|
2
2
|
import { NodeExecutionEnv } from "@earendil-works/pi-agent-core/node";
|
|
3
|
-
import { CUA_NAVIGATION_TOOL_NAME, createCuaNavigationToolDefinition, getCuaEnvApiKey, normalizeGotoUrl, resolveCuaRuntimeSpec, streamSimple } from "@onkernel/cua-ai";
|
|
3
|
+
import { CUA_NAVIGATION_TOOL_NAME, CUA_PLAYWRIGHT_TOOL_NAME, createCuaNavigationToolDefinition, createCuaPlaywrightToolDefinition, getCuaEnvApiKey, normalizeGotoUrl, resolveCuaRuntimeSpec, streamSimple } from "@onkernel/cua-ai";
|
|
4
4
|
import sharp from "sharp";
|
|
5
5
|
export * from "@earendil-works/pi-agent-core";
|
|
6
6
|
//#region src/translator/keys.ts
|
|
@@ -160,6 +160,14 @@ var InternalComputerTranslator = class {
|
|
|
160
160
|
y: Math.trunc(pos.y)
|
|
161
161
|
};
|
|
162
162
|
}
|
|
163
|
+
async executePlaywright(code, timeoutSec) {
|
|
164
|
+
const truncated = timeoutSec !== void 0 ? Math.trunc(timeoutSec) : void 0;
|
|
165
|
+
const timeout = truncated !== void 0 && truncated >= 1 ? Math.min(truncated, PLAYWRIGHT_MAX_TIMEOUT_SEC) : void 0;
|
|
166
|
+
return this.client.browsers.playwright.execute(this.sessionId, {
|
|
167
|
+
code,
|
|
168
|
+
...timeout !== void 0 ? { timeout_sec: timeout } : {}
|
|
169
|
+
});
|
|
170
|
+
}
|
|
163
171
|
async executeBatch(actions) {
|
|
164
172
|
const result = { readResults: [] };
|
|
165
173
|
const pending = [];
|
|
@@ -297,6 +305,7 @@ var InternalComputerTranslator = class {
|
|
|
297
305
|
await this.client.browsers.computer.batch(this.sessionId, { actions });
|
|
298
306
|
}
|
|
299
307
|
};
|
|
308
|
+
const PLAYWRIGHT_MAX_TIMEOUT_SEC = 300;
|
|
300
309
|
const CLICK_BUTTONS = new Set([
|
|
301
310
|
"left",
|
|
302
311
|
"right",
|
|
@@ -357,18 +366,19 @@ function createCuaComputerTools(args) {
|
|
|
357
366
|
}
|
|
358
367
|
/** Build executor tools against an existing translator (internal; not part of the package surface). */
|
|
359
368
|
function buildCuaComputerTools(args, translator) {
|
|
360
|
-
return
|
|
369
|
+
return withExtraTools(args).map((executor) => createExecutorTool(executor, translator));
|
|
361
370
|
}
|
|
362
|
-
function
|
|
371
|
+
function withExtraTools(args) {
|
|
363
372
|
const executors = [...args.toolExecutors];
|
|
364
373
|
const existing = new Set(executors.map((executor) => executor.definition.name));
|
|
365
|
-
if (args.computerUseExtra && !existing.has(CUA_NAVIGATION_TOOL_NAME)) {
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
374
|
+
if (args.computerUseExtra && !existing.has(CUA_NAVIGATION_TOOL_NAME)) executors.push({
|
|
375
|
+
kind: "navigation",
|
|
376
|
+
definition: createCuaNavigationToolDefinition()
|
|
377
|
+
});
|
|
378
|
+
if (args.playwright && !existing.has(CUA_PLAYWRIGHT_TOOL_NAME)) executors.push({
|
|
379
|
+
kind: "playwright",
|
|
380
|
+
definition: createCuaPlaywrightToolDefinition()
|
|
381
|
+
});
|
|
372
382
|
return executors;
|
|
373
383
|
}
|
|
374
384
|
function createExecutorTool(executor, translator) {
|
|
@@ -382,6 +392,16 @@ function createExecutorTool(executor, translator) {
|
|
|
382
392
|
return executeNavigationTool(translator, asNavigationInput(params));
|
|
383
393
|
}
|
|
384
394
|
};
|
|
395
|
+
if (isPlaywrightExecutor(executor)) return {
|
|
396
|
+
name: definition.name,
|
|
397
|
+
label: definition.name,
|
|
398
|
+
description: definition.description,
|
|
399
|
+
parameters: definition.parameters,
|
|
400
|
+
executionMode: "sequential",
|
|
401
|
+
async execute(_toolCallId, params) {
|
|
402
|
+
return executePlaywrightTool(translator, asPlaywrightInput(params));
|
|
403
|
+
}
|
|
404
|
+
};
|
|
385
405
|
return {
|
|
386
406
|
name: definition.name,
|
|
387
407
|
label: definition.name,
|
|
@@ -396,6 +416,9 @@ function createExecutorTool(executor, translator) {
|
|
|
396
416
|
function isNavigationExecutor(executor) {
|
|
397
417
|
return "kind" in executor && executor.kind === "navigation";
|
|
398
418
|
}
|
|
419
|
+
function isPlaywrightExecutor(executor) {
|
|
420
|
+
return "kind" in executor && executor.kind === "playwright";
|
|
421
|
+
}
|
|
399
422
|
async function executeBatchTool(translator, params) {
|
|
400
423
|
const content = [];
|
|
401
424
|
const readResults = [];
|
|
@@ -487,6 +510,50 @@ async function executeNavigationTool(translator, params) {
|
|
|
487
510
|
throw new Error(`${action} failed: ${errorMessage(err)}`, { cause: err });
|
|
488
511
|
}
|
|
489
512
|
}
|
|
513
|
+
async function executePlaywrightTool(translator, params) {
|
|
514
|
+
try {
|
|
515
|
+
const execution = await translator.executePlaywright(params.code, params.timeout_sec);
|
|
516
|
+
const content = [];
|
|
517
|
+
if (execution.result !== void 0) content.push({
|
|
518
|
+
type: "text",
|
|
519
|
+
text: `result: ${formatPlaywrightResult(execution.result)}`
|
|
520
|
+
});
|
|
521
|
+
if (execution.stdout?.trim()) content.push({
|
|
522
|
+
type: "text",
|
|
523
|
+
text: `stdout:\n${execution.stdout.trimEnd()}`
|
|
524
|
+
});
|
|
525
|
+
if (execution.stderr?.trim()) content.push({
|
|
526
|
+
type: "text",
|
|
527
|
+
text: `stderr:\n${execution.stderr.trimEnd()}`
|
|
528
|
+
});
|
|
529
|
+
if (!execution.success) content.push({
|
|
530
|
+
type: "text",
|
|
531
|
+
text: `error: ${execution.error ?? "playwright execution reported failure"}`
|
|
532
|
+
});
|
|
533
|
+
const statusText = execution.success ? "Playwright executed successfully." : `Playwright execution failed: ${execution.error ?? "unknown error"}`;
|
|
534
|
+
if (content.length === 0) content.push({
|
|
535
|
+
type: "text",
|
|
536
|
+
text: statusText
|
|
537
|
+
});
|
|
538
|
+
const details = {
|
|
539
|
+
success: execution.success,
|
|
540
|
+
statusText
|
|
541
|
+
};
|
|
542
|
+
if (execution.result !== void 0) details.result = execution.result;
|
|
543
|
+
if (execution.stdout) details.stdout = execution.stdout;
|
|
544
|
+
if (execution.stderr) details.stderr = execution.stderr;
|
|
545
|
+
if (execution.error) details.error = execution.error;
|
|
546
|
+
return {
|
|
547
|
+
content,
|
|
548
|
+
details
|
|
549
|
+
};
|
|
550
|
+
} catch (err) {
|
|
551
|
+
throw new Error(`playwright_execute failed: ${errorMessage(err)}`, { cause: err });
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
function formatPlaywrightResult(result) {
|
|
555
|
+
return typeof result === "string" ? result : JSON.stringify(result);
|
|
556
|
+
}
|
|
490
557
|
function errorMessage(err) {
|
|
491
558
|
return err instanceof Error ? err.message : String(err);
|
|
492
559
|
}
|
|
@@ -494,6 +561,10 @@ function asNavigationInput(value) {
|
|
|
494
561
|
if (value && typeof value === "object" && typeof value.action === "string") return value;
|
|
495
562
|
throw new Error("invalid computer_use_extra parameters");
|
|
496
563
|
}
|
|
564
|
+
function asPlaywrightInput(value) {
|
|
565
|
+
if (value && typeof value === "object" && typeof value.code === "string") return value;
|
|
566
|
+
throw new Error("invalid playwright_execute parameters");
|
|
567
|
+
}
|
|
497
568
|
//#endregion
|
|
498
569
|
//#region src/agent.ts
|
|
499
570
|
/**
|
|
@@ -524,7 +595,8 @@ var CuaRuntimeController = class {
|
|
|
524
595
|
tools() {
|
|
525
596
|
return [...buildCuaComputerTools({
|
|
526
597
|
toolExecutors: this.runtimeSpec.toolExecutors,
|
|
527
|
-
computerUseExtra: this.options.computerUseExtra
|
|
598
|
+
computerUseExtra: this.options.computerUseExtra,
|
|
599
|
+
playwright: this.options.playwright
|
|
528
600
|
}, this.translator), ...this.options.extraTools ?? []];
|
|
529
601
|
}
|
|
530
602
|
onPayload() {
|
|
@@ -535,7 +607,11 @@ var CuaRuntimeController = class {
|
|
|
535
607
|
}) : void 0, this.options.onPayload);
|
|
536
608
|
}
|
|
537
609
|
keepToolNames() {
|
|
538
|
-
return [
|
|
610
|
+
return [
|
|
611
|
+
...(this.options.extraTools ?? []).map((tool) => tool.name),
|
|
612
|
+
...this.options.computerUseExtra ? [CUA_NAVIGATION_TOOL_NAME] : [],
|
|
613
|
+
...this.options.playwright ? [CUA_PLAYWRIGHT_TOOL_NAME] : []
|
|
614
|
+
];
|
|
539
615
|
}
|
|
540
616
|
createTranslator() {
|
|
541
617
|
return new InternalComputerTranslator({
|
|
@@ -566,13 +642,14 @@ var CuaAgent = class extends Agent {
|
|
|
566
642
|
stateProxy;
|
|
567
643
|
stateProxyTarget;
|
|
568
644
|
constructor(options) {
|
|
569
|
-
const { browser, client, initialState, onPayload, streamFn, prepareNextTurn, extraTools, computerUseExtra, ...agentOptions } = options;
|
|
645
|
+
const { browser, client, initialState, onPayload, streamFn, prepareNextTurn, extraTools, computerUseExtra, playwright, ...agentOptions } = options;
|
|
570
646
|
const runtime = new CuaRuntimeController({
|
|
571
647
|
browser,
|
|
572
648
|
client,
|
|
573
649
|
model: initialState.model,
|
|
574
650
|
extraTools,
|
|
575
651
|
computerUseExtra,
|
|
652
|
+
playwright,
|
|
576
653
|
onPayload
|
|
577
654
|
});
|
|
578
655
|
const wrappedStreamFn = (model, context, streamOptions) => {
|
|
@@ -665,13 +742,14 @@ var CuaAgentHarness = class extends AgentHarness {
|
|
|
665
742
|
runtime;
|
|
666
743
|
requestedActiveToolNames;
|
|
667
744
|
constructor(options) {
|
|
668
|
-
const { browser, client, model, extraTools, computerUseExtra, systemPrompt, getApiKeyAndHeaders, onPayload, activeToolNames, ...harnessOptions } = options;
|
|
745
|
+
const { browser, client, model, extraTools, computerUseExtra, playwright, systemPrompt, getApiKeyAndHeaders, onPayload, activeToolNames, ...harnessOptions } = options;
|
|
669
746
|
const runtime = new CuaRuntimeController({
|
|
670
747
|
browser,
|
|
671
748
|
client,
|
|
672
749
|
model,
|
|
673
750
|
extraTools,
|
|
674
751
|
computerUseExtra,
|
|
752
|
+
playwright,
|
|
675
753
|
onPayload
|
|
676
754
|
});
|
|
677
755
|
const resolvedTools = runtime.tools();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@onkernel/cua-agent",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.4",
|
|
4
4
|
"description": "Kernel browser computer-use Agent and AgentHarness classes built on pi-agent-core",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "module",
|
|
@@ -42,7 +42,7 @@
|
|
|
42
42
|
"dependencies": {
|
|
43
43
|
"@earendil-works/pi-agent-core": "0.79.1",
|
|
44
44
|
"@earendil-works/pi-ai": "0.79.1",
|
|
45
|
-
"@onkernel/cua-ai": "0.3.
|
|
45
|
+
"@onkernel/cua-ai": "0.3.1",
|
|
46
46
|
"@onkernel/sdk": "0.49.0",
|
|
47
47
|
"sharp": "^0.34.5"
|
|
48
48
|
},
|