@onkernel/cua-agent 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.3.4 - 2026-06-23
4
+
5
+ - Add an opt-in `playwright` option to `CuaAgent` and `CuaAgentHarness` that
6
+ exposes a `playwright_execute` tool, running Playwright/TypeScript against
7
+ the live browser session via the Kernel SDK. Results, stdout, and stderr
8
+ come back as tool content; SDK-reported failures surface as content rather
9
+ than throwing. Adds the `PlaywrightDetails` export.
10
+
3
11
  ## 0.3.3 - 2026-06-12
4
12
 
5
13
  - The action translator now consumes the canonical `CuaAction` union with an
package/README.md CHANGED
@@ -98,6 +98,8 @@ Both classes mirror pi constructor shapes and behavior, with minimal additions:
98
98
  - CUA model refs (`"provider:model"`) accepted where pi expects a concrete model
99
99
  - `extraTools` to add your own pi tools alongside the built-in browser tools
100
100
  - `computerUseExtra: true` to let the model use a small navigation helper
101
+ - `playwright: true` to let the model run Playwright/TypeScript against the
102
+ live browser session
101
103
 
102
104
  If auth callbacks are omitted, both classes default to CUA env var conventions:
103
105
  - OpenAI: `OPENAI_API_KEY`
@@ -124,6 +126,19 @@ URL or go back. `computerUseExtra: true` adds `computer_use_extra`, a
124
126
  provider-neutral escape hatch exposing `goto`, `back`, `forward`, and `url`
125
127
  so navigation works uniformly regardless of which model is driving.
126
128
 
129
+ Some steps are awkward as raw pointer/keyboard actions: precise DOM reads,
130
+ form fills, data extraction, or waiting on a specific selector.
131
+ `playwright: true` adds `playwright_execute`, which runs Playwright/TypeScript
132
+ directly against the live browser session. `page`, `context`, and `browser`
133
+ are in scope and the code may `return` a JSON-serializable value. Each call
134
+ runs in a fresh JS context (locals don't persist across calls) but the
135
+ browser session does carry over. No screenshot is returned automatically;
136
+ request one on a follow-up turn when the model needs to see the page.
137
+ Playwright-level failures come back as tool content (so the model can adapt)
138
+ rather than thrown errors. Verified e2e
139
+ against Anthropic, Tzafon, and Yutori CUA models; OpenAI and Google are
140
+ unit-tested.
141
+
127
142
  ### Model Switching
128
143
 
129
144
  `CuaAgent` follows pi `Agent` semantics: assign `agent.state.model` to a
package/dist/index.d.ts CHANGED
@@ -16,6 +16,7 @@ interface ComputerToolOptions {
16
16
  coordinateSystem?: ComputerToolCoordinateSystem;
17
17
  screenshot?: CuaScreenshotSpec;
18
18
  computerUseExtra?: boolean;
19
+ playwright?: boolean;
19
20
  }
20
21
  interface BatchDetails {
21
22
  statusText: string;
@@ -36,10 +37,34 @@ interface NavigationDetails {
36
37
  statusText: string;
37
38
  url?: string;
38
39
  }
40
+ /**
41
+ * Structured details for a `playwright_execute` tool result. Library
42
+ * consumers can read these directly instead of re-parsing the model-facing
43
+ * tool content blocks.
44
+ *
45
+ * - `success` — whether the Playwright code itself completed without error.
46
+ * A `false` value means the code threw or the SDK reported failure; in
47
+ * that case the failure is also surfaced as tool content for the model.
48
+ * - `statusText` — short human-readable status (success or failure summary).
49
+ * - `result` — present only when the code returned a JSON-serializable value.
50
+ * - `stdout`/`stderr` — raw daemon output, present whenever the daemon
51
+ * reported a non-empty value on that stream (may be whitespace-only).
52
+ * - `error` — present only when `success` is `false`; the error message from
53
+ * the daemon.
54
+ */
55
+ interface PlaywrightDetails {
56
+ success: boolean;
57
+ statusText: string;
58
+ result?: unknown;
59
+ stdout?: string;
60
+ stderr?: string;
61
+ error?: string;
62
+ }
39
63
  type BatchTool = AgentTool<TSchema, BatchDetails>;
40
64
  type NavigationTool = AgentTool<TSchema, NavigationDetails>;
65
+ type PlaywrightTool = AgentTool<TSchema, PlaywrightDetails>;
41
66
  type ActionTool = AgentTool<TSchema, BatchDetails>;
42
- type CuaExecutorTool = BatchTool | NavigationTool | ActionTool;
67
+ type CuaExecutorTool = BatchTool | NavigationTool | PlaywrightTool | ActionTool;
43
68
  declare function createCuaComputerTools(args: ComputerToolOptions): CuaExecutorTool[];
44
69
  //#endregion
45
70
  //#region src/agent.d.ts
@@ -74,7 +99,8 @@ type CuaAgentOptions = Omit<AgentOptions, "initialState"> & {
74
99
  client: Kernel; /** Initial pi state plus a CUA-aware model value. */
75
100
  initialState: CuaAgentInitialState; /** Add your own pi tools alongside the built-in browser tools. */
76
101
  extraTools?: AgentTool[]; /** Expose a helper for browser navigation and URL reads. */
77
- computerUseExtra?: boolean;
102
+ computerUseExtra?: boolean; /** Expose a tool that runs Playwright code against the browser session. */
103
+ playwright?: boolean;
78
104
  };
79
105
  /**
80
106
  * Constructor options for {@link CuaAgentHarness}.
@@ -88,7 +114,8 @@ type CuaAgentHarnessOptions<TSkill extends Skill = Skill, TPromptTemplate extend
88
114
  client: Kernel; /** Model used by the harness. CUA refs are resolved before pi sees the model. */
89
115
  model: CuaRuntimeInput; /** Add your own pi tools alongside the built-in browser tools. */
90
116
  extraTools?: AgentTool[]; /** Expose a helper for browser navigation and URL reads. */
91
- computerUseExtra?: boolean; /** Optional payload hook composed after the provider-specific CUA payload hook. */
117
+ computerUseExtra?: boolean; /** Expose a tool that runs Playwright code against the browser session. */
118
+ playwright?: boolean; /** Optional payload hook composed after the provider-specific CUA payload hook. */
92
119
  onPayload?: SimpleStreamOptions["onPayload"];
93
120
  };
94
121
  /**
@@ -137,4 +164,4 @@ declare class CuaAgentHarness<TSkill extends Skill = Skill, TPromptTemplate exte
137
164
  setActiveTools(toolNames: string[]): Promise<void>;
138
165
  }
139
166
  //#endregion
140
- export { type BatchDetails, type ComputerToolOptions, CuaAgent, CuaAgentHarness, type CuaAgentHarnessOptions, type CuaAgentOptions, type CuaAgentState, type CuaExecutorTool, type KernelBrowser, type NavigationDetails, NodeExecutionEnv, createCuaComputerTools };
167
+ export { type BatchDetails, type ComputerToolOptions, CuaAgent, CuaAgentHarness, type CuaAgentHarnessOptions, type CuaAgentOptions, type CuaAgentState, type CuaExecutorTool, type KernelBrowser, type NavigationDetails, NodeExecutionEnv, type PlaywrightDetails, createCuaComputerTools };
package/dist/index.js CHANGED
@@ -1,6 +1,6 @@
1
1
  import { Agent, AgentHarness } from "@earendil-works/pi-agent-core";
2
2
  import { NodeExecutionEnv } from "@earendil-works/pi-agent-core/node";
3
- import { CUA_NAVIGATION_TOOL_NAME, createCuaNavigationToolDefinition, getCuaEnvApiKey, normalizeGotoUrl, resolveCuaRuntimeSpec, streamSimple } from "@onkernel/cua-ai";
3
+ import { CUA_NAVIGATION_TOOL_NAME, CUA_PLAYWRIGHT_TOOL_NAME, createCuaNavigationToolDefinition, createCuaPlaywrightToolDefinition, getCuaEnvApiKey, normalizeGotoUrl, resolveCuaRuntimeSpec, streamSimple } from "@onkernel/cua-ai";
4
4
  import sharp from "sharp";
5
5
  export * from "@earendil-works/pi-agent-core";
6
6
  //#region src/translator/keys.ts
@@ -160,6 +160,14 @@ var InternalComputerTranslator = class {
160
160
  y: Math.trunc(pos.y)
161
161
  };
162
162
  }
163
+ async executePlaywright(code, timeoutSec) {
164
+ const truncated = timeoutSec !== void 0 ? Math.trunc(timeoutSec) : void 0;
165
+ const timeout = truncated !== void 0 && truncated >= 1 ? Math.min(truncated, PLAYWRIGHT_MAX_TIMEOUT_SEC) : void 0;
166
+ return this.client.browsers.playwright.execute(this.sessionId, {
167
+ code,
168
+ ...timeout !== void 0 ? { timeout_sec: timeout } : {}
169
+ });
170
+ }
163
171
  async executeBatch(actions) {
164
172
  const result = { readResults: [] };
165
173
  const pending = [];
@@ -297,6 +305,7 @@ var InternalComputerTranslator = class {
297
305
  await this.client.browsers.computer.batch(this.sessionId, { actions });
298
306
  }
299
307
  };
308
+ const PLAYWRIGHT_MAX_TIMEOUT_SEC = 300;
300
309
  const CLICK_BUTTONS = new Set([
301
310
  "left",
302
311
  "right",
@@ -357,18 +366,19 @@ function createCuaComputerTools(args) {
357
366
  }
358
367
  /** Build executor tools against an existing translator (internal; not part of the package surface). */
359
368
  function buildCuaComputerTools(args, translator) {
360
- return withNavigationTool(args).map((executor) => createExecutorTool(executor, translator));
369
+ return withExtraTools(args).map((executor) => createExecutorTool(executor, translator));
361
370
  }
362
- function withNavigationTool(args) {
371
+ function withExtraTools(args) {
363
372
  const executors = [...args.toolExecutors];
364
373
  const existing = new Set(executors.map((executor) => executor.definition.name));
365
- if (args.computerUseExtra && !existing.has(CUA_NAVIGATION_TOOL_NAME)) {
366
- const definition = createCuaNavigationToolDefinition();
367
- executors.push({
368
- kind: "navigation",
369
- definition
370
- });
371
- }
374
+ if (args.computerUseExtra && !existing.has(CUA_NAVIGATION_TOOL_NAME)) executors.push({
375
+ kind: "navigation",
376
+ definition: createCuaNavigationToolDefinition()
377
+ });
378
+ if (args.playwright && !existing.has(CUA_PLAYWRIGHT_TOOL_NAME)) executors.push({
379
+ kind: "playwright",
380
+ definition: createCuaPlaywrightToolDefinition()
381
+ });
372
382
  return executors;
373
383
  }
374
384
  function createExecutorTool(executor, translator) {
@@ -382,6 +392,16 @@ function createExecutorTool(executor, translator) {
382
392
  return executeNavigationTool(translator, asNavigationInput(params));
383
393
  }
384
394
  };
395
+ if (isPlaywrightExecutor(executor)) return {
396
+ name: definition.name,
397
+ label: definition.name,
398
+ description: definition.description,
399
+ parameters: definition.parameters,
400
+ executionMode: "sequential",
401
+ async execute(_toolCallId, params) {
402
+ return executePlaywrightTool(translator, asPlaywrightInput(params));
403
+ }
404
+ };
385
405
  return {
386
406
  name: definition.name,
387
407
  label: definition.name,
@@ -396,6 +416,9 @@ function createExecutorTool(executor, translator) {
396
416
  function isNavigationExecutor(executor) {
397
417
  return "kind" in executor && executor.kind === "navigation";
398
418
  }
419
+ function isPlaywrightExecutor(executor) {
420
+ return "kind" in executor && executor.kind === "playwright";
421
+ }
399
422
  async function executeBatchTool(translator, params) {
400
423
  const content = [];
401
424
  const readResults = [];
@@ -487,6 +510,50 @@ async function executeNavigationTool(translator, params) {
487
510
  throw new Error(`${action} failed: ${errorMessage(err)}`, { cause: err });
488
511
  }
489
512
  }
513
+ async function executePlaywrightTool(translator, params) {
514
+ try {
515
+ const execution = await translator.executePlaywright(params.code, params.timeout_sec);
516
+ const content = [];
517
+ if (execution.result !== void 0) content.push({
518
+ type: "text",
519
+ text: `result: ${formatPlaywrightResult(execution.result)}`
520
+ });
521
+ if (execution.stdout?.trim()) content.push({
522
+ type: "text",
523
+ text: `stdout:\n${execution.stdout.trimEnd()}`
524
+ });
525
+ if (execution.stderr?.trim()) content.push({
526
+ type: "text",
527
+ text: `stderr:\n${execution.stderr.trimEnd()}`
528
+ });
529
+ if (!execution.success) content.push({
530
+ type: "text",
531
+ text: `error: ${execution.error ?? "playwright execution reported failure"}`
532
+ });
533
+ const statusText = execution.success ? "Playwright executed successfully." : `Playwright execution failed: ${execution.error ?? "unknown error"}`;
534
+ if (content.length === 0) content.push({
535
+ type: "text",
536
+ text: statusText
537
+ });
538
+ const details = {
539
+ success: execution.success,
540
+ statusText
541
+ };
542
+ if (execution.result !== void 0) details.result = execution.result;
543
+ if (execution.stdout) details.stdout = execution.stdout;
544
+ if (execution.stderr) details.stderr = execution.stderr;
545
+ if (execution.error) details.error = execution.error;
546
+ return {
547
+ content,
548
+ details
549
+ };
550
+ } catch (err) {
551
+ throw new Error(`playwright_execute failed: ${errorMessage(err)}`, { cause: err });
552
+ }
553
+ }
554
+ function formatPlaywrightResult(result) {
555
+ return typeof result === "string" ? result : JSON.stringify(result);
556
+ }
490
557
  function errorMessage(err) {
491
558
  return err instanceof Error ? err.message : String(err);
492
559
  }
@@ -494,6 +561,10 @@ function asNavigationInput(value) {
494
561
  if (value && typeof value === "object" && typeof value.action === "string") return value;
495
562
  throw new Error("invalid computer_use_extra parameters");
496
563
  }
564
+ function asPlaywrightInput(value) {
565
+ if (value && typeof value === "object" && typeof value.code === "string") return value;
566
+ throw new Error("invalid playwright_execute parameters");
567
+ }
497
568
  //#endregion
498
569
  //#region src/agent.ts
499
570
  /**
@@ -524,7 +595,8 @@ var CuaRuntimeController = class {
524
595
  tools() {
525
596
  return [...buildCuaComputerTools({
526
597
  toolExecutors: this.runtimeSpec.toolExecutors,
527
- computerUseExtra: this.options.computerUseExtra
598
+ computerUseExtra: this.options.computerUseExtra,
599
+ playwright: this.options.playwright
528
600
  }, this.translator), ...this.options.extraTools ?? []];
529
601
  }
530
602
  onPayload() {
@@ -535,7 +607,11 @@ var CuaRuntimeController = class {
535
607
  }) : void 0, this.options.onPayload);
536
608
  }
537
609
  keepToolNames() {
538
- return [...(this.options.extraTools ?? []).map((tool) => tool.name), ...this.options.computerUseExtra ? [CUA_NAVIGATION_TOOL_NAME] : []];
610
+ return [
611
+ ...(this.options.extraTools ?? []).map((tool) => tool.name),
612
+ ...this.options.computerUseExtra ? [CUA_NAVIGATION_TOOL_NAME] : [],
613
+ ...this.options.playwright ? [CUA_PLAYWRIGHT_TOOL_NAME] : []
614
+ ];
539
615
  }
540
616
  createTranslator() {
541
617
  return new InternalComputerTranslator({
@@ -566,13 +642,14 @@ var CuaAgent = class extends Agent {
566
642
  stateProxy;
567
643
  stateProxyTarget;
568
644
  constructor(options) {
569
- const { browser, client, initialState, onPayload, streamFn, prepareNextTurn, extraTools, computerUseExtra, ...agentOptions } = options;
645
+ const { browser, client, initialState, onPayload, streamFn, prepareNextTurn, extraTools, computerUseExtra, playwright, ...agentOptions } = options;
570
646
  const runtime = new CuaRuntimeController({
571
647
  browser,
572
648
  client,
573
649
  model: initialState.model,
574
650
  extraTools,
575
651
  computerUseExtra,
652
+ playwright,
576
653
  onPayload
577
654
  });
578
655
  const wrappedStreamFn = (model, context, streamOptions) => {
@@ -665,13 +742,14 @@ var CuaAgentHarness = class extends AgentHarness {
665
742
  runtime;
666
743
  requestedActiveToolNames;
667
744
  constructor(options) {
668
- const { browser, client, model, extraTools, computerUseExtra, systemPrompt, getApiKeyAndHeaders, onPayload, activeToolNames, ...harnessOptions } = options;
745
+ const { browser, client, model, extraTools, computerUseExtra, playwright, systemPrompt, getApiKeyAndHeaders, onPayload, activeToolNames, ...harnessOptions } = options;
669
746
  const runtime = new CuaRuntimeController({
670
747
  browser,
671
748
  client,
672
749
  model,
673
750
  extraTools,
674
751
  computerUseExtra,
752
+ playwright,
675
753
  onPayload
676
754
  });
677
755
  const resolvedTools = runtime.tools();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@onkernel/cua-agent",
3
- "version": "0.3.3",
3
+ "version": "0.3.4",
4
4
  "description": "Kernel browser computer-use Agent and AgentHarness classes built on pi-agent-core",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -42,7 +42,7 @@
42
42
  "dependencies": {
43
43
  "@earendil-works/pi-agent-core": "0.79.1",
44
44
  "@earendil-works/pi-ai": "0.79.1",
45
- "@onkernel/cua-ai": "0.3.0",
45
+ "@onkernel/cua-ai": "0.3.1",
46
46
  "@onkernel/sdk": "0.49.0",
47
47
  "sharp": "^0.34.5"
48
48
  },