npm - @onkernel/cua-agent - Versions diffs - 0.3.3 → 0.3.4 - Mend

@onkernel/cua-agent 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,13 @@
 # Changelog
+## 0.3.4 - 2026-06-23
+- Add an opt-in `playwright` option to `CuaAgent` and `CuaAgentHarness` that
+  exposes a `playwright_execute` tool, running Playwright/TypeScript against
+  the live browser session via the Kernel SDK. Results, stdout, and stderr
+  come back as tool content; SDK-reported failures surface as content rather
+  than throwing. Adds the `PlaywrightDetails` export.
 ## 0.3.3 - 2026-06-12
 - The action translator now consumes the canonical `CuaAction` union with an

package/README.md CHANGED Viewed

@@ -98,6 +98,8 @@ Both classes mirror pi constructor shapes and behavior, with minimal additions:
 - CUA model refs (`"provider:model"`) accepted where pi expects a concrete model
 - `extraTools` to add your own pi tools alongside the built-in browser tools
 - `computerUseExtra: true` to let the model use a small navigation helper
+- `playwright: true` to let the model run Playwright/TypeScript against the
+  live browser session
 If auth callbacks are omitted, both classes default to CUA env var conventions:
 - OpenAI: `OPENAI_API_KEY`
@@ -124,6 +126,19 @@ URL or go back. `computerUseExtra: true` adds `computer_use_extra`, a
 provider-neutral escape hatch exposing `goto`, `back`, `forward`, and `url`
 so navigation works uniformly regardless of which model is driving.
+Some steps are awkward as raw pointer/keyboard actions: precise DOM reads,
+form fills, data extraction, or waiting on a specific selector.
+`playwright: true` adds `playwright_execute`, which runs Playwright/TypeScript
+directly against the live browser session. `page`, `context`, and `browser`
+are in scope and the code may `return` a JSON-serializable value. Each call
+runs in a fresh JS context (locals don't persist across calls) but the
+browser session does carry over. No screenshot is returned automatically;
+request one on a follow-up turn when the model needs to see the page.
+Playwright-level failures come back as tool content (so the model can adapt)
+rather than thrown errors. Verified e2e
+against Anthropic, Tzafon, and Yutori CUA models; OpenAI and Google are
+unit-tested.
 ### Model Switching
 `CuaAgent` follows pi `Agent` semantics: assign `agent.state.model` to a

package/dist/index.d.ts CHANGED Viewed

@@ -16,6 +16,7 @@ interface ComputerToolOptions {
   coordinateSystem?: ComputerToolCoordinateSystem;
   screenshot?: CuaScreenshotSpec;
   computerUseExtra?: boolean;
+  playwright?: boolean;
 }
 interface BatchDetails {
   statusText: string;
@@ -36,10 +37,34 @@ interface NavigationDetails {
   statusText: string;
   url?: string;
 }
+/**
+ * Structured details for a `playwright_execute` tool result. Library
+ * consumers can read these directly instead of re-parsing the model-facing
+ * tool content blocks.
+ *
+ * - `success` — whether the Playwright code itself completed without error.
+ *   A `false` value means the code threw or the SDK reported failure; in
+ *   that case the failure is also surfaced as tool content for the model.
+ * - `statusText` — short human-readable status (success or failure summary).
+ * - `result` — present only when the code returned a JSON-serializable value.
+ * - `stdout`/`stderr` — raw daemon output, present whenever the daemon
+ *   reported a non-empty value on that stream (may be whitespace-only).
+ * - `error` — present only when `success` is `false`; the error message from
+ *   the daemon.
+ */
+interface PlaywrightDetails {
+  success: boolean;
+  statusText: string;
+  result?: unknown;
+  stdout?: string;
+  stderr?: string;
+  error?: string;
+}
 type BatchTool = AgentTool<TSchema, BatchDetails>;
 type NavigationTool = AgentTool<TSchema, NavigationDetails>;
+type PlaywrightTool = AgentTool<TSchema, PlaywrightDetails>;
 type ActionTool = AgentTool<TSchema, BatchDetails>;
-type CuaExecutorTool = BatchTool | NavigationTool | ActionTool;
+type CuaExecutorTool = BatchTool | NavigationTool | PlaywrightTool | ActionTool;
 declare function createCuaComputerTools(args: ComputerToolOptions): CuaExecutorTool[];
 //#endregion
 //#region src/agent.d.ts
@@ -74,7 +99,8 @@ type CuaAgentOptions = Omit<AgentOptions, "initialState"> & {
   client: Kernel; /** Initial pi state plus a CUA-aware model value. */
   initialState: CuaAgentInitialState; /** Add your own pi tools alongside the built-in browser tools. */
   extraTools?: AgentTool[]; /** Expose a helper for browser navigation and URL reads. */
-  computerUseExtra?: boolean;
+  computerUseExtra?: boolean; /** Expose a tool that runs Playwright code against the browser session. */
+  playwright?: boolean;
 };
 /**
  * Constructor options for {@link CuaAgentHarness}.
@@ -88,7 +114,8 @@ type CuaAgentHarnessOptions<TSkill extends Skill = Skill, TPromptTemplate extend
   client: Kernel; /** Model used by the harness. CUA refs are resolved before pi sees the model. */
   model: CuaRuntimeInput; /** Add your own pi tools alongside the built-in browser tools. */
   extraTools?: AgentTool[]; /** Expose a helper for browser navigation and URL reads. */
-  computerUseExtra?: boolean; /** Optional payload hook composed after the provider-specific CUA payload hook. */
+  computerUseExtra?: boolean; /** Expose a tool that runs Playwright code against the browser session. */
+  playwright?: boolean; /** Optional payload hook composed after the provider-specific CUA payload hook. */
   onPayload?: SimpleStreamOptions["onPayload"];
 };
 /**
@@ -137,4 +164,4 @@ declare class CuaAgentHarness<TSkill extends Skill = Skill, TPromptTemplate exte
   setActiveTools(toolNames: string[]): Promise<void>;
 }
 //#endregion
-export { type BatchDetails, type ComputerToolOptions, CuaAgent, CuaAgentHarness, type CuaAgentHarnessOptions, type CuaAgentOptions, type CuaAgentState, type CuaExecutorTool, type KernelBrowser, type NavigationDetails, NodeExecutionEnv, createCuaComputerTools };
+export { type BatchDetails, type ComputerToolOptions, CuaAgent, CuaAgentHarness, type CuaAgentHarnessOptions, type CuaAgentOptions, type CuaAgentState, type CuaExecutorTool, type KernelBrowser, type NavigationDetails, NodeExecutionEnv, type PlaywrightDetails, createCuaComputerTools };

package/dist/index.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { Agent, AgentHarness } from "@earendil-works/pi-agent-core";
 import { NodeExecutionEnv } from "@earendil-works/pi-agent-core/node";
-import { CUA_NAVIGATION_TOOL_NAME, createCuaNavigationToolDefinition, getCuaEnvApiKey, normalizeGotoUrl, resolveCuaRuntimeSpec, streamSimple } from "@onkernel/cua-ai";
+import { CUA_NAVIGATION_TOOL_NAME, CUA_PLAYWRIGHT_TOOL_NAME, createCuaNavigationToolDefinition, createCuaPlaywrightToolDefinition, getCuaEnvApiKey, normalizeGotoUrl, resolveCuaRuntimeSpec, streamSimple } from "@onkernel/cua-ai";
 import sharp from "sharp";
 export * from "@earendil-works/pi-agent-core";
 //#region src/translator/keys.ts
@@ -160,6 +160,14 @@ var InternalComputerTranslator = class {
 			y: Math.trunc(pos.y)
 		};
 	}
+	async executePlaywright(code, timeoutSec) {
+		const truncated = timeoutSec !== void 0 ? Math.trunc(timeoutSec) : void 0;
+		const timeout = truncated !== void 0 && truncated >= 1 ? Math.min(truncated, PLAYWRIGHT_MAX_TIMEOUT_SEC) : void 0;
+		return this.client.browsers.playwright.execute(this.sessionId, {
+			code,
+			...timeout !== void 0 ? { timeout_sec: timeout } : {}
+		});
+	}
 	async executeBatch(actions) {
 		const result = { readResults: [] };
 		const pending = [];
@@ -297,6 +305,7 @@ var InternalComputerTranslator = class {
 		await this.client.browsers.computer.batch(this.sessionId, { actions });
 	}
 };
+const PLAYWRIGHT_MAX_TIMEOUT_SEC = 300;
 const CLICK_BUTTONS = new Set([
 	"left",
 	"right",
@@ -357,18 +366,19 @@ function createCuaComputerTools(args) {
 }
 /** Build executor tools against an existing translator (internal; not part of the package surface). */
 function buildCuaComputerTools(args, translator) {
-	return withNavigationTool(args).map((executor) => createExecutorTool(executor, translator));
+	return withExtraTools(args).map((executor) => createExecutorTool(executor, translator));
 }
-function withNavigationTool(args) {
+function withExtraTools(args) {
 	const executors = [...args.toolExecutors];
 	const existing = new Set(executors.map((executor) => executor.definition.name));
-	if (args.computerUseExtra && !existing.has(CUA_NAVIGATION_TOOL_NAME)) {
-		const definition = createCuaNavigationToolDefinition();
-		executors.push({
-			kind: "navigation",
-			definition
-		});
-	}
+	if (args.computerUseExtra && !existing.has(CUA_NAVIGATION_TOOL_NAME)) executors.push({
+		kind: "navigation",
+		definition: createCuaNavigationToolDefinition()
+	});
+	if (args.playwright && !existing.has(CUA_PLAYWRIGHT_TOOL_NAME)) executors.push({
+		kind: "playwright",
+		definition: createCuaPlaywrightToolDefinition()
+	});
 	return executors;
 }
 function createExecutorTool(executor, translator) {
@@ -382,6 +392,16 @@ function createExecutorTool(executor, translator) {
 			return executeNavigationTool(translator, asNavigationInput(params));
 		}
 	};
+	if (isPlaywrightExecutor(executor)) return {
+		name: definition.name,
+		label: definition.name,
+		description: definition.description,
+		parameters: definition.parameters,
+		executionMode: "sequential",
+		async execute(_toolCallId, params) {
+			return executePlaywrightTool(translator, asPlaywrightInput(params));
+		}
+	};
 	return {
 		name: definition.name,
 		label: definition.name,
@@ -396,6 +416,9 @@ function createExecutorTool(executor, translator) {
 function isNavigationExecutor(executor) {
 	return "kind" in executor && executor.kind === "navigation";
 }
+function isPlaywrightExecutor(executor) {
+	return "kind" in executor && executor.kind === "playwright";
+}
 async function executeBatchTool(translator, params) {
 	const content = [];
 	const readResults = [];
@@ -487,6 +510,50 @@ async function executeNavigationTool(translator, params) {
 		throw new Error(`${action} failed: ${errorMessage(err)}`, { cause: err });
 	}
 }
+async function executePlaywrightTool(translator, params) {
+	try {
+		const execution = await translator.executePlaywright(params.code, params.timeout_sec);
+		const content = [];
+		if (execution.result !== void 0) content.push({
+			type: "text",
+			text: `result: ${formatPlaywrightResult(execution.result)}`
+		});
+		if (execution.stdout?.trim()) content.push({
+			type: "text",
+			text: `stdout:\n${execution.stdout.trimEnd()}`
+		});
+		if (execution.stderr?.trim()) content.push({
+			type: "text",
+			text: `stderr:\n${execution.stderr.trimEnd()}`
+		});
+		if (!execution.success) content.push({
+			type: "text",
+			text: `error: ${execution.error ?? "playwright execution reported failure"}`
+		});
+		const statusText = execution.success ? "Playwright executed successfully." : `Playwright execution failed: ${execution.error ?? "unknown error"}`;
+		if (content.length === 0) content.push({
+			type: "text",
+			text: statusText
+		});
+		const details = {
+			success: execution.success,
+			statusText
+		};
+		if (execution.result !== void 0) details.result = execution.result;
+		if (execution.stdout) details.stdout = execution.stdout;
+		if (execution.stderr) details.stderr = execution.stderr;
+		if (execution.error) details.error = execution.error;
+		return {
+			content,
+			details
+		};
+	} catch (err) {
+		throw new Error(`playwright_execute failed: ${errorMessage(err)}`, { cause: err });
+	}
+}
+function formatPlaywrightResult(result) {
+	return typeof result === "string" ? result : JSON.stringify(result);
+}
 function errorMessage(err) {
 	return err instanceof Error ? err.message : String(err);
 }
@@ -494,6 +561,10 @@ function asNavigationInput(value) {
 	if (value && typeof value === "object" && typeof value.action === "string") return value;
 	throw new Error("invalid computer_use_extra parameters");
 }
+function asPlaywrightInput(value) {
+	if (value && typeof value === "object" && typeof value.code === "string") return value;
+	throw new Error("invalid playwright_execute parameters");
+}
 //#endregion
 //#region src/agent.ts
 /**
@@ -524,7 +595,8 @@ var CuaRuntimeController = class {
 	tools() {
 		return [...buildCuaComputerTools({
 			toolExecutors: this.runtimeSpec.toolExecutors,
-			computerUseExtra: this.options.computerUseExtra
+			computerUseExtra: this.options.computerUseExtra,
+			playwright: this.options.playwright
 		}, this.translator), ...this.options.extraTools ?? []];
 	}
 	onPayload() {
@@ -535,7 +607,11 @@ var CuaRuntimeController = class {
 		}) : void 0, this.options.onPayload);
 	}
 	keepToolNames() {
-		return [...(this.options.extraTools ?? []).map((tool) => tool.name), ...this.options.computerUseExtra ? [CUA_NAVIGATION_TOOL_NAME] : []];
+		return [
+			...(this.options.extraTools ?? []).map((tool) => tool.name),
+			...this.options.computerUseExtra ? [CUA_NAVIGATION_TOOL_NAME] : [],
+			...this.options.playwright ? [CUA_PLAYWRIGHT_TOOL_NAME] : []
+		];
 	}
 	createTranslator() {
 		return new InternalComputerTranslator({
@@ -566,13 +642,14 @@ var CuaAgent = class extends Agent {
 	stateProxy;
 	stateProxyTarget;
 	constructor(options) {
-		const { browser, client, initialState, onPayload, streamFn, prepareNextTurn, extraTools, computerUseExtra, ...agentOptions } = options;
+		const { browser, client, initialState, onPayload, streamFn, prepareNextTurn, extraTools, computerUseExtra, playwright, ...agentOptions } = options;
 		const runtime = new CuaRuntimeController({
 			browser,
 			client,
 			model: initialState.model,
 			extraTools,
 			computerUseExtra,
+			playwright,
 			onPayload
 		});
 		const wrappedStreamFn = (model, context, streamOptions) => {
@@ -665,13 +742,14 @@ var CuaAgentHarness = class extends AgentHarness {
 	runtime;
 	requestedActiveToolNames;
 	constructor(options) {
-		const { browser, client, model, extraTools, computerUseExtra, systemPrompt, getApiKeyAndHeaders, onPayload, activeToolNames, ...harnessOptions } = options;
+		const { browser, client, model, extraTools, computerUseExtra, playwright, systemPrompt, getApiKeyAndHeaders, onPayload, activeToolNames, ...harnessOptions } = options;
 		const runtime = new CuaRuntimeController({
 			browser,
 			client,
 			model,
 			extraTools,
 			computerUseExtra,
+			playwright,
 			onPayload
 		});
 		const resolvedTools = runtime.tools();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@onkernel/cua-agent",
-  "version": "0.3.3",
+  "version": "0.3.4",
   "description": "Kernel browser computer-use Agent and AgentHarness classes built on pi-agent-core",
   "license": "MIT",
   "type": "module",
@@ -42,7 +42,7 @@
   "dependencies": {
     "@earendil-works/pi-agent-core": "0.79.1",
     "@earendil-works/pi-ai": "0.79.1",
-    "@onkernel/cua-ai": "0.3.0",
+    "@onkernel/cua-ai": "0.3.1",
     "@onkernel/sdk": "0.49.0",
     "sharp": "^0.34.5"
   },