@onkernel/cua-agent 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,24 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.3.0 - 2026-06-10
4
+
5
+ - Replaces the vendored pi-agent-core snapshot with the released `@earendil-works/pi-agent-core@0.79.1` dependency. The full pi surface is still re-exported, but it now tracks the published package instead of a frozen fork.
6
+ - BREAKING: `harness.agent` is removed. It only existed in the vendored pre-release snapshot and never shipped in any pi-agent-core release; use `getModel()`, `getTools()`, and `getActiveTools()` instead.
7
+ - BREAKING: `steer()`, `followUp()`, `nextTurn()`, and `setStreamOptions()` on the harness now return promises and must be awaited.
8
+ - BREAKING: the harness `model_select` and `thinking_level_select` events are renamed `model_update` and `thinking_level_update`, and the `steeringMode`/`followUpMode` property accessors became `getSteeringMode()`/`setSteeringMode()`/`getFollowUpMode()`/`setFollowUpMode()` methods.
9
+ - BREAKING: `ExecutionEnv` is now `Result`-based. Custom env implementations return `Result` values instead of throwing.
10
+ - BREAKING: requires Node.js >= 22.19.0.
11
+ - `NodeExecutionEnv` now comes from `@earendil-works/pi-agent-core`'s `/node` subpath; importing it from `@onkernel/cua-agent` keeps working.
12
+ - Tool execution follows pi's throw-on-failure contract: failed browser actions throw an error labeled with the action instead of also encoding the failure into tool result content and details.
13
+ - Moves the yutori screenshot payload append into `@onkernel/cua-ai`'s payload middleware.
14
+ - Built ESM output uses explicit `.js` relative import specifiers so `dist` resolves under plain Node.js.
15
+
16
+ ## 0.2.0 - 2026-05-13
17
+
18
+ - Adds `CuaAgentHarness`, a provider-aware harness API with session-backed turns, resource and prompt helpers, active tool selection, and model switching.
19
+ - Keeps CUA runtime defaults in sync when changing models so provider-specific tools, prompts, and payload middleware update together.
20
+ - Improves browser keyboard shortcut translation for Kernel computer actions.
21
+
3
22
  ## 0.1.0
4
23
 
5
24
  - Class-first CUA runtime: `CuaAgent` and `CuaHarness` on top of pi-agent-core.
package/README.md CHANGED
@@ -1,9 +1,11 @@
1
1
  # `@onkernel/cua-agent`
2
2
 
3
- Kernel browser computer-use classes built on
4
- [`@earendil-works/pi-agent-core`](https://github.com/earendil-works/pi/tree/main/packages/agent).
3
+ Kernel browser computer-use classes built on the `Agent` and `AgentHarness`
4
+ classes from [`@earendil-works/pi-agent-core`](https://www.npmjs.com/package/@earendil-works/pi-agent-core).
5
+ The full pi-agent-core surface is re-exported from this package, including
6
+ `NodeExecutionEnv` from its `/node` subpath.
5
7
 
6
- This package keeps pi-agent-core semantics intact and adds browser execution
8
+ This package keeps pi agent semantics intact and adds browser execution
7
9
  plumbing for canonical CUA tools.
8
10
 
9
11
  ## Installation
@@ -33,45 +35,61 @@ const agent = new CuaAgent({
33
35
  await agent.prompt("Open news.ycombinator.com and summarize the top story.");
34
36
  ```
35
37
 
36
- ## Quick Start (`CuaHarness`)
38
+ ## Quick Start (`CuaAgentHarness`)
37
39
 
38
40
  ```ts
39
- import { CuaHarness } from "@onkernel/cua-agent";
41
+ import { CuaAgentHarness, InMemorySessionRepo, NodeExecutionEnv } from "@onkernel/cua-agent";
40
42
 
41
- const harness = new CuaHarness({
43
+ const sessionRepo = new InMemorySessionRepo();
44
+ const session = await sessionRepo.create({ id: "example" });
45
+
46
+ const harness = new CuaAgentHarness({
42
47
  browser,
43
48
  client,
49
+ env: new NodeExecutionEnv({ cwd: process.cwd() }),
44
50
  model: "openai:gpt-5.5",
51
+ session,
45
52
  });
46
53
 
47
- await harness.prompt("Open example.com and tell me the current URL.");
48
- const transcript = harness.getTranscript();
49
- console.log("messages in transcript:", transcript.length);
54
+ const response = await harness.prompt("Open example.com and tell me the current URL.");
55
+ const branch = await session.getBranch();
56
+ const lastAssistant = [...branch]
57
+ .reverse()
58
+ .flatMap((entry) =>
59
+ entry.type === "message" && entry.message.role === "assistant" ? [entry.message] : [],
60
+ )[0];
61
+ const assistant = lastAssistant ?? response;
62
+ const assistantText = assistant.content
63
+ .flatMap((block) => (block.type === "text" ? [block.text] : []))
64
+ .join("")
65
+ .trim();
66
+ console.log("assistant stopReason:", assistant.stopReason);
67
+ console.log("assistant text:", assistantText || "(no text)");
50
68
  ```
51
69
 
52
- Use `CuaAgent` when you want direct pi `Agent` control: raw transcript state,
70
+ Use `CuaAgent` when you want direct pi `Agent` control: raw message state,
53
71
  lifecycle events, custom streaming, and explicit prompt/continue/queue control.
54
72
  Reach for the harness shape when you want an app layer around the loop:
55
- session/transcript helpers, resource and prompt entry points, provider/auth
56
- hooks, active tool selection, compaction/tree workflows, and higher-level queue
57
- events. `CuaHarness` is the thin CUA version of that shape today: it installs
58
- CUA defaults, delegates runtime methods to the wrapped `Agent`, and adds
59
- `getTranscript()`.
73
+ session-backed turns, resource and prompt entry points, provider/auth hooks,
74
+ active tool selection, compaction/tree workflows, and higher-level queue events.
75
+ `CuaAgentHarness` extends pi `AgentHarness`, installs CUA defaults, and refreshes
76
+ provider-specific runtime state when `setModel()` changes models.
60
77
 
61
78
  ## Core Concepts
62
79
 
63
80
  ### Class-First API
64
81
 
65
82
  - `CuaAgent extends Agent`
66
- - `CuaHarness` wraps a pi `Agent` with a harness-style constructor and
67
- delegated runtime methods.
83
+ - `CuaAgentHarness extends AgentHarness`
68
84
 
69
85
  Both classes mirror pi constructor shapes and behavior, with minimal additions:
70
86
  - `browser` (Kernel browser response)
71
87
  - `client` (Kernel SDK client)
72
88
  - CUA model refs (`"provider:model"`) accepted where pi expects a concrete model
89
+ - `extraTools` to add your own pi tools alongside the built-in browser tools
90
+ - `computerUseExtra: true` to let the model use a small navigation helper
73
91
 
74
- If `getApiKey` is omitted, both classes default to CUA env var conventions:
92
+ If auth callbacks are omitted, both classes default to CUA env var conventions:
75
93
  - OpenAI: `OPENAI_API_KEY`
76
94
  - Anthropic: `ANTHROPIC_OAUTH_TOKEN` or `ANTHROPIC_API_KEY`
77
95
  - Gemini: `GOOGLE_API_KEY` or `GEMINI_API_KEY`
@@ -80,14 +98,35 @@ If `getApiKey` is omitted, both classes default to CUA env var conventions:
80
98
 
81
99
  ### Tool Defaults
82
100
 
83
- If tools are omitted, the classes install canonical CUA computer tool executors
84
- using runtime specs from `@onkernel/cua-ai`. If tools are provided, they are
85
- used exactly.
101
+ By default, the classes install provider-selected CUA computer tool executors
102
+ from `@onkernel/cua-ai`. Each provider decides which tool names the model sees;
103
+ the matching executor adapter translates returned tool calls into canonical CUA
104
+ actions that run against the Kernel browser.
105
+
106
+ Use `extraTools` to add your own pi tools alongside the provider's
107
+ computer-use tools. This is useful when the model needs to call
108
+ application-specific code, such as looking up a record, writing a database row,
109
+ or handing off to another service while it also controls the browser.
110
+
111
+ `computerUseExtra: true` adds the `computer_use_extra` tool. Use it when you
112
+ want one compact helper for common browser navigation/read operations:
113
+ `goto`, `back`, `forward`, and `url`.
114
+
115
+ ### Model Switching
116
+
117
+ `CuaAgent` follows pi `Agent` semantics: assign `agent.state.model` to a
118
+ concrete model or CUA model ref. CUA-owned tools and the default system prompt
119
+ refresh with the new provider runtime.
120
+
121
+ `CuaAgentHarness` follows pi `AgentHarness` semantics: call
122
+ `await harness.setModel(model)`. The harness updates its model through pi's
123
+ snapshot machinery and refreshes CUA-owned tools and default prompt state for
124
+ the next provider request.
86
125
 
87
126
  ### Tool Composition
88
127
 
89
- Use `createCuaComputerTools()` to compose your own tool list from canonical
90
- tool definitions:
128
+ Use `createCuaComputerTools()` to compose your own tool list from provider
129
+ execution adapters:
91
130
 
92
131
  ```ts
93
132
  import { resolveCuaRuntimeSpec } from "@onkernel/cua-ai";
@@ -98,11 +137,12 @@ const tools = [
98
137
  ...createCuaComputerTools({
99
138
  browser,
100
139
  client,
101
- toolDefinitions: runtime.toolDefinitions,
140
+ toolExecutors: runtime.toolExecutors,
102
141
  }),
103
142
  myCustomTool,
104
143
  ];
105
144
  ```
106
145
 
107
146
  For full event semantics, steering, follow-up queues, and tool execution
108
- details, see the pi-agent-core README.
147
+ details, see the [`@earendil-works/pi-agent-core`](https://www.npmjs.com/package/@earendil-works/pi-agent-core)
148
+ package.
package/dist/index.d.ts CHANGED
@@ -1,8 +1,138 @@
1
+ import { Agent, AgentHarness, AgentHarnessOptions, AgentOptions, AgentState, AgentTool, PromptTemplate, Skill } from "@earendil-works/pi-agent-core";
2
+ import { NodeExecutionEnv } from "@earendil-works/pi-agent-core/node";
3
+ import { Api, ComputerToolCoordinateSystem, CuaModelRef, CuaScreenshotSpec, CuaToolExecutorSpec, Model, SimpleStreamOptions, TSchema } from "@onkernel/cua-ai";
4
+ import Kernel from "@onkernel/sdk";
5
+ import { BrowserCreateResponse, BrowserRetrieveResponse } from "@onkernel/sdk/resources/browsers";
1
6
  export * from "@earendil-works/pi-agent-core";
2
- export type { KernelBrowser } from "./translator/translator";
3
- export { createCuaComputerTools } from "./tools";
4
- export type { BatchDetails, ComputerToolOptions, CuaExecutorTool, NavigationDetails, SupportedCuaExecutorToolName, } from "./tools";
5
- export { SUPPORTED_CUA_EXECUTOR_TOOL_NAMES } from "./tools";
6
- export { CuaAgent, CuaHarness } from "./agent";
7
- export type { CuaAgentOptions, CuaHarnessOptions } from "./agent";
8
- //# sourceMappingURL=index.d.ts.map
7
+
8
+ //#region src/translator/translator.d.ts
9
+ type KernelBrowser = BrowserCreateResponse | BrowserRetrieveResponse;
10
+ //#endregion
11
+ //#region src/tools.d.ts
12
+ interface ComputerToolOptions {
13
+ browser: KernelBrowser;
14
+ client: Kernel;
15
+ toolExecutors: CuaToolExecutorSpec[];
16
+ coordinateSystem?: ComputerToolCoordinateSystem;
17
+ screenshot?: CuaScreenshotSpec;
18
+ computerUseExtra?: boolean;
19
+ }
20
+ interface BatchDetails {
21
+ statusText: string;
22
+ readResults: Array<{
23
+ type: "url";
24
+ url: string;
25
+ } | {
26
+ type: "screenshot";
27
+ bytes: number;
28
+ } | {
29
+ type: "cursor_position";
30
+ x: number;
31
+ y: number;
32
+ }>;
33
+ }
34
+ interface NavigationDetails {
35
+ action: string;
36
+ statusText: string;
37
+ url?: string;
38
+ }
39
+ type BatchTool = AgentTool<TSchema, BatchDetails>;
40
+ type NavigationTool = AgentTool<TSchema, NavigationDetails>;
41
+ type ActionTool = AgentTool<TSchema, BatchDetails>;
42
+ type CuaExecutorTool = BatchTool | NavigationTool | ActionTool;
43
+ declare function createCuaComputerTools(args: ComputerToolOptions): CuaExecutorTool[];
44
+ //#endregion
45
+ //#region src/agent.d.ts
46
+ /** A CUA model reference string or a concrete pi model object. */
47
+ type CuaRuntimeInput = CuaModelRef | Model<Api>;
48
+ /**
49
+ * Agent state exposed by {@link CuaAgent}.
50
+ *
51
+ * It is the regular pi `AgentState`, except assigning `state.model` may use a
52
+ * CUA model ref such as `"openai:gpt-5.5"`. CUA-owned tools and the default
53
+ * system prompt are refreshed to match the new provider runtime.
54
+ */
55
+ interface CuaAgentState extends Omit<AgentState, "model"> {
56
+ /** The concrete pi model currently used by the underlying agent loop. */
57
+ get model(): Model<Api>;
58
+ /** Assign a concrete pi model or CUA model ref and refresh CUA runtime defaults. */
59
+ set model(model: CuaRuntimeInput);
60
+ }
61
+ /** Initial state for {@link CuaAgent}. */
62
+ type CuaAgentInitialState = Omit<NonNullable<AgentOptions["initialState"]>, "model" | "tools"> & {
63
+ /** Model to use for the first turn. CUA refs are resolved before pi sees the state. */model: CuaRuntimeInput;
64
+ };
65
+ /**
66
+ * Constructor options for {@link CuaAgent}.
67
+ *
68
+ * `browser` and `client` are used to build the default computer-use tools.
69
+ * Everything else follows pi `AgentOptions`, with `initialState.model`
70
+ * widened to accept CUA model refs.
71
+ */
72
+ type CuaAgentOptions = Omit<AgentOptions, "initialState"> & {
73
+ /** Kernel browser session used by default CUA tools. */browser: KernelBrowser; /** Kernel SDK client used by default CUA tools. */
74
+ client: Kernel; /** Initial pi state plus a CUA-aware model value. */
75
+ initialState: CuaAgentInitialState; /** Add your own pi tools alongside the built-in browser tools. */
76
+ extraTools?: AgentTool[]; /** Expose a helper for browser navigation and URL reads. */
77
+ computerUseExtra?: boolean;
78
+ };
79
+ /**
80
+ * Constructor options for {@link CuaAgentHarness}.
81
+ *
82
+ * The harness keeps pi `AgentHarnessOptions` intact except that `model`
83
+ * accepts CUA refs and `browser`/`client` are required to build default
84
+ * computer-use tools. Callers provide pi's `env` and `session` directly.
85
+ */
86
+ type CuaAgentHarnessOptions<TSkill extends Skill = Skill, TPromptTemplate extends PromptTemplate = PromptTemplate> = Omit<AgentHarnessOptions<TSkill, TPromptTemplate, AgentTool>, "model" | "tools"> & {
87
+ /** Kernel browser session used by default CUA tools. */browser: KernelBrowser; /** Kernel SDK client used by default CUA tools. */
88
+ client: Kernel; /** Model used by the harness. CUA refs are resolved before pi sees the model. */
89
+ model: CuaRuntimeInput; /** Add your own pi tools alongside the built-in browser tools. */
90
+ extraTools?: AgentTool[]; /** Expose a helper for browser navigation and URL reads. */
91
+ computerUseExtra?: boolean; /** Optional payload hook composed after the provider-specific CUA payload hook. */
92
+ onPayload?: SimpleStreamOptions["onPayload"];
93
+ };
94
+ /**
95
+ * Pi `Agent` configured for Kernel browser computer use.
96
+ *
97
+ * Use this class when you want direct access to the lower-level pi agent state,
98
+ * queues, event stream, and `state.model` mutation model. It resolves CUA model
99
+ * refs, installs provider-appropriate CUA tools by default, and keeps those
100
+ * defaults in sync when `agent.state.model` changes.
101
+ */
102
+ declare class CuaAgent extends Agent {
103
+ private readonly runtime;
104
+ private readonly ownsSystemPrompt;
105
+ private stateProxy?;
106
+ constructor(options: CuaAgentOptions);
107
+ /**
108
+ * Return a state proxy so `agent.state.model = "provider:model"` can behave
109
+ * like pi's normal mutable state while also re-resolving CUA tools, prompt,
110
+ * and payload hooks for the selected provider.
111
+ */
112
+ get state(): CuaAgentState;
113
+ private applyRuntime;
114
+ }
115
+ /**
116
+ * Pi `AgentHarness` configured for Kernel browser computer use.
117
+ *
118
+ * Use this class when you want pi's higher-level harness APIs for sessions,
119
+ * resources, prompt templates, queue events, compaction, and model selection.
120
+ * It installs provider CUA tools by default and keeps CUA-owned runtime
121
+ * defaults in sync through `setModel()`.
122
+ */
123
+ declare class CuaAgentHarness<TSkill extends Skill = Skill, TPromptTemplate extends PromptTemplate = PromptTemplate> extends AgentHarness<TSkill, TPromptTemplate, AgentTool> {
124
+ private readonly runtime;
125
+ private requestedActiveToolNames?;
126
+ constructor(options: CuaAgentHarnessOptions<TSkill, TPromptTemplate>);
127
+ /**
128
+ * Mirror pi `AgentHarness.setModel()` while accepting CUA model refs.
129
+ *
130
+ * The override refreshes CUA-owned tools before delegating to pi so the
131
+ * harness snapshot and session model-change entry are written with the
132
+ * concrete model selected by `@onkernel/cua-ai`.
133
+ */
134
+ setModel(model: CuaRuntimeInput): Promise<void>;
135
+ setActiveTools(toolNames: string[]): Promise<void>;
136
+ }
137
+ //#endregion
138
+ export { type BatchDetails, type ComputerToolOptions, CuaAgent, CuaAgentHarness, type CuaAgentHarnessOptions, type CuaAgentOptions, type CuaAgentState, type CuaExecutorTool, type KernelBrowser, type NavigationDetails, NodeExecutionEnv, createCuaComputerTools };