@onkernel/cua-ai 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/CHANGELOG.md +100 -0
  2. package/README.md +341 -65
  3. package/dist/chunk-D7D4PA-g.js +13 -0
  4. package/dist/index.d.ts +576 -10
  5. package/dist/index.js +1999 -11
  6. package/docs/supported-models.md +77 -0
  7. package/examples/quickstart.ts +28 -22
  8. package/package.json +10 -6
  9. package/dist/api-keys.d.ts +0 -8
  10. package/dist/api-keys.d.ts.map +0 -1
  11. package/dist/api-keys.js +0 -48
  12. package/dist/api-keys.js.map +0 -1
  13. package/dist/index.d.ts.map +0 -1
  14. package/dist/index.js.map +0 -1
  15. package/dist/models.d.ts +0 -33
  16. package/dist/models.d.ts.map +0 -1
  17. package/dist/models.js +0 -159
  18. package/dist/models.js.map +0 -1
  19. package/dist/providers/anthropic/index.d.ts +0 -10
  20. package/dist/providers/anthropic/index.d.ts.map +0 -1
  21. package/dist/providers/anthropic/index.js +0 -16
  22. package/dist/providers/anthropic/index.js.map +0 -1
  23. package/dist/providers/common.d.ts +0 -111
  24. package/dist/providers/common.d.ts.map +0 -1
  25. package/dist/providers/common.js +0 -138
  26. package/dist/providers/common.js.map +0 -1
  27. package/dist/providers/gemini/index.d.ts +0 -11
  28. package/dist/providers/gemini/index.d.ts.map +0 -1
  29. package/dist/providers/gemini/index.js +0 -14
  30. package/dist/providers/gemini/index.js.map +0 -1
  31. package/dist/providers/openai/index.d.ts +0 -8
  32. package/dist/providers/openai/index.d.ts.map +0 -1
  33. package/dist/providers/openai/index.js +0 -22
  34. package/dist/providers/openai/index.js.map +0 -1
  35. package/dist/providers/tzafon/index.d.ts +0 -12
  36. package/dist/providers/tzafon/index.d.ts.map +0 -1
  37. package/dist/providers/tzafon/index.js +0 -18
  38. package/dist/providers/tzafon/index.js.map +0 -1
  39. package/dist/providers/tzafon/provider.d.ts +0 -8
  40. package/dist/providers/tzafon/provider.d.ts.map +0 -1
  41. package/dist/providers/tzafon/provider.js +0 -234
  42. package/dist/providers/tzafon/provider.js.map +0 -1
  43. package/dist/providers/yutori/index.d.ts +0 -12
  44. package/dist/providers/yutori/index.d.ts.map +0 -1
  45. package/dist/providers/yutori/index.js +0 -23
  46. package/dist/providers/yutori/index.js.map +0 -1
  47. package/dist/providers/yutori/provider.d.ts +0 -9
  48. package/dist/providers/yutori/provider.d.ts.map +0 -1
  49. package/dist/providers/yutori/provider.js +0 -307
  50. package/dist/providers/yutori/provider.js.map +0 -1
  51. package/dist/providers.d.ts +0 -6
  52. package/dist/providers.d.ts.map +0 -1
  53. package/dist/providers.js +0 -26
  54. package/dist/providers.js.map +0 -1
  55. package/dist/runtime-spec.d.ts +0 -29
  56. package/dist/runtime-spec.d.ts.map +0 -1
  57. package/dist/runtime-spec.js +0 -58
  58. package/dist/runtime-spec.js.map +0 -1
package/dist/index.js CHANGED
@@ -1,13 +1,2001 @@
1
- import { registerCuaProviders } from "./providers";
1
+ import { t as __exportAll } from "./chunk-D7D4PA-g.js";
2
+ import { Type, createAssistantMessageEventStream, getModel, getModels, registerApiProvider } from "@earendil-works/pi-ai";
3
+ import Lightcone from "@tzafon/lightcone";
4
+ import OpenAI from "openai";
2
5
  export * from "@earendil-works/pi-ai";
3
- export * from "./models";
4
- export * from "./api-keys";
5
- export * from "./runtime-spec";
6
- export * from "./providers/common";
7
- export * as anthropic from "./providers/anthropic/index";
8
- export * as gemini from "./providers/gemini/index";
9
- export * as openai from "./providers/openai/index";
10
- export * as tzafon from "./providers/tzafon/index";
11
- export * as yutori from "./providers/yutori/index";
6
+ //#region src/providers/common.ts
7
+ const CUA_ACTION_TYPES = [
8
+ "click",
9
+ "double_click",
10
+ "mouse_down",
11
+ "mouse_up",
12
+ "type",
13
+ "keypress",
14
+ "scroll",
15
+ "move",
16
+ "drag",
17
+ "wait",
18
+ "screenshot",
19
+ "goto",
20
+ "back",
21
+ "forward",
22
+ "url",
23
+ "cursor_position"
24
+ ];
25
+ const PointSchema = Type.Object({
26
+ x: Type.Number(),
27
+ y: Type.Number()
28
+ }, { additionalProperties: false });
29
+ const CUA_ACTION_SCHEMA_BY_TYPE = {
30
+ click: Type.Object({
31
+ type: Type.Literal("click"),
32
+ x: Type.Number(),
33
+ y: Type.Number(),
34
+ button: Type.Optional(Type.String()),
35
+ hold_keys: Type.Optional(Type.Array(Type.String()))
36
+ }, { additionalProperties: false }),
37
+ double_click: Type.Object({
38
+ type: Type.Literal("double_click"),
39
+ x: Type.Number(),
40
+ y: Type.Number(),
41
+ hold_keys: Type.Optional(Type.Array(Type.String()))
42
+ }, { additionalProperties: false }),
43
+ mouse_down: Type.Object({
44
+ type: Type.Literal("mouse_down"),
45
+ x: Type.Number(),
46
+ y: Type.Number(),
47
+ button: Type.Optional(Type.String()),
48
+ hold_keys: Type.Optional(Type.Array(Type.String()))
49
+ }, { additionalProperties: false }),
50
+ mouse_up: Type.Object({
51
+ type: Type.Literal("mouse_up"),
52
+ x: Type.Number(),
53
+ y: Type.Number(),
54
+ button: Type.Optional(Type.String()),
55
+ hold_keys: Type.Optional(Type.Array(Type.String()))
56
+ }, { additionalProperties: false }),
57
+ type: Type.Object({
58
+ type: Type.Literal("type"),
59
+ text: Type.String()
60
+ }, { additionalProperties: false }),
61
+ keypress: Type.Object({
62
+ type: Type.Literal("keypress"),
63
+ keys: Type.Array(Type.String()),
64
+ duration: Type.Optional(Type.Number())
65
+ }, { additionalProperties: false }),
66
+ scroll: Type.Object({
67
+ type: Type.Literal("scroll"),
68
+ x: Type.Optional(Type.Number()),
69
+ y: Type.Optional(Type.Number()),
70
+ scroll_x: Type.Optional(Type.Number()),
71
+ scroll_y: Type.Optional(Type.Number()),
72
+ hold_keys: Type.Optional(Type.Array(Type.String()))
73
+ }, { additionalProperties: false }),
74
+ move: Type.Object({
75
+ type: Type.Literal("move"),
76
+ x: Type.Number(),
77
+ y: Type.Number()
78
+ }, { additionalProperties: false }),
79
+ drag: Type.Object({
80
+ type: Type.Literal("drag"),
81
+ path: Type.Array(PointSchema, { minItems: 2 }),
82
+ button: Type.Optional(Type.String()),
83
+ hold_keys: Type.Optional(Type.Array(Type.String()))
84
+ }, { additionalProperties: false }),
85
+ wait: Type.Object({
86
+ type: Type.Literal("wait"),
87
+ ms: Type.Optional(Type.Number())
88
+ }, { additionalProperties: false }),
89
+ screenshot: Type.Object({ type: Type.Literal("screenshot") }, { additionalProperties: false }),
90
+ goto: Type.Object({
91
+ type: Type.Literal("goto"),
92
+ url: Type.String()
93
+ }, { additionalProperties: false }),
94
+ back: Type.Object({ type: Type.Literal("back") }, { additionalProperties: false }),
95
+ forward: Type.Object({ type: Type.Literal("forward") }, { additionalProperties: false }),
96
+ url: Type.Object({ type: Type.Literal("url") }, { additionalProperties: false }),
97
+ cursor_position: Type.Object({ type: Type.Literal("cursor_position") }, { additionalProperties: false })
98
+ };
99
+ function createCuaActionArgumentSchema(action) {
100
+ const { type: _type, ...properties } = CUA_ACTION_SCHEMA_BY_TYPE[action].properties;
101
+ return Type.Object(properties, { additionalProperties: false });
102
+ }
103
+ function createCuaActionSchema(actions = CUA_ACTION_TYPES) {
104
+ if (actions.length === 0) throw new Error("actions must include at least one CUA action type");
105
+ if (actions.length === 1) return CUA_ACTION_SCHEMA_BY_TYPE[actions[0]];
106
+ return Type.Union(actions.map((action) => CUA_ACTION_SCHEMA_BY_TYPE[action]));
107
+ }
108
+ function createCuaActionToolDefinitions(actions = CUA_ACTION_TYPES) {
109
+ return actions.map((action) => ({
110
+ name: action,
111
+ description: `Execute one ${action} computer action.`,
112
+ parameters: createCuaActionArgumentSchema(action)
113
+ }));
114
+ }
115
+ const CuaActionSchema = createCuaActionSchema();
116
+ function createCuaBatchSchema(actions) {
117
+ return Type.Object({ actions: Type.Array(createCuaActionSchema(actions), { description: "Ordered computer actions to execute." }) });
118
+ }
119
+ const CuaBatchSchema = createCuaBatchSchema();
120
+ const CuaNavigationSchema = Type.Object({
121
+ action: Type.Union([
122
+ Type.Literal("goto"),
123
+ Type.Literal("back"),
124
+ Type.Literal("forward"),
125
+ Type.Literal("url")
126
+ ]),
127
+ url: Type.Optional(Type.String())
128
+ }, { additionalProperties: false });
129
+ /**
130
+ * Default name for batch computer-action tools created by
131
+ * {@link createCuaBatchToolDefinition} and the name Anthropic's batch tool
132
+ * ships under (the only provider that includes one by default).
133
+ */
134
+ const CUA_BATCH_TOOL_NAME = "computer_batch";
135
+ const CUA_NAVIGATION_TOOL_NAME = "computer_use_extra";
136
+ const CUA_BATCH_TOOL_DESCRIPTION = [
137
+ "Execute multiple computer actions in sequence, including ordered read steps like url(), cursor_position(), and screenshot().",
138
+ "Prefer this tool for predictable browser interaction sequences such as click-then-type, typing a URL, keyboard navigation, drag paths, and mixed write/read batches.",
139
+ "If no explicit read step is included, the tool returns one fresh screenshot after execution."
140
+ ].join("\n");
141
+ const CUA_NAVIGATION_TOOL_DESCRIPTION = "High-level browser navigation helpers for goto, back, forward, and url.";
142
+ /**
143
+ * Build the provider's CUA computer-use tools.
144
+ *
145
+ * Use this when calling `complete()` or `stream()` directly and you need an
146
+ * array of `Tool` objects for browser actions. Pass `actions` to expose only a
147
+ * smaller set, such as `["click"]`.
148
+ */
149
+ function computerTools(options = {}) {
150
+ return createCuaActionToolDefinitions(options.actions);
151
+ }
152
+ /** Build execution adapters for individual canonical CUA action tools. */
153
+ function createCuaActionToolExecutors(actions = CUA_ACTION_TYPES) {
154
+ return createCuaActionToolDefinitions(actions).map((definition) => {
155
+ const actionType = definition.name;
156
+ return {
157
+ definition,
158
+ toActions(args) {
159
+ return [{
160
+ ...args && typeof args === "object" ? args : {},
161
+ type: actionType
162
+ }];
163
+ }
164
+ };
165
+ });
166
+ }
167
+ /** Return the canonical tool name that should execute a normalized CUA action. */
168
+ function canonicalToolCallName(action) {
169
+ return action.type;
170
+ }
171
+ /** Convert a normalized CUA action into tool-call arguments by removing its `type` tag. */
172
+ function canonicalToolCallArguments(action) {
173
+ const { type: _type, ...args } = action;
174
+ return args;
175
+ }
176
+ /** Prefix bare hostnames/paths with `https://` before browser navigation. */
177
+ function normalizeGotoUrl(value) {
178
+ if (typeof value !== "string") return void 0;
179
+ const url = value.trim();
180
+ if (!url) return void 0;
181
+ return /^[a-z][a-z0-9+.-]*:\/\//i.test(url) ? url : `https://${url}`;
182
+ }
183
+ function createCuaBatchToolDefinition(actions, options = {}) {
184
+ return {
185
+ name: options.name ?? "computer_batch",
186
+ description: options.description ?? CUA_BATCH_TOOL_DESCRIPTION,
187
+ parameters: createCuaBatchSchema(actions)
188
+ };
189
+ }
190
+ /** Build an execution adapter for a batch tool whose input is `{ actions }`. */
191
+ function createCuaBatchToolExecutor(actions, options = {}) {
192
+ return {
193
+ definition: createCuaBatchToolDefinition(actions, options),
194
+ toActions(args) {
195
+ if (!isBatchInput(args)) throw new Error("invalid batch tool parameters");
196
+ return args.actions;
197
+ }
198
+ };
199
+ }
200
+ /** Build the provider's default CUA tool execution adapters. */
201
+ function computerToolExecutors(options = {}) {
202
+ return createCuaActionToolExecutors(options.actions);
203
+ }
204
+ function isBatchInput(value) {
205
+ return Boolean(value && typeof value === "object" && Array.isArray(value.actions));
206
+ }
207
+ function createCuaNavigationToolDefinition() {
208
+ return {
209
+ name: CUA_NAVIGATION_TOOL_NAME,
210
+ description: CUA_NAVIGATION_TOOL_DESCRIPTION,
211
+ parameters: CuaNavigationSchema
212
+ };
213
+ }
214
+ //#endregion
215
+ //#region src/providers/tzafon/provider.ts
216
+ const TZAFON_RESPONSES_API = "tzafon-responses";
217
+ const TZAFON_COMPUTER_USE_TOOL = {
218
+ type: "computer_use",
219
+ display_width: 1920,
220
+ display_height: 1080,
221
+ environment: "browser"
222
+ };
223
+ const TZAFON_LOCAL_ACTION_TOOL_NAMES = new Set(CUA_ACTION_TYPES);
224
+ const streamSimpleTzafonResponses = (model, context, options) => {
225
+ return streamTzafonResponses(model, context, options);
226
+ };
227
+ const streamTzafonResponses = (model, context, options) => {
228
+ const stream = createAssistantMessageEventStream();
229
+ const output = initialAssistantMessage$1(model);
230
+ (async () => {
231
+ try {
232
+ const apiKey = options?.apiKey || process.env.TZAFON_API_KEY;
233
+ if (!apiKey) throw new Error(`No API key for provider: ${model.provider}`);
234
+ const client = new Lightcone({ apiKey });
235
+ const payload = {
236
+ model: model.id,
237
+ input: convertContextMessages(context),
238
+ tools: convertTools$1(context.tools ?? []),
239
+ instructions: context.systemPrompt,
240
+ temperature: options?.temperature ?? 0,
241
+ max_output_tokens: options?.maxTokens ?? model.maxTokens
242
+ };
243
+ const tzafonPayload = tzafonComputerUseOnPayload(payload, model, { keepToolNames: [...keepToolNamesFromContext$1(context), ...options?.keepToolNames ?? []] });
244
+ const nextPayload = await options?.onPayload?.(tzafonPayload ?? payload, model);
245
+ if (options?.signal?.aborted) throw new Error("Request was aborted");
246
+ const response = await client.responses.create(nextPayload ?? tzafonPayload ?? payload, { signal: options?.signal });
247
+ if (options?.signal?.aborted) throw new Error("Request was aborted");
248
+ stream.push({
249
+ type: "start",
250
+ partial: output
251
+ });
252
+ output.responseId = getString(response, "id") || void 0;
253
+ output.usage = usageFromTzafon(getValue(response, "usage"));
254
+ for (const item of getArray(response, "output")) {
255
+ const type = getString(item, "type");
256
+ if (type === "message") {
257
+ const text = extractMessageText(item);
258
+ if (text) emitText$1(stream, output, text);
259
+ continue;
260
+ }
261
+ if (type === "function_call") {
262
+ emitToolCall(stream, output, {
263
+ type: "toolCall",
264
+ id: getString(item, "call_id"),
265
+ name: getString(item, "name"),
266
+ arguments: parseArguments$1(getValue(item, "arguments"))
267
+ });
268
+ continue;
269
+ }
270
+ if (type === "computer_call") {
271
+ const callId = getString(item, "call_id") || getString(item, "id") || `computer_call_${output.content.length}`;
272
+ let actionIndex = 0;
273
+ for (const action of toCanonicalActions$1(getValue(item, "action"))) {
274
+ if (action.type === "answer") {
275
+ emitText$1(stream, output, action.text);
276
+ continue;
277
+ }
278
+ emitToolCall(stream, output, {
279
+ type: "toolCall",
280
+ id: tzafonToolCallId(callId, actionIndex),
281
+ name: canonicalToolCallName(action),
282
+ arguments: canonicalToolCallArguments(action)
283
+ });
284
+ actionIndex += 1;
285
+ }
286
+ }
287
+ }
288
+ output.stopReason = output.content.some((part) => part.type === "toolCall") ? "toolUse" : "stop";
289
+ stream.push({
290
+ type: "done",
291
+ reason: output.stopReason,
292
+ message: output
293
+ });
294
+ stream.end();
295
+ } catch (err) {
296
+ output.stopReason = options?.signal?.aborted ? "aborted" : "error";
297
+ output.errorMessage = err instanceof Error ? err.message : String(err);
298
+ stream.push({
299
+ type: "error",
300
+ reason: output.stopReason,
301
+ error: output
302
+ });
303
+ stream.end();
304
+ }
305
+ })();
306
+ return stream;
307
+ };
308
+ function tzafonComputerUseOnPayload(payload, _model, context) {
309
+ if (!payload || typeof payload !== "object") return void 0;
310
+ const current = payload;
311
+ const keepToolNames = new Set(context?.keepToolNames ?? []);
312
+ const existingTools = Array.isArray(current.tools) ? current.tools : [];
313
+ const shouldAddComputerUse = existingTools.some((tool) => {
314
+ const name = readToolName$1(tool);
315
+ return Boolean(name && TZAFON_LOCAL_ACTION_TOOL_NAMES.has(name) && !keepToolNames.has(name));
316
+ });
317
+ const tools = existingTools.filter((tool) => {
318
+ const name = readToolName$1(tool);
319
+ return !name || keepToolNames.has(name) || !TZAFON_LOCAL_ACTION_TOOL_NAMES.has(name);
320
+ });
321
+ return {
322
+ ...payload,
323
+ tools: shouldAddComputerUse ? [TZAFON_COMPUTER_USE_TOOL, ...tools] : tools
324
+ };
325
+ }
326
+ /** Derive a unique canonical tool-call id for a Tzafon computer action. */
327
+ function tzafonToolCallId(callId, actionIndex) {
328
+ return actionIndex === 0 ? callId : `${callId}:${actionIndex}`;
329
+ }
330
+ function initialAssistantMessage$1(model) {
331
+ return {
332
+ role: "assistant",
333
+ content: [],
334
+ api: model.api,
335
+ provider: model.provider,
336
+ model: model.id,
337
+ usage: {
338
+ input: 0,
339
+ output: 0,
340
+ cacheRead: 0,
341
+ cacheWrite: 0,
342
+ totalTokens: 0,
343
+ cost: {
344
+ input: 0,
345
+ output: 0,
346
+ cacheRead: 0,
347
+ cacheWrite: 0,
348
+ total: 0
349
+ }
350
+ },
351
+ stopReason: "stop",
352
+ timestamp: Date.now()
353
+ };
354
+ }
355
+ function emitText$1(stream, output, text) {
356
+ const contentIndex = output.content.length;
357
+ const content = {
358
+ type: "text",
359
+ text
360
+ };
361
+ output.content.push(content);
362
+ stream.push({
363
+ type: "text_start",
364
+ contentIndex,
365
+ partial: output
366
+ });
367
+ stream.push({
368
+ type: "text_delta",
369
+ contentIndex,
370
+ delta: text,
371
+ partial: output
372
+ });
373
+ stream.push({
374
+ type: "text_end",
375
+ contentIndex,
376
+ content: text,
377
+ partial: output
378
+ });
379
+ }
380
+ function emitToolCall(stream, output, toolCall) {
381
+ const contentIndex = output.content.length;
382
+ output.content.push(toolCall);
383
+ stream.push({
384
+ type: "toolcall_start",
385
+ contentIndex,
386
+ partial: output
387
+ });
388
+ stream.push({
389
+ type: "toolcall_end",
390
+ contentIndex,
391
+ toolCall,
392
+ partial: output
393
+ });
394
+ }
395
+ /** Normalize one Tzafon `computer_call.action` payload into canonical CUA actions. */
396
+ function toCanonicalActions$1(action) {
397
+ if (!action || typeof action !== "object") return [];
398
+ const current = action;
399
+ const type = getString(current, "type");
400
+ const x = readOptionalNumber(current, "x");
401
+ const y = readOptionalNumber(current, "y");
402
+ switch (type) {
403
+ case "click":
404
+ case "left_click": return x !== void 0 && y !== void 0 ? [{
405
+ type: "click",
406
+ x,
407
+ y
408
+ }] : [];
409
+ case "right_click": return x !== void 0 && y !== void 0 ? [{
410
+ type: "click",
411
+ x,
412
+ y,
413
+ button: "right"
414
+ }] : [];
415
+ case "double_click": return x !== void 0 && y !== void 0 ? [{
416
+ type: "double_click",
417
+ x,
418
+ y
419
+ }] : [];
420
+ case "triple_click": return x !== void 0 && y !== void 0 ? [{
421
+ type: "double_click",
422
+ x,
423
+ y
424
+ }, {
425
+ type: "click",
426
+ x,
427
+ y
428
+ }] : [];
429
+ case "move":
430
+ case "hover": return x !== void 0 && y !== void 0 ? [{
431
+ type: "move",
432
+ x,
433
+ y
434
+ }] : [];
435
+ case "drag": return toDragAction(current);
436
+ case "type": return [{
437
+ type: "type",
438
+ text: getString(current, "text")
439
+ }];
440
+ case "keypress":
441
+ case "key": return toKeypressAction$1(current);
442
+ case "scroll": return [toScrollAction$1(current)];
443
+ case "hscroll": return [{
444
+ type: "scroll",
445
+ scroll_x: readOptionalNumber(current, "scroll_x") ?? readOptionalNumber(current, "amount") ?? 0
446
+ }];
447
+ case "navigate": return [{
448
+ type: "goto",
449
+ url: getString(current, "url")
450
+ }];
451
+ case "wait": return [{
452
+ type: "wait",
453
+ ms: readOptionalNumber(current, "ms") ?? secondsToMs$1(readOptionalNumber(current, "seconds"))
454
+ }];
455
+ case "screenshot": return [{ type: "screenshot" }];
456
+ case "answer":
457
+ case "done":
458
+ case "terminate": return [{
459
+ type: "answer",
460
+ text: getString(current, "result") || getString(current, "text") || getString(current, "status")
461
+ }];
462
+ default: return [];
463
+ }
464
+ }
465
+ function toDragAction(action) {
466
+ const path = getArray(action, "path").map((point) => {
467
+ if (!point || typeof point !== "object") return void 0;
468
+ const x = readOptionalNumber(point, "x");
469
+ const y = readOptionalNumber(point, "y");
470
+ return x !== void 0 && y !== void 0 ? {
471
+ x,
472
+ y
473
+ } : void 0;
474
+ }).filter((point) => Boolean(point));
475
+ if (path.length >= 2) return [{
476
+ type: "drag",
477
+ path
478
+ }];
479
+ const x = readOptionalNumber(action, "x");
480
+ const y = readOptionalNumber(action, "y");
481
+ const endX = readOptionalNumber(action, "end_x") ?? readOptionalNumber(action, "x2");
482
+ const endY = readOptionalNumber(action, "end_y") ?? readOptionalNumber(action, "y2");
483
+ if (x === void 0 || y === void 0 || endX === void 0 || endY === void 0) return [];
484
+ return [{
485
+ type: "drag",
486
+ path: [{
487
+ x,
488
+ y
489
+ }, {
490
+ x: endX,
491
+ y: endY
492
+ }]
493
+ }];
494
+ }
495
+ function toKeypressAction$1(action) {
496
+ const keys = getArray(action, "keys").map((key) => typeof key === "string" ? key : void 0).filter((key) => Boolean(key));
497
+ const key = getString(action, "key");
498
+ const text = getString(action, "text");
499
+ const value = keys.length > 0 ? keys : key ? [key] : text ? [text] : [];
500
+ return value.length > 0 ? [{
501
+ type: "keypress",
502
+ keys: value
503
+ }] : [];
504
+ }
505
+ function toScrollAction$1(action) {
506
+ return {
507
+ type: "scroll",
508
+ x: readOptionalNumber(action, "x"),
509
+ y: readOptionalNumber(action, "y"),
510
+ scroll_x: readOptionalNumber(action, "scroll_x"),
511
+ scroll_y: readOptionalNumber(action, "scroll_y") ?? readOptionalNumber(action, "amount")
512
+ };
513
+ }
514
+ function secondsToMs$1(seconds) {
515
+ return seconds === void 0 ? void 0 : seconds * 1e3;
516
+ }
517
+ function convertTools$1(tools) {
518
+ return tools.map((tool) => ({
519
+ type: "function",
520
+ name: tool.name,
521
+ description: tool.description,
522
+ parameters: tool.parameters
523
+ }));
524
+ }
525
+ function keepToolNamesFromContext$1(context) {
526
+ return (context.tools ?? []).map((tool) => tool.name).filter((name) => !TZAFON_LOCAL_ACTION_TOOL_NAMES.has(name));
527
+ }
528
+ function readToolName$1(tool) {
529
+ if (!tool || typeof tool !== "object") return void 0;
530
+ const direct = getString(tool, "name");
531
+ if (direct) return direct;
532
+ return getString(getValue(tool, "function"), "name");
533
+ }
534
+ function convertContextMessages(context) {
535
+ const items = [];
536
+ for (const message of context.messages) {
537
+ if (message.role === "user") {
538
+ items.push({
539
+ role: "user",
540
+ content: convertUserContent(message.content)
541
+ });
542
+ continue;
543
+ }
544
+ if (message.role === "assistant") {
545
+ const text = message.content.filter((part) => part.type === "text").map((part) => part.text).join("\n").trim();
546
+ if (text) items.push({
547
+ role: "assistant",
548
+ content: text
549
+ });
550
+ for (const part of message.content) {
551
+ if (part.type !== "toolCall") continue;
552
+ items.push({
553
+ type: "function_call",
554
+ call_id: part.id,
555
+ name: part.name,
556
+ arguments: JSON.stringify(part.arguments ?? {})
557
+ });
558
+ }
559
+ continue;
560
+ }
561
+ if (message.role === "toolResult") {
562
+ const text = message.content.filter((part) => part.type === "text").map((part) => part.text).join("\n").trim();
563
+ items.push({
564
+ type: "function_call_output",
565
+ call_id: message.toolCallId,
566
+ output: message.isError ? `Error: ${text || "tool execution failed"}` : text || "ok"
567
+ });
568
+ const image = [...message.content].reverse().find((part) => part.type === "image");
569
+ if (image) items.push({
570
+ role: "user",
571
+ content: [{
572
+ type: "input_text",
573
+ text: "screenshot"
574
+ }, {
575
+ type: "input_image",
576
+ image_url: `data:${image.mimeType};base64,${image.data}`,
577
+ detail: "auto"
578
+ }]
579
+ });
580
+ }
581
+ }
582
+ return items;
583
+ }
584
+ function convertUserContent(content) {
585
+ if (typeof content === "string") return [{
586
+ type: "input_text",
587
+ text: content
588
+ }];
589
+ return content.map((part) => {
590
+ if (part.type === "text") return {
591
+ type: "input_text",
592
+ text: part.text
593
+ };
594
+ return {
595
+ type: "input_image",
596
+ image_url: `data:${part.mimeType};base64,${part.data}`,
597
+ detail: "auto"
598
+ };
599
+ });
600
+ }
601
+ function extractMessageText(item) {
602
+ return getArray(item, "content").map((block) => getString(block, "text")).filter(Boolean).join("\n").trim();
603
+ }
604
+ function parseArguments$1(value) {
605
+ const top = typeof value === "string" && value.trim() ? safeJsonParse(value) : value && typeof value === "object" ? value : {};
606
+ if (!top || typeof top !== "object") return {};
607
+ const out = {};
608
+ for (const [key, val] of Object.entries(top)) out[key] = normalizeArgumentValue(key, val);
609
+ return out;
610
+ }
611
+ const NUMERIC_ARGUMENT_KEYS = new Set([
612
+ "x",
613
+ "y",
614
+ "scroll_x",
615
+ "scroll_y",
616
+ "ms",
617
+ "duration"
618
+ ]);
619
+ function normalizeArgumentValue(key, value) {
620
+ const parsed = typeof value === "string" && looksLikeJson(value) ? safeJsonParse(value) ?? value : value;
621
+ if (typeof parsed === "string" && NUMERIC_ARGUMENT_KEYS.has(key)) {
622
+ const number = Number.parseFloat(parsed);
623
+ return Number.isFinite(number) ? number : parsed;
624
+ }
625
+ if (Array.isArray(parsed)) return parsed.map((item) => normalizeArgumentValue(key, item));
626
+ if (parsed && typeof parsed === "object") return Object.fromEntries(Object.entries(parsed).map(([childKey, childValue]) => [childKey, normalizeArgumentValue(childKey, childValue)]));
627
+ return parsed;
628
+ }
629
+ function safeJsonParse(value) {
630
+ try {
631
+ const parsed = JSON.parse(value);
632
+ return parsed && typeof parsed === "object" ? parsed : null;
633
+ } catch {
634
+ return null;
635
+ }
636
+ }
637
+ function looksLikeJson(value) {
638
+ const trimmed = value.trim();
639
+ return trimmed.startsWith("[") || trimmed.startsWith("{");
640
+ }
641
+ function usageFromTzafon(usage) {
642
+ const input = readUsageNumber(usage, "input_tokens");
643
+ const output = readUsageNumber(usage, "output_tokens");
644
+ return {
645
+ input,
646
+ output,
647
+ cacheRead: readUsageNumber(getValue(usage, "input_tokens_details"), "cached_tokens"),
648
+ cacheWrite: 0,
649
+ totalTokens: readUsageNumber(usage, "total_tokens") || input + output,
650
+ cost: {
651
+ input: 0,
652
+ output: 0,
653
+ cacheRead: 0,
654
+ cacheWrite: 0,
655
+ total: 0
656
+ }
657
+ };
658
+ }
659
+ function readUsageNumber(obj, key) {
660
+ return readOptionalNumber(obj, key) ?? 0;
661
+ }
662
+ function readOptionalNumber(obj, key) {
663
+ if (!obj || typeof obj !== "object") return void 0;
664
+ const value = obj[key];
665
+ if (typeof value === "number" && Number.isFinite(value)) return value;
666
+ if (typeof value === "string" && value.trim()) {
667
+ const number = Number(value);
668
+ return Number.isFinite(number) ? number : void 0;
669
+ }
670
+ }
671
+ function getArray(obj, key) {
672
+ const value = getValue(obj, key);
673
+ return Array.isArray(value) ? value : [];
674
+ }
675
+ function getString(obj, key) {
676
+ const value = getValue(obj, key);
677
+ return typeof value === "string" ? value : "";
678
+ }
679
+ function getValue(obj, key) {
680
+ if (!obj || typeof obj !== "object") return void 0;
681
+ return obj[key];
682
+ }
683
+ //#endregion
684
+ //#region src/providers/yutori/actions.ts
685
+ /**
686
+ * Native Yutori Navigator n1.5 tool-set ids.
687
+ *
688
+ * Source of truth:
689
+ * - https://docs.yutori.com/reference/n1-5
690
+ * - https://docs.yutori.com/llm-quickstart.md
691
+ */
692
+ const YUTORI_N15_CORE_TOOL_SET = "browser_tools_core-20260403";
693
+ const YUTORI_N15_EXPANDED_TOOL_SET = "browser_tools_expanded-20260403";
694
+ /**
695
+ * DOM/ref-backed Navigator n1.5 actions. We intentionally disable these until
696
+ * CuaAgent has the ref/DOM execution path that Yutori documents for the
697
+ * expanded tool set.
698
+ */
699
+ const YUTORI_N15_EXPANDED_ACTION_TYPES = [
700
+ "extract_elements",
701
+ "find",
702
+ "set_element_value",
703
+ "execute_js"
704
+ ];
705
+ /**
706
+ * Navigator n1's fixed legacy browser action space.
707
+ *
708
+ * Source of truth: https://docs.yutori.com/reference/n1
709
+ */
710
+ const YUTORI_N1_ACTION_TYPES = [
711
+ "left_click",
712
+ "double_click",
713
+ "right_click",
714
+ "triple_click",
715
+ "type",
716
+ "key_press",
717
+ "scroll",
718
+ "hover",
719
+ "drag",
720
+ "goto_url",
721
+ "go_back",
722
+ "refresh",
723
+ "wait"
724
+ ];
725
+ /**
726
+ * Navigator n1.5 core visual action space. These are the actions available
727
+ * when `tool_set` is `browser_tools_core-20260403`, which keeps CuaAgent in the
728
+ * pure screenshot/coordinate path and avoids DOM refs.
729
+ *
730
+ * Source of truth: https://docs.yutori.com/reference/n1-5
731
+ */
732
+ const YUTORI_N15_CORE_ACTION_TYPES = [
733
+ "left_click",
734
+ "double_click",
735
+ "triple_click",
736
+ "middle_click",
737
+ "right_click",
738
+ "mouse_move",
739
+ "mouse_down",
740
+ "mouse_up",
741
+ "drag",
742
+ "scroll",
743
+ "type",
744
+ "key_press",
745
+ "hold_key",
746
+ "goto_url",
747
+ "go_back",
748
+ "go_forward",
749
+ "refresh",
750
+ "wait"
751
+ ];
752
+ const YUTORI_N15_ACTION_TYPES = [...YUTORI_N15_CORE_ACTION_TYPES, ...YUTORI_N15_EXPANDED_ACTION_TYPES];
753
+ /**
754
+ * Canonical CUA action types Yutori's native actions normalize into. These are
755
+ * the tool-call names {@link streamYutori} emits and the local executors
756
+ * CuaAgent installs for Yutori models.
757
+ */
758
+ const YUTORI_CUA_ACTION_TYPES = [
759
+ "click",
760
+ "double_click",
761
+ "mouse_down",
762
+ "mouse_up",
763
+ "type",
764
+ "keypress",
765
+ "scroll",
766
+ "move",
767
+ "drag",
768
+ "wait",
769
+ "goto",
770
+ "back",
771
+ "forward"
772
+ ];
773
+ const DEFAULT_SCROLL_AMOUNT = 3;
774
+ const SCROLL_AMOUNT_PER_NOTCH = 120;
775
+ const DEFAULT_WAIT_MS = 2e3;
776
+ const NAVIGATION_WAIT_MS = 1500;
777
+ const GOTO_WAIT_MS = 2e3;
778
+ function resolveYutoriActions(actions) {
779
+ const resolved = actions ?? YUTORI_CUA_ACTION_TYPES;
780
+ const supported = [];
781
+ const unsupported = [];
782
+ for (const action of resolved) if (isYutoriCanonicalAction(action)) supported.push(action);
783
+ else unsupported.push(action);
784
+ if (unsupported.length > 0) throw new Error(`unsupported Yutori canonical action(s): ${unsupported.join(", ")}`);
785
+ return supported;
786
+ }
787
+ function isYutoriCanonicalAction(action) {
788
+ return YUTORI_CUA_ACTION_TYPES.includes(action);
789
+ }
790
+ /** Build the TypeBox schema for Yutori-supported canonical browser actions. */
791
+ function createActionSchema$1(actions) {
792
+ return createCuaActionSchema(resolveYutoriActions(actions));
793
+ }
794
+ /**
795
+ * Build local mirrors of the canonical action tools Yutori models call.
796
+ *
797
+ * These definitions are never sent to the API: `streamYutori` strips them from
798
+ * the outbound payload and selects Yutori's native `tool_set` instead, then
799
+ * normalizes the model's native tool calls back into these canonical names.
800
+ * Install them locally so the normalized calls have matching executors —
801
+ * `providerModule.toolDefinitions()` is intentionally `[]`. Pass `actions` to
802
+ * mirror only a supported subset, such as `["click"]`.
803
+ */
804
+ function computerTools$2(options = {}) {
805
+ return createCuaActionToolDefinitions(resolveYutoriActions(options.actions));
806
+ }
807
+ /** Build the local execution adapters used by CuaAgent and CuaAgentHarness. */
808
+ function computerToolExecutors$2(options = {}) {
809
+ return createCuaActionToolExecutors(resolveYutoriActions(options.actions));
810
+ }
811
+ function yutoriToolSetForModel(modelId) {
812
+ return modelId.startsWith("n1.5") ? YUTORI_N15_CORE_TOOL_SET : void 0;
813
+ }
814
+ function yutoriNativeActionsForModel(modelId) {
815
+ return modelId.startsWith("n1.5") ? YUTORI_N15_CORE_ACTION_TYPES : YUTORI_N1_ACTION_TYPES;
816
+ }
817
+ function isYutoriLocalActionToolName(name) {
818
+ return YUTORI_CUA_ACTION_TYPES.includes(name);
819
+ }
820
+ function toCanonicalActions(name, args) {
821
+ const coords = readPoint(args.coordinates);
822
+ switch (name) {
823
+ case "left_click": return coords ? [{
824
+ type: "click",
825
+ x: coords.x,
826
+ y: coords.y,
827
+ ...holdKeys(args.modifier)
828
+ }] : void 0;
829
+ case "right_click": return coords ? [{
830
+ type: "click",
831
+ x: coords.x,
832
+ y: coords.y,
833
+ button: "right",
834
+ ...holdKeys(args.modifier)
835
+ }] : void 0;
836
+ case "middle_click": return coords ? [{
837
+ type: "click",
838
+ x: coords.x,
839
+ y: coords.y,
840
+ button: "middle",
841
+ ...holdKeys(args.modifier)
842
+ }] : void 0;
843
+ case "double_click": return coords ? [{
844
+ type: "double_click",
845
+ x: coords.x,
846
+ y: coords.y,
847
+ ...holdKeys(args.modifier)
848
+ }] : void 0;
849
+ case "triple_click": return coords ? [{
850
+ type: "double_click",
851
+ x: coords.x,
852
+ y: coords.y,
853
+ ...holdKeys(args.modifier)
854
+ }, {
855
+ type: "click",
856
+ x: coords.x,
857
+ y: coords.y,
858
+ ...holdKeys(args.modifier)
859
+ }] : void 0;
860
+ case "mouse_move":
861
+ case "hover": return coords ? [{
862
+ type: "move",
863
+ x: coords.x,
864
+ y: coords.y
865
+ }] : void 0;
866
+ case "mouse_down": return coords ? [{
867
+ type: "mouse_down",
868
+ x: coords.x,
869
+ y: coords.y,
870
+ ...holdKeys(args.modifier)
871
+ }] : void 0;
872
+ case "mouse_up": return coords ? [{
873
+ type: "mouse_up",
874
+ x: coords.x,
875
+ y: coords.y,
876
+ ...holdKeys(args.modifier)
877
+ }] : void 0;
878
+ case "drag": {
879
+ const start = readPoint(args.start_coordinates);
880
+ return start && coords ? [{
881
+ type: "drag",
882
+ path: [start, coords],
883
+ button: "left"
884
+ }] : void 0;
885
+ }
886
+ case "scroll": return toScrollAction(args, coords);
887
+ case "type": return toTypeActions(args);
888
+ case "key_press": return toKeypressAction(args);
889
+ case "hold_key": return toHoldKeyAction(args);
890
+ case "goto_url": {
891
+ const url = normalizeGotoUrl(args.url);
892
+ return url ? [{
893
+ type: "goto",
894
+ url
895
+ }, {
896
+ type: "wait",
897
+ ms: GOTO_WAIT_MS
898
+ }] : void 0;
899
+ }
900
+ case "go_back": return [{ type: "back" }, {
901
+ type: "wait",
902
+ ms: NAVIGATION_WAIT_MS
903
+ }];
904
+ case "go_forward": return [{ type: "forward" }, {
905
+ type: "wait",
906
+ ms: NAVIGATION_WAIT_MS
907
+ }];
908
+ case "refresh": return [{
909
+ type: "keypress",
910
+ keys: ["f5"]
911
+ }, {
912
+ type: "wait",
913
+ ms: DEFAULT_WAIT_MS
914
+ }];
915
+ case "wait": return [{
916
+ type: "wait",
917
+ ms: secondsToMs(args.duration, DEFAULT_WAIT_MS)
918
+ }];
919
+ default: return;
920
+ }
921
+ }
922
+ function readPoint(value) {
923
+ if (!Array.isArray(value) || value.length < 2) return void 0;
924
+ const x = Number(value[0]);
925
+ const y = Number(value[1]);
926
+ if (!Number.isFinite(x) || !Number.isFinite(y)) return void 0;
927
+ return {
928
+ x,
929
+ y
930
+ };
931
+ }
932
+ function toScrollAction(args, coords) {
933
+ if (!coords) return void 0;
934
+ const direction = typeof args.direction === "string" ? args.direction : "down";
935
+ const amount = typeof args.amount === "number" ? args.amount : DEFAULT_SCROLL_AMOUNT;
936
+ const ticks = Math.max(1, Math.trunc(amount)) * SCROLL_AMOUNT_PER_NOTCH;
937
+ const scroll_x = direction === "left" ? -ticks : direction === "right" ? ticks : 0;
938
+ const scroll_y = direction === "up" ? -ticks : direction === "down" ? ticks : 0;
939
+ return [{
940
+ type: "scroll",
941
+ x: coords.x,
942
+ y: coords.y,
943
+ scroll_x,
944
+ scroll_y,
945
+ ...holdKeys(args.modifier)
946
+ }];
947
+ }
948
+ function toTypeActions(args) {
949
+ const text = typeof args.text === "string" ? args.text : void 0;
950
+ if (text === void 0) return void 0;
951
+ const actions = [];
952
+ if (args.clear_before_typing === true) actions.push({
953
+ type: "keypress",
954
+ keys: ["ctrl", "a"]
955
+ }, {
956
+ type: "keypress",
957
+ keys: ["backspace"]
958
+ });
959
+ actions.push({
960
+ type: "type",
961
+ text
962
+ });
963
+ if (args.press_enter_after === true) actions.push({
964
+ type: "keypress",
965
+ keys: ["enter"]
966
+ });
967
+ return actions;
968
+ }
969
+ function toKeypressAction(args) {
970
+ const sequence = readKeySequence(args.key_comb ?? args.key);
971
+ return sequence.length > 0 ? sequence.map((keys) => ({
972
+ type: "keypress",
973
+ keys
974
+ })) : void 0;
975
+ }
976
+ function toHoldKeyAction(args) {
977
+ const keys = readKeyCombo(args.key_comb ?? args.key);
978
+ return keys.length > 0 ? [{
979
+ type: "keypress",
980
+ keys,
981
+ duration: secondsToMs(args.duration, 1e3)
982
+ }] : void 0;
983
+ }
984
+ function readKeyCombo(value) {
985
+ if (typeof value !== "string") return [];
986
+ return value.split("+").map((part) => part.trim()).filter(Boolean);
987
+ }
988
+ function readKeySequence(value) {
989
+ if (typeof value !== "string") return [];
990
+ return value.trim().split(/\s+/).map((part) => readKeyCombo(part)).filter((combo) => combo.length > 0);
991
+ }
992
+ function holdKeys(value) {
993
+ if (typeof value !== "string") return {};
994
+ const key = value.trim();
995
+ return key ? { hold_keys: [key] } : {};
996
+ }
997
+ function secondsToMs(value, fallback) {
998
+ if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) return fallback;
999
+ return Math.round(value * 1e3);
1000
+ }
1001
+ //#endregion
1002
+ //#region src/providers/yutori/provider.ts
1003
+ const YUTORI_CHAT_COMPLETIONS_API = "yutori-chat-completions";
1004
+ const streamYutori = (model, context, options) => {
1005
+ const stream = createAssistantMessageEventStream();
1006
+ runYutoriStream(stream, model, context, options);
1007
+ return stream;
1008
+ };
1009
+ const streamSimpleYutori = (model, context, options) => streamYutori(model, context, options);
1010
+ function yutoriNativeToolSetOnPayload(payload, model, context) {
1011
+ if (!payload || typeof payload !== "object") return void 0;
1012
+ const current = payload;
1013
+ const keepToolNames = new Set(context?.keepToolNames ?? []);
1014
+ const tools = Array.isArray(current.tools) ? current.tools.filter((tool) => {
1015
+ const name = readToolName(tool);
1016
+ return !name || keepToolNames.has(name) || !isYutoriLocalActionToolName(name);
1017
+ }) : void 0;
1018
+ const toolSet = model ? yutoriToolSetForModel(model.id) : void 0;
1019
+ return {
1020
+ ...payload,
1021
+ ...toolSet ? {
1022
+ tool_set: toolSet,
1023
+ disable_tools: [...YUTORI_N15_EXPANDED_ACTION_TYPES]
1024
+ } : {},
1025
+ ...tools && tools.length > 0 ? { tools } : { tools: void 0 }
1026
+ };
1027
+ }
1028
+ /**
1029
+ * CUA payload middleware for yutori: map local executor tools onto the native
1030
+ * tool set, then append a fresh screenshot to the latest user/tool message per
1031
+ * yutori's screenshot policy.
1032
+ */
1033
+ async function yutoriCuaOnPayload(payload, model, context) {
1034
+ const next = yutoriNativeToolSetOnPayload(payload, model, context) ?? payload;
1035
+ return await appendScreenshotToLatestMessage(next, context?.getScreenshot) ?? next;
1036
+ }
1037
+ async function appendScreenshotToLatestMessage(payload, getScreenshot) {
1038
+ if (!getScreenshot) return void 0;
1039
+ if (!payload || typeof payload !== "object") return void 0;
1040
+ const current = payload;
1041
+ if (!Array.isArray(current.messages) || current.messages.length === 0) return void 0;
1042
+ const last = current.messages[current.messages.length - 1];
1043
+ if (!last || typeof last !== "object") return void 0;
1044
+ const lastMessage = last;
1045
+ if (lastMessage.role !== "user" && lastMessage.role !== "tool") return void 0;
1046
+ if (contentHasImage(lastMessage.content)) return void 0;
1047
+ const screenshot = await getScreenshot();
1048
+ const content = normalizePayloadContent(lastMessage.content);
1049
+ const nextMessages = current.messages.slice();
1050
+ nextMessages[nextMessages.length - 1] = {
1051
+ ...last,
1052
+ content: [
1053
+ ...content,
1054
+ {
1055
+ type: "text",
1056
+ text: "\n\n"
1057
+ },
1058
+ {
1059
+ type: "image_url",
1060
+ image_url: {
1061
+ url: `data:${screenshot.mimeType};base64,${screenshot.data.toString("base64")}`,
1062
+ detail: "high"
1063
+ }
1064
+ }
1065
+ ]
1066
+ };
1067
+ return {
1068
+ ...payload,
1069
+ messages: nextMessages
1070
+ };
1071
+ }
1072
+ function normalizePayloadContent(content) {
1073
+ if (typeof content === "string") return [{
1074
+ type: "text",
1075
+ text: content
1076
+ }];
1077
+ if (Array.isArray(content)) return content.filter((part) => Boolean(part) && typeof part === "object");
1078
+ return [];
1079
+ }
1080
+ function contentHasImage(content) {
1081
+ return Array.isArray(content) && content.some((part) => {
1082
+ return Boolean(part) && typeof part === "object" && part.type === "image_url";
1083
+ });
1084
+ }
1085
+ async function runYutoriStream(stream, model, context, options) {
1086
+ const output = initialAssistantMessage(model);
1087
+ try {
1088
+ const apiKey = options?.apiKey || process.env.YUTORI_API_KEY;
1089
+ if (!apiKey) throw new Error("missing Yutori API key");
1090
+ const client = new OpenAI({
1091
+ apiKey,
1092
+ baseURL: model.baseUrl || "https://api.yutori.com/v1",
1093
+ defaultHeaders: model.headers
1094
+ });
1095
+ let payload = {
1096
+ model: model.id,
1097
+ messages: convertMessages(context),
1098
+ max_completion_tokens: options?.maxTokens ?? model.maxTokens,
1099
+ temperature: options?.temperature ?? .3
1100
+ };
1101
+ const tools = convertTools(context);
1102
+ if (tools.length > 0) payload.tools = tools;
1103
+ payload = yutoriNativeToolSetOnPayload(payload, model, { keepToolNames: [...keepToolNamesFromContext(context), ...options?.keepToolNames ?? []] });
1104
+ const nextPayload = await options?.onPayload?.(payload, model);
1105
+ if (nextPayload !== void 0) payload = nextPayload;
1106
+ const { data: response, response: rawResponse } = await client.chat.completions.create(payload, { signal: options?.signal }).withResponse();
1107
+ const completion = response;
1108
+ await options?.onResponse?.({
1109
+ status: rawResponse.status,
1110
+ headers: headersToRecord(rawResponse.headers)
1111
+ }, model);
1112
+ stream.push({
1113
+ type: "start",
1114
+ partial: output
1115
+ });
1116
+ const choice = completion.choices?.[0];
1117
+ const message = choice?.message;
1118
+ output.responseId = completion.id;
1119
+ output.usage = usageFromYutori(completion.usage);
1120
+ if (choice?.finish_reason === "tool_calls") output.stopReason = "toolUse";
1121
+ else if (choice?.finish_reason === "length") output.stopReason = "length";
1122
+ const text = typeof message?.content === "string" ? message.content : "";
1123
+ if (text) emitText(stream, output, text);
1124
+ for (const call of message?.tool_calls ?? []) {
1125
+ if (call.type !== "function") continue;
1126
+ const args = parseArguments(call.function.arguments);
1127
+ const canonical = toCanonicalActions(call.function.name, args);
1128
+ if (canonical && canonical.length > 0) {
1129
+ for (let i = 0; i < canonical.length; i++) {
1130
+ const action = canonical[i];
1131
+ const contentIndex = output.content.length;
1132
+ const toolCall = {
1133
+ type: "toolCall",
1134
+ id: canonical.length === 1 ? call.id : `${call.id}_${i}`,
1135
+ name: canonicalToolCallName(action),
1136
+ arguments: canonicalToolCallArguments(action)
1137
+ };
1138
+ output.content.push(toolCall);
1139
+ output.stopReason = "toolUse";
1140
+ stream.push({
1141
+ type: "toolcall_start",
1142
+ contentIndex,
1143
+ partial: output
1144
+ });
1145
+ stream.push({
1146
+ type: "toolcall_delta",
1147
+ contentIndex,
1148
+ delta: JSON.stringify(toolCall.arguments),
1149
+ partial: output
1150
+ });
1151
+ stream.push({
1152
+ type: "toolcall_end",
1153
+ contentIndex,
1154
+ toolCall,
1155
+ partial: output
1156
+ });
1157
+ }
1158
+ continue;
1159
+ }
1160
+ const contentIndex = output.content.length;
1161
+ const toolCall = {
1162
+ type: "toolCall",
1163
+ id: call.id,
1164
+ name: call.function.name,
1165
+ arguments: args
1166
+ };
1167
+ output.content.push(toolCall);
1168
+ output.stopReason = "toolUse";
1169
+ stream.push({
1170
+ type: "toolcall_start",
1171
+ contentIndex,
1172
+ partial: output
1173
+ });
1174
+ stream.push({
1175
+ type: "toolcall_delta",
1176
+ contentIndex,
1177
+ delta: call.function.arguments ?? "",
1178
+ partial: output
1179
+ });
1180
+ stream.push({
1181
+ type: "toolcall_end",
1182
+ contentIndex,
1183
+ toolCall,
1184
+ partial: output
1185
+ });
1186
+ }
1187
+ stream.push({
1188
+ type: "done",
1189
+ reason: output.stopReason,
1190
+ message: output
1191
+ });
1192
+ stream.end();
1193
+ } catch (err) {
1194
+ output.stopReason = options?.signal?.aborted ? "aborted" : "error";
1195
+ output.errorMessage = err instanceof Error ? err.message : String(err);
1196
+ stream.push({
1197
+ type: "error",
1198
+ reason: output.stopReason,
1199
+ error: output
1200
+ });
1201
+ stream.end();
1202
+ }
1203
+ }
1204
+ function keepToolNamesFromContext(context) {
1205
+ return (context.tools ?? []).map((tool) => tool.name).filter((name) => !isYutoriLocalActionToolName(name));
1206
+ }
1207
+ function initialAssistantMessage(model) {
1208
+ return {
1209
+ role: "assistant",
1210
+ content: [],
1211
+ api: model.api,
1212
+ provider: model.provider,
1213
+ model: model.id,
1214
+ usage: {
1215
+ input: 0,
1216
+ output: 0,
1217
+ cacheRead: 0,
1218
+ cacheWrite: 0,
1219
+ totalTokens: 0,
1220
+ cost: {
1221
+ input: 0,
1222
+ output: 0,
1223
+ cacheRead: 0,
1224
+ cacheWrite: 0,
1225
+ total: 0
1226
+ }
1227
+ },
1228
+ stopReason: "stop",
1229
+ timestamp: Date.now()
1230
+ };
1231
+ }
1232
+ function convertMessages(context) {
1233
+ const messages = [];
1234
+ if (context.systemPrompt) messages.push({
1235
+ role: "system",
1236
+ content: context.systemPrompt
1237
+ });
1238
+ for (const message of context.messages) if (message.role === "user") messages.push({
1239
+ role: "user",
1240
+ content: typeof message.content === "string" ? message.content : message.content.map(toOpenAIContentPart)
1241
+ });
1242
+ else if (message.role === "assistant") {
1243
+ const text = message.content.filter((part) => part.type === "text").map((part) => part.text).join("");
1244
+ const toolCalls = message.content.filter((part) => part.type === "toolCall").map((part) => ({
1245
+ id: part.id,
1246
+ type: "function",
1247
+ function: {
1248
+ name: part.name,
1249
+ arguments: JSON.stringify(part.arguments ?? {})
1250
+ }
1251
+ }));
1252
+ messages.push({
1253
+ role: "assistant",
1254
+ content: text || null,
1255
+ ...toolCalls.length > 0 ? { tool_calls: toolCalls } : {}
1256
+ });
1257
+ } else if (message.role === "toolResult") messages.push({
1258
+ role: "tool",
1259
+ tool_call_id: message.toolCallId,
1260
+ content: message.content.map(toOpenAIContentPart)
1261
+ });
1262
+ return messages;
1263
+ }
1264
+ function convertTools(context) {
1265
+ return (context.tools ?? []).map((tool) => ({
1266
+ type: "function",
1267
+ function: {
1268
+ name: tool.name,
1269
+ description: tool.description,
1270
+ parameters: tool.parameters
1271
+ }
1272
+ }));
1273
+ }
1274
+ function emitText(stream, output, text) {
1275
+ const contentIndex = output.content.length;
1276
+ const content = {
1277
+ type: "text",
1278
+ text
1279
+ };
1280
+ output.content.push(content);
1281
+ stream.push({
1282
+ type: "text_start",
1283
+ contentIndex,
1284
+ partial: output
1285
+ });
1286
+ stream.push({
1287
+ type: "text_delta",
1288
+ contentIndex,
1289
+ delta: text,
1290
+ partial: output
1291
+ });
1292
+ stream.push({
1293
+ type: "text_end",
1294
+ contentIndex,
1295
+ content: text,
1296
+ partial: output
1297
+ });
1298
+ }
1299
+ function toOpenAIContentPart(part) {
1300
+ if (part.type === "text") return {
1301
+ type: "text",
1302
+ text: part.text
1303
+ };
1304
+ return {
1305
+ type: "image_url",
1306
+ image_url: { url: `data:${part.mimeType};base64,${part.data}` }
1307
+ };
1308
+ }
1309
+ function parseArguments(value) {
1310
+ if (!value?.trim()) return {};
1311
+ try {
1312
+ const parsed = JSON.parse(value);
1313
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
1314
+ } catch {
1315
+ return {};
1316
+ }
1317
+ }
1318
+ function usageFromYutori(usage) {
1319
+ const input = readNumber(usage, "prompt_tokens");
1320
+ const output = readNumber(usage, "completion_tokens");
1321
+ return {
1322
+ input,
1323
+ output,
1324
+ cacheRead: 0,
1325
+ cacheWrite: 0,
1326
+ totalTokens: readNumber(usage, "total_tokens") || input + output,
1327
+ cost: {
1328
+ input: 0,
1329
+ output: 0,
1330
+ cacheRead: 0,
1331
+ cacheWrite: 0,
1332
+ total: 0
1333
+ }
1334
+ };
1335
+ }
1336
+ function readToolName(tool) {
1337
+ if (!tool || typeof tool !== "object") return void 0;
1338
+ const obj = tool;
1339
+ if (typeof obj.function?.name === "string") return obj.function.name;
1340
+ if (typeof obj.name === "string") return obj.name;
1341
+ }
1342
+ function readNumber(value, key) {
1343
+ if (!value || typeof value !== "object") return 0;
1344
+ const n = value[key];
1345
+ return typeof n === "number" && Number.isFinite(n) ? n : 0;
1346
+ }
1347
+ function headersToRecord(headers) {
1348
+ const out = {};
1349
+ headers.forEach((value, key) => {
1350
+ out[key] = value;
1351
+ });
1352
+ return out;
1353
+ }
1354
+ //#endregion
1355
+ //#region src/providers.ts
1356
+ /**
1357
+ * Register the Yutori and Tzafon stream providers with pi-ai's global API
1358
+ * registry. Importing `@onkernel/cua-ai` calls this automatically.
1359
+ *
1360
+ * The pi-ai registry mutators this package re-exports (`clearApiProviders`,
1361
+ * `resetApiProviders`, `unregisterApiProviders`) deregister these providers,
1362
+ * after which Yutori/Tzafon streaming fails until they are registered again.
1363
+ * Call this to restore them; it is idempotent and safe to call repeatedly.
1364
+ */
1365
+ function registerCuaProviders() {
1366
+ registerApiProvider({
1367
+ api: YUTORI_CHAT_COMPLETIONS_API,
1368
+ stream: streamYutori,
1369
+ streamSimple: streamSimpleYutori
1370
+ });
1371
+ registerApiProvider({
1372
+ api: TZAFON_RESPONSES_API,
1373
+ stream: streamTzafonResponses,
1374
+ streamSimple: streamSimpleTzafonResponses
1375
+ });
1376
+ }
1377
+ //#endregion
1378
+ //#region src/models.ts
1379
+ /** All providers this package curates computer-use models for. */
1380
+ const CUA_PROVIDERS = [
1381
+ "openai",
1382
+ "anthropic",
1383
+ "google",
1384
+ "tzafon",
1385
+ "yutori"
1386
+ ];
1387
+ /**
1388
+ * Per-provider computer-use support annotations.
1389
+ *
1390
+ * pi-ai's model registry is generated from models.dev (see
1391
+ * node_modules/@earendil-works/pi-ai/scripts/generate-models.ts) and lists every
1392
+ * model a provider offers. Only some of those models support computer-use, so
1393
+ * this table layers per-provider CUA-support annotations on top of the
1394
+ * registry. Each entry cites the official source documenting CUA support.
1395
+ *
1396
+ * To verify support and add new entries, follow the `update-models` skill at
1397
+ * .agents/skills/update-models/SKILL.md.
1398
+ */
1399
+ const CUA_MODEL_ANNOTATIONS = {
1400
+ openai: [{
1401
+ match: {
1402
+ kind: "family",
1403
+ family: "gpt-5.4"
1404
+ },
1405
+ source: "https://developers.openai.com/api/docs/models/gpt-5.4"
1406
+ }, {
1407
+ match: {
1408
+ kind: "family",
1409
+ family: "gpt-5.5"
1410
+ },
1411
+ source: "https://developers.openai.com/api/docs/models/gpt-5.5"
1412
+ }],
1413
+ anthropic: [
1414
+ {
1415
+ match: {
1416
+ kind: "family",
1417
+ family: "claude-3-7-sonnet"
1418
+ },
1419
+ source: "https://docs.anthropic.com/en/docs/build-with-claude/computer-use"
1420
+ },
1421
+ {
1422
+ match: {
1423
+ kind: "family",
1424
+ family: "claude-opus-4"
1425
+ },
1426
+ source: "https://docs.anthropic.com/en/docs/build-with-claude/computer-use"
1427
+ },
1428
+ {
1429
+ match: {
1430
+ kind: "family",
1431
+ family: "claude-sonnet-4"
1432
+ },
1433
+ source: "https://docs.anthropic.com/en/docs/build-with-claude/computer-use"
1434
+ },
1435
+ {
1436
+ match: {
1437
+ kind: "family",
1438
+ family: "claude-haiku-4"
1439
+ },
1440
+ source: "https://docs.anthropic.com/en/docs/build-with-claude/computer-use"
1441
+ },
1442
+ {
1443
+ match: {
1444
+ kind: "family",
1445
+ family: "claude-fable-5"
1446
+ },
1447
+ source: "https://docs.anthropic.com/en/docs/build-with-claude/computer-use"
1448
+ }
1449
+ ],
1450
+ google: [{
1451
+ match: {
1452
+ kind: "exact",
1453
+ id: "gemini-3-flash-preview"
1454
+ },
1455
+ source: "https://ai.google.dev/gemini-api/docs/computer-use"
1456
+ }, {
1457
+ match: {
1458
+ kind: "exact",
1459
+ id: "gemini-3-pro-preview"
1460
+ },
1461
+ source: "https://ai.google.dev/gemini-api/docs/computer-use"
1462
+ }],
1463
+ tzafon: [{
1464
+ match: {
1465
+ kind: "exact",
1466
+ id: "tzafon.northstar-cua-fast"
1467
+ },
1468
+ source: "https://huggingface.co/Tzafon/Northstar-CUA-Fast"
1469
+ }],
1470
+ yutori: [
1471
+ {
1472
+ match: {
1473
+ kind: "exact",
1474
+ id: "n1-latest"
1475
+ },
1476
+ source: "https://docs.yutori.com/reference/navigator"
1477
+ },
1478
+ {
1479
+ match: {
1480
+ kind: "exact",
1481
+ id: "n1-20260203"
1482
+ },
1483
+ source: "https://docs.yutori.com/reference/navigator"
1484
+ },
1485
+ {
1486
+ match: {
1487
+ kind: "exact",
1488
+ id: "n1.5-latest"
1489
+ },
1490
+ source: "https://docs.yutori.com/reference/navigator"
1491
+ },
1492
+ {
1493
+ match: {
1494
+ kind: "exact",
1495
+ id: "n1.5-20260428"
1496
+ },
1497
+ source: "https://docs.yutori.com/reference/navigator"
1498
+ }
1499
+ ]
1500
+ };
1501
+ const CUA_MODEL_OVERRIDES = {
1502
+ openai: [cuaModel("openai", "gpt-5.5", "GPT-5.5"), cuaModel("openai", "gpt-5.5-2026-04-23", "GPT-5.5 (2026-04-23)")],
1503
+ anthropic: [],
1504
+ google: [],
1505
+ tzafon: [cuaModel("tzafon", "tzafon.northstar-cua-fast", "Tzafon Northstar CUA Fast")],
1506
+ yutori: [
1507
+ cuaModel("yutori", "n1.5-latest", "Yutori Navigator n1.5"),
1508
+ cuaModel("yutori", "n1.5-20260428", "Yutori Navigator n1.5 (2026-04-28)"),
1509
+ cuaModel("yutori", "n1-latest", "Yutori Navigator n1"),
1510
+ cuaModel("yutori", "n1-20260203", "Yutori Navigator n1 (2026-02-03)")
1511
+ ]
1512
+ };
1513
+ /**
1514
+ * Split a provider-qualified ref like `"openai:gpt-5.5"` into its parts.
1515
+ *
1516
+ * `"gemini:"` is accepted as an alias for the canonical `"google:"` prefix
1517
+ * and normalizes to provider `"google"`. Throws when the ref is unqualified
1518
+ * or names an unsupported provider.
1519
+ */
1520
+ function parseCuaModelRef(ref) {
1521
+ const idx = ref.indexOf(":");
1522
+ if (idx <= 0 || idx === ref.length - 1) throw new Error(`CUA model ref must be provider-qualified as "<provider>:<model>"; got "${ref}"`);
1523
+ const prefix = ref.slice(0, idx);
1524
+ const provider = prefix === "gemini" ? "google" : prefix;
1525
+ const model = ref.slice(idx + 1);
1526
+ if (!isCuaProvider(provider)) throw new Error(`unsupported CUA provider "${prefix}" (expected one of: ${CUA_PROVIDERS.join(", ")})`);
1527
+ return {
1528
+ provider,
1529
+ model
1530
+ };
1531
+ }
1532
+ /** Join a provider and model id into a {@link CuaModelRef}. */
1533
+ function formatCuaModelRef(provider, model) {
1534
+ return `${provider}:${model}`;
1535
+ }
1536
+ /**
1537
+ * List the computer-use-capable models this package curates, optionally
1538
+ * filtered to one provider. Merges pi-ai's registry with local overrides and
1539
+ * keeps only models annotated in {@link CUA_MODEL_ANNOTATIONS}.
1540
+ */
1541
+ function listCuaModels(provider) {
1542
+ const providers = provider ? [provider] : [...CUA_PROVIDERS];
1543
+ const byRef = /* @__PURE__ */ new Map();
1544
+ for (const p of providers) {
1545
+ for (const model of CUA_MODEL_OVERRIDES[p]) {
1546
+ const ref = formatCuaModelRef(p, model.id);
1547
+ byRef.set(ref, {
1548
+ ref,
1549
+ provider: p,
1550
+ model: model.id,
1551
+ name: model.name
1552
+ });
1553
+ }
1554
+ for (const model of getModels(p)) {
1555
+ if (!supportsCuaProvider(p, model.id)) continue;
1556
+ const ref = formatCuaModelRef(p, model.id);
1557
+ if (byRef.has(ref)) continue;
1558
+ byRef.set(ref, {
1559
+ ref,
1560
+ provider: p,
1561
+ model: model.id,
1562
+ name: model.name
1563
+ });
1564
+ }
1565
+ }
1566
+ return [...byRef.values()].sort(compareCuaModels);
1567
+ }
1568
+ /**
1569
+ * Resolve a {@link CuaModelRef} to a concrete pi-ai model.
1570
+ *
1571
+ * Throws when the ref is unqualified, names an unsupported provider, or names
1572
+ * a model without a CUA-support annotation. `"gemini:"` refs are accepted as
1573
+ * an alias for `"google:"` (see {@link parseCuaModelRef}).
1574
+ */
1575
+ function getCuaModel(ref) {
1576
+ const { provider, model: modelId } = parseCuaModelRef(ref);
1577
+ if (!supportsCuaProvider(provider, modelId)) throw new Error(`unsupported CUA model "${ref}"`);
1578
+ const fromRegistry = getModel(provider, modelId);
1579
+ if (fromRegistry) return fromRegistry;
1580
+ const override = CUA_MODEL_OVERRIDES[provider].find((m) => m.id === modelId);
1581
+ if (override) return override;
1582
+ throw new Error(`CUA model "${ref}" is supported but not registered. Add it to pi-ai (models.dev) or CUA_MODEL_OVERRIDES.`);
1583
+ }
1584
+ /** Return the {@link CuaProvider} for a concrete model, or throw when it is not a CUA provider. */
1585
+ function providerForModel(model) {
1586
+ if (!isCuaProvider(model.provider)) throw new Error(`unsupported CUA model provider "${model.provider}" (expected one of: ${CUA_PROVIDERS.join(", ")})`);
1587
+ return model.provider;
1588
+ }
1589
+ /** Narrow an arbitrary string to {@link CuaProvider}. */
1590
+ function isCuaProvider(value) {
1591
+ return CUA_PROVIDERS.includes(value);
1592
+ }
1593
+ function supportsCuaProvider(provider, modelId) {
1594
+ return findCuaAnnotation(provider, modelId) !== void 0;
1595
+ }
1596
+ /** Find the CUA-support annotation covering a model id, if any. */
1597
+ function findCuaAnnotation(provider, modelId) {
1598
+ const id = modelId.toLowerCase();
1599
+ for (const annotation of CUA_MODEL_ANNOTATIONS[provider]) if (annotation.match.kind === "exact") {
1600
+ if (id === annotation.match.id.toLowerCase()) return annotation;
1601
+ } else if (isCuaFamilyMatch(id, annotation.match.family.toLowerCase())) return annotation;
1602
+ }
1603
+ function isCuaFamilyMatch(id, family) {
1604
+ if (id === family) return true;
1605
+ if (!id.startsWith(`${family}-`)) return false;
1606
+ return id.slice(family.length + 1).split("-").every((segment) => /^\d+$/.test(segment));
1607
+ }
1608
+ function cuaModel(provider, id, name) {
1609
+ const base = {
1610
+ id,
1611
+ name,
1612
+ provider,
1613
+ reasoning: provider === "openai" || provider === "anthropic" || provider === "google",
1614
+ input: ["text", "image"],
1615
+ cost: {
1616
+ input: 0,
1617
+ output: 0,
1618
+ cacheRead: 0,
1619
+ cacheWrite: 0
1620
+ }
1621
+ };
1622
+ switch (provider) {
1623
+ case "openai": return {
1624
+ ...base,
1625
+ api: "openai-responses",
1626
+ baseUrl: "https://api.openai.com/v1",
1627
+ contextWindow: 4e5,
1628
+ maxTokens: 32768
1629
+ };
1630
+ case "anthropic": return {
1631
+ ...base,
1632
+ api: "anthropic-messages",
1633
+ baseUrl: "https://api.anthropic.com",
1634
+ contextWindow: 2e5,
1635
+ maxTokens: 64e3
1636
+ };
1637
+ case "google": return {
1638
+ ...base,
1639
+ api: "google-generative-ai",
1640
+ baseUrl: "https://generativelanguage.googleapis.com/v1beta",
1641
+ contextWindow: 1048576,
1642
+ maxTokens: 65536
1643
+ };
1644
+ case "tzafon": return {
1645
+ ...base,
1646
+ api: "tzafon-responses",
1647
+ baseUrl: "https://api.lightcone.ai",
1648
+ contextWindow: 128e3,
1649
+ maxTokens: 4096
1650
+ };
1651
+ case "yutori": return {
1652
+ ...base,
1653
+ api: "yutori-chat-completions",
1654
+ baseUrl: "https://api.yutori.com/v1",
1655
+ contextWindow: 128e3,
1656
+ maxTokens: 4096
1657
+ };
1658
+ }
1659
+ }
1660
+ function compareCuaModels(a, b) {
1661
+ if (a.provider !== b.provider) return CUA_PROVIDERS.indexOf(a.provider) - CUA_PROVIDERS.indexOf(b.provider);
1662
+ return a.model.localeCompare(b.model);
1663
+ }
1664
+ //#endregion
1665
+ //#region src/api-keys.ts
1666
+ /**
1667
+ * Environment variables accepted for each CUA provider.
1668
+ *
1669
+ * This mirrors pi-ai's approach: model lookup is pure, while auth is resolved
1670
+ * when streaming. These helpers let callers share one readable convention for
1671
+ * explicit `getApiKey` wiring (especially useful for `google` vs `gemini`).
1672
+ */
1673
+ const CUA_PROVIDER_API_KEY_ENV_VARS = {
1674
+ openai: ["OPENAI_API_KEY"],
1675
+ anthropic: ["ANTHROPIC_OAUTH_TOKEN", "ANTHROPIC_API_KEY"],
1676
+ google: ["GOOGLE_API_KEY", "GEMINI_API_KEY"],
1677
+ tzafon: ["TZAFON_API_KEY"],
1678
+ yutori: ["YUTORI_API_KEY"]
1679
+ };
1680
+ /**
1681
+ * List the environment variables checked for a provider's API key, in
1682
+ * precedence order. Accepts `"gemini"` as an alias for `"google"`; returns an
1683
+ * empty list for unknown providers.
1684
+ */
1685
+ function cuaApiKeyEnvVarsForProvider(provider) {
1686
+ if (provider === "gemini") return CUA_PROVIDER_API_KEY_ENV_VARS.google;
1687
+ return CUA_PROVIDER_API_KEY_ENV_VARS[provider] ?? [];
1688
+ }
1689
+ /** Read a provider's API key from the environment, or return undefined when unset. */
1690
+ function getCuaEnvApiKey(provider) {
1691
+ for (const envVar of cuaApiKeyEnvVarsForProvider(provider)) {
1692
+ const value = process.env[envVar];
1693
+ if (value?.trim()) return value;
1694
+ }
1695
+ }
1696
+ /** Read a provider's API key from the environment, or throw naming the variables to set. */
1697
+ function requireCuaEnvApiKey(provider) {
1698
+ const apiKey = getCuaEnvApiKey(provider);
1699
+ if (apiKey) return apiKey;
1700
+ const envVars = cuaApiKeyEnvVarsForProvider(provider);
1701
+ if (envVars.length === 0) throw new Error(`No known API key environment variables for provider "${provider}"`);
1702
+ throw new Error(`Missing API key for "${provider}". Set one of: ${envVars.join(", ")}`);
1703
+ }
1704
+ /** {@link getCuaEnvApiKey} keyed by a model ref or concrete model instead of a provider name. */
1705
+ function getCuaEnvApiKeyForModel(input) {
1706
+ return getCuaEnvApiKey(typeof input === "string" ? parseCuaModelRef(input).provider : providerForModel(input));
1707
+ }
1708
+ /** {@link requireCuaEnvApiKey} keyed by a model ref or concrete model instead of a provider name. */
1709
+ function requireCuaEnvApiKeyForModel(input) {
1710
+ return requireCuaEnvApiKey(typeof input === "string" ? parseCuaModelRef(input).provider : providerForModel(input));
1711
+ }
1712
+ //#endregion
1713
+ //#region src/providers/anthropic/actions.ts
1714
+ /**
1715
+ * Canonical CUA action types Anthropic browser computer-use tools support.
1716
+ *
1717
+ * Source of truth: Anthropic's computer-use best-practices quickstart
1718
+ * computer/browser tool action enums. These are the browser actions Anthropic
1719
+ * currently accepts under CUA's canonical individual tool names.
1720
+ * https://github.com/anthropics/claude-quickstarts/blob/main/computer-use-best-practices/computer_use/tools/computer.py
1721
+ * https://github.com/anthropics/claude-quickstarts/blob/main/computer-use-best-practices/computer_use/tools/browser.py
1722
+ */
1723
+ const ANTHROPIC_CUA_ACTION_TYPES = [
1724
+ "click",
1725
+ "double_click",
1726
+ "mouse_down",
1727
+ "mouse_up",
1728
+ "type",
1729
+ "keypress",
1730
+ "scroll",
1731
+ "move",
1732
+ "drag",
1733
+ "wait",
1734
+ "screenshot",
1735
+ "goto",
1736
+ "cursor_position"
1737
+ ];
1738
+ const ANTHROPIC_CANONICAL_ACTION_TYPE_SET = new Set(ANTHROPIC_CUA_ACTION_TYPES);
1739
+ /** Name of the batch tool included by default in Anthropic computer-use tools. */
1740
+ const ANTHROPIC_BATCH_TOOL_NAME = CUA_BATCH_TOOL_NAME;
1741
+ const ANTHROPIC_BATCH_TOOL_DESCRIPTION = [CUA_BATCH_TOOL_DESCRIPTION, "Coordinates in a batch refer to the screenshot taken before the batch call."].join("\n");
1742
+ function resolveAnthropicActions(actions) {
1743
+ const resolved = actions ?? ANTHROPIC_CUA_ACTION_TYPES;
1744
+ const supported = [];
1745
+ const unsupported = [];
1746
+ for (const action of resolved) if (isAnthropicCanonicalAction(action)) supported.push(action);
1747
+ else unsupported.push(action);
1748
+ if (unsupported.length > 0) throw new Error(`unsupported Anthropic canonical action(s): ${unsupported.join(", ")}`);
1749
+ return supported;
1750
+ }
1751
+ function isAnthropicCanonicalAction(action) {
1752
+ return ANTHROPIC_CANONICAL_ACTION_TYPE_SET.has(action);
1753
+ }
1754
+ /** Build the TypeBox schema for Anthropic-supported canonical browser actions. */
1755
+ function createActionSchema(actions) {
1756
+ return createCuaActionSchema(resolveAnthropicActions(actions));
1757
+ }
1758
+ /**
1759
+ * Build Anthropic CUA computer-use tools.
1760
+ *
1761
+ * Use this when calling `complete()` or `stream()` directly and you need an
1762
+ * array of `Tool` objects for Anthropic browser actions. Pass `actions` to
1763
+ * expose only a supported subset, such as `["click"]`. Anthropic includes a
1764
+ * batch tool by default; pass `excludeBatch: true` to omit it.
1765
+ */
1766
+ function computerTools$1(options = {}) {
1767
+ const actions = resolveAnthropicActions(options.actions);
1768
+ const tools = createCuaActionToolDefinitions(actions);
1769
+ if (!options.excludeBatch) tools.push(createCuaBatchToolDefinition(actions, {
1770
+ name: ANTHROPIC_BATCH_TOOL_NAME,
1771
+ description: ANTHROPIC_BATCH_TOOL_DESCRIPTION
1772
+ }));
1773
+ return tools;
1774
+ }
1775
+ /** Build the local execution adapters used by CuaAgent and CuaAgentHarness. */
1776
+ function computerToolExecutors$1(options = {}) {
1777
+ const actions = resolveAnthropicActions(options.actions);
1778
+ const executors = createCuaActionToolExecutors(actions);
1779
+ if (!options.excludeBatch) executors.push(createCuaBatchToolExecutor(actions, {
1780
+ name: ANTHROPIC_BATCH_TOOL_NAME,
1781
+ description: ANTHROPIC_BATCH_TOOL_DESCRIPTION
1782
+ }));
1783
+ return executors;
1784
+ }
1785
+ //#endregion
1786
+ //#region src/providers/anthropic/index.ts
1787
+ var anthropic_exports = /* @__PURE__ */ __exportAll({
1788
+ ANTHROPIC_BATCH_TOOL_NAME: () => ANTHROPIC_BATCH_TOOL_NAME,
1789
+ ANTHROPIC_COMPUTER_INSTRUCTIONS: () => ANTHROPIC_COMPUTER_INSTRUCTIONS,
1790
+ ANTHROPIC_CUA_ACTION_TYPES: () => ANTHROPIC_CUA_ACTION_TYPES,
1791
+ buildAnthropicSystemPrompt: () => buildAnthropicSystemPrompt,
1792
+ computerToolExecutors: () => computerToolExecutors$1,
1793
+ computerTools: () => computerTools$1,
1794
+ coordinateSystem: () => coordinateSystem$4,
1795
+ createActionSchema: () => createActionSchema,
1796
+ providerModule: () => providerModule$4
1797
+ });
1798
+ function coordinateSystem$4() {
1799
+ return { type: "pixel" };
1800
+ }
1801
+ const ANTHROPIC_COMPUTER_INSTRUCTIONS = `You control a Kernel cloud browser through individual browser tools. Use keyboard navigation where possible, and request screenshots when you need to inspect state.`;
1802
+ function buildAnthropicSystemPrompt(opts = {}) {
1803
+ return [ANTHROPIC_COMPUTER_INSTRUCTIONS, opts.suffix].filter(Boolean).join("\n\n");
1804
+ }
1805
+ const providerModule$4 = {
1806
+ toolDefinitions: computerTools$1,
1807
+ toolExecutors: computerToolExecutors$1,
1808
+ coordinateSystem: coordinateSystem$4,
1809
+ buildSystemPrompt: buildAnthropicSystemPrompt
1810
+ };
1811
+ //#endregion
1812
+ //#region src/providers/gemini/index.ts
1813
+ var gemini_exports = /* @__PURE__ */ __exportAll({
1814
+ GEMINI_COMPUTER_INSTRUCTIONS: () => GEMINI_COMPUTER_INSTRUCTIONS,
1815
+ GEMINI_CUA_ACTION_TYPES: () => CUA_ACTION_TYPES,
1816
+ buildGeminiSystemPrompt: () => buildGeminiSystemPrompt,
1817
+ computerToolExecutors: () => computerToolExecutors,
1818
+ computerTools: () => computerTools,
1819
+ coordinateSystem: () => coordinateSystem$3,
1820
+ createActionSchema: () => createCuaActionSchema,
1821
+ providerModule: () => providerModule$3
1822
+ });
1823
+ function coordinateSystem$3() {
1824
+ return {
1825
+ type: "normalized",
1826
+ range: [0, 999]
1827
+ };
1828
+ }
1829
+ const GEMINI_COMPUTER_INSTRUCTIONS = `You control a Kernel cloud browser through individual browser tools. Use the provider coordinate system for tool calls, and request screenshots or URL reads when state changes.`;
1830
+ function buildGeminiSystemPrompt(opts = {}) {
1831
+ return [GEMINI_COMPUTER_INSTRUCTIONS, opts.suffix].filter(Boolean).join("\n\n");
1832
+ }
1833
+ const providerModule$3 = {
1834
+ toolDefinitions: computerTools,
1835
+ toolExecutors: computerToolExecutors,
1836
+ coordinateSystem: coordinateSystem$3,
1837
+ buildSystemPrompt: buildGeminiSystemPrompt
1838
+ };
1839
+ //#endregion
1840
+ //#region src/providers/openai/index.ts
1841
+ var openai_exports = /* @__PURE__ */ __exportAll({
1842
+ OPENAI_COMPUTER_INSTRUCTIONS: () => OPENAI_COMPUTER_INSTRUCTIONS,
1843
+ OPENAI_CUA_ACTION_TYPES: () => CUA_ACTION_TYPES,
1844
+ OPENAI_EXTRA_TOOL_DESCRIPTION: () => CUA_NAVIGATION_TOOL_DESCRIPTION,
1845
+ OPENAI_EXTRA_TOOL_NAME: () => CUA_NAVIGATION_TOOL_NAME,
1846
+ OpenAIExtraSchema: () => CuaNavigationSchema,
1847
+ buildOpenAISystemPrompt: () => buildOpenAISystemPrompt,
1848
+ computerToolExecutors: () => computerToolExecutors,
1849
+ computerTools: () => computerTools,
1850
+ coordinateSystem: () => coordinateSystem$2,
1851
+ createActionSchema: () => createCuaActionSchema,
1852
+ openaiResponsesStoreOnPayload: () => openaiResponsesStoreOnPayload,
1853
+ providerModule: () => providerModule$2
1854
+ });
1855
+ function coordinateSystem$2() {
1856
+ return { type: "pixel" };
1857
+ }
1858
+ const OPENAI_COMPUTER_INSTRUCTIONS = `You control a Kernel cloud browser through individual browser tools. Use the available tools for browser interaction and request explicit url, cursor_position, or screenshot reads when you need updated state.`;
1859
+ function buildOpenAISystemPrompt(opts = {}) {
1860
+ return [OPENAI_COMPUTER_INSTRUCTIONS, opts.suffix].filter(Boolean).join("\n\n");
1861
+ }
1862
+ function openaiResponsesStoreOnPayload(payload) {
1863
+ if (!payload || typeof payload !== "object") return void 0;
1864
+ const current = payload;
1865
+ if (current.store === true) return void 0;
1866
+ return {
1867
+ ...current,
1868
+ store: true
1869
+ };
1870
+ }
1871
+ const providerModule$2 = {
1872
+ toolDefinitions: computerTools,
1873
+ toolExecutors: computerToolExecutors,
1874
+ coordinateSystem: coordinateSystem$2,
1875
+ buildSystemPrompt: buildOpenAISystemPrompt,
1876
+ onPayload: openaiResponsesStoreOnPayload
1877
+ };
1878
+ //#endregion
1879
+ //#region src/providers/tzafon/index.ts
1880
+ var tzafon_exports = /* @__PURE__ */ __exportAll({
1881
+ TZAFON_COMPUTER_INSTRUCTIONS: () => TZAFON_COMPUTER_INSTRUCTIONS,
1882
+ TZAFON_CUA_ACTION_TYPES: () => CUA_ACTION_TYPES,
1883
+ TZAFON_RESPONSES_API: () => TZAFON_RESPONSES_API,
1884
+ buildTzafonSystemPrompt: () => buildTzafonSystemPrompt,
1885
+ computerToolExecutors: () => computerToolExecutors,
1886
+ computerTools: () => computerTools,
1887
+ coordinateSystem: () => coordinateSystem$1,
1888
+ createActionSchema: () => createCuaActionSchema,
1889
+ providerModule: () => providerModule$1,
1890
+ streamSimpleTzafonResponses: () => streamSimpleTzafonResponses,
1891
+ streamTzafonResponses: () => streamTzafonResponses,
1892
+ toCanonicalActions: () => toCanonicalActions$1,
1893
+ tzafonComputerUseOnPayload: () => tzafonComputerUseOnPayload,
1894
+ tzafonToolCallId: () => tzafonToolCallId
1895
+ });
1896
+ function coordinateSystem$1() {
1897
+ return {
1898
+ type: "normalized",
1899
+ range: [0, 999]
1900
+ };
1901
+ }
1902
+ const TZAFON_COMPUTER_INSTRUCTIONS = `You control a Kernel cloud browser through individual browser tools. Include screenshot or URL reads when you need updated state.`;
1903
+ /** Build the default system prompt used with Tzafon CUA models. */
1904
+ function buildTzafonSystemPrompt(opts = {}) {
1905
+ return [TZAFON_COMPUTER_INSTRUCTIONS, opts.suffix].filter(Boolean).join("\n\n");
1906
+ }
1907
+ const providerModule$1 = {
1908
+ toolDefinitions: computerTools,
1909
+ toolExecutors: computerToolExecutors,
1910
+ coordinateSystem: coordinateSystem$1,
1911
+ buildSystemPrompt: buildTzafonSystemPrompt,
1912
+ onPayload: tzafonComputerUseOnPayload
1913
+ };
1914
+ //#endregion
1915
+ //#region src/providers/yutori/index.ts
1916
+ var yutori_exports = /* @__PURE__ */ __exportAll({
1917
+ YUTORI_CHAT_COMPLETIONS_API: () => YUTORI_CHAT_COMPLETIONS_API,
1918
+ YUTORI_COMPUTER_INSTRUCTIONS: () => "",
1919
+ YUTORI_CUA_ACTION_TYPES: () => YUTORI_CUA_ACTION_TYPES,
1920
+ YUTORI_N15_ACTION_TYPES: () => YUTORI_N15_ACTION_TYPES,
1921
+ YUTORI_N15_CORE_ACTION_TYPES: () => YUTORI_N15_CORE_ACTION_TYPES,
1922
+ YUTORI_N15_CORE_TOOL_SET: () => YUTORI_N15_CORE_TOOL_SET,
1923
+ YUTORI_N15_EXPANDED_ACTION_TYPES: () => YUTORI_N15_EXPANDED_ACTION_TYPES,
1924
+ YUTORI_N15_EXPANDED_TOOL_SET: () => YUTORI_N15_EXPANDED_TOOL_SET,
1925
+ YUTORI_N1_ACTION_TYPES: () => YUTORI_N1_ACTION_TYPES,
1926
+ buildYutoriSystemPrompt: () => buildYutoriSystemPrompt,
1927
+ computerToolExecutors: () => computerToolExecutors$2,
1928
+ computerTools: () => computerTools$2,
1929
+ coordinateSystem: () => coordinateSystem,
1930
+ createActionSchema: () => createActionSchema$1,
1931
+ providerModule: () => providerModule,
1932
+ streamSimpleYutori: () => streamSimpleYutori,
1933
+ streamYutori: () => streamYutori,
1934
+ toCanonicalActions: () => toCanonicalActions,
1935
+ yutoriCuaOnPayload: () => yutoriCuaOnPayload,
1936
+ yutoriNativeActionsForModel: () => yutoriNativeActionsForModel,
1937
+ yutoriNativeToolSetOnPayload: () => yutoriNativeToolSetOnPayload,
1938
+ yutoriToolSetForModel: () => yutoriToolSetForModel
1939
+ });
1940
+ function coordinateSystem() {
1941
+ return {
1942
+ type: "normalized",
1943
+ range: [0, 1e3]
1944
+ };
1945
+ }
1946
+ function buildYutoriSystemPrompt(opts = {}) {
1947
+ return ["", opts.suffix].filter(Boolean).join("\n\n");
1948
+ }
1949
+ const providerModule = {
1950
+ toolDefinitions: () => [],
1951
+ toolExecutors: computerToolExecutors$2,
1952
+ coordinateSystem,
1953
+ buildSystemPrompt: buildYutoriSystemPrompt,
1954
+ onPayload: yutoriCuaOnPayload,
1955
+ screenshot: {
1956
+ appendToLatestMessage: true,
1957
+ transform: {
1958
+ width: 1280,
1959
+ height: 800,
1960
+ format: "webp",
1961
+ quality: 90
1962
+ }
1963
+ }
1964
+ };
1965
+ //#endregion
1966
+ //#region src/runtime-spec.ts
1967
+ const PROVIDERS = {
1968
+ openai: providerModule$2,
1969
+ anthropic: providerModule$4,
1970
+ google: providerModule$3,
1971
+ tzafon: providerModule$1,
1972
+ yutori: providerModule
1973
+ };
1974
+ /**
1975
+ * Resolve provider defaults from either a CUA model ref or a concrete model.
1976
+ *
1977
+ * Use the returned spec to build computer-use requests without hard-coding
1978
+ * model-provider rules in your application. Pass `options` (e.g.
1979
+ * `{ actions: ["click"] }`) to narrow the resolved tool definitions and
1980
+ * executors to a supported subset.
1981
+ */
1982
+ function resolveCuaRuntimeSpec(input, options) {
1983
+ const model = typeof input === "string" ? getCuaModel(input) : input;
1984
+ const provider = providerForModel(model);
1985
+ const mod = PROVIDERS[provider];
1986
+ return {
1987
+ model,
1988
+ provider,
1989
+ toolDefinitions: mod.toolDefinitions(options),
1990
+ toolExecutors: mod.toolExecutors(options),
1991
+ defaultSystemPrompt: mod.buildSystemPrompt(),
1992
+ coordinateSystem: mod.coordinateSystem(),
1993
+ screenshot: mod.screenshot,
1994
+ onPayload: mod.onPayload
1995
+ };
1996
+ }
1997
+ //#endregion
1998
+ //#region src/index.ts
12
1999
  registerCuaProviders();
13
- //# sourceMappingURL=index.js.map
2000
+ //#endregion
2001
+ export { CUA_ACTION_TYPES, CUA_BATCH_TOOL_DESCRIPTION, CUA_BATCH_TOOL_NAME, CUA_MODEL_ANNOTATIONS, CUA_NAVIGATION_TOOL_DESCRIPTION, CUA_NAVIGATION_TOOL_NAME, CUA_PROVIDERS, CuaActionSchema, CuaBatchSchema, CuaNavigationSchema, anthropic_exports as anthropic, canonicalToolCallArguments, canonicalToolCallName, computerToolExecutors, computerTools, createCuaActionSchema, createCuaActionToolDefinitions, createCuaActionToolExecutors, createCuaBatchSchema, createCuaBatchToolDefinition, createCuaBatchToolExecutor, createCuaNavigationToolDefinition, cuaApiKeyEnvVarsForProvider, findCuaAnnotation, formatCuaModelRef, gemini_exports as gemini, getCuaEnvApiKey, getCuaEnvApiKeyForModel, getCuaModel, isCuaProvider, listCuaModels, normalizeGotoUrl, openai_exports as openai, parseCuaModelRef, providerForModel, registerCuaProviders, requireCuaEnvApiKey, requireCuaEnvApiKeyForModel, resolveCuaRuntimeSpec, tzafon_exports as tzafon, yutori_exports as yutori };