@onkernel/cua-ai 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +100 -0
- package/README.md +341 -65
- package/dist/chunk-D7D4PA-g.js +13 -0
- package/dist/index.d.ts +576 -10
- package/dist/index.js +1999 -11
- package/docs/supported-models.md +77 -0
- package/examples/quickstart.ts +28 -22
- package/package.json +10 -6
- package/dist/api-keys.d.ts +0 -8
- package/dist/api-keys.d.ts.map +0 -1
- package/dist/api-keys.js +0 -48
- package/dist/api-keys.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/models.d.ts +0 -33
- package/dist/models.d.ts.map +0 -1
- package/dist/models.js +0 -159
- package/dist/models.js.map +0 -1
- package/dist/providers/anthropic/index.d.ts +0 -10
- package/dist/providers/anthropic/index.d.ts.map +0 -1
- package/dist/providers/anthropic/index.js +0 -16
- package/dist/providers/anthropic/index.js.map +0 -1
- package/dist/providers/common.d.ts +0 -111
- package/dist/providers/common.d.ts.map +0 -1
- package/dist/providers/common.js +0 -138
- package/dist/providers/common.js.map +0 -1
- package/dist/providers/gemini/index.d.ts +0 -11
- package/dist/providers/gemini/index.d.ts.map +0 -1
- package/dist/providers/gemini/index.js +0 -14
- package/dist/providers/gemini/index.js.map +0 -1
- package/dist/providers/openai/index.d.ts +0 -8
- package/dist/providers/openai/index.d.ts.map +0 -1
- package/dist/providers/openai/index.js +0 -22
- package/dist/providers/openai/index.js.map +0 -1
- package/dist/providers/tzafon/index.d.ts +0 -12
- package/dist/providers/tzafon/index.d.ts.map +0 -1
- package/dist/providers/tzafon/index.js +0 -18
- package/dist/providers/tzafon/index.js.map +0 -1
- package/dist/providers/tzafon/provider.d.ts +0 -8
- package/dist/providers/tzafon/provider.d.ts.map +0 -1
- package/dist/providers/tzafon/provider.js +0 -234
- package/dist/providers/tzafon/provider.js.map +0 -1
- package/dist/providers/yutori/index.d.ts +0 -12
- package/dist/providers/yutori/index.d.ts.map +0 -1
- package/dist/providers/yutori/index.js +0 -23
- package/dist/providers/yutori/index.js.map +0 -1
- package/dist/providers/yutori/provider.d.ts +0 -9
- package/dist/providers/yutori/provider.d.ts.map +0 -1
- package/dist/providers/yutori/provider.js +0 -307
- package/dist/providers/yutori/provider.js.map +0 -1
- package/dist/providers.d.ts +0 -6
- package/dist/providers.d.ts.map +0 -1
- package/dist/providers.js +0 -26
- package/dist/providers.js.map +0 -1
- package/dist/runtime-spec.d.ts +0 -29
- package/dist/runtime-spec.d.ts.map +0 -1
- package/dist/runtime-spec.js +0 -58
- package/dist/runtime-spec.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -1,13 +1,2001 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { t as __exportAll } from "./chunk-D7D4PA-g.js";
|
|
2
|
+
import { Type, createAssistantMessageEventStream, getModel, getModels, registerApiProvider } from "@earendil-works/pi-ai";
|
|
3
|
+
import Lightcone from "@tzafon/lightcone";
|
|
4
|
+
import OpenAI from "openai";
|
|
2
5
|
export * from "@earendil-works/pi-ai";
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
6
|
+
//#region src/providers/common.ts
|
|
7
|
+
const CUA_ACTION_TYPES = [
|
|
8
|
+
"click",
|
|
9
|
+
"double_click",
|
|
10
|
+
"mouse_down",
|
|
11
|
+
"mouse_up",
|
|
12
|
+
"type",
|
|
13
|
+
"keypress",
|
|
14
|
+
"scroll",
|
|
15
|
+
"move",
|
|
16
|
+
"drag",
|
|
17
|
+
"wait",
|
|
18
|
+
"screenshot",
|
|
19
|
+
"goto",
|
|
20
|
+
"back",
|
|
21
|
+
"forward",
|
|
22
|
+
"url",
|
|
23
|
+
"cursor_position"
|
|
24
|
+
];
|
|
25
|
+
const PointSchema = Type.Object({
|
|
26
|
+
x: Type.Number(),
|
|
27
|
+
y: Type.Number()
|
|
28
|
+
}, { additionalProperties: false });
|
|
29
|
+
const CUA_ACTION_SCHEMA_BY_TYPE = {
|
|
30
|
+
click: Type.Object({
|
|
31
|
+
type: Type.Literal("click"),
|
|
32
|
+
x: Type.Number(),
|
|
33
|
+
y: Type.Number(),
|
|
34
|
+
button: Type.Optional(Type.String()),
|
|
35
|
+
hold_keys: Type.Optional(Type.Array(Type.String()))
|
|
36
|
+
}, { additionalProperties: false }),
|
|
37
|
+
double_click: Type.Object({
|
|
38
|
+
type: Type.Literal("double_click"),
|
|
39
|
+
x: Type.Number(),
|
|
40
|
+
y: Type.Number(),
|
|
41
|
+
hold_keys: Type.Optional(Type.Array(Type.String()))
|
|
42
|
+
}, { additionalProperties: false }),
|
|
43
|
+
mouse_down: Type.Object({
|
|
44
|
+
type: Type.Literal("mouse_down"),
|
|
45
|
+
x: Type.Number(),
|
|
46
|
+
y: Type.Number(),
|
|
47
|
+
button: Type.Optional(Type.String()),
|
|
48
|
+
hold_keys: Type.Optional(Type.Array(Type.String()))
|
|
49
|
+
}, { additionalProperties: false }),
|
|
50
|
+
mouse_up: Type.Object({
|
|
51
|
+
type: Type.Literal("mouse_up"),
|
|
52
|
+
x: Type.Number(),
|
|
53
|
+
y: Type.Number(),
|
|
54
|
+
button: Type.Optional(Type.String()),
|
|
55
|
+
hold_keys: Type.Optional(Type.Array(Type.String()))
|
|
56
|
+
}, { additionalProperties: false }),
|
|
57
|
+
type: Type.Object({
|
|
58
|
+
type: Type.Literal("type"),
|
|
59
|
+
text: Type.String()
|
|
60
|
+
}, { additionalProperties: false }),
|
|
61
|
+
keypress: Type.Object({
|
|
62
|
+
type: Type.Literal("keypress"),
|
|
63
|
+
keys: Type.Array(Type.String()),
|
|
64
|
+
duration: Type.Optional(Type.Number())
|
|
65
|
+
}, { additionalProperties: false }),
|
|
66
|
+
scroll: Type.Object({
|
|
67
|
+
type: Type.Literal("scroll"),
|
|
68
|
+
x: Type.Optional(Type.Number()),
|
|
69
|
+
y: Type.Optional(Type.Number()),
|
|
70
|
+
scroll_x: Type.Optional(Type.Number()),
|
|
71
|
+
scroll_y: Type.Optional(Type.Number()),
|
|
72
|
+
hold_keys: Type.Optional(Type.Array(Type.String()))
|
|
73
|
+
}, { additionalProperties: false }),
|
|
74
|
+
move: Type.Object({
|
|
75
|
+
type: Type.Literal("move"),
|
|
76
|
+
x: Type.Number(),
|
|
77
|
+
y: Type.Number()
|
|
78
|
+
}, { additionalProperties: false }),
|
|
79
|
+
drag: Type.Object({
|
|
80
|
+
type: Type.Literal("drag"),
|
|
81
|
+
path: Type.Array(PointSchema, { minItems: 2 }),
|
|
82
|
+
button: Type.Optional(Type.String()),
|
|
83
|
+
hold_keys: Type.Optional(Type.Array(Type.String()))
|
|
84
|
+
}, { additionalProperties: false }),
|
|
85
|
+
wait: Type.Object({
|
|
86
|
+
type: Type.Literal("wait"),
|
|
87
|
+
ms: Type.Optional(Type.Number())
|
|
88
|
+
}, { additionalProperties: false }),
|
|
89
|
+
screenshot: Type.Object({ type: Type.Literal("screenshot") }, { additionalProperties: false }),
|
|
90
|
+
goto: Type.Object({
|
|
91
|
+
type: Type.Literal("goto"),
|
|
92
|
+
url: Type.String()
|
|
93
|
+
}, { additionalProperties: false }),
|
|
94
|
+
back: Type.Object({ type: Type.Literal("back") }, { additionalProperties: false }),
|
|
95
|
+
forward: Type.Object({ type: Type.Literal("forward") }, { additionalProperties: false }),
|
|
96
|
+
url: Type.Object({ type: Type.Literal("url") }, { additionalProperties: false }),
|
|
97
|
+
cursor_position: Type.Object({ type: Type.Literal("cursor_position") }, { additionalProperties: false })
|
|
98
|
+
};
|
|
99
|
+
function createCuaActionArgumentSchema(action) {
|
|
100
|
+
const { type: _type, ...properties } = CUA_ACTION_SCHEMA_BY_TYPE[action].properties;
|
|
101
|
+
return Type.Object(properties, { additionalProperties: false });
|
|
102
|
+
}
|
|
103
|
+
function createCuaActionSchema(actions = CUA_ACTION_TYPES) {
|
|
104
|
+
if (actions.length === 0) throw new Error("actions must include at least one CUA action type");
|
|
105
|
+
if (actions.length === 1) return CUA_ACTION_SCHEMA_BY_TYPE[actions[0]];
|
|
106
|
+
return Type.Union(actions.map((action) => CUA_ACTION_SCHEMA_BY_TYPE[action]));
|
|
107
|
+
}
|
|
108
|
+
function createCuaActionToolDefinitions(actions = CUA_ACTION_TYPES) {
|
|
109
|
+
return actions.map((action) => ({
|
|
110
|
+
name: action,
|
|
111
|
+
description: `Execute one ${action} computer action.`,
|
|
112
|
+
parameters: createCuaActionArgumentSchema(action)
|
|
113
|
+
}));
|
|
114
|
+
}
|
|
115
|
+
const CuaActionSchema = createCuaActionSchema();
|
|
116
|
+
function createCuaBatchSchema(actions) {
|
|
117
|
+
return Type.Object({ actions: Type.Array(createCuaActionSchema(actions), { description: "Ordered computer actions to execute." }) });
|
|
118
|
+
}
|
|
119
|
+
const CuaBatchSchema = createCuaBatchSchema();
|
|
120
|
+
const CuaNavigationSchema = Type.Object({
|
|
121
|
+
action: Type.Union([
|
|
122
|
+
Type.Literal("goto"),
|
|
123
|
+
Type.Literal("back"),
|
|
124
|
+
Type.Literal("forward"),
|
|
125
|
+
Type.Literal("url")
|
|
126
|
+
]),
|
|
127
|
+
url: Type.Optional(Type.String())
|
|
128
|
+
}, { additionalProperties: false });
|
|
129
|
+
/**
|
|
130
|
+
* Default name for batch computer-action tools created by
|
|
131
|
+
* {@link createCuaBatchToolDefinition} and the name Anthropic's batch tool
|
|
132
|
+
* ships under (the only provider that includes one by default).
|
|
133
|
+
*/
|
|
134
|
+
const CUA_BATCH_TOOL_NAME = "computer_batch";
|
|
135
|
+
const CUA_NAVIGATION_TOOL_NAME = "computer_use_extra";
|
|
136
|
+
const CUA_BATCH_TOOL_DESCRIPTION = [
|
|
137
|
+
"Execute multiple computer actions in sequence, including ordered read steps like url(), cursor_position(), and screenshot().",
|
|
138
|
+
"Prefer this tool for predictable browser interaction sequences such as click-then-type, typing a URL, keyboard navigation, drag paths, and mixed write/read batches.",
|
|
139
|
+
"If no explicit read step is included, the tool returns one fresh screenshot after execution."
|
|
140
|
+
].join("\n");
|
|
141
|
+
const CUA_NAVIGATION_TOOL_DESCRIPTION = "High-level browser navigation helpers for goto, back, forward, and url.";
|
|
142
|
+
/**
|
|
143
|
+
* Build the provider's CUA computer-use tools.
|
|
144
|
+
*
|
|
145
|
+
* Use this when calling `complete()` or `stream()` directly and you need an
|
|
146
|
+
* array of `Tool` objects for browser actions. Pass `actions` to expose only a
|
|
147
|
+
* smaller set, such as `["click"]`.
|
|
148
|
+
*/
|
|
149
|
+
function computerTools(options = {}) {
|
|
150
|
+
return createCuaActionToolDefinitions(options.actions);
|
|
151
|
+
}
|
|
152
|
+
/** Build execution adapters for individual canonical CUA action tools. */
|
|
153
|
+
function createCuaActionToolExecutors(actions = CUA_ACTION_TYPES) {
|
|
154
|
+
return createCuaActionToolDefinitions(actions).map((definition) => {
|
|
155
|
+
const actionType = definition.name;
|
|
156
|
+
return {
|
|
157
|
+
definition,
|
|
158
|
+
toActions(args) {
|
|
159
|
+
return [{
|
|
160
|
+
...args && typeof args === "object" ? args : {},
|
|
161
|
+
type: actionType
|
|
162
|
+
}];
|
|
163
|
+
}
|
|
164
|
+
};
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
/** Return the canonical tool name that should execute a normalized CUA action. */
|
|
168
|
+
function canonicalToolCallName(action) {
|
|
169
|
+
return action.type;
|
|
170
|
+
}
|
|
171
|
+
/** Convert a normalized CUA action into tool-call arguments by removing its `type` tag. */
|
|
172
|
+
function canonicalToolCallArguments(action) {
|
|
173
|
+
const { type: _type, ...args } = action;
|
|
174
|
+
return args;
|
|
175
|
+
}
|
|
176
|
+
/** Prefix bare hostnames/paths with `https://` before browser navigation. */
|
|
177
|
+
function normalizeGotoUrl(value) {
|
|
178
|
+
if (typeof value !== "string") return void 0;
|
|
179
|
+
const url = value.trim();
|
|
180
|
+
if (!url) return void 0;
|
|
181
|
+
return /^[a-z][a-z0-9+.-]*:\/\//i.test(url) ? url : `https://${url}`;
|
|
182
|
+
}
|
|
183
|
+
function createCuaBatchToolDefinition(actions, options = {}) {
|
|
184
|
+
return {
|
|
185
|
+
name: options.name ?? "computer_batch",
|
|
186
|
+
description: options.description ?? CUA_BATCH_TOOL_DESCRIPTION,
|
|
187
|
+
parameters: createCuaBatchSchema(actions)
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
/** Build an execution adapter for a batch tool whose input is `{ actions }`. */
|
|
191
|
+
function createCuaBatchToolExecutor(actions, options = {}) {
|
|
192
|
+
return {
|
|
193
|
+
definition: createCuaBatchToolDefinition(actions, options),
|
|
194
|
+
toActions(args) {
|
|
195
|
+
if (!isBatchInput(args)) throw new Error("invalid batch tool parameters");
|
|
196
|
+
return args.actions;
|
|
197
|
+
}
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
/** Build the provider's default CUA tool execution adapters. */
|
|
201
|
+
function computerToolExecutors(options = {}) {
|
|
202
|
+
return createCuaActionToolExecutors(options.actions);
|
|
203
|
+
}
|
|
204
|
+
function isBatchInput(value) {
|
|
205
|
+
return Boolean(value && typeof value === "object" && Array.isArray(value.actions));
|
|
206
|
+
}
|
|
207
|
+
function createCuaNavigationToolDefinition() {
|
|
208
|
+
return {
|
|
209
|
+
name: CUA_NAVIGATION_TOOL_NAME,
|
|
210
|
+
description: CUA_NAVIGATION_TOOL_DESCRIPTION,
|
|
211
|
+
parameters: CuaNavigationSchema
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
//#endregion
|
|
215
|
+
//#region src/providers/tzafon/provider.ts
|
|
216
|
+
const TZAFON_RESPONSES_API = "tzafon-responses";
|
|
217
|
+
const TZAFON_COMPUTER_USE_TOOL = {
|
|
218
|
+
type: "computer_use",
|
|
219
|
+
display_width: 1920,
|
|
220
|
+
display_height: 1080,
|
|
221
|
+
environment: "browser"
|
|
222
|
+
};
|
|
223
|
+
const TZAFON_LOCAL_ACTION_TOOL_NAMES = new Set(CUA_ACTION_TYPES);
|
|
224
|
+
const streamSimpleTzafonResponses = (model, context, options) => {
|
|
225
|
+
return streamTzafonResponses(model, context, options);
|
|
226
|
+
};
|
|
227
|
+
const streamTzafonResponses = (model, context, options) => {
|
|
228
|
+
const stream = createAssistantMessageEventStream();
|
|
229
|
+
const output = initialAssistantMessage$1(model);
|
|
230
|
+
(async () => {
|
|
231
|
+
try {
|
|
232
|
+
const apiKey = options?.apiKey || process.env.TZAFON_API_KEY;
|
|
233
|
+
if (!apiKey) throw new Error(`No API key for provider: ${model.provider}`);
|
|
234
|
+
const client = new Lightcone({ apiKey });
|
|
235
|
+
const payload = {
|
|
236
|
+
model: model.id,
|
|
237
|
+
input: convertContextMessages(context),
|
|
238
|
+
tools: convertTools$1(context.tools ?? []),
|
|
239
|
+
instructions: context.systemPrompt,
|
|
240
|
+
temperature: options?.temperature ?? 0,
|
|
241
|
+
max_output_tokens: options?.maxTokens ?? model.maxTokens
|
|
242
|
+
};
|
|
243
|
+
const tzafonPayload = tzafonComputerUseOnPayload(payload, model, { keepToolNames: [...keepToolNamesFromContext$1(context), ...options?.keepToolNames ?? []] });
|
|
244
|
+
const nextPayload = await options?.onPayload?.(tzafonPayload ?? payload, model);
|
|
245
|
+
if (options?.signal?.aborted) throw new Error("Request was aborted");
|
|
246
|
+
const response = await client.responses.create(nextPayload ?? tzafonPayload ?? payload, { signal: options?.signal });
|
|
247
|
+
if (options?.signal?.aborted) throw new Error("Request was aborted");
|
|
248
|
+
stream.push({
|
|
249
|
+
type: "start",
|
|
250
|
+
partial: output
|
|
251
|
+
});
|
|
252
|
+
output.responseId = getString(response, "id") || void 0;
|
|
253
|
+
output.usage = usageFromTzafon(getValue(response, "usage"));
|
|
254
|
+
for (const item of getArray(response, "output")) {
|
|
255
|
+
const type = getString(item, "type");
|
|
256
|
+
if (type === "message") {
|
|
257
|
+
const text = extractMessageText(item);
|
|
258
|
+
if (text) emitText$1(stream, output, text);
|
|
259
|
+
continue;
|
|
260
|
+
}
|
|
261
|
+
if (type === "function_call") {
|
|
262
|
+
emitToolCall(stream, output, {
|
|
263
|
+
type: "toolCall",
|
|
264
|
+
id: getString(item, "call_id"),
|
|
265
|
+
name: getString(item, "name"),
|
|
266
|
+
arguments: parseArguments$1(getValue(item, "arguments"))
|
|
267
|
+
});
|
|
268
|
+
continue;
|
|
269
|
+
}
|
|
270
|
+
if (type === "computer_call") {
|
|
271
|
+
const callId = getString(item, "call_id") || getString(item, "id") || `computer_call_${output.content.length}`;
|
|
272
|
+
let actionIndex = 0;
|
|
273
|
+
for (const action of toCanonicalActions$1(getValue(item, "action"))) {
|
|
274
|
+
if (action.type === "answer") {
|
|
275
|
+
emitText$1(stream, output, action.text);
|
|
276
|
+
continue;
|
|
277
|
+
}
|
|
278
|
+
emitToolCall(stream, output, {
|
|
279
|
+
type: "toolCall",
|
|
280
|
+
id: tzafonToolCallId(callId, actionIndex),
|
|
281
|
+
name: canonicalToolCallName(action),
|
|
282
|
+
arguments: canonicalToolCallArguments(action)
|
|
283
|
+
});
|
|
284
|
+
actionIndex += 1;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
output.stopReason = output.content.some((part) => part.type === "toolCall") ? "toolUse" : "stop";
|
|
289
|
+
stream.push({
|
|
290
|
+
type: "done",
|
|
291
|
+
reason: output.stopReason,
|
|
292
|
+
message: output
|
|
293
|
+
});
|
|
294
|
+
stream.end();
|
|
295
|
+
} catch (err) {
|
|
296
|
+
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
297
|
+
output.errorMessage = err instanceof Error ? err.message : String(err);
|
|
298
|
+
stream.push({
|
|
299
|
+
type: "error",
|
|
300
|
+
reason: output.stopReason,
|
|
301
|
+
error: output
|
|
302
|
+
});
|
|
303
|
+
stream.end();
|
|
304
|
+
}
|
|
305
|
+
})();
|
|
306
|
+
return stream;
|
|
307
|
+
};
|
|
308
|
+
function tzafonComputerUseOnPayload(payload, _model, context) {
|
|
309
|
+
if (!payload || typeof payload !== "object") return void 0;
|
|
310
|
+
const current = payload;
|
|
311
|
+
const keepToolNames = new Set(context?.keepToolNames ?? []);
|
|
312
|
+
const existingTools = Array.isArray(current.tools) ? current.tools : [];
|
|
313
|
+
const shouldAddComputerUse = existingTools.some((tool) => {
|
|
314
|
+
const name = readToolName$1(tool);
|
|
315
|
+
return Boolean(name && TZAFON_LOCAL_ACTION_TOOL_NAMES.has(name) && !keepToolNames.has(name));
|
|
316
|
+
});
|
|
317
|
+
const tools = existingTools.filter((tool) => {
|
|
318
|
+
const name = readToolName$1(tool);
|
|
319
|
+
return !name || keepToolNames.has(name) || !TZAFON_LOCAL_ACTION_TOOL_NAMES.has(name);
|
|
320
|
+
});
|
|
321
|
+
return {
|
|
322
|
+
...payload,
|
|
323
|
+
tools: shouldAddComputerUse ? [TZAFON_COMPUTER_USE_TOOL, ...tools] : tools
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
/** Derive a unique canonical tool-call id for a Tzafon computer action. */
|
|
327
|
+
function tzafonToolCallId(callId, actionIndex) {
|
|
328
|
+
return actionIndex === 0 ? callId : `${callId}:${actionIndex}`;
|
|
329
|
+
}
|
|
330
|
+
function initialAssistantMessage$1(model) {
|
|
331
|
+
return {
|
|
332
|
+
role: "assistant",
|
|
333
|
+
content: [],
|
|
334
|
+
api: model.api,
|
|
335
|
+
provider: model.provider,
|
|
336
|
+
model: model.id,
|
|
337
|
+
usage: {
|
|
338
|
+
input: 0,
|
|
339
|
+
output: 0,
|
|
340
|
+
cacheRead: 0,
|
|
341
|
+
cacheWrite: 0,
|
|
342
|
+
totalTokens: 0,
|
|
343
|
+
cost: {
|
|
344
|
+
input: 0,
|
|
345
|
+
output: 0,
|
|
346
|
+
cacheRead: 0,
|
|
347
|
+
cacheWrite: 0,
|
|
348
|
+
total: 0
|
|
349
|
+
}
|
|
350
|
+
},
|
|
351
|
+
stopReason: "stop",
|
|
352
|
+
timestamp: Date.now()
|
|
353
|
+
};
|
|
354
|
+
}
|
|
355
|
+
function emitText$1(stream, output, text) {
|
|
356
|
+
const contentIndex = output.content.length;
|
|
357
|
+
const content = {
|
|
358
|
+
type: "text",
|
|
359
|
+
text
|
|
360
|
+
};
|
|
361
|
+
output.content.push(content);
|
|
362
|
+
stream.push({
|
|
363
|
+
type: "text_start",
|
|
364
|
+
contentIndex,
|
|
365
|
+
partial: output
|
|
366
|
+
});
|
|
367
|
+
stream.push({
|
|
368
|
+
type: "text_delta",
|
|
369
|
+
contentIndex,
|
|
370
|
+
delta: text,
|
|
371
|
+
partial: output
|
|
372
|
+
});
|
|
373
|
+
stream.push({
|
|
374
|
+
type: "text_end",
|
|
375
|
+
contentIndex,
|
|
376
|
+
content: text,
|
|
377
|
+
partial: output
|
|
378
|
+
});
|
|
379
|
+
}
|
|
380
|
+
function emitToolCall(stream, output, toolCall) {
|
|
381
|
+
const contentIndex = output.content.length;
|
|
382
|
+
output.content.push(toolCall);
|
|
383
|
+
stream.push({
|
|
384
|
+
type: "toolcall_start",
|
|
385
|
+
contentIndex,
|
|
386
|
+
partial: output
|
|
387
|
+
});
|
|
388
|
+
stream.push({
|
|
389
|
+
type: "toolcall_end",
|
|
390
|
+
contentIndex,
|
|
391
|
+
toolCall,
|
|
392
|
+
partial: output
|
|
393
|
+
});
|
|
394
|
+
}
|
|
395
|
+
/** Normalize one Tzafon `computer_call.action` payload into canonical CUA actions. */
|
|
396
|
+
function toCanonicalActions$1(action) {
|
|
397
|
+
if (!action || typeof action !== "object") return [];
|
|
398
|
+
const current = action;
|
|
399
|
+
const type = getString(current, "type");
|
|
400
|
+
const x = readOptionalNumber(current, "x");
|
|
401
|
+
const y = readOptionalNumber(current, "y");
|
|
402
|
+
switch (type) {
|
|
403
|
+
case "click":
|
|
404
|
+
case "left_click": return x !== void 0 && y !== void 0 ? [{
|
|
405
|
+
type: "click",
|
|
406
|
+
x,
|
|
407
|
+
y
|
|
408
|
+
}] : [];
|
|
409
|
+
case "right_click": return x !== void 0 && y !== void 0 ? [{
|
|
410
|
+
type: "click",
|
|
411
|
+
x,
|
|
412
|
+
y,
|
|
413
|
+
button: "right"
|
|
414
|
+
}] : [];
|
|
415
|
+
case "double_click": return x !== void 0 && y !== void 0 ? [{
|
|
416
|
+
type: "double_click",
|
|
417
|
+
x,
|
|
418
|
+
y
|
|
419
|
+
}] : [];
|
|
420
|
+
case "triple_click": return x !== void 0 && y !== void 0 ? [{
|
|
421
|
+
type: "double_click",
|
|
422
|
+
x,
|
|
423
|
+
y
|
|
424
|
+
}, {
|
|
425
|
+
type: "click",
|
|
426
|
+
x,
|
|
427
|
+
y
|
|
428
|
+
}] : [];
|
|
429
|
+
case "move":
|
|
430
|
+
case "hover": return x !== void 0 && y !== void 0 ? [{
|
|
431
|
+
type: "move",
|
|
432
|
+
x,
|
|
433
|
+
y
|
|
434
|
+
}] : [];
|
|
435
|
+
case "drag": return toDragAction(current);
|
|
436
|
+
case "type": return [{
|
|
437
|
+
type: "type",
|
|
438
|
+
text: getString(current, "text")
|
|
439
|
+
}];
|
|
440
|
+
case "keypress":
|
|
441
|
+
case "key": return toKeypressAction$1(current);
|
|
442
|
+
case "scroll": return [toScrollAction$1(current)];
|
|
443
|
+
case "hscroll": return [{
|
|
444
|
+
type: "scroll",
|
|
445
|
+
scroll_x: readOptionalNumber(current, "scroll_x") ?? readOptionalNumber(current, "amount") ?? 0
|
|
446
|
+
}];
|
|
447
|
+
case "navigate": return [{
|
|
448
|
+
type: "goto",
|
|
449
|
+
url: getString(current, "url")
|
|
450
|
+
}];
|
|
451
|
+
case "wait": return [{
|
|
452
|
+
type: "wait",
|
|
453
|
+
ms: readOptionalNumber(current, "ms") ?? secondsToMs$1(readOptionalNumber(current, "seconds"))
|
|
454
|
+
}];
|
|
455
|
+
case "screenshot": return [{ type: "screenshot" }];
|
|
456
|
+
case "answer":
|
|
457
|
+
case "done":
|
|
458
|
+
case "terminate": return [{
|
|
459
|
+
type: "answer",
|
|
460
|
+
text: getString(current, "result") || getString(current, "text") || getString(current, "status")
|
|
461
|
+
}];
|
|
462
|
+
default: return [];
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
function toDragAction(action) {
|
|
466
|
+
const path = getArray(action, "path").map((point) => {
|
|
467
|
+
if (!point || typeof point !== "object") return void 0;
|
|
468
|
+
const x = readOptionalNumber(point, "x");
|
|
469
|
+
const y = readOptionalNumber(point, "y");
|
|
470
|
+
return x !== void 0 && y !== void 0 ? {
|
|
471
|
+
x,
|
|
472
|
+
y
|
|
473
|
+
} : void 0;
|
|
474
|
+
}).filter((point) => Boolean(point));
|
|
475
|
+
if (path.length >= 2) return [{
|
|
476
|
+
type: "drag",
|
|
477
|
+
path
|
|
478
|
+
}];
|
|
479
|
+
const x = readOptionalNumber(action, "x");
|
|
480
|
+
const y = readOptionalNumber(action, "y");
|
|
481
|
+
const endX = readOptionalNumber(action, "end_x") ?? readOptionalNumber(action, "x2");
|
|
482
|
+
const endY = readOptionalNumber(action, "end_y") ?? readOptionalNumber(action, "y2");
|
|
483
|
+
if (x === void 0 || y === void 0 || endX === void 0 || endY === void 0) return [];
|
|
484
|
+
return [{
|
|
485
|
+
type: "drag",
|
|
486
|
+
path: [{
|
|
487
|
+
x,
|
|
488
|
+
y
|
|
489
|
+
}, {
|
|
490
|
+
x: endX,
|
|
491
|
+
y: endY
|
|
492
|
+
}]
|
|
493
|
+
}];
|
|
494
|
+
}
|
|
495
|
+
function toKeypressAction$1(action) {
|
|
496
|
+
const keys = getArray(action, "keys").map((key) => typeof key === "string" ? key : void 0).filter((key) => Boolean(key));
|
|
497
|
+
const key = getString(action, "key");
|
|
498
|
+
const text = getString(action, "text");
|
|
499
|
+
const value = keys.length > 0 ? keys : key ? [key] : text ? [text] : [];
|
|
500
|
+
return value.length > 0 ? [{
|
|
501
|
+
type: "keypress",
|
|
502
|
+
keys: value
|
|
503
|
+
}] : [];
|
|
504
|
+
}
|
|
505
|
+
function toScrollAction$1(action) {
|
|
506
|
+
return {
|
|
507
|
+
type: "scroll",
|
|
508
|
+
x: readOptionalNumber(action, "x"),
|
|
509
|
+
y: readOptionalNumber(action, "y"),
|
|
510
|
+
scroll_x: readOptionalNumber(action, "scroll_x"),
|
|
511
|
+
scroll_y: readOptionalNumber(action, "scroll_y") ?? readOptionalNumber(action, "amount")
|
|
512
|
+
};
|
|
513
|
+
}
|
|
514
|
+
function secondsToMs$1(seconds) {
|
|
515
|
+
return seconds === void 0 ? void 0 : seconds * 1e3;
|
|
516
|
+
}
|
|
517
|
+
function convertTools$1(tools) {
|
|
518
|
+
return tools.map((tool) => ({
|
|
519
|
+
type: "function",
|
|
520
|
+
name: tool.name,
|
|
521
|
+
description: tool.description,
|
|
522
|
+
parameters: tool.parameters
|
|
523
|
+
}));
|
|
524
|
+
}
|
|
525
|
+
function keepToolNamesFromContext$1(context) {
|
|
526
|
+
return (context.tools ?? []).map((tool) => tool.name).filter((name) => !TZAFON_LOCAL_ACTION_TOOL_NAMES.has(name));
|
|
527
|
+
}
|
|
528
|
+
function readToolName$1(tool) {
|
|
529
|
+
if (!tool || typeof tool !== "object") return void 0;
|
|
530
|
+
const direct = getString(tool, "name");
|
|
531
|
+
if (direct) return direct;
|
|
532
|
+
return getString(getValue(tool, "function"), "name");
|
|
533
|
+
}
|
|
534
|
+
function convertContextMessages(context) {
|
|
535
|
+
const items = [];
|
|
536
|
+
for (const message of context.messages) {
|
|
537
|
+
if (message.role === "user") {
|
|
538
|
+
items.push({
|
|
539
|
+
role: "user",
|
|
540
|
+
content: convertUserContent(message.content)
|
|
541
|
+
});
|
|
542
|
+
continue;
|
|
543
|
+
}
|
|
544
|
+
if (message.role === "assistant") {
|
|
545
|
+
const text = message.content.filter((part) => part.type === "text").map((part) => part.text).join("\n").trim();
|
|
546
|
+
if (text) items.push({
|
|
547
|
+
role: "assistant",
|
|
548
|
+
content: text
|
|
549
|
+
});
|
|
550
|
+
for (const part of message.content) {
|
|
551
|
+
if (part.type !== "toolCall") continue;
|
|
552
|
+
items.push({
|
|
553
|
+
type: "function_call",
|
|
554
|
+
call_id: part.id,
|
|
555
|
+
name: part.name,
|
|
556
|
+
arguments: JSON.stringify(part.arguments ?? {})
|
|
557
|
+
});
|
|
558
|
+
}
|
|
559
|
+
continue;
|
|
560
|
+
}
|
|
561
|
+
if (message.role === "toolResult") {
|
|
562
|
+
const text = message.content.filter((part) => part.type === "text").map((part) => part.text).join("\n").trim();
|
|
563
|
+
items.push({
|
|
564
|
+
type: "function_call_output",
|
|
565
|
+
call_id: message.toolCallId,
|
|
566
|
+
output: message.isError ? `Error: ${text || "tool execution failed"}` : text || "ok"
|
|
567
|
+
});
|
|
568
|
+
const image = [...message.content].reverse().find((part) => part.type === "image");
|
|
569
|
+
if (image) items.push({
|
|
570
|
+
role: "user",
|
|
571
|
+
content: [{
|
|
572
|
+
type: "input_text",
|
|
573
|
+
text: "screenshot"
|
|
574
|
+
}, {
|
|
575
|
+
type: "input_image",
|
|
576
|
+
image_url: `data:${image.mimeType};base64,${image.data}`,
|
|
577
|
+
detail: "auto"
|
|
578
|
+
}]
|
|
579
|
+
});
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
return items;
|
|
583
|
+
}
|
|
584
|
+
function convertUserContent(content) {
|
|
585
|
+
if (typeof content === "string") return [{
|
|
586
|
+
type: "input_text",
|
|
587
|
+
text: content
|
|
588
|
+
}];
|
|
589
|
+
return content.map((part) => {
|
|
590
|
+
if (part.type === "text") return {
|
|
591
|
+
type: "input_text",
|
|
592
|
+
text: part.text
|
|
593
|
+
};
|
|
594
|
+
return {
|
|
595
|
+
type: "input_image",
|
|
596
|
+
image_url: `data:${part.mimeType};base64,${part.data}`,
|
|
597
|
+
detail: "auto"
|
|
598
|
+
};
|
|
599
|
+
});
|
|
600
|
+
}
|
|
601
|
+
function extractMessageText(item) {
|
|
602
|
+
return getArray(item, "content").map((block) => getString(block, "text")).filter(Boolean).join("\n").trim();
|
|
603
|
+
}
|
|
604
|
+
function parseArguments$1(value) {
|
|
605
|
+
const top = typeof value === "string" && value.trim() ? safeJsonParse(value) : value && typeof value === "object" ? value : {};
|
|
606
|
+
if (!top || typeof top !== "object") return {};
|
|
607
|
+
const out = {};
|
|
608
|
+
for (const [key, val] of Object.entries(top)) out[key] = normalizeArgumentValue(key, val);
|
|
609
|
+
return out;
|
|
610
|
+
}
|
|
611
|
+
const NUMERIC_ARGUMENT_KEYS = new Set([
|
|
612
|
+
"x",
|
|
613
|
+
"y",
|
|
614
|
+
"scroll_x",
|
|
615
|
+
"scroll_y",
|
|
616
|
+
"ms",
|
|
617
|
+
"duration"
|
|
618
|
+
]);
|
|
619
|
+
function normalizeArgumentValue(key, value) {
|
|
620
|
+
const parsed = typeof value === "string" && looksLikeJson(value) ? safeJsonParse(value) ?? value : value;
|
|
621
|
+
if (typeof parsed === "string" && NUMERIC_ARGUMENT_KEYS.has(key)) {
|
|
622
|
+
const number = Number.parseFloat(parsed);
|
|
623
|
+
return Number.isFinite(number) ? number : parsed;
|
|
624
|
+
}
|
|
625
|
+
if (Array.isArray(parsed)) return parsed.map((item) => normalizeArgumentValue(key, item));
|
|
626
|
+
if (parsed && typeof parsed === "object") return Object.fromEntries(Object.entries(parsed).map(([childKey, childValue]) => [childKey, normalizeArgumentValue(childKey, childValue)]));
|
|
627
|
+
return parsed;
|
|
628
|
+
}
|
|
629
|
+
function safeJsonParse(value) {
|
|
630
|
+
try {
|
|
631
|
+
const parsed = JSON.parse(value);
|
|
632
|
+
return parsed && typeof parsed === "object" ? parsed : null;
|
|
633
|
+
} catch {
|
|
634
|
+
return null;
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
function looksLikeJson(value) {
|
|
638
|
+
const trimmed = value.trim();
|
|
639
|
+
return trimmed.startsWith("[") || trimmed.startsWith("{");
|
|
640
|
+
}
|
|
641
|
+
function usageFromTzafon(usage) {
|
|
642
|
+
const input = readUsageNumber(usage, "input_tokens");
|
|
643
|
+
const output = readUsageNumber(usage, "output_tokens");
|
|
644
|
+
return {
|
|
645
|
+
input,
|
|
646
|
+
output,
|
|
647
|
+
cacheRead: readUsageNumber(getValue(usage, "input_tokens_details"), "cached_tokens"),
|
|
648
|
+
cacheWrite: 0,
|
|
649
|
+
totalTokens: readUsageNumber(usage, "total_tokens") || input + output,
|
|
650
|
+
cost: {
|
|
651
|
+
input: 0,
|
|
652
|
+
output: 0,
|
|
653
|
+
cacheRead: 0,
|
|
654
|
+
cacheWrite: 0,
|
|
655
|
+
total: 0
|
|
656
|
+
}
|
|
657
|
+
};
|
|
658
|
+
}
|
|
659
|
+
function readUsageNumber(obj, key) {
|
|
660
|
+
return readOptionalNumber(obj, key) ?? 0;
|
|
661
|
+
}
|
|
662
|
+
function readOptionalNumber(obj, key) {
|
|
663
|
+
if (!obj || typeof obj !== "object") return void 0;
|
|
664
|
+
const value = obj[key];
|
|
665
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
666
|
+
if (typeof value === "string" && value.trim()) {
|
|
667
|
+
const number = Number(value);
|
|
668
|
+
return Number.isFinite(number) ? number : void 0;
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
function getArray(obj, key) {
|
|
672
|
+
const value = getValue(obj, key);
|
|
673
|
+
return Array.isArray(value) ? value : [];
|
|
674
|
+
}
|
|
675
|
+
function getString(obj, key) {
|
|
676
|
+
const value = getValue(obj, key);
|
|
677
|
+
return typeof value === "string" ? value : "";
|
|
678
|
+
}
|
|
679
|
+
function getValue(obj, key) {
|
|
680
|
+
if (!obj || typeof obj !== "object") return void 0;
|
|
681
|
+
return obj[key];
|
|
682
|
+
}
|
|
683
|
+
//#endregion
|
|
684
|
+
//#region src/providers/yutori/actions.ts
|
|
685
|
+
/**
|
|
686
|
+
* Native Yutori Navigator n1.5 tool-set ids.
|
|
687
|
+
*
|
|
688
|
+
* Source of truth:
|
|
689
|
+
* - https://docs.yutori.com/reference/n1-5
|
|
690
|
+
* - https://docs.yutori.com/llm-quickstart.md
|
|
691
|
+
*/
|
|
692
|
+
const YUTORI_N15_CORE_TOOL_SET = "browser_tools_core-20260403";
|
|
693
|
+
const YUTORI_N15_EXPANDED_TOOL_SET = "browser_tools_expanded-20260403";
|
|
694
|
+
/**
|
|
695
|
+
* DOM/ref-backed Navigator n1.5 actions. We intentionally disable these until
|
|
696
|
+
* CuaAgent has the ref/DOM execution path that Yutori documents for the
|
|
697
|
+
* expanded tool set.
|
|
698
|
+
*/
|
|
699
|
+
const YUTORI_N15_EXPANDED_ACTION_TYPES = [
|
|
700
|
+
"extract_elements",
|
|
701
|
+
"find",
|
|
702
|
+
"set_element_value",
|
|
703
|
+
"execute_js"
|
|
704
|
+
];
|
|
705
|
+
/**
|
|
706
|
+
* Navigator n1's fixed legacy browser action space.
|
|
707
|
+
*
|
|
708
|
+
* Source of truth: https://docs.yutori.com/reference/n1
|
|
709
|
+
*/
|
|
710
|
+
const YUTORI_N1_ACTION_TYPES = [
|
|
711
|
+
"left_click",
|
|
712
|
+
"double_click",
|
|
713
|
+
"right_click",
|
|
714
|
+
"triple_click",
|
|
715
|
+
"type",
|
|
716
|
+
"key_press",
|
|
717
|
+
"scroll",
|
|
718
|
+
"hover",
|
|
719
|
+
"drag",
|
|
720
|
+
"goto_url",
|
|
721
|
+
"go_back",
|
|
722
|
+
"refresh",
|
|
723
|
+
"wait"
|
|
724
|
+
];
|
|
725
|
+
/**
|
|
726
|
+
* Navigator n1.5 core visual action space. These are the actions available
|
|
727
|
+
* when `tool_set` is `browser_tools_core-20260403`, which keeps CuaAgent in the
|
|
728
|
+
* pure screenshot/coordinate path and avoids DOM refs.
|
|
729
|
+
*
|
|
730
|
+
* Source of truth: https://docs.yutori.com/reference/n1-5
|
|
731
|
+
*/
|
|
732
|
+
const YUTORI_N15_CORE_ACTION_TYPES = [
|
|
733
|
+
"left_click",
|
|
734
|
+
"double_click",
|
|
735
|
+
"triple_click",
|
|
736
|
+
"middle_click",
|
|
737
|
+
"right_click",
|
|
738
|
+
"mouse_move",
|
|
739
|
+
"mouse_down",
|
|
740
|
+
"mouse_up",
|
|
741
|
+
"drag",
|
|
742
|
+
"scroll",
|
|
743
|
+
"type",
|
|
744
|
+
"key_press",
|
|
745
|
+
"hold_key",
|
|
746
|
+
"goto_url",
|
|
747
|
+
"go_back",
|
|
748
|
+
"go_forward",
|
|
749
|
+
"refresh",
|
|
750
|
+
"wait"
|
|
751
|
+
];
|
|
752
|
+
const YUTORI_N15_ACTION_TYPES = [...YUTORI_N15_CORE_ACTION_TYPES, ...YUTORI_N15_EXPANDED_ACTION_TYPES];
|
|
753
|
+
/**
|
|
754
|
+
* Canonical CUA action types Yutori's native actions normalize into. These are
|
|
755
|
+
* the tool-call names {@link streamYutori} emits and the local executors
|
|
756
|
+
* CuaAgent installs for Yutori models.
|
|
757
|
+
*/
|
|
758
|
+
const YUTORI_CUA_ACTION_TYPES = [
|
|
759
|
+
"click",
|
|
760
|
+
"double_click",
|
|
761
|
+
"mouse_down",
|
|
762
|
+
"mouse_up",
|
|
763
|
+
"type",
|
|
764
|
+
"keypress",
|
|
765
|
+
"scroll",
|
|
766
|
+
"move",
|
|
767
|
+
"drag",
|
|
768
|
+
"wait",
|
|
769
|
+
"goto",
|
|
770
|
+
"back",
|
|
771
|
+
"forward"
|
|
772
|
+
];
|
|
773
|
+
const DEFAULT_SCROLL_AMOUNT = 3;
|
|
774
|
+
const SCROLL_AMOUNT_PER_NOTCH = 120;
|
|
775
|
+
const DEFAULT_WAIT_MS = 2e3;
|
|
776
|
+
const NAVIGATION_WAIT_MS = 1500;
|
|
777
|
+
const GOTO_WAIT_MS = 2e3;
|
|
778
|
+
function resolveYutoriActions(actions) {
|
|
779
|
+
const resolved = actions ?? YUTORI_CUA_ACTION_TYPES;
|
|
780
|
+
const supported = [];
|
|
781
|
+
const unsupported = [];
|
|
782
|
+
for (const action of resolved) if (isYutoriCanonicalAction(action)) supported.push(action);
|
|
783
|
+
else unsupported.push(action);
|
|
784
|
+
if (unsupported.length > 0) throw new Error(`unsupported Yutori canonical action(s): ${unsupported.join(", ")}`);
|
|
785
|
+
return supported;
|
|
786
|
+
}
|
|
787
|
+
function isYutoriCanonicalAction(action) {
|
|
788
|
+
return YUTORI_CUA_ACTION_TYPES.includes(action);
|
|
789
|
+
}
|
|
790
|
+
/** Build the TypeBox schema for Yutori-supported canonical browser actions. */
|
|
791
|
+
function createActionSchema$1(actions) {
|
|
792
|
+
return createCuaActionSchema(resolveYutoriActions(actions));
|
|
793
|
+
}
|
|
794
|
+
/**
|
|
795
|
+
* Build local mirrors of the canonical action tools Yutori models call.
|
|
796
|
+
*
|
|
797
|
+
* These definitions are never sent to the API: `streamYutori` strips them from
|
|
798
|
+
* the outbound payload and selects Yutori's native `tool_set` instead, then
|
|
799
|
+
* normalizes the model's native tool calls back into these canonical names.
|
|
800
|
+
* Install them locally so the normalized calls have matching executors —
|
|
801
|
+
* `providerModule.toolDefinitions()` is intentionally `[]`. Pass `actions` to
|
|
802
|
+
* mirror only a supported subset, such as `["click"]`.
|
|
803
|
+
*/
|
|
804
|
+
function computerTools$2(options = {}) {
|
|
805
|
+
return createCuaActionToolDefinitions(resolveYutoriActions(options.actions));
|
|
806
|
+
}
|
|
807
|
+
/** Build the local execution adapters used by CuaAgent and CuaAgentHarness. */
|
|
808
|
+
function computerToolExecutors$2(options = {}) {
|
|
809
|
+
return createCuaActionToolExecutors(resolveYutoriActions(options.actions));
|
|
810
|
+
}
|
|
811
|
+
function yutoriToolSetForModel(modelId) {
|
|
812
|
+
return modelId.startsWith("n1.5") ? YUTORI_N15_CORE_TOOL_SET : void 0;
|
|
813
|
+
}
|
|
814
|
+
function yutoriNativeActionsForModel(modelId) {
|
|
815
|
+
return modelId.startsWith("n1.5") ? YUTORI_N15_CORE_ACTION_TYPES : YUTORI_N1_ACTION_TYPES;
|
|
816
|
+
}
|
|
817
|
+
function isYutoriLocalActionToolName(name) {
|
|
818
|
+
return YUTORI_CUA_ACTION_TYPES.includes(name);
|
|
819
|
+
}
|
|
820
|
+
function toCanonicalActions(name, args) {
|
|
821
|
+
const coords = readPoint(args.coordinates);
|
|
822
|
+
switch (name) {
|
|
823
|
+
case "left_click": return coords ? [{
|
|
824
|
+
type: "click",
|
|
825
|
+
x: coords.x,
|
|
826
|
+
y: coords.y,
|
|
827
|
+
...holdKeys(args.modifier)
|
|
828
|
+
}] : void 0;
|
|
829
|
+
case "right_click": return coords ? [{
|
|
830
|
+
type: "click",
|
|
831
|
+
x: coords.x,
|
|
832
|
+
y: coords.y,
|
|
833
|
+
button: "right",
|
|
834
|
+
...holdKeys(args.modifier)
|
|
835
|
+
}] : void 0;
|
|
836
|
+
case "middle_click": return coords ? [{
|
|
837
|
+
type: "click",
|
|
838
|
+
x: coords.x,
|
|
839
|
+
y: coords.y,
|
|
840
|
+
button: "middle",
|
|
841
|
+
...holdKeys(args.modifier)
|
|
842
|
+
}] : void 0;
|
|
843
|
+
case "double_click": return coords ? [{
|
|
844
|
+
type: "double_click",
|
|
845
|
+
x: coords.x,
|
|
846
|
+
y: coords.y,
|
|
847
|
+
...holdKeys(args.modifier)
|
|
848
|
+
}] : void 0;
|
|
849
|
+
case "triple_click": return coords ? [{
|
|
850
|
+
type: "double_click",
|
|
851
|
+
x: coords.x,
|
|
852
|
+
y: coords.y,
|
|
853
|
+
...holdKeys(args.modifier)
|
|
854
|
+
}, {
|
|
855
|
+
type: "click",
|
|
856
|
+
x: coords.x,
|
|
857
|
+
y: coords.y,
|
|
858
|
+
...holdKeys(args.modifier)
|
|
859
|
+
}] : void 0;
|
|
860
|
+
case "mouse_move":
|
|
861
|
+
case "hover": return coords ? [{
|
|
862
|
+
type: "move",
|
|
863
|
+
x: coords.x,
|
|
864
|
+
y: coords.y
|
|
865
|
+
}] : void 0;
|
|
866
|
+
case "mouse_down": return coords ? [{
|
|
867
|
+
type: "mouse_down",
|
|
868
|
+
x: coords.x,
|
|
869
|
+
y: coords.y,
|
|
870
|
+
...holdKeys(args.modifier)
|
|
871
|
+
}] : void 0;
|
|
872
|
+
case "mouse_up": return coords ? [{
|
|
873
|
+
type: "mouse_up",
|
|
874
|
+
x: coords.x,
|
|
875
|
+
y: coords.y,
|
|
876
|
+
...holdKeys(args.modifier)
|
|
877
|
+
}] : void 0;
|
|
878
|
+
case "drag": {
|
|
879
|
+
const start = readPoint(args.start_coordinates);
|
|
880
|
+
return start && coords ? [{
|
|
881
|
+
type: "drag",
|
|
882
|
+
path: [start, coords],
|
|
883
|
+
button: "left"
|
|
884
|
+
}] : void 0;
|
|
885
|
+
}
|
|
886
|
+
case "scroll": return toScrollAction(args, coords);
|
|
887
|
+
case "type": return toTypeActions(args);
|
|
888
|
+
case "key_press": return toKeypressAction(args);
|
|
889
|
+
case "hold_key": return toHoldKeyAction(args);
|
|
890
|
+
case "goto_url": {
|
|
891
|
+
const url = normalizeGotoUrl(args.url);
|
|
892
|
+
return url ? [{
|
|
893
|
+
type: "goto",
|
|
894
|
+
url
|
|
895
|
+
}, {
|
|
896
|
+
type: "wait",
|
|
897
|
+
ms: GOTO_WAIT_MS
|
|
898
|
+
}] : void 0;
|
|
899
|
+
}
|
|
900
|
+
case "go_back": return [{ type: "back" }, {
|
|
901
|
+
type: "wait",
|
|
902
|
+
ms: NAVIGATION_WAIT_MS
|
|
903
|
+
}];
|
|
904
|
+
case "go_forward": return [{ type: "forward" }, {
|
|
905
|
+
type: "wait",
|
|
906
|
+
ms: NAVIGATION_WAIT_MS
|
|
907
|
+
}];
|
|
908
|
+
case "refresh": return [{
|
|
909
|
+
type: "keypress",
|
|
910
|
+
keys: ["f5"]
|
|
911
|
+
}, {
|
|
912
|
+
type: "wait",
|
|
913
|
+
ms: DEFAULT_WAIT_MS
|
|
914
|
+
}];
|
|
915
|
+
case "wait": return [{
|
|
916
|
+
type: "wait",
|
|
917
|
+
ms: secondsToMs(args.duration, DEFAULT_WAIT_MS)
|
|
918
|
+
}];
|
|
919
|
+
default: return;
|
|
920
|
+
}
|
|
921
|
+
}
|
|
922
|
+
function readPoint(value) {
|
|
923
|
+
if (!Array.isArray(value) || value.length < 2) return void 0;
|
|
924
|
+
const x = Number(value[0]);
|
|
925
|
+
const y = Number(value[1]);
|
|
926
|
+
if (!Number.isFinite(x) || !Number.isFinite(y)) return void 0;
|
|
927
|
+
return {
|
|
928
|
+
x,
|
|
929
|
+
y
|
|
930
|
+
};
|
|
931
|
+
}
|
|
932
|
+
function toScrollAction(args, coords) {
|
|
933
|
+
if (!coords) return void 0;
|
|
934
|
+
const direction = typeof args.direction === "string" ? args.direction : "down";
|
|
935
|
+
const amount = typeof args.amount === "number" ? args.amount : DEFAULT_SCROLL_AMOUNT;
|
|
936
|
+
const ticks = Math.max(1, Math.trunc(amount)) * SCROLL_AMOUNT_PER_NOTCH;
|
|
937
|
+
const scroll_x = direction === "left" ? -ticks : direction === "right" ? ticks : 0;
|
|
938
|
+
const scroll_y = direction === "up" ? -ticks : direction === "down" ? ticks : 0;
|
|
939
|
+
return [{
|
|
940
|
+
type: "scroll",
|
|
941
|
+
x: coords.x,
|
|
942
|
+
y: coords.y,
|
|
943
|
+
scroll_x,
|
|
944
|
+
scroll_y,
|
|
945
|
+
...holdKeys(args.modifier)
|
|
946
|
+
}];
|
|
947
|
+
}
|
|
948
|
+
function toTypeActions(args) {
|
|
949
|
+
const text = typeof args.text === "string" ? args.text : void 0;
|
|
950
|
+
if (text === void 0) return void 0;
|
|
951
|
+
const actions = [];
|
|
952
|
+
if (args.clear_before_typing === true) actions.push({
|
|
953
|
+
type: "keypress",
|
|
954
|
+
keys: ["ctrl", "a"]
|
|
955
|
+
}, {
|
|
956
|
+
type: "keypress",
|
|
957
|
+
keys: ["backspace"]
|
|
958
|
+
});
|
|
959
|
+
actions.push({
|
|
960
|
+
type: "type",
|
|
961
|
+
text
|
|
962
|
+
});
|
|
963
|
+
if (args.press_enter_after === true) actions.push({
|
|
964
|
+
type: "keypress",
|
|
965
|
+
keys: ["enter"]
|
|
966
|
+
});
|
|
967
|
+
return actions;
|
|
968
|
+
}
|
|
969
|
+
function toKeypressAction(args) {
|
|
970
|
+
const sequence = readKeySequence(args.key_comb ?? args.key);
|
|
971
|
+
return sequence.length > 0 ? sequence.map((keys) => ({
|
|
972
|
+
type: "keypress",
|
|
973
|
+
keys
|
|
974
|
+
})) : void 0;
|
|
975
|
+
}
|
|
976
|
+
function toHoldKeyAction(args) {
|
|
977
|
+
const keys = readKeyCombo(args.key_comb ?? args.key);
|
|
978
|
+
return keys.length > 0 ? [{
|
|
979
|
+
type: "keypress",
|
|
980
|
+
keys,
|
|
981
|
+
duration: secondsToMs(args.duration, 1e3)
|
|
982
|
+
}] : void 0;
|
|
983
|
+
}
|
|
984
|
+
function readKeyCombo(value) {
|
|
985
|
+
if (typeof value !== "string") return [];
|
|
986
|
+
return value.split("+").map((part) => part.trim()).filter(Boolean);
|
|
987
|
+
}
|
|
988
|
+
function readKeySequence(value) {
|
|
989
|
+
if (typeof value !== "string") return [];
|
|
990
|
+
return value.trim().split(/\s+/).map((part) => readKeyCombo(part)).filter((combo) => combo.length > 0);
|
|
991
|
+
}
|
|
992
|
+
function holdKeys(value) {
|
|
993
|
+
if (typeof value !== "string") return {};
|
|
994
|
+
const key = value.trim();
|
|
995
|
+
return key ? { hold_keys: [key] } : {};
|
|
996
|
+
}
|
|
997
|
+
function secondsToMs(value, fallback) {
|
|
998
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) return fallback;
|
|
999
|
+
return Math.round(value * 1e3);
|
|
1000
|
+
}
|
|
1001
|
+
//#endregion
|
|
1002
|
+
//#region src/providers/yutori/provider.ts
|
|
1003
|
+
const YUTORI_CHAT_COMPLETIONS_API = "yutori-chat-completions";
|
|
1004
|
+
const streamYutori = (model, context, options) => {
|
|
1005
|
+
const stream = createAssistantMessageEventStream();
|
|
1006
|
+
runYutoriStream(stream, model, context, options);
|
|
1007
|
+
return stream;
|
|
1008
|
+
};
|
|
1009
|
+
const streamSimpleYutori = (model, context, options) => streamYutori(model, context, options);
|
|
1010
|
+
function yutoriNativeToolSetOnPayload(payload, model, context) {
|
|
1011
|
+
if (!payload || typeof payload !== "object") return void 0;
|
|
1012
|
+
const current = payload;
|
|
1013
|
+
const keepToolNames = new Set(context?.keepToolNames ?? []);
|
|
1014
|
+
const tools = Array.isArray(current.tools) ? current.tools.filter((tool) => {
|
|
1015
|
+
const name = readToolName(tool);
|
|
1016
|
+
return !name || keepToolNames.has(name) || !isYutoriLocalActionToolName(name);
|
|
1017
|
+
}) : void 0;
|
|
1018
|
+
const toolSet = model ? yutoriToolSetForModel(model.id) : void 0;
|
|
1019
|
+
return {
|
|
1020
|
+
...payload,
|
|
1021
|
+
...toolSet ? {
|
|
1022
|
+
tool_set: toolSet,
|
|
1023
|
+
disable_tools: [...YUTORI_N15_EXPANDED_ACTION_TYPES]
|
|
1024
|
+
} : {},
|
|
1025
|
+
...tools && tools.length > 0 ? { tools } : { tools: void 0 }
|
|
1026
|
+
};
|
|
1027
|
+
}
|
|
1028
|
+
/**
|
|
1029
|
+
* CUA payload middleware for yutori: map local executor tools onto the native
|
|
1030
|
+
* tool set, then append a fresh screenshot to the latest user/tool message per
|
|
1031
|
+
* yutori's screenshot policy.
|
|
1032
|
+
*/
|
|
1033
|
+
async function yutoriCuaOnPayload(payload, model, context) {
|
|
1034
|
+
const next = yutoriNativeToolSetOnPayload(payload, model, context) ?? payload;
|
|
1035
|
+
return await appendScreenshotToLatestMessage(next, context?.getScreenshot) ?? next;
|
|
1036
|
+
}
|
|
1037
|
+
async function appendScreenshotToLatestMessage(payload, getScreenshot) {
|
|
1038
|
+
if (!getScreenshot) return void 0;
|
|
1039
|
+
if (!payload || typeof payload !== "object") return void 0;
|
|
1040
|
+
const current = payload;
|
|
1041
|
+
if (!Array.isArray(current.messages) || current.messages.length === 0) return void 0;
|
|
1042
|
+
const last = current.messages[current.messages.length - 1];
|
|
1043
|
+
if (!last || typeof last !== "object") return void 0;
|
|
1044
|
+
const lastMessage = last;
|
|
1045
|
+
if (lastMessage.role !== "user" && lastMessage.role !== "tool") return void 0;
|
|
1046
|
+
if (contentHasImage(lastMessage.content)) return void 0;
|
|
1047
|
+
const screenshot = await getScreenshot();
|
|
1048
|
+
const content = normalizePayloadContent(lastMessage.content);
|
|
1049
|
+
const nextMessages = current.messages.slice();
|
|
1050
|
+
nextMessages[nextMessages.length - 1] = {
|
|
1051
|
+
...last,
|
|
1052
|
+
content: [
|
|
1053
|
+
...content,
|
|
1054
|
+
{
|
|
1055
|
+
type: "text",
|
|
1056
|
+
text: "\n\n"
|
|
1057
|
+
},
|
|
1058
|
+
{
|
|
1059
|
+
type: "image_url",
|
|
1060
|
+
image_url: {
|
|
1061
|
+
url: `data:${screenshot.mimeType};base64,${screenshot.data.toString("base64")}`,
|
|
1062
|
+
detail: "high"
|
|
1063
|
+
}
|
|
1064
|
+
}
|
|
1065
|
+
]
|
|
1066
|
+
};
|
|
1067
|
+
return {
|
|
1068
|
+
...payload,
|
|
1069
|
+
messages: nextMessages
|
|
1070
|
+
};
|
|
1071
|
+
}
|
|
1072
|
+
function normalizePayloadContent(content) {
|
|
1073
|
+
if (typeof content === "string") return [{
|
|
1074
|
+
type: "text",
|
|
1075
|
+
text: content
|
|
1076
|
+
}];
|
|
1077
|
+
if (Array.isArray(content)) return content.filter((part) => Boolean(part) && typeof part === "object");
|
|
1078
|
+
return [];
|
|
1079
|
+
}
|
|
1080
|
+
function contentHasImage(content) {
|
|
1081
|
+
return Array.isArray(content) && content.some((part) => {
|
|
1082
|
+
return Boolean(part) && typeof part === "object" && part.type === "image_url";
|
|
1083
|
+
});
|
|
1084
|
+
}
|
|
1085
|
+
async function runYutoriStream(stream, model, context, options) {
|
|
1086
|
+
const output = initialAssistantMessage(model);
|
|
1087
|
+
try {
|
|
1088
|
+
const apiKey = options?.apiKey || process.env.YUTORI_API_KEY;
|
|
1089
|
+
if (!apiKey) throw new Error("missing Yutori API key");
|
|
1090
|
+
const client = new OpenAI({
|
|
1091
|
+
apiKey,
|
|
1092
|
+
baseURL: model.baseUrl || "https://api.yutori.com/v1",
|
|
1093
|
+
defaultHeaders: model.headers
|
|
1094
|
+
});
|
|
1095
|
+
let payload = {
|
|
1096
|
+
model: model.id,
|
|
1097
|
+
messages: convertMessages(context),
|
|
1098
|
+
max_completion_tokens: options?.maxTokens ?? model.maxTokens,
|
|
1099
|
+
temperature: options?.temperature ?? .3
|
|
1100
|
+
};
|
|
1101
|
+
const tools = convertTools(context);
|
|
1102
|
+
if (tools.length > 0) payload.tools = tools;
|
|
1103
|
+
payload = yutoriNativeToolSetOnPayload(payload, model, { keepToolNames: [...keepToolNamesFromContext(context), ...options?.keepToolNames ?? []] });
|
|
1104
|
+
const nextPayload = await options?.onPayload?.(payload, model);
|
|
1105
|
+
if (nextPayload !== void 0) payload = nextPayload;
|
|
1106
|
+
const { data: response, response: rawResponse } = await client.chat.completions.create(payload, { signal: options?.signal }).withResponse();
|
|
1107
|
+
const completion = response;
|
|
1108
|
+
await options?.onResponse?.({
|
|
1109
|
+
status: rawResponse.status,
|
|
1110
|
+
headers: headersToRecord(rawResponse.headers)
|
|
1111
|
+
}, model);
|
|
1112
|
+
stream.push({
|
|
1113
|
+
type: "start",
|
|
1114
|
+
partial: output
|
|
1115
|
+
});
|
|
1116
|
+
const choice = completion.choices?.[0];
|
|
1117
|
+
const message = choice?.message;
|
|
1118
|
+
output.responseId = completion.id;
|
|
1119
|
+
output.usage = usageFromYutori(completion.usage);
|
|
1120
|
+
if (choice?.finish_reason === "tool_calls") output.stopReason = "toolUse";
|
|
1121
|
+
else if (choice?.finish_reason === "length") output.stopReason = "length";
|
|
1122
|
+
const text = typeof message?.content === "string" ? message.content : "";
|
|
1123
|
+
if (text) emitText(stream, output, text);
|
|
1124
|
+
for (const call of message?.tool_calls ?? []) {
|
|
1125
|
+
if (call.type !== "function") continue;
|
|
1126
|
+
const args = parseArguments(call.function.arguments);
|
|
1127
|
+
const canonical = toCanonicalActions(call.function.name, args);
|
|
1128
|
+
if (canonical && canonical.length > 0) {
|
|
1129
|
+
for (let i = 0; i < canonical.length; i++) {
|
|
1130
|
+
const action = canonical[i];
|
|
1131
|
+
const contentIndex = output.content.length;
|
|
1132
|
+
const toolCall = {
|
|
1133
|
+
type: "toolCall",
|
|
1134
|
+
id: canonical.length === 1 ? call.id : `${call.id}_${i}`,
|
|
1135
|
+
name: canonicalToolCallName(action),
|
|
1136
|
+
arguments: canonicalToolCallArguments(action)
|
|
1137
|
+
};
|
|
1138
|
+
output.content.push(toolCall);
|
|
1139
|
+
output.stopReason = "toolUse";
|
|
1140
|
+
stream.push({
|
|
1141
|
+
type: "toolcall_start",
|
|
1142
|
+
contentIndex,
|
|
1143
|
+
partial: output
|
|
1144
|
+
});
|
|
1145
|
+
stream.push({
|
|
1146
|
+
type: "toolcall_delta",
|
|
1147
|
+
contentIndex,
|
|
1148
|
+
delta: JSON.stringify(toolCall.arguments),
|
|
1149
|
+
partial: output
|
|
1150
|
+
});
|
|
1151
|
+
stream.push({
|
|
1152
|
+
type: "toolcall_end",
|
|
1153
|
+
contentIndex,
|
|
1154
|
+
toolCall,
|
|
1155
|
+
partial: output
|
|
1156
|
+
});
|
|
1157
|
+
}
|
|
1158
|
+
continue;
|
|
1159
|
+
}
|
|
1160
|
+
const contentIndex = output.content.length;
|
|
1161
|
+
const toolCall = {
|
|
1162
|
+
type: "toolCall",
|
|
1163
|
+
id: call.id,
|
|
1164
|
+
name: call.function.name,
|
|
1165
|
+
arguments: args
|
|
1166
|
+
};
|
|
1167
|
+
output.content.push(toolCall);
|
|
1168
|
+
output.stopReason = "toolUse";
|
|
1169
|
+
stream.push({
|
|
1170
|
+
type: "toolcall_start",
|
|
1171
|
+
contentIndex,
|
|
1172
|
+
partial: output
|
|
1173
|
+
});
|
|
1174
|
+
stream.push({
|
|
1175
|
+
type: "toolcall_delta",
|
|
1176
|
+
contentIndex,
|
|
1177
|
+
delta: call.function.arguments ?? "",
|
|
1178
|
+
partial: output
|
|
1179
|
+
});
|
|
1180
|
+
stream.push({
|
|
1181
|
+
type: "toolcall_end",
|
|
1182
|
+
contentIndex,
|
|
1183
|
+
toolCall,
|
|
1184
|
+
partial: output
|
|
1185
|
+
});
|
|
1186
|
+
}
|
|
1187
|
+
stream.push({
|
|
1188
|
+
type: "done",
|
|
1189
|
+
reason: output.stopReason,
|
|
1190
|
+
message: output
|
|
1191
|
+
});
|
|
1192
|
+
stream.end();
|
|
1193
|
+
} catch (err) {
|
|
1194
|
+
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
1195
|
+
output.errorMessage = err instanceof Error ? err.message : String(err);
|
|
1196
|
+
stream.push({
|
|
1197
|
+
type: "error",
|
|
1198
|
+
reason: output.stopReason,
|
|
1199
|
+
error: output
|
|
1200
|
+
});
|
|
1201
|
+
stream.end();
|
|
1202
|
+
}
|
|
1203
|
+
}
|
|
1204
|
+
function keepToolNamesFromContext(context) {
|
|
1205
|
+
return (context.tools ?? []).map((tool) => tool.name).filter((name) => !isYutoriLocalActionToolName(name));
|
|
1206
|
+
}
|
|
1207
|
+
function initialAssistantMessage(model) {
|
|
1208
|
+
return {
|
|
1209
|
+
role: "assistant",
|
|
1210
|
+
content: [],
|
|
1211
|
+
api: model.api,
|
|
1212
|
+
provider: model.provider,
|
|
1213
|
+
model: model.id,
|
|
1214
|
+
usage: {
|
|
1215
|
+
input: 0,
|
|
1216
|
+
output: 0,
|
|
1217
|
+
cacheRead: 0,
|
|
1218
|
+
cacheWrite: 0,
|
|
1219
|
+
totalTokens: 0,
|
|
1220
|
+
cost: {
|
|
1221
|
+
input: 0,
|
|
1222
|
+
output: 0,
|
|
1223
|
+
cacheRead: 0,
|
|
1224
|
+
cacheWrite: 0,
|
|
1225
|
+
total: 0
|
|
1226
|
+
}
|
|
1227
|
+
},
|
|
1228
|
+
stopReason: "stop",
|
|
1229
|
+
timestamp: Date.now()
|
|
1230
|
+
};
|
|
1231
|
+
}
|
|
1232
|
+
function convertMessages(context) {
|
|
1233
|
+
const messages = [];
|
|
1234
|
+
if (context.systemPrompt) messages.push({
|
|
1235
|
+
role: "system",
|
|
1236
|
+
content: context.systemPrompt
|
|
1237
|
+
});
|
|
1238
|
+
for (const message of context.messages) if (message.role === "user") messages.push({
|
|
1239
|
+
role: "user",
|
|
1240
|
+
content: typeof message.content === "string" ? message.content : message.content.map(toOpenAIContentPart)
|
|
1241
|
+
});
|
|
1242
|
+
else if (message.role === "assistant") {
|
|
1243
|
+
const text = message.content.filter((part) => part.type === "text").map((part) => part.text).join("");
|
|
1244
|
+
const toolCalls = message.content.filter((part) => part.type === "toolCall").map((part) => ({
|
|
1245
|
+
id: part.id,
|
|
1246
|
+
type: "function",
|
|
1247
|
+
function: {
|
|
1248
|
+
name: part.name,
|
|
1249
|
+
arguments: JSON.stringify(part.arguments ?? {})
|
|
1250
|
+
}
|
|
1251
|
+
}));
|
|
1252
|
+
messages.push({
|
|
1253
|
+
role: "assistant",
|
|
1254
|
+
content: text || null,
|
|
1255
|
+
...toolCalls.length > 0 ? { tool_calls: toolCalls } : {}
|
|
1256
|
+
});
|
|
1257
|
+
} else if (message.role === "toolResult") messages.push({
|
|
1258
|
+
role: "tool",
|
|
1259
|
+
tool_call_id: message.toolCallId,
|
|
1260
|
+
content: message.content.map(toOpenAIContentPart)
|
|
1261
|
+
});
|
|
1262
|
+
return messages;
|
|
1263
|
+
}
|
|
1264
|
+
function convertTools(context) {
|
|
1265
|
+
return (context.tools ?? []).map((tool) => ({
|
|
1266
|
+
type: "function",
|
|
1267
|
+
function: {
|
|
1268
|
+
name: tool.name,
|
|
1269
|
+
description: tool.description,
|
|
1270
|
+
parameters: tool.parameters
|
|
1271
|
+
}
|
|
1272
|
+
}));
|
|
1273
|
+
}
|
|
1274
|
+
function emitText(stream, output, text) {
|
|
1275
|
+
const contentIndex = output.content.length;
|
|
1276
|
+
const content = {
|
|
1277
|
+
type: "text",
|
|
1278
|
+
text
|
|
1279
|
+
};
|
|
1280
|
+
output.content.push(content);
|
|
1281
|
+
stream.push({
|
|
1282
|
+
type: "text_start",
|
|
1283
|
+
contentIndex,
|
|
1284
|
+
partial: output
|
|
1285
|
+
});
|
|
1286
|
+
stream.push({
|
|
1287
|
+
type: "text_delta",
|
|
1288
|
+
contentIndex,
|
|
1289
|
+
delta: text,
|
|
1290
|
+
partial: output
|
|
1291
|
+
});
|
|
1292
|
+
stream.push({
|
|
1293
|
+
type: "text_end",
|
|
1294
|
+
contentIndex,
|
|
1295
|
+
content: text,
|
|
1296
|
+
partial: output
|
|
1297
|
+
});
|
|
1298
|
+
}
|
|
1299
|
+
function toOpenAIContentPart(part) {
|
|
1300
|
+
if (part.type === "text") return {
|
|
1301
|
+
type: "text",
|
|
1302
|
+
text: part.text
|
|
1303
|
+
};
|
|
1304
|
+
return {
|
|
1305
|
+
type: "image_url",
|
|
1306
|
+
image_url: { url: `data:${part.mimeType};base64,${part.data}` }
|
|
1307
|
+
};
|
|
1308
|
+
}
|
|
1309
|
+
function parseArguments(value) {
|
|
1310
|
+
if (!value?.trim()) return {};
|
|
1311
|
+
try {
|
|
1312
|
+
const parsed = JSON.parse(value);
|
|
1313
|
+
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed : {};
|
|
1314
|
+
} catch {
|
|
1315
|
+
return {};
|
|
1316
|
+
}
|
|
1317
|
+
}
|
|
1318
|
+
function usageFromYutori(usage) {
|
|
1319
|
+
const input = readNumber(usage, "prompt_tokens");
|
|
1320
|
+
const output = readNumber(usage, "completion_tokens");
|
|
1321
|
+
return {
|
|
1322
|
+
input,
|
|
1323
|
+
output,
|
|
1324
|
+
cacheRead: 0,
|
|
1325
|
+
cacheWrite: 0,
|
|
1326
|
+
totalTokens: readNumber(usage, "total_tokens") || input + output,
|
|
1327
|
+
cost: {
|
|
1328
|
+
input: 0,
|
|
1329
|
+
output: 0,
|
|
1330
|
+
cacheRead: 0,
|
|
1331
|
+
cacheWrite: 0,
|
|
1332
|
+
total: 0
|
|
1333
|
+
}
|
|
1334
|
+
};
|
|
1335
|
+
}
|
|
1336
|
+
function readToolName(tool) {
|
|
1337
|
+
if (!tool || typeof tool !== "object") return void 0;
|
|
1338
|
+
const obj = tool;
|
|
1339
|
+
if (typeof obj.function?.name === "string") return obj.function.name;
|
|
1340
|
+
if (typeof obj.name === "string") return obj.name;
|
|
1341
|
+
}
|
|
1342
|
+
function readNumber(value, key) {
|
|
1343
|
+
if (!value || typeof value !== "object") return 0;
|
|
1344
|
+
const n = value[key];
|
|
1345
|
+
return typeof n === "number" && Number.isFinite(n) ? n : 0;
|
|
1346
|
+
}
|
|
1347
|
+
function headersToRecord(headers) {
|
|
1348
|
+
const out = {};
|
|
1349
|
+
headers.forEach((value, key) => {
|
|
1350
|
+
out[key] = value;
|
|
1351
|
+
});
|
|
1352
|
+
return out;
|
|
1353
|
+
}
|
|
1354
|
+
//#endregion
|
|
1355
|
+
//#region src/providers.ts
|
|
1356
|
+
/**
|
|
1357
|
+
* Register the Yutori and Tzafon stream providers with pi-ai's global API
|
|
1358
|
+
* registry. Importing `@onkernel/cua-ai` calls this automatically.
|
|
1359
|
+
*
|
|
1360
|
+
* The pi-ai registry mutators this package re-exports (`clearApiProviders`,
|
|
1361
|
+
* `resetApiProviders`, `unregisterApiProviders`) deregister these providers,
|
|
1362
|
+
* after which Yutori/Tzafon streaming fails until they are registered again.
|
|
1363
|
+
* Call this to restore them; it is idempotent and safe to call repeatedly.
|
|
1364
|
+
*/
|
|
1365
|
+
function registerCuaProviders() {
|
|
1366
|
+
registerApiProvider({
|
|
1367
|
+
api: YUTORI_CHAT_COMPLETIONS_API,
|
|
1368
|
+
stream: streamYutori,
|
|
1369
|
+
streamSimple: streamSimpleYutori
|
|
1370
|
+
});
|
|
1371
|
+
registerApiProvider({
|
|
1372
|
+
api: TZAFON_RESPONSES_API,
|
|
1373
|
+
stream: streamTzafonResponses,
|
|
1374
|
+
streamSimple: streamSimpleTzafonResponses
|
|
1375
|
+
});
|
|
1376
|
+
}
|
|
1377
|
+
//#endregion
|
|
1378
|
+
//#region src/models.ts
|
|
1379
|
+
/** All providers this package curates computer-use models for. */
|
|
1380
|
+
const CUA_PROVIDERS = [
|
|
1381
|
+
"openai",
|
|
1382
|
+
"anthropic",
|
|
1383
|
+
"google",
|
|
1384
|
+
"tzafon",
|
|
1385
|
+
"yutori"
|
|
1386
|
+
];
|
|
1387
|
+
/**
|
|
1388
|
+
* Per-provider computer-use support annotations.
|
|
1389
|
+
*
|
|
1390
|
+
* pi-ai's model registry is generated from models.dev (see
|
|
1391
|
+
* node_modules/@earendil-works/pi-ai/scripts/generate-models.ts) and lists every
|
|
1392
|
+
* model a provider offers. Only some of those models support computer-use, so
|
|
1393
|
+
* this table layers per-provider CUA-support annotations on top of the
|
|
1394
|
+
* registry. Each entry cites the official source documenting CUA support.
|
|
1395
|
+
*
|
|
1396
|
+
* To verify support and add new entries, follow the `update-models` skill at
|
|
1397
|
+
* .agents/skills/update-models/SKILL.md.
|
|
1398
|
+
*/
|
|
1399
|
+
const CUA_MODEL_ANNOTATIONS = {
|
|
1400
|
+
openai: [{
|
|
1401
|
+
match: {
|
|
1402
|
+
kind: "family",
|
|
1403
|
+
family: "gpt-5.4"
|
|
1404
|
+
},
|
|
1405
|
+
source: "https://developers.openai.com/api/docs/models/gpt-5.4"
|
|
1406
|
+
}, {
|
|
1407
|
+
match: {
|
|
1408
|
+
kind: "family",
|
|
1409
|
+
family: "gpt-5.5"
|
|
1410
|
+
},
|
|
1411
|
+
source: "https://developers.openai.com/api/docs/models/gpt-5.5"
|
|
1412
|
+
}],
|
|
1413
|
+
anthropic: [
|
|
1414
|
+
{
|
|
1415
|
+
match: {
|
|
1416
|
+
kind: "family",
|
|
1417
|
+
family: "claude-3-7-sonnet"
|
|
1418
|
+
},
|
|
1419
|
+
source: "https://docs.anthropic.com/en/docs/build-with-claude/computer-use"
|
|
1420
|
+
},
|
|
1421
|
+
{
|
|
1422
|
+
match: {
|
|
1423
|
+
kind: "family",
|
|
1424
|
+
family: "claude-opus-4"
|
|
1425
|
+
},
|
|
1426
|
+
source: "https://docs.anthropic.com/en/docs/build-with-claude/computer-use"
|
|
1427
|
+
},
|
|
1428
|
+
{
|
|
1429
|
+
match: {
|
|
1430
|
+
kind: "family",
|
|
1431
|
+
family: "claude-sonnet-4"
|
|
1432
|
+
},
|
|
1433
|
+
source: "https://docs.anthropic.com/en/docs/build-with-claude/computer-use"
|
|
1434
|
+
},
|
|
1435
|
+
{
|
|
1436
|
+
match: {
|
|
1437
|
+
kind: "family",
|
|
1438
|
+
family: "claude-haiku-4"
|
|
1439
|
+
},
|
|
1440
|
+
source: "https://docs.anthropic.com/en/docs/build-with-claude/computer-use"
|
|
1441
|
+
},
|
|
1442
|
+
{
|
|
1443
|
+
match: {
|
|
1444
|
+
kind: "family",
|
|
1445
|
+
family: "claude-fable-5"
|
|
1446
|
+
},
|
|
1447
|
+
source: "https://docs.anthropic.com/en/docs/build-with-claude/computer-use"
|
|
1448
|
+
}
|
|
1449
|
+
],
|
|
1450
|
+
google: [{
|
|
1451
|
+
match: {
|
|
1452
|
+
kind: "exact",
|
|
1453
|
+
id: "gemini-3-flash-preview"
|
|
1454
|
+
},
|
|
1455
|
+
source: "https://ai.google.dev/gemini-api/docs/computer-use"
|
|
1456
|
+
}, {
|
|
1457
|
+
match: {
|
|
1458
|
+
kind: "exact",
|
|
1459
|
+
id: "gemini-3-pro-preview"
|
|
1460
|
+
},
|
|
1461
|
+
source: "https://ai.google.dev/gemini-api/docs/computer-use"
|
|
1462
|
+
}],
|
|
1463
|
+
tzafon: [{
|
|
1464
|
+
match: {
|
|
1465
|
+
kind: "exact",
|
|
1466
|
+
id: "tzafon.northstar-cua-fast"
|
|
1467
|
+
},
|
|
1468
|
+
source: "https://huggingface.co/Tzafon/Northstar-CUA-Fast"
|
|
1469
|
+
}],
|
|
1470
|
+
yutori: [
|
|
1471
|
+
{
|
|
1472
|
+
match: {
|
|
1473
|
+
kind: "exact",
|
|
1474
|
+
id: "n1-latest"
|
|
1475
|
+
},
|
|
1476
|
+
source: "https://docs.yutori.com/reference/navigator"
|
|
1477
|
+
},
|
|
1478
|
+
{
|
|
1479
|
+
match: {
|
|
1480
|
+
kind: "exact",
|
|
1481
|
+
id: "n1-20260203"
|
|
1482
|
+
},
|
|
1483
|
+
source: "https://docs.yutori.com/reference/navigator"
|
|
1484
|
+
},
|
|
1485
|
+
{
|
|
1486
|
+
match: {
|
|
1487
|
+
kind: "exact",
|
|
1488
|
+
id: "n1.5-latest"
|
|
1489
|
+
},
|
|
1490
|
+
source: "https://docs.yutori.com/reference/navigator"
|
|
1491
|
+
},
|
|
1492
|
+
{
|
|
1493
|
+
match: {
|
|
1494
|
+
kind: "exact",
|
|
1495
|
+
id: "n1.5-20260428"
|
|
1496
|
+
},
|
|
1497
|
+
source: "https://docs.yutori.com/reference/navigator"
|
|
1498
|
+
}
|
|
1499
|
+
]
|
|
1500
|
+
};
|
|
1501
|
+
const CUA_MODEL_OVERRIDES = {
|
|
1502
|
+
openai: [cuaModel("openai", "gpt-5.5", "GPT-5.5"), cuaModel("openai", "gpt-5.5-2026-04-23", "GPT-5.5 (2026-04-23)")],
|
|
1503
|
+
anthropic: [],
|
|
1504
|
+
google: [],
|
|
1505
|
+
tzafon: [cuaModel("tzafon", "tzafon.northstar-cua-fast", "Tzafon Northstar CUA Fast")],
|
|
1506
|
+
yutori: [
|
|
1507
|
+
cuaModel("yutori", "n1.5-latest", "Yutori Navigator n1.5"),
|
|
1508
|
+
cuaModel("yutori", "n1.5-20260428", "Yutori Navigator n1.5 (2026-04-28)"),
|
|
1509
|
+
cuaModel("yutori", "n1-latest", "Yutori Navigator n1"),
|
|
1510
|
+
cuaModel("yutori", "n1-20260203", "Yutori Navigator n1 (2026-02-03)")
|
|
1511
|
+
]
|
|
1512
|
+
};
|
|
1513
|
+
/**
|
|
1514
|
+
* Split a provider-qualified ref like `"openai:gpt-5.5"` into its parts.
|
|
1515
|
+
*
|
|
1516
|
+
* `"gemini:"` is accepted as an alias for the canonical `"google:"` prefix
|
|
1517
|
+
* and normalizes to provider `"google"`. Throws when the ref is unqualified
|
|
1518
|
+
* or names an unsupported provider.
|
|
1519
|
+
*/
|
|
1520
|
+
function parseCuaModelRef(ref) {
|
|
1521
|
+
const idx = ref.indexOf(":");
|
|
1522
|
+
if (idx <= 0 || idx === ref.length - 1) throw new Error(`CUA model ref must be provider-qualified as "<provider>:<model>"; got "${ref}"`);
|
|
1523
|
+
const prefix = ref.slice(0, idx);
|
|
1524
|
+
const provider = prefix === "gemini" ? "google" : prefix;
|
|
1525
|
+
const model = ref.slice(idx + 1);
|
|
1526
|
+
if (!isCuaProvider(provider)) throw new Error(`unsupported CUA provider "${prefix}" (expected one of: ${CUA_PROVIDERS.join(", ")})`);
|
|
1527
|
+
return {
|
|
1528
|
+
provider,
|
|
1529
|
+
model
|
|
1530
|
+
};
|
|
1531
|
+
}
|
|
1532
|
+
/** Join a provider and model id into a {@link CuaModelRef}. */
|
|
1533
|
+
function formatCuaModelRef(provider, model) {
|
|
1534
|
+
return `${provider}:${model}`;
|
|
1535
|
+
}
|
|
1536
|
+
/**
|
|
1537
|
+
* List the computer-use-capable models this package curates, optionally
|
|
1538
|
+
* filtered to one provider. Merges pi-ai's registry with local overrides and
|
|
1539
|
+
* keeps only models annotated in {@link CUA_MODEL_ANNOTATIONS}.
|
|
1540
|
+
*/
|
|
1541
|
+
function listCuaModels(provider) {
|
|
1542
|
+
const providers = provider ? [provider] : [...CUA_PROVIDERS];
|
|
1543
|
+
const byRef = /* @__PURE__ */ new Map();
|
|
1544
|
+
for (const p of providers) {
|
|
1545
|
+
for (const model of CUA_MODEL_OVERRIDES[p]) {
|
|
1546
|
+
const ref = formatCuaModelRef(p, model.id);
|
|
1547
|
+
byRef.set(ref, {
|
|
1548
|
+
ref,
|
|
1549
|
+
provider: p,
|
|
1550
|
+
model: model.id,
|
|
1551
|
+
name: model.name
|
|
1552
|
+
});
|
|
1553
|
+
}
|
|
1554
|
+
for (const model of getModels(p)) {
|
|
1555
|
+
if (!supportsCuaProvider(p, model.id)) continue;
|
|
1556
|
+
const ref = formatCuaModelRef(p, model.id);
|
|
1557
|
+
if (byRef.has(ref)) continue;
|
|
1558
|
+
byRef.set(ref, {
|
|
1559
|
+
ref,
|
|
1560
|
+
provider: p,
|
|
1561
|
+
model: model.id,
|
|
1562
|
+
name: model.name
|
|
1563
|
+
});
|
|
1564
|
+
}
|
|
1565
|
+
}
|
|
1566
|
+
return [...byRef.values()].sort(compareCuaModels);
|
|
1567
|
+
}
|
|
1568
|
+
/**
|
|
1569
|
+
* Resolve a {@link CuaModelRef} to a concrete pi-ai model.
|
|
1570
|
+
*
|
|
1571
|
+
* Throws when the ref is unqualified, names an unsupported provider, or names
|
|
1572
|
+
* a model without a CUA-support annotation. `"gemini:"` refs are accepted as
|
|
1573
|
+
* an alias for `"google:"` (see {@link parseCuaModelRef}).
|
|
1574
|
+
*/
|
|
1575
|
+
function getCuaModel(ref) {
|
|
1576
|
+
const { provider, model: modelId } = parseCuaModelRef(ref);
|
|
1577
|
+
if (!supportsCuaProvider(provider, modelId)) throw new Error(`unsupported CUA model "${ref}"`);
|
|
1578
|
+
const fromRegistry = getModel(provider, modelId);
|
|
1579
|
+
if (fromRegistry) return fromRegistry;
|
|
1580
|
+
const override = CUA_MODEL_OVERRIDES[provider].find((m) => m.id === modelId);
|
|
1581
|
+
if (override) return override;
|
|
1582
|
+
throw new Error(`CUA model "${ref}" is supported but not registered. Add it to pi-ai (models.dev) or CUA_MODEL_OVERRIDES.`);
|
|
1583
|
+
}
|
|
1584
|
+
/** Return the {@link CuaProvider} for a concrete model, or throw when it is not a CUA provider. */
|
|
1585
|
+
function providerForModel(model) {
|
|
1586
|
+
if (!isCuaProvider(model.provider)) throw new Error(`unsupported CUA model provider "${model.provider}" (expected one of: ${CUA_PROVIDERS.join(", ")})`);
|
|
1587
|
+
return model.provider;
|
|
1588
|
+
}
|
|
1589
|
+
/** Narrow an arbitrary string to {@link CuaProvider}. */
|
|
1590
|
+
function isCuaProvider(value) {
|
|
1591
|
+
return CUA_PROVIDERS.includes(value);
|
|
1592
|
+
}
|
|
1593
|
+
function supportsCuaProvider(provider, modelId) {
|
|
1594
|
+
return findCuaAnnotation(provider, modelId) !== void 0;
|
|
1595
|
+
}
|
|
1596
|
+
/** Find the CUA-support annotation covering a model id, if any. */
|
|
1597
|
+
function findCuaAnnotation(provider, modelId) {
|
|
1598
|
+
const id = modelId.toLowerCase();
|
|
1599
|
+
for (const annotation of CUA_MODEL_ANNOTATIONS[provider]) if (annotation.match.kind === "exact") {
|
|
1600
|
+
if (id === annotation.match.id.toLowerCase()) return annotation;
|
|
1601
|
+
} else if (isCuaFamilyMatch(id, annotation.match.family.toLowerCase())) return annotation;
|
|
1602
|
+
}
|
|
1603
|
+
function isCuaFamilyMatch(id, family) {
|
|
1604
|
+
if (id === family) return true;
|
|
1605
|
+
if (!id.startsWith(`${family}-`)) return false;
|
|
1606
|
+
return id.slice(family.length + 1).split("-").every((segment) => /^\d+$/.test(segment));
|
|
1607
|
+
}
|
|
1608
|
+
function cuaModel(provider, id, name) {
|
|
1609
|
+
const base = {
|
|
1610
|
+
id,
|
|
1611
|
+
name,
|
|
1612
|
+
provider,
|
|
1613
|
+
reasoning: provider === "openai" || provider === "anthropic" || provider === "google",
|
|
1614
|
+
input: ["text", "image"],
|
|
1615
|
+
cost: {
|
|
1616
|
+
input: 0,
|
|
1617
|
+
output: 0,
|
|
1618
|
+
cacheRead: 0,
|
|
1619
|
+
cacheWrite: 0
|
|
1620
|
+
}
|
|
1621
|
+
};
|
|
1622
|
+
switch (provider) {
|
|
1623
|
+
case "openai": return {
|
|
1624
|
+
...base,
|
|
1625
|
+
api: "openai-responses",
|
|
1626
|
+
baseUrl: "https://api.openai.com/v1",
|
|
1627
|
+
contextWindow: 4e5,
|
|
1628
|
+
maxTokens: 32768
|
|
1629
|
+
};
|
|
1630
|
+
case "anthropic": return {
|
|
1631
|
+
...base,
|
|
1632
|
+
api: "anthropic-messages",
|
|
1633
|
+
baseUrl: "https://api.anthropic.com",
|
|
1634
|
+
contextWindow: 2e5,
|
|
1635
|
+
maxTokens: 64e3
|
|
1636
|
+
};
|
|
1637
|
+
case "google": return {
|
|
1638
|
+
...base,
|
|
1639
|
+
api: "google-generative-ai",
|
|
1640
|
+
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
|
|
1641
|
+
contextWindow: 1048576,
|
|
1642
|
+
maxTokens: 65536
|
|
1643
|
+
};
|
|
1644
|
+
case "tzafon": return {
|
|
1645
|
+
...base,
|
|
1646
|
+
api: "tzafon-responses",
|
|
1647
|
+
baseUrl: "https://api.lightcone.ai",
|
|
1648
|
+
contextWindow: 128e3,
|
|
1649
|
+
maxTokens: 4096
|
|
1650
|
+
};
|
|
1651
|
+
case "yutori": return {
|
|
1652
|
+
...base,
|
|
1653
|
+
api: "yutori-chat-completions",
|
|
1654
|
+
baseUrl: "https://api.yutori.com/v1",
|
|
1655
|
+
contextWindow: 128e3,
|
|
1656
|
+
maxTokens: 4096
|
|
1657
|
+
};
|
|
1658
|
+
}
|
|
1659
|
+
}
|
|
1660
|
+
function compareCuaModels(a, b) {
|
|
1661
|
+
if (a.provider !== b.provider) return CUA_PROVIDERS.indexOf(a.provider) - CUA_PROVIDERS.indexOf(b.provider);
|
|
1662
|
+
return a.model.localeCompare(b.model);
|
|
1663
|
+
}
|
|
1664
|
+
//#endregion
|
|
1665
|
+
//#region src/api-keys.ts
|
|
1666
|
+
/**
|
|
1667
|
+
* Environment variables accepted for each CUA provider.
|
|
1668
|
+
*
|
|
1669
|
+
* This mirrors pi-ai's approach: model lookup is pure, while auth is resolved
|
|
1670
|
+
* when streaming. These helpers let callers share one readable convention for
|
|
1671
|
+
* explicit `getApiKey` wiring (especially useful for `google` vs `gemini`).
|
|
1672
|
+
*/
|
|
1673
|
+
const CUA_PROVIDER_API_KEY_ENV_VARS = {
|
|
1674
|
+
openai: ["OPENAI_API_KEY"],
|
|
1675
|
+
anthropic: ["ANTHROPIC_OAUTH_TOKEN", "ANTHROPIC_API_KEY"],
|
|
1676
|
+
google: ["GOOGLE_API_KEY", "GEMINI_API_KEY"],
|
|
1677
|
+
tzafon: ["TZAFON_API_KEY"],
|
|
1678
|
+
yutori: ["YUTORI_API_KEY"]
|
|
1679
|
+
};
|
|
1680
|
+
/**
|
|
1681
|
+
* List the environment variables checked for a provider's API key, in
|
|
1682
|
+
* precedence order. Accepts `"gemini"` as an alias for `"google"`; returns an
|
|
1683
|
+
* empty list for unknown providers.
|
|
1684
|
+
*/
|
|
1685
|
+
function cuaApiKeyEnvVarsForProvider(provider) {
|
|
1686
|
+
if (provider === "gemini") return CUA_PROVIDER_API_KEY_ENV_VARS.google;
|
|
1687
|
+
return CUA_PROVIDER_API_KEY_ENV_VARS[provider] ?? [];
|
|
1688
|
+
}
|
|
1689
|
+
/** Read a provider's API key from the environment, or return undefined when unset. */
|
|
1690
|
+
function getCuaEnvApiKey(provider) {
|
|
1691
|
+
for (const envVar of cuaApiKeyEnvVarsForProvider(provider)) {
|
|
1692
|
+
const value = process.env[envVar];
|
|
1693
|
+
if (value?.trim()) return value;
|
|
1694
|
+
}
|
|
1695
|
+
}
|
|
1696
|
+
/** Read a provider's API key from the environment, or throw naming the variables to set. */
|
|
1697
|
+
function requireCuaEnvApiKey(provider) {
|
|
1698
|
+
const apiKey = getCuaEnvApiKey(provider);
|
|
1699
|
+
if (apiKey) return apiKey;
|
|
1700
|
+
const envVars = cuaApiKeyEnvVarsForProvider(provider);
|
|
1701
|
+
if (envVars.length === 0) throw new Error(`No known API key environment variables for provider "${provider}"`);
|
|
1702
|
+
throw new Error(`Missing API key for "${provider}". Set one of: ${envVars.join(", ")}`);
|
|
1703
|
+
}
|
|
1704
|
+
/** {@link getCuaEnvApiKey} keyed by a model ref or concrete model instead of a provider name. */
|
|
1705
|
+
function getCuaEnvApiKeyForModel(input) {
|
|
1706
|
+
return getCuaEnvApiKey(typeof input === "string" ? parseCuaModelRef(input).provider : providerForModel(input));
|
|
1707
|
+
}
|
|
1708
|
+
/** {@link requireCuaEnvApiKey} keyed by a model ref or concrete model instead of a provider name. */
|
|
1709
|
+
function requireCuaEnvApiKeyForModel(input) {
|
|
1710
|
+
return requireCuaEnvApiKey(typeof input === "string" ? parseCuaModelRef(input).provider : providerForModel(input));
|
|
1711
|
+
}
|
|
1712
|
+
//#endregion
|
|
1713
|
+
//#region src/providers/anthropic/actions.ts
|
|
1714
|
+
/**
|
|
1715
|
+
* Canonical CUA action types Anthropic browser computer-use tools support.
|
|
1716
|
+
*
|
|
1717
|
+
* Source of truth: Anthropic's computer-use best-practices quickstart
|
|
1718
|
+
* computer/browser tool action enums. These are the browser actions Anthropic
|
|
1719
|
+
* currently accepts under CUA's canonical individual tool names.
|
|
1720
|
+
* https://github.com/anthropics/claude-quickstarts/blob/main/computer-use-best-practices/computer_use/tools/computer.py
|
|
1721
|
+
* https://github.com/anthropics/claude-quickstarts/blob/main/computer-use-best-practices/computer_use/tools/browser.py
|
|
1722
|
+
*/
|
|
1723
|
+
const ANTHROPIC_CUA_ACTION_TYPES = [
|
|
1724
|
+
"click",
|
|
1725
|
+
"double_click",
|
|
1726
|
+
"mouse_down",
|
|
1727
|
+
"mouse_up",
|
|
1728
|
+
"type",
|
|
1729
|
+
"keypress",
|
|
1730
|
+
"scroll",
|
|
1731
|
+
"move",
|
|
1732
|
+
"drag",
|
|
1733
|
+
"wait",
|
|
1734
|
+
"screenshot",
|
|
1735
|
+
"goto",
|
|
1736
|
+
"cursor_position"
|
|
1737
|
+
];
|
|
1738
|
+
const ANTHROPIC_CANONICAL_ACTION_TYPE_SET = new Set(ANTHROPIC_CUA_ACTION_TYPES);
|
|
1739
|
+
/** Name of the batch tool included by default in Anthropic computer-use tools. */
|
|
1740
|
+
const ANTHROPIC_BATCH_TOOL_NAME = CUA_BATCH_TOOL_NAME;
|
|
1741
|
+
const ANTHROPIC_BATCH_TOOL_DESCRIPTION = [CUA_BATCH_TOOL_DESCRIPTION, "Coordinates in a batch refer to the screenshot taken before the batch call."].join("\n");
|
|
1742
|
+
function resolveAnthropicActions(actions) {
|
|
1743
|
+
const resolved = actions ?? ANTHROPIC_CUA_ACTION_TYPES;
|
|
1744
|
+
const supported = [];
|
|
1745
|
+
const unsupported = [];
|
|
1746
|
+
for (const action of resolved) if (isAnthropicCanonicalAction(action)) supported.push(action);
|
|
1747
|
+
else unsupported.push(action);
|
|
1748
|
+
if (unsupported.length > 0) throw new Error(`unsupported Anthropic canonical action(s): ${unsupported.join(", ")}`);
|
|
1749
|
+
return supported;
|
|
1750
|
+
}
|
|
1751
|
+
function isAnthropicCanonicalAction(action) {
|
|
1752
|
+
return ANTHROPIC_CANONICAL_ACTION_TYPE_SET.has(action);
|
|
1753
|
+
}
|
|
1754
|
+
/** Build the TypeBox schema for Anthropic-supported canonical browser actions. */
|
|
1755
|
+
function createActionSchema(actions) {
|
|
1756
|
+
return createCuaActionSchema(resolveAnthropicActions(actions));
|
|
1757
|
+
}
|
|
1758
|
+
/**
|
|
1759
|
+
* Build Anthropic CUA computer-use tools.
|
|
1760
|
+
*
|
|
1761
|
+
* Use this when calling `complete()` or `stream()` directly and you need an
|
|
1762
|
+
* array of `Tool` objects for Anthropic browser actions. Pass `actions` to
|
|
1763
|
+
* expose only a supported subset, such as `["click"]`. Anthropic includes a
|
|
1764
|
+
* batch tool by default; pass `excludeBatch: true` to omit it.
|
|
1765
|
+
*/
|
|
1766
|
+
function computerTools$1(options = {}) {
|
|
1767
|
+
const actions = resolveAnthropicActions(options.actions);
|
|
1768
|
+
const tools = createCuaActionToolDefinitions(actions);
|
|
1769
|
+
if (!options.excludeBatch) tools.push(createCuaBatchToolDefinition(actions, {
|
|
1770
|
+
name: ANTHROPIC_BATCH_TOOL_NAME,
|
|
1771
|
+
description: ANTHROPIC_BATCH_TOOL_DESCRIPTION
|
|
1772
|
+
}));
|
|
1773
|
+
return tools;
|
|
1774
|
+
}
|
|
1775
|
+
/** Build the local execution adapters used by CuaAgent and CuaAgentHarness. */
|
|
1776
|
+
function computerToolExecutors$1(options = {}) {
|
|
1777
|
+
const actions = resolveAnthropicActions(options.actions);
|
|
1778
|
+
const executors = createCuaActionToolExecutors(actions);
|
|
1779
|
+
if (!options.excludeBatch) executors.push(createCuaBatchToolExecutor(actions, {
|
|
1780
|
+
name: ANTHROPIC_BATCH_TOOL_NAME,
|
|
1781
|
+
description: ANTHROPIC_BATCH_TOOL_DESCRIPTION
|
|
1782
|
+
}));
|
|
1783
|
+
return executors;
|
|
1784
|
+
}
|
|
1785
|
+
//#endregion
|
|
1786
|
+
//#region src/providers/anthropic/index.ts
|
|
1787
|
+
var anthropic_exports = /* @__PURE__ */ __exportAll({
|
|
1788
|
+
ANTHROPIC_BATCH_TOOL_NAME: () => ANTHROPIC_BATCH_TOOL_NAME,
|
|
1789
|
+
ANTHROPIC_COMPUTER_INSTRUCTIONS: () => ANTHROPIC_COMPUTER_INSTRUCTIONS,
|
|
1790
|
+
ANTHROPIC_CUA_ACTION_TYPES: () => ANTHROPIC_CUA_ACTION_TYPES,
|
|
1791
|
+
buildAnthropicSystemPrompt: () => buildAnthropicSystemPrompt,
|
|
1792
|
+
computerToolExecutors: () => computerToolExecutors$1,
|
|
1793
|
+
computerTools: () => computerTools$1,
|
|
1794
|
+
coordinateSystem: () => coordinateSystem$4,
|
|
1795
|
+
createActionSchema: () => createActionSchema,
|
|
1796
|
+
providerModule: () => providerModule$4
|
|
1797
|
+
});
|
|
1798
|
+
function coordinateSystem$4() {
|
|
1799
|
+
return { type: "pixel" };
|
|
1800
|
+
}
|
|
1801
|
+
const ANTHROPIC_COMPUTER_INSTRUCTIONS = `You control a Kernel cloud browser through individual browser tools. Use keyboard navigation where possible, and request screenshots when you need to inspect state.`;
|
|
1802
|
+
function buildAnthropicSystemPrompt(opts = {}) {
|
|
1803
|
+
return [ANTHROPIC_COMPUTER_INSTRUCTIONS, opts.suffix].filter(Boolean).join("\n\n");
|
|
1804
|
+
}
|
|
1805
|
+
const providerModule$4 = {
|
|
1806
|
+
toolDefinitions: computerTools$1,
|
|
1807
|
+
toolExecutors: computerToolExecutors$1,
|
|
1808
|
+
coordinateSystem: coordinateSystem$4,
|
|
1809
|
+
buildSystemPrompt: buildAnthropicSystemPrompt
|
|
1810
|
+
};
|
|
1811
|
+
//#endregion
|
|
1812
|
+
//#region src/providers/gemini/index.ts
|
|
1813
|
+
var gemini_exports = /* @__PURE__ */ __exportAll({
|
|
1814
|
+
GEMINI_COMPUTER_INSTRUCTIONS: () => GEMINI_COMPUTER_INSTRUCTIONS,
|
|
1815
|
+
GEMINI_CUA_ACTION_TYPES: () => CUA_ACTION_TYPES,
|
|
1816
|
+
buildGeminiSystemPrompt: () => buildGeminiSystemPrompt,
|
|
1817
|
+
computerToolExecutors: () => computerToolExecutors,
|
|
1818
|
+
computerTools: () => computerTools,
|
|
1819
|
+
coordinateSystem: () => coordinateSystem$3,
|
|
1820
|
+
createActionSchema: () => createCuaActionSchema,
|
|
1821
|
+
providerModule: () => providerModule$3
|
|
1822
|
+
});
|
|
1823
|
+
function coordinateSystem$3() {
|
|
1824
|
+
return {
|
|
1825
|
+
type: "normalized",
|
|
1826
|
+
range: [0, 999]
|
|
1827
|
+
};
|
|
1828
|
+
}
|
|
1829
|
+
const GEMINI_COMPUTER_INSTRUCTIONS = `You control a Kernel cloud browser through individual browser tools. Use the provider coordinate system for tool calls, and request screenshots or URL reads when state changes.`;
|
|
1830
|
+
function buildGeminiSystemPrompt(opts = {}) {
|
|
1831
|
+
return [GEMINI_COMPUTER_INSTRUCTIONS, opts.suffix].filter(Boolean).join("\n\n");
|
|
1832
|
+
}
|
|
1833
|
+
const providerModule$3 = {
|
|
1834
|
+
toolDefinitions: computerTools,
|
|
1835
|
+
toolExecutors: computerToolExecutors,
|
|
1836
|
+
coordinateSystem: coordinateSystem$3,
|
|
1837
|
+
buildSystemPrompt: buildGeminiSystemPrompt
|
|
1838
|
+
};
|
|
1839
|
+
//#endregion
|
|
1840
|
+
//#region src/providers/openai/index.ts
|
|
1841
|
+
var openai_exports = /* @__PURE__ */ __exportAll({
|
|
1842
|
+
OPENAI_COMPUTER_INSTRUCTIONS: () => OPENAI_COMPUTER_INSTRUCTIONS,
|
|
1843
|
+
OPENAI_CUA_ACTION_TYPES: () => CUA_ACTION_TYPES,
|
|
1844
|
+
OPENAI_EXTRA_TOOL_DESCRIPTION: () => CUA_NAVIGATION_TOOL_DESCRIPTION,
|
|
1845
|
+
OPENAI_EXTRA_TOOL_NAME: () => CUA_NAVIGATION_TOOL_NAME,
|
|
1846
|
+
OpenAIExtraSchema: () => CuaNavigationSchema,
|
|
1847
|
+
buildOpenAISystemPrompt: () => buildOpenAISystemPrompt,
|
|
1848
|
+
computerToolExecutors: () => computerToolExecutors,
|
|
1849
|
+
computerTools: () => computerTools,
|
|
1850
|
+
coordinateSystem: () => coordinateSystem$2,
|
|
1851
|
+
createActionSchema: () => createCuaActionSchema,
|
|
1852
|
+
openaiResponsesStoreOnPayload: () => openaiResponsesStoreOnPayload,
|
|
1853
|
+
providerModule: () => providerModule$2
|
|
1854
|
+
});
|
|
1855
|
+
function coordinateSystem$2() {
|
|
1856
|
+
return { type: "pixel" };
|
|
1857
|
+
}
|
|
1858
|
+
const OPENAI_COMPUTER_INSTRUCTIONS = `You control a Kernel cloud browser through individual browser tools. Use the available tools for browser interaction and request explicit url, cursor_position, or screenshot reads when you need updated state.`;
|
|
1859
|
+
function buildOpenAISystemPrompt(opts = {}) {
|
|
1860
|
+
return [OPENAI_COMPUTER_INSTRUCTIONS, opts.suffix].filter(Boolean).join("\n\n");
|
|
1861
|
+
}
|
|
1862
|
+
function openaiResponsesStoreOnPayload(payload) {
|
|
1863
|
+
if (!payload || typeof payload !== "object") return void 0;
|
|
1864
|
+
const current = payload;
|
|
1865
|
+
if (current.store === true) return void 0;
|
|
1866
|
+
return {
|
|
1867
|
+
...current,
|
|
1868
|
+
store: true
|
|
1869
|
+
};
|
|
1870
|
+
}
|
|
1871
|
+
const providerModule$2 = {
|
|
1872
|
+
toolDefinitions: computerTools,
|
|
1873
|
+
toolExecutors: computerToolExecutors,
|
|
1874
|
+
coordinateSystem: coordinateSystem$2,
|
|
1875
|
+
buildSystemPrompt: buildOpenAISystemPrompt,
|
|
1876
|
+
onPayload: openaiResponsesStoreOnPayload
|
|
1877
|
+
};
|
|
1878
|
+
//#endregion
|
|
1879
|
+
//#region src/providers/tzafon/index.ts
|
|
1880
|
+
var tzafon_exports = /* @__PURE__ */ __exportAll({
|
|
1881
|
+
TZAFON_COMPUTER_INSTRUCTIONS: () => TZAFON_COMPUTER_INSTRUCTIONS,
|
|
1882
|
+
TZAFON_CUA_ACTION_TYPES: () => CUA_ACTION_TYPES,
|
|
1883
|
+
TZAFON_RESPONSES_API: () => TZAFON_RESPONSES_API,
|
|
1884
|
+
buildTzafonSystemPrompt: () => buildTzafonSystemPrompt,
|
|
1885
|
+
computerToolExecutors: () => computerToolExecutors,
|
|
1886
|
+
computerTools: () => computerTools,
|
|
1887
|
+
coordinateSystem: () => coordinateSystem$1,
|
|
1888
|
+
createActionSchema: () => createCuaActionSchema,
|
|
1889
|
+
providerModule: () => providerModule$1,
|
|
1890
|
+
streamSimpleTzafonResponses: () => streamSimpleTzafonResponses,
|
|
1891
|
+
streamTzafonResponses: () => streamTzafonResponses,
|
|
1892
|
+
toCanonicalActions: () => toCanonicalActions$1,
|
|
1893
|
+
tzafonComputerUseOnPayload: () => tzafonComputerUseOnPayload,
|
|
1894
|
+
tzafonToolCallId: () => tzafonToolCallId
|
|
1895
|
+
});
|
|
1896
|
+
function coordinateSystem$1() {
|
|
1897
|
+
return {
|
|
1898
|
+
type: "normalized",
|
|
1899
|
+
range: [0, 999]
|
|
1900
|
+
};
|
|
1901
|
+
}
|
|
1902
|
+
const TZAFON_COMPUTER_INSTRUCTIONS = `You control a Kernel cloud browser through individual browser tools. Include screenshot or URL reads when you need updated state.`;
|
|
1903
|
+
/** Build the default system prompt used with Tzafon CUA models. */
|
|
1904
|
+
function buildTzafonSystemPrompt(opts = {}) {
|
|
1905
|
+
return [TZAFON_COMPUTER_INSTRUCTIONS, opts.suffix].filter(Boolean).join("\n\n");
|
|
1906
|
+
}
|
|
1907
|
+
const providerModule$1 = {
|
|
1908
|
+
toolDefinitions: computerTools,
|
|
1909
|
+
toolExecutors: computerToolExecutors,
|
|
1910
|
+
coordinateSystem: coordinateSystem$1,
|
|
1911
|
+
buildSystemPrompt: buildTzafonSystemPrompt,
|
|
1912
|
+
onPayload: tzafonComputerUseOnPayload
|
|
1913
|
+
};
|
|
1914
|
+
//#endregion
|
|
1915
|
+
//#region src/providers/yutori/index.ts
|
|
1916
|
+
var yutori_exports = /* @__PURE__ */ __exportAll({
|
|
1917
|
+
YUTORI_CHAT_COMPLETIONS_API: () => YUTORI_CHAT_COMPLETIONS_API,
|
|
1918
|
+
YUTORI_COMPUTER_INSTRUCTIONS: () => "",
|
|
1919
|
+
YUTORI_CUA_ACTION_TYPES: () => YUTORI_CUA_ACTION_TYPES,
|
|
1920
|
+
YUTORI_N15_ACTION_TYPES: () => YUTORI_N15_ACTION_TYPES,
|
|
1921
|
+
YUTORI_N15_CORE_ACTION_TYPES: () => YUTORI_N15_CORE_ACTION_TYPES,
|
|
1922
|
+
YUTORI_N15_CORE_TOOL_SET: () => YUTORI_N15_CORE_TOOL_SET,
|
|
1923
|
+
YUTORI_N15_EXPANDED_ACTION_TYPES: () => YUTORI_N15_EXPANDED_ACTION_TYPES,
|
|
1924
|
+
YUTORI_N15_EXPANDED_TOOL_SET: () => YUTORI_N15_EXPANDED_TOOL_SET,
|
|
1925
|
+
YUTORI_N1_ACTION_TYPES: () => YUTORI_N1_ACTION_TYPES,
|
|
1926
|
+
buildYutoriSystemPrompt: () => buildYutoriSystemPrompt,
|
|
1927
|
+
computerToolExecutors: () => computerToolExecutors$2,
|
|
1928
|
+
computerTools: () => computerTools$2,
|
|
1929
|
+
coordinateSystem: () => coordinateSystem,
|
|
1930
|
+
createActionSchema: () => createActionSchema$1,
|
|
1931
|
+
providerModule: () => providerModule,
|
|
1932
|
+
streamSimpleYutori: () => streamSimpleYutori,
|
|
1933
|
+
streamYutori: () => streamYutori,
|
|
1934
|
+
toCanonicalActions: () => toCanonicalActions,
|
|
1935
|
+
yutoriCuaOnPayload: () => yutoriCuaOnPayload,
|
|
1936
|
+
yutoriNativeActionsForModel: () => yutoriNativeActionsForModel,
|
|
1937
|
+
yutoriNativeToolSetOnPayload: () => yutoriNativeToolSetOnPayload,
|
|
1938
|
+
yutoriToolSetForModel: () => yutoriToolSetForModel
|
|
1939
|
+
});
|
|
1940
|
+
function coordinateSystem() {
|
|
1941
|
+
return {
|
|
1942
|
+
type: "normalized",
|
|
1943
|
+
range: [0, 1e3]
|
|
1944
|
+
};
|
|
1945
|
+
}
|
|
1946
|
+
function buildYutoriSystemPrompt(opts = {}) {
|
|
1947
|
+
return ["", opts.suffix].filter(Boolean).join("\n\n");
|
|
1948
|
+
}
|
|
1949
|
+
const providerModule = {
|
|
1950
|
+
toolDefinitions: () => [],
|
|
1951
|
+
toolExecutors: computerToolExecutors$2,
|
|
1952
|
+
coordinateSystem,
|
|
1953
|
+
buildSystemPrompt: buildYutoriSystemPrompt,
|
|
1954
|
+
onPayload: yutoriCuaOnPayload,
|
|
1955
|
+
screenshot: {
|
|
1956
|
+
appendToLatestMessage: true,
|
|
1957
|
+
transform: {
|
|
1958
|
+
width: 1280,
|
|
1959
|
+
height: 800,
|
|
1960
|
+
format: "webp",
|
|
1961
|
+
quality: 90
|
|
1962
|
+
}
|
|
1963
|
+
}
|
|
1964
|
+
};
|
|
1965
|
+
//#endregion
|
|
1966
|
+
//#region src/runtime-spec.ts
|
|
1967
|
+
const PROVIDERS = {
|
|
1968
|
+
openai: providerModule$2,
|
|
1969
|
+
anthropic: providerModule$4,
|
|
1970
|
+
google: providerModule$3,
|
|
1971
|
+
tzafon: providerModule$1,
|
|
1972
|
+
yutori: providerModule
|
|
1973
|
+
};
|
|
1974
|
+
/**
|
|
1975
|
+
* Resolve provider defaults from either a CUA model ref or a concrete model.
|
|
1976
|
+
*
|
|
1977
|
+
* Use the returned spec to build computer-use requests without hard-coding
|
|
1978
|
+
* model-provider rules in your application. Pass `options` (e.g.
|
|
1979
|
+
* `{ actions: ["click"] }`) to narrow the resolved tool definitions and
|
|
1980
|
+
* executors to a supported subset.
|
|
1981
|
+
*/
|
|
1982
|
+
function resolveCuaRuntimeSpec(input, options) {
|
|
1983
|
+
const model = typeof input === "string" ? getCuaModel(input) : input;
|
|
1984
|
+
const provider = providerForModel(model);
|
|
1985
|
+
const mod = PROVIDERS[provider];
|
|
1986
|
+
return {
|
|
1987
|
+
model,
|
|
1988
|
+
provider,
|
|
1989
|
+
toolDefinitions: mod.toolDefinitions(options),
|
|
1990
|
+
toolExecutors: mod.toolExecutors(options),
|
|
1991
|
+
defaultSystemPrompt: mod.buildSystemPrompt(),
|
|
1992
|
+
coordinateSystem: mod.coordinateSystem(),
|
|
1993
|
+
screenshot: mod.screenshot,
|
|
1994
|
+
onPayload: mod.onPayload
|
|
1995
|
+
};
|
|
1996
|
+
}
|
|
1997
|
+
//#endregion
|
|
1998
|
+
//#region src/index.ts
|
|
12
1999
|
registerCuaProviders();
|
|
13
|
-
//#
|
|
2000
|
+
//#endregion
|
|
2001
|
+
export { CUA_ACTION_TYPES, CUA_BATCH_TOOL_DESCRIPTION, CUA_BATCH_TOOL_NAME, CUA_MODEL_ANNOTATIONS, CUA_NAVIGATION_TOOL_DESCRIPTION, CUA_NAVIGATION_TOOL_NAME, CUA_PROVIDERS, CuaActionSchema, CuaBatchSchema, CuaNavigationSchema, anthropic_exports as anthropic, canonicalToolCallArguments, canonicalToolCallName, computerToolExecutors, computerTools, createCuaActionSchema, createCuaActionToolDefinitions, createCuaActionToolExecutors, createCuaBatchSchema, createCuaBatchToolDefinition, createCuaBatchToolExecutor, createCuaNavigationToolDefinition, cuaApiKeyEnvVarsForProvider, findCuaAnnotation, formatCuaModelRef, gemini_exports as gemini, getCuaEnvApiKey, getCuaEnvApiKeyForModel, getCuaModel, isCuaProvider, listCuaModels, normalizeGotoUrl, openai_exports as openai, parseCuaModelRef, providerForModel, registerCuaProviders, requireCuaEnvApiKey, requireCuaEnvApiKeyForModel, resolveCuaRuntimeSpec, tzafon_exports as tzafon, yutori_exports as yutori };
|