@oagi/oagi 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +154 -0
- package/dist/chunk-SDBYP57G.js +1536 -0
- package/dist/chunk-SDBYP57G.js.map +1 -0
- package/dist/cli.cjs +1927 -0
- package/dist/cli.cjs.map +1 -0
- package/dist/cli.d.cts +2 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +414 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.cjs +1173 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +567 -0
- package/dist/index.d.ts +567 -0
- package/dist/index.js +36 -0
- package/dist/index.js.map +1 -0
- package/package.json +53 -0
|
@@ -0,0 +1,1536 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __decorateClass = (decorators, target, key, kind) => {
|
|
5
|
+
var result = kind > 1 ? void 0 : kind ? __getOwnPropDesc(target, key) : target;
|
|
6
|
+
for (var i = decorators.length - 1, decorator; i >= 0; i--)
|
|
7
|
+
if (decorator = decorators[i])
|
|
8
|
+
result = (kind ? decorator(target, key, result) : decorator(result)) || result;
|
|
9
|
+
if (kind && result) __defProp(target, key, result);
|
|
10
|
+
return result;
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
// src/errors.ts
|
|
14
|
+
var OAGIError = class extends Error {
|
|
15
|
+
};
|
|
16
|
+
var APIError = class extends OAGIError {
|
|
17
|
+
constructor(response, message) {
|
|
18
|
+
super(message ?? response.statusText);
|
|
19
|
+
this.response = response;
|
|
20
|
+
}
|
|
21
|
+
toString() {
|
|
22
|
+
return `API Error [${this.response.status}]: ${this.message}`;
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
var AuthenticationError = class extends APIError {
|
|
26
|
+
};
|
|
27
|
+
var RateLimitError = class extends APIError {
|
|
28
|
+
};
|
|
29
|
+
var ValidationError = class extends APIError {
|
|
30
|
+
};
|
|
31
|
+
var NotFoundError = class extends APIError {
|
|
32
|
+
};
|
|
33
|
+
var ServerError = class extends APIError {
|
|
34
|
+
};
|
|
35
|
+
var ConfigurationError = class extends OAGIError {
|
|
36
|
+
};
|
|
37
|
+
var NetworkError = class extends OAGIError {
|
|
38
|
+
constructor(message, originalError) {
|
|
39
|
+
super(message);
|
|
40
|
+
this.originalError = originalError;
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
var RequestTimeoutError = class extends NetworkError {
|
|
44
|
+
};
|
|
45
|
+
var ValueError = class extends OAGIError {
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
// src/client.ts
|
|
49
|
+
import OpenAI from "openai";
|
|
50
|
+
|
|
51
|
+
// src/consts.ts
|
|
52
|
+
var DEFAULT_BASE_URL = "https://api.agiopen.org";
|
|
53
|
+
var API_KEY_HELP_URL = "https://developer.agiopen.org/api-keys";
|
|
54
|
+
var API_V1_FILE_UPLOAD_ENDPOINT = "/v1/file/upload";
|
|
55
|
+
var API_V1_GENERATE_ENDPOINT = "/v1/generate";
|
|
56
|
+
var MODEL_ACTOR = "lux-actor-1";
|
|
57
|
+
var MODEL_THINKER = "lux-thinker-1";
|
|
58
|
+
var MODE_ACTOR = "actor";
|
|
59
|
+
var DEFAULT_MAX_STEPS = 20;
|
|
60
|
+
var DEFAULT_MAX_STEPS_THINKER = 100;
|
|
61
|
+
var MAX_STEPS_ACTOR = 30;
|
|
62
|
+
var MAX_STEPS_THINKER = 120;
|
|
63
|
+
var DEFAULT_STEP_DELAY = 0.3;
|
|
64
|
+
var DEFAULT_TEMPERATURE = 0.5;
|
|
65
|
+
var DEFAULT_TEMPERATURE_LOW = 0.1;
|
|
66
|
+
var HTTP_CLIENT_TIMEOUT = 60;
|
|
67
|
+
var DEFAULT_MAX_RETRIES = 2;
|
|
68
|
+
|
|
69
|
+
// src/logger.ts
|
|
70
|
+
import pino from "pino";
|
|
71
|
+
var levelEnv = process.env.OAGI_LOG?.toLowerCase() ?? "info";
|
|
72
|
+
var allowedLevels = ["debug", "info", "warn", "error", "fatal"];
|
|
73
|
+
var logger = pino({
|
|
74
|
+
level: allowedLevels.includes(levelEnv) ? levelEnv : "info",
|
|
75
|
+
base: null,
|
|
76
|
+
timestamp: () => `,"time":"${(/* @__PURE__ */ new Date()).toISOString()}"`,
|
|
77
|
+
messageKey: "msg",
|
|
78
|
+
transport: {
|
|
79
|
+
target: "pino-pretty",
|
|
80
|
+
options: {
|
|
81
|
+
colorize: false,
|
|
82
|
+
translateTime: "SYS:yyyy-mm-dd HH:MM:ss",
|
|
83
|
+
messageFormat: "{msg}",
|
|
84
|
+
ignore: "pid,hostname"
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
var getLogger = (name) => logger.child({ name: `oagi.${name}` });
|
|
89
|
+
var logger_default = getLogger;
|
|
90
|
+
var logTraceOnFailure = (_, __, descriptor) => {
|
|
91
|
+
const original = descriptor.value;
|
|
92
|
+
descriptor.value = async function(...args) {
|
|
93
|
+
try {
|
|
94
|
+
return await original.apply(this, args);
|
|
95
|
+
} catch (err) {
|
|
96
|
+
if (err instanceof APIError) {
|
|
97
|
+
const requestId = err.response.headers.get("x-request-id") ?? "";
|
|
98
|
+
const traceId = err.response.headers.get("x-trace-id") ?? "";
|
|
99
|
+
logger.error(`Request Id: ${requestId}`);
|
|
100
|
+
logger.error(`Trace Id: ${traceId}`);
|
|
101
|
+
}
|
|
102
|
+
throw err;
|
|
103
|
+
}
|
|
104
|
+
};
|
|
105
|
+
return descriptor;
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
// src/types/models/action.ts
|
|
109
|
+
import * as z from "zod";
|
|
110
|
+
var ActionTypeSchema = z.enum([
|
|
111
|
+
"click",
|
|
112
|
+
"left_double",
|
|
113
|
+
"left_triple",
|
|
114
|
+
"right_single",
|
|
115
|
+
"drag",
|
|
116
|
+
"hotkey",
|
|
117
|
+
"type",
|
|
118
|
+
"scroll",
|
|
119
|
+
"finish",
|
|
120
|
+
"wait",
|
|
121
|
+
"call_user"
|
|
122
|
+
]);
|
|
123
|
+
var ActionSchema = z.object({
|
|
124
|
+
/**
|
|
125
|
+
* Type of action to perform
|
|
126
|
+
*/
|
|
127
|
+
type: ActionTypeSchema,
|
|
128
|
+
/**
|
|
129
|
+
* Action argument in the specified format
|
|
130
|
+
*/
|
|
131
|
+
argument: z.string(),
|
|
132
|
+
/**
|
|
133
|
+
* Number of times to repeat the action
|
|
134
|
+
*/
|
|
135
|
+
count: z.int().default(1)
|
|
136
|
+
});
|
|
137
|
+
var parseCoords = (args) => {
|
|
138
|
+
const match = /(\d+),\s*(\d+)/.exec(args);
|
|
139
|
+
if (!match) {
|
|
140
|
+
return null;
|
|
141
|
+
}
|
|
142
|
+
return [Number(match[1]), Number(match[2])];
|
|
143
|
+
};
|
|
144
|
+
var parseDragCoords = (args) => {
|
|
145
|
+
const match = /(\d+),\s*(\d+),\s*(\d+),\s*(\d+)/.exec(args);
|
|
146
|
+
if (!match) {
|
|
147
|
+
return null;
|
|
148
|
+
}
|
|
149
|
+
return [
|
|
150
|
+
Number(match[1]),
|
|
151
|
+
Number(match[2]),
|
|
152
|
+
Number(match[3]),
|
|
153
|
+
Number(match[4])
|
|
154
|
+
];
|
|
155
|
+
};
|
|
156
|
+
var parseScroll = (args) => {
|
|
157
|
+
const match = /(\d+),\s*(\d+),\s*(\w+)/.exec(args);
|
|
158
|
+
if (!match) {
|
|
159
|
+
return null;
|
|
160
|
+
}
|
|
161
|
+
return [Number(match[1]), Number(match[2]), match[3].toLowerCase()];
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
// src/types/models/client.ts
|
|
165
|
+
import * as z2 from "zod";
|
|
166
|
+
var UsageSchema = z2.object({
|
|
167
|
+
prompt_tokens: z2.int(),
|
|
168
|
+
completion_tokens: z2.int(),
|
|
169
|
+
total_tokens: z2.int()
|
|
170
|
+
});
|
|
171
|
+
var ErrorDetailSchema = z2.object({
|
|
172
|
+
code: z2.string(),
|
|
173
|
+
message: z2.string()
|
|
174
|
+
});
|
|
175
|
+
var ErrorResponseSchema = z2.object({
|
|
176
|
+
error: ErrorDetailSchema.nullish()
|
|
177
|
+
});
|
|
178
|
+
var UploadFileResponseSchema = z2.object({
|
|
179
|
+
url: z2.string(),
|
|
180
|
+
uuid: z2.string(),
|
|
181
|
+
expires_at: z2.int(),
|
|
182
|
+
file_expires_at: z2.int(),
|
|
183
|
+
download_url: z2.string()
|
|
184
|
+
});
|
|
185
|
+
var GenerateResponseSchema = z2.object({
|
|
186
|
+
response: z2.string(),
|
|
187
|
+
prompt_tokens: z2.int(),
|
|
188
|
+
completion_tokens: z2.int(),
|
|
189
|
+
/**
|
|
190
|
+
* @deprecated This field is deprecated
|
|
191
|
+
*/
|
|
192
|
+
cost: z2.float64().nullish(),
|
|
193
|
+
request_id: z2.string().nullish()
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
// src/types/models/image-config.ts
|
|
197
|
+
import * as z3 from "zod";
|
|
198
|
+
var ImageConfigSchema = z3.object({
|
|
199
|
+
format: z3.enum(["PNG", "JPEG"]).default("JPEG"),
|
|
200
|
+
quality: z3.int().min(1).max(100).default(85),
|
|
201
|
+
width: z3.int().positive().nullish().default(1260),
|
|
202
|
+
height: z3.int().positive().nullish().default(700),
|
|
203
|
+
optimize: z3.boolean().default(false),
|
|
204
|
+
resample: z3.enum(["NEAREST", "BILINEAR", "BICUBIC", "LANCZOS"]).default("LANCZOS")
|
|
205
|
+
}).transform((value) => {
|
|
206
|
+
if (value.format === "PNG") {
|
|
207
|
+
return { ...value, quality: 85 };
|
|
208
|
+
}
|
|
209
|
+
return value;
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
// src/types/step_observer.ts
|
|
213
|
+
import * as z4 from "zod";
|
|
214
|
+
var BaseEventSchema = z4.object({
|
|
215
|
+
timestamp: z4.date().default(() => /* @__PURE__ */ new Date())
|
|
216
|
+
});
|
|
217
|
+
var ImageEventSchema = BaseEventSchema.extend({
|
|
218
|
+
type: z4.literal("image"),
|
|
219
|
+
step_num: z4.number(),
|
|
220
|
+
image: z4.string()
|
|
221
|
+
});
|
|
222
|
+
var StepEventSchema = BaseEventSchema.extend({
|
|
223
|
+
type: z4.literal("step"),
|
|
224
|
+
step_num: z4.number(),
|
|
225
|
+
image: z4.custom(),
|
|
226
|
+
step: z4.custom(),
|
|
227
|
+
task_id: z4.string().optional()
|
|
228
|
+
});
|
|
229
|
+
var ActionEventSchema = BaseEventSchema.extend({
|
|
230
|
+
type: z4.literal("action"),
|
|
231
|
+
step_num: z4.number(),
|
|
232
|
+
actions: z4.array(z4.custom()),
|
|
233
|
+
error: z4.string().optional()
|
|
234
|
+
});
|
|
235
|
+
var LogEventSchema = BaseEventSchema.extend({
|
|
236
|
+
type: z4.literal("log"),
|
|
237
|
+
message: z4.string()
|
|
238
|
+
});
|
|
239
|
+
var SplitEventSchema = BaseEventSchema.extend({
|
|
240
|
+
type: z4.literal("split"),
|
|
241
|
+
label: z4.string().optional()
|
|
242
|
+
});
|
|
243
|
+
var PlanEventSchema = BaseEventSchema.extend({
|
|
244
|
+
type: z4.literal("plan"),
|
|
245
|
+
phase: z4.enum(["initial", "reflection", "summary"]),
|
|
246
|
+
image: z4.string().or(z4.custom()).optional(),
|
|
247
|
+
reasoning: z4.string(),
|
|
248
|
+
result: z4.string().optional(),
|
|
249
|
+
request_id: z4.string().optional()
|
|
250
|
+
});
|
|
251
|
+
var StepObserver = class {
|
|
252
|
+
chain(observer) {
|
|
253
|
+
return new ChainedStepObserver([this, observer ?? null]);
|
|
254
|
+
}
|
|
255
|
+
};
|
|
256
|
+
var ChainedStepObserver = class extends StepObserver {
|
|
257
|
+
observers;
|
|
258
|
+
constructor(observers) {
|
|
259
|
+
super();
|
|
260
|
+
this.observers = observers;
|
|
261
|
+
}
|
|
262
|
+
async onEvent(event) {
|
|
263
|
+
return await this.observers.reduce(async (prev, observer) => {
|
|
264
|
+
await prev;
|
|
265
|
+
if (observer) await observer.onEvent(event);
|
|
266
|
+
}, Promise.resolve());
|
|
267
|
+
}
|
|
268
|
+
};
|
|
269
|
+
|
|
270
|
+
// src/utils/output-parser.ts
|
|
271
|
+
var splitActions = (actionBlock) => {
|
|
272
|
+
const actions = [];
|
|
273
|
+
let currentAction = [];
|
|
274
|
+
let parenLevel = 0;
|
|
275
|
+
for (const char of actionBlock) {
|
|
276
|
+
currentAction.push(char);
|
|
277
|
+
switch (char) {
|
|
278
|
+
case "(":
|
|
279
|
+
parenLevel++;
|
|
280
|
+
break;
|
|
281
|
+
case ")":
|
|
282
|
+
parenLevel--;
|
|
283
|
+
break;
|
|
284
|
+
case "&":
|
|
285
|
+
if (parenLevel === 0) {
|
|
286
|
+
const action = currentAction.join("").trim();
|
|
287
|
+
action && actions.push(action);
|
|
288
|
+
currentAction = [];
|
|
289
|
+
}
|
|
290
|
+
break;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
const lastAction = currentAction.join("").trim();
|
|
294
|
+
lastAction && actions.push(lastAction);
|
|
295
|
+
return actions;
|
|
296
|
+
};
|
|
297
|
+
var parseAction = (action) => {
|
|
298
|
+
const match = /(\w+)\((.*)\)/.exec(action);
|
|
299
|
+
if (!match) return null;
|
|
300
|
+
const { data: actionType, success } = ActionTypeSchema.safeParse(match[1]);
|
|
301
|
+
if (!success) return null;
|
|
302
|
+
let argument = match[2].trim();
|
|
303
|
+
const args = argument.split(",");
|
|
304
|
+
let count = 1;
|
|
305
|
+
switch (actionType) {
|
|
306
|
+
// hotkey(key, c) - press key c times
|
|
307
|
+
case "hotkey":
|
|
308
|
+
if (args.length >= 2 && args[1].trim()) {
|
|
309
|
+
argument = args[0].trim();
|
|
310
|
+
count = Number(args[1].trim());
|
|
311
|
+
}
|
|
312
|
+
break;
|
|
313
|
+
case "scroll":
|
|
314
|
+
if (args.length >= 4) {
|
|
315
|
+
const x = args[0].trim();
|
|
316
|
+
const y = args[1].trim();
|
|
317
|
+
const direction = args[2].trim();
|
|
318
|
+
argument = `${x},${y},${direction}`;
|
|
319
|
+
count = Number(args[3].trim());
|
|
320
|
+
}
|
|
321
|
+
break;
|
|
322
|
+
default:
|
|
323
|
+
}
|
|
324
|
+
if (!Number.isInteger(count) || count <= 0) {
|
|
325
|
+
count = 1;
|
|
326
|
+
}
|
|
327
|
+
return { type: actionType, argument, count };
|
|
328
|
+
};
|
|
329
|
+
var parseRawOutput = (rawOutput) => {
|
|
330
|
+
const reason = /<\|think_start\|>(.*?)<\|think_end\|>/s.exec(rawOutput)?.[1] ?? "";
|
|
331
|
+
const action = /<\|action_start\|>(.*?)<\|action_end\|>/s.exec(rawOutput)?.[1] ?? "";
|
|
332
|
+
const actions = splitActions(action).map(parseAction).filter((action2) => !!action2);
|
|
333
|
+
return {
|
|
334
|
+
reason,
|
|
335
|
+
actions,
|
|
336
|
+
stop: actions.some((action2) => action2.type === "finish")
|
|
337
|
+
};
|
|
338
|
+
};
|
|
339
|
+
|
|
340
|
+
// src/utils/prompt-builder.ts
|
|
341
|
+
var buildPrompt = (taskDescription) => `You are a Desktop Agent completing computer use tasks from a user instruction.
|
|
342
|
+
|
|
343
|
+
Every step, you will look at the screenshot and output the desired actions in a format as:
|
|
344
|
+
|
|
345
|
+
<|think_start|> brief description of your intent and reasoning <|think_end|>
|
|
346
|
+
<|action_start|> one of the allowed actions as below <|action_end|>
|
|
347
|
+
|
|
348
|
+
In the action field, you have the following action formats:
|
|
349
|
+
1. click(x, y) # left-click at the position (x, y), where x and y are integers normalized between 0 and 1000
|
|
350
|
+
2. left_double(x, y) # left-double-click at the position (x, y), where x and y are integers normalized between 0 and 1000
|
|
351
|
+
3. left_triple(x, y) # left-triple-click at the position (x, y), where x and y are integers normalized between 0 and 1000
|
|
352
|
+
4. right_single(x, y) # right-click at the position (x, y), where x and y are integers normalized between 0 and 1000
|
|
353
|
+
5. drag(x1, y1, x2, y2) # drag the mouse from (x1, y1) to (x2, y2) to select or move contents, where x1, y1, x2, y2 are integers normalized between 0 and 1000
|
|
354
|
+
6. hotkey(key, c) # press the key for c times
|
|
355
|
+
7. type(text) # type a text string on the keyboard
|
|
356
|
+
8. scroll(x, y, direction, c) # scroll the mouse at position (x, y) in the direction of up or down for c times, where x and y are integers normalized between 0 and 1000
|
|
357
|
+
9. wait() # wait for a while
|
|
358
|
+
10. finish() # indicate the task is finished
|
|
359
|
+
|
|
360
|
+
Directly output the text beginning with <|think_start|>, no additional text is needed for this scenario.
|
|
361
|
+
|
|
362
|
+
The user instruction is:
|
|
363
|
+
${taskDescription}
|
|
364
|
+
`;
|
|
365
|
+
|
|
366
|
+
// src/client.ts
|
|
367
|
+
var logger2 = logger_default("client");
|
|
368
|
+
var _Client = class _Client {
|
|
369
|
+
constructor(baseUrl = process.env.OAGI_BASE_URL ?? DEFAULT_BASE_URL, apiKey = process.env.OAGI_API_KEY ?? null, maxRetries = DEFAULT_MAX_RETRIES) {
|
|
370
|
+
this.baseUrl = baseUrl;
|
|
371
|
+
this.apiKey = apiKey;
|
|
372
|
+
if (!apiKey) {
|
|
373
|
+
throw new ConfigurationError(
|
|
374
|
+
`OAGI API key must be provided either as 'api_key' parameter or OAGI_API_KEY environment variable. Get your API key at ${API_KEY_HELP_URL}`
|
|
375
|
+
);
|
|
376
|
+
}
|
|
377
|
+
this.client = new OpenAI({
|
|
378
|
+
baseURL: new URL("./v1", baseUrl).href,
|
|
379
|
+
apiKey,
|
|
380
|
+
maxRetries
|
|
381
|
+
});
|
|
382
|
+
logger2.info(`Client initialized with base_url: ${baseUrl}`);
|
|
383
|
+
}
|
|
384
|
+
timeout = HTTP_CLIENT_TIMEOUT;
|
|
385
|
+
client;
|
|
386
|
+
fetch(input, init) {
|
|
387
|
+
if (typeof input === "string" || input instanceof URL) {
|
|
388
|
+
input = new URL(input, this.baseUrl);
|
|
389
|
+
} else {
|
|
390
|
+
input = new URL(input.url, this.baseUrl);
|
|
391
|
+
}
|
|
392
|
+
init ??= {};
|
|
393
|
+
const signal = AbortSignal.timeout(this.timeout * 1e3);
|
|
394
|
+
init.signal = init.signal ? AbortSignal.any([signal, init.signal]) : signal;
|
|
395
|
+
return fetch(input, init);
|
|
396
|
+
}
|
|
397
|
+
buildHeaders(apiVersion) {
|
|
398
|
+
const headers = {};
|
|
399
|
+
if (apiVersion) {
|
|
400
|
+
headers["x-api-version"] = apiVersion;
|
|
401
|
+
}
|
|
402
|
+
if (this.apiKey) {
|
|
403
|
+
headers["x-api-key"] = this.apiKey;
|
|
404
|
+
}
|
|
405
|
+
return headers;
|
|
406
|
+
}
|
|
407
|
+
async handleResponseError(response) {
|
|
408
|
+
const data = await response.json();
|
|
409
|
+
const cls = _Client.getErrorClass(response.status);
|
|
410
|
+
const err = new cls(response, data.error?.message);
|
|
411
|
+
logger2.error(err.toString());
|
|
412
|
+
throw err;
|
|
413
|
+
}
|
|
414
|
+
handleHttpErrors(err) {
|
|
415
|
+
if (err instanceof DOMException) {
|
|
416
|
+
if (err.name === "TimeoutError") {
|
|
417
|
+
const message = `Request timed out after ${this.timeout} seconds`;
|
|
418
|
+
logger2.error(message);
|
|
419
|
+
throw new RequestTimeoutError(message, err);
|
|
420
|
+
}
|
|
421
|
+
} else if (err instanceof TypeError) {
|
|
422
|
+
const message = `Network error: ${err}`;
|
|
423
|
+
logger2.error(message);
|
|
424
|
+
throw new NetworkError(message, err);
|
|
425
|
+
}
|
|
426
|
+
throw err;
|
|
427
|
+
}
|
|
428
|
+
static getErrorClass(statusCode) {
|
|
429
|
+
if (statusCode >= 500) return ServerError;
|
|
430
|
+
return {
|
|
431
|
+
401: AuthenticationError,
|
|
432
|
+
404: NotFoundError,
|
|
433
|
+
422: ValidationError,
|
|
434
|
+
429: RateLimitError
|
|
435
|
+
}[statusCode] ?? APIError;
|
|
436
|
+
}
|
|
437
|
+
/**
|
|
438
|
+
* Call OpenAI-compatible /v1/chat/completions endpoint.
|
|
439
|
+
*
|
|
440
|
+
* @param model Model to use for inference
|
|
441
|
+
* @param messages Full message history (OpenAI-compatible format)
|
|
442
|
+
* @param temperature Sampling temperature (0.0-2.0)
|
|
443
|
+
* @param taskId Optional task ID for multi-turn conversations
|
|
444
|
+
* @returns Tuple of (Step, raw_output, Usage)
|
|
445
|
+
* - Step: Parsed actions and reasoning
|
|
446
|
+
* - raw_output: Raw model output string (for message history)
|
|
447
|
+
* - Usage: Token usage statistics (or None if not available)
|
|
448
|
+
*/
|
|
449
|
+
async chatCompletions(model, messages, temperature, taskId) {
|
|
450
|
+
logger2.info(`Making async chat completion request with model: ${model}`);
|
|
451
|
+
const response = await this.client.chat.completions.create({
|
|
452
|
+
model,
|
|
453
|
+
messages,
|
|
454
|
+
temperature,
|
|
455
|
+
// @ts-expect-error extra body
|
|
456
|
+
task_id: taskId
|
|
457
|
+
});
|
|
458
|
+
const rawOutput = response.choices[0].message.content ?? "";
|
|
459
|
+
const step = parseRawOutput(rawOutput);
|
|
460
|
+
taskId = response.task_id;
|
|
461
|
+
const task = taskId ? `task_id: ${taskId}, ` : "";
|
|
462
|
+
const usage = response.usage ? `, tokens: ${response.usage.prompt_tokens}+${response.usage.completion_tokens}` : "";
|
|
463
|
+
logger2.info(
|
|
464
|
+
`Chat completion successful - ${task}actions: ${step.actions.length}, stop: ${step.stop}${usage}`
|
|
465
|
+
);
|
|
466
|
+
return [step, rawOutput, response.usage];
|
|
467
|
+
}
|
|
468
|
+
/**
|
|
469
|
+
* Call the /v1/file/upload endpoint to get a S3 presigned URL
|
|
470
|
+
*
|
|
471
|
+
* @param apiVersion API version header
|
|
472
|
+
* @returns {Promise<UploadFileResponse>} The response from /v1/file/upload with uuid and presigned S3 URL
|
|
473
|
+
*/
|
|
474
|
+
async getS3PresignedUrl(apiVersion) {
|
|
475
|
+
logger2.debug(`Making async API request to ${API_V1_FILE_UPLOAD_ENDPOINT}`);
|
|
476
|
+
try {
|
|
477
|
+
const headers = this.buildHeaders(apiVersion);
|
|
478
|
+
const response = await this.fetch(API_V1_FILE_UPLOAD_ENDPOINT, {
|
|
479
|
+
headers
|
|
480
|
+
});
|
|
481
|
+
if (!response.ok) {
|
|
482
|
+
await this.handleResponseError(response);
|
|
483
|
+
}
|
|
484
|
+
try {
|
|
485
|
+
const uploadFileResponse = UploadFileResponseSchema.parse(
|
|
486
|
+
await response.json()
|
|
487
|
+
);
|
|
488
|
+
logger2.debug("Calling /v1/file/upload successful");
|
|
489
|
+
return uploadFileResponse;
|
|
490
|
+
} catch (err) {
|
|
491
|
+
logger2.error(`Invalid upload response: ${response.status}`);
|
|
492
|
+
throw new APIError(
|
|
493
|
+
response,
|
|
494
|
+
`Invalid presigned S3 URL response: ${err}`
|
|
495
|
+
);
|
|
496
|
+
}
|
|
497
|
+
} catch (err) {
|
|
498
|
+
this.handleHttpErrors(err);
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
/**
|
|
502
|
+
* Upload image bytes to S3 using presigned URL
|
|
503
|
+
*
|
|
504
|
+
* @param url S3 presigned URL
|
|
505
|
+
* @param content Image bytes to upload
|
|
506
|
+
* @throws {APIError} If upload fails
|
|
507
|
+
*/
|
|
508
|
+
async uploadToS3(url, content) {
|
|
509
|
+
logger2.debug("Uploading image to S3");
|
|
510
|
+
let response = null;
|
|
511
|
+
try {
|
|
512
|
+
response = await this.fetch(url, {
|
|
513
|
+
body: content,
|
|
514
|
+
method: "PUT"
|
|
515
|
+
});
|
|
516
|
+
if (!response.ok) {
|
|
517
|
+
await this.handleResponseError(response);
|
|
518
|
+
}
|
|
519
|
+
} catch (err) {
|
|
520
|
+
logger2.error(`S3 upload failed ${err}`);
|
|
521
|
+
if (err instanceof APIError) {
|
|
522
|
+
throw err;
|
|
523
|
+
}
|
|
524
|
+
throw new APIError(
|
|
525
|
+
response ?? new Response(null, { status: 500 }),
|
|
526
|
+
`${err}`
|
|
527
|
+
);
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
/**
|
|
531
|
+
* Get S3 presigned URL and upload image (convenience method)
|
|
532
|
+
*
|
|
533
|
+
* @param screenshot Screenshot image bytes
|
|
534
|
+
* @param apiVersion API version header
|
|
535
|
+
* @returns {UploadFileResponse} The response from /v1/file/upload with uuid and presigned S3 URL
|
|
536
|
+
*/
|
|
537
|
+
async putS3PresignedUrl(screenshot, apiVersion) {
|
|
538
|
+
const uploadFileResponse = await this.getS3PresignedUrl(apiVersion);
|
|
539
|
+
await this.uploadToS3(uploadFileResponse.url, screenshot);
|
|
540
|
+
return uploadFileResponse;
|
|
541
|
+
}
|
|
542
|
+
async callWorker({
|
|
543
|
+
workerId,
|
|
544
|
+
overallTodo,
|
|
545
|
+
taskDescription,
|
|
546
|
+
todos,
|
|
547
|
+
history = [],
|
|
548
|
+
currentTodoIndex,
|
|
549
|
+
taskExecutionSummary,
|
|
550
|
+
currentScreenshot,
|
|
551
|
+
currentSubtaskInstruction,
|
|
552
|
+
windowSteps,
|
|
553
|
+
windowScreenshots,
|
|
554
|
+
resultScreenshot,
|
|
555
|
+
priorNotes,
|
|
556
|
+
latestTodoSummary,
|
|
557
|
+
apiVersion
|
|
558
|
+
}) {
|
|
559
|
+
const validWorkers = ["oagi_first", "oagi_follow", "oagi_task_summary"];
|
|
560
|
+
if (!validWorkers.includes(workerId)) {
|
|
561
|
+
throw new ValueError(
|
|
562
|
+
`Invalid worker_id '${workerId}'. Must be one of: ${validWorkers}`
|
|
563
|
+
);
|
|
564
|
+
}
|
|
565
|
+
logger2.info(`Calling /v1/generate with worker_id: ${workerId}`);
|
|
566
|
+
const payload = {
|
|
567
|
+
external_worker_id: workerId,
|
|
568
|
+
overall_todo: overallTodo,
|
|
569
|
+
task_description: taskDescription,
|
|
570
|
+
todos,
|
|
571
|
+
history,
|
|
572
|
+
// Add optional memory fields
|
|
573
|
+
current_todo_index: currentTodoIndex,
|
|
574
|
+
task_execution_summary: taskExecutionSummary,
|
|
575
|
+
// Add optional screenshot/worker-specific fields
|
|
576
|
+
current_screenshot: currentScreenshot,
|
|
577
|
+
current_subtask_instruction: currentSubtaskInstruction,
|
|
578
|
+
window_steps: windowSteps,
|
|
579
|
+
window_screenshots: windowScreenshots,
|
|
580
|
+
result_screenshot: resultScreenshot,
|
|
581
|
+
prior_notes: priorNotes,
|
|
582
|
+
latest_todo_summary: latestTodoSummary
|
|
583
|
+
};
|
|
584
|
+
const headers = this.buildHeaders(apiVersion);
|
|
585
|
+
try {
|
|
586
|
+
const response = await this.fetch(API_V1_GENERATE_ENDPOINT, {
|
|
587
|
+
body: JSON.stringify(payload),
|
|
588
|
+
headers,
|
|
589
|
+
method: "POST"
|
|
590
|
+
});
|
|
591
|
+
if (!response.ok) {
|
|
592
|
+
await this.handleResponseError(response);
|
|
593
|
+
}
|
|
594
|
+
const result = GenerateResponseSchema.parse(await response.json());
|
|
595
|
+
result.request_id = response.headers.get("X-Request-ID");
|
|
596
|
+
logger2.info(
|
|
597
|
+
`Generate request successful - tokens: ${result.prompt_tokens}+${result.completion_tokens}, request_id: ${result.request_id}`
|
|
598
|
+
);
|
|
599
|
+
return result;
|
|
600
|
+
} catch (err) {
|
|
601
|
+
this.handleHttpErrors(err);
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
};
|
|
605
|
+
__decorateClass([
|
|
606
|
+
logTraceOnFailure
|
|
607
|
+
], _Client.prototype, "callWorker", 1);
|
|
608
|
+
var Client = _Client;
|
|
609
|
+
|
|
610
|
+
// src/actor.ts
|
|
611
|
+
import { randomUUID } from "crypto";
|
|
612
|
+
var logger3 = logger_default("task");
|
|
613
|
+
var Actor = class {
|
|
614
|
+
constructor(apiKey, baseUrl, model = MODEL_ACTOR, temperature) {
|
|
615
|
+
this.model = model;
|
|
616
|
+
this.temperature = temperature;
|
|
617
|
+
this.client = new Client(baseUrl, apiKey);
|
|
618
|
+
}
|
|
619
|
+
/**
|
|
620
|
+
* Client-side generated UUID
|
|
621
|
+
*/
|
|
622
|
+
taskId = randomUUID();
|
|
623
|
+
taskDescription = null;
|
|
624
|
+
/**
|
|
625
|
+
* OpenAI-compatible message history
|
|
626
|
+
*/
|
|
627
|
+
messageHistory = [];
|
|
628
|
+
maxSteps = DEFAULT_MAX_STEPS;
|
|
629
|
+
/**
|
|
630
|
+
* Current step counter
|
|
631
|
+
*/
|
|
632
|
+
currentStep = 0;
|
|
633
|
+
client;
|
|
634
|
+
validateAndIncrementStep() {
|
|
635
|
+
if (!this.taskDescription) {
|
|
636
|
+
throw new ValueError(
|
|
637
|
+
"Task description must be set. Call initTask() first."
|
|
638
|
+
);
|
|
639
|
+
}
|
|
640
|
+
if (this.currentStep >= this.maxSteps) {
|
|
641
|
+
throw new ValueError(
|
|
642
|
+
`Max steps limit (${this.maxSteps}) reached. Call initTask() to start a new task.`
|
|
643
|
+
);
|
|
644
|
+
}
|
|
645
|
+
this.currentStep++;
|
|
646
|
+
}
|
|
647
|
+
/**
|
|
648
|
+
* Get screenshot URL, uploading to S3 if needed (async version).
|
|
649
|
+
* @param screenshot Screenshot as URL string, or raw bytes
|
|
650
|
+
* @returns Screenshot URL (either direct or from S3 upload)
|
|
651
|
+
*/
|
|
652
|
+
async ensureScreenshotUrl(screenshot) {
|
|
653
|
+
if (typeof screenshot === "string") return screenshot;
|
|
654
|
+
const uploadResponse = await this.client.putS3PresignedUrl(screenshot);
|
|
655
|
+
return uploadResponse.download_url;
|
|
656
|
+
}
|
|
657
|
+
/**
|
|
658
|
+
* Add user message with screenshot to message history.
|
|
659
|
+
*
|
|
660
|
+
* @param screenshot URL of the screenshot
|
|
661
|
+
* @param prompt Optional prompt text (for first message only)
|
|
662
|
+
*/
|
|
663
|
+
addUserMessageToHistory(screenshot, prompt) {
|
|
664
|
+
const content = [];
|
|
665
|
+
if (prompt) {
|
|
666
|
+
content.push({
|
|
667
|
+
type: "text",
|
|
668
|
+
text: prompt
|
|
669
|
+
});
|
|
670
|
+
}
|
|
671
|
+
content.push({
|
|
672
|
+
type: "image_url",
|
|
673
|
+
image_url: {
|
|
674
|
+
url: screenshot
|
|
675
|
+
}
|
|
676
|
+
});
|
|
677
|
+
this.messageHistory.push({ role: "user", content });
|
|
678
|
+
}
|
|
679
|
+
/**
|
|
680
|
+
* Build prompt for first message only.
|
|
681
|
+
*/
|
|
682
|
+
buildStepPrompt() {
|
|
683
|
+
if (this.messageHistory.length === 0) {
|
|
684
|
+
return buildPrompt(this.taskDescription);
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
/**
|
|
688
|
+
* Initialize a new task with the given description.
|
|
689
|
+
*
|
|
690
|
+
* @param taskDescription Task description
|
|
691
|
+
* @param maxSteps Maximum number of steps allowed
|
|
692
|
+
*/
|
|
693
|
+
initTask(taskDescription, maxSteps = DEFAULT_MAX_STEPS) {
|
|
694
|
+
this.taskId = randomUUID();
|
|
695
|
+
this.taskDescription = taskDescription;
|
|
696
|
+
this.messageHistory = [];
|
|
697
|
+
const limit = this.model == MODEL_THINKER ? MAX_STEPS_THINKER : MAX_STEPS_ACTOR;
|
|
698
|
+
if (maxSteps > limit) {
|
|
699
|
+
logger3.warn(
|
|
700
|
+
`max_steps (${maxSteps}) exceeds limit for model '${this.model}'. Capping to ${limit}.`
|
|
701
|
+
);
|
|
702
|
+
maxSteps = limit;
|
|
703
|
+
}
|
|
704
|
+
this.maxSteps = maxSteps;
|
|
705
|
+
this.currentStep = 0;
|
|
706
|
+
logger3.info(
|
|
707
|
+
`Task initialized: '${taskDescription}' (max_steps: ${maxSteps})`
|
|
708
|
+
);
|
|
709
|
+
}
|
|
710
|
+
/**
|
|
711
|
+
* Send screenshot to the server and get the next actions.
|
|
712
|
+
*
|
|
713
|
+
* @param screenshot Screenshot as URL string, or raw bytes
|
|
714
|
+
* @param instruction Optional additional instruction for this step (currently unused)
|
|
715
|
+
* @param temperature Sampling temperature for this step (overrides task default if provided)
|
|
716
|
+
*/
|
|
717
|
+
async step(screenshot, _instruction, temperature) {
|
|
718
|
+
this.validateAndIncrementStep();
|
|
719
|
+
logger3.debug(`Executing step for task: '${this.taskDescription}'`);
|
|
720
|
+
try {
|
|
721
|
+
const screenshotUrl = await this.ensureScreenshotUrl(screenshot);
|
|
722
|
+
this.addUserMessageToHistory(screenshotUrl, this.buildStepPrompt());
|
|
723
|
+
const [step, rawOutput] = await this.client.chatCompletions(
|
|
724
|
+
this.model,
|
|
725
|
+
this.messageHistory,
|
|
726
|
+
temperature ?? this.temperature,
|
|
727
|
+
this.taskId
|
|
728
|
+
);
|
|
729
|
+
if (rawOutput) {
|
|
730
|
+
this.messageHistory.push({
|
|
731
|
+
role: "assistant",
|
|
732
|
+
content: [
|
|
733
|
+
{
|
|
734
|
+
type: "text",
|
|
735
|
+
text: rawOutput
|
|
736
|
+
}
|
|
737
|
+
]
|
|
738
|
+
});
|
|
739
|
+
}
|
|
740
|
+
if (step.stop) {
|
|
741
|
+
logger3.info("Task completed.");
|
|
742
|
+
} else {
|
|
743
|
+
logger3.debug(`Step completed with${step.actions.length} actions`);
|
|
744
|
+
}
|
|
745
|
+
return step;
|
|
746
|
+
} catch (err) {
|
|
747
|
+
logger3.error(`Error during step execution: ${err}`);
|
|
748
|
+
throw err;
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
};
|
|
752
|
+
|
|
753
|
+
// src/agent/default.ts
|
|
754
|
+
var logger4 = logger_default("agent.default");
|
|
755
|
+
var resetHandler = (handler) => {
|
|
756
|
+
if (typeof handler.reset === "function") {
|
|
757
|
+
handler.reset();
|
|
758
|
+
}
|
|
759
|
+
};
|
|
760
|
+
var sleep = (seconds) => new Promise((resolve) => setTimeout(resolve, seconds * 1e3));
|
|
761
|
+
var DefaultAgent = class {
|
|
762
|
+
/** Default asynchronous agent implementation using OAGI client. */
|
|
763
|
+
api_key;
|
|
764
|
+
base_url;
|
|
765
|
+
model;
|
|
766
|
+
max_steps;
|
|
767
|
+
temperature;
|
|
768
|
+
step_observer;
|
|
769
|
+
step_delay;
|
|
770
|
+
constructor(api_key, base_url, model = MODEL_ACTOR, max_steps = DEFAULT_MAX_STEPS, temperature = DEFAULT_TEMPERATURE, step_observer, step_delay = DEFAULT_STEP_DELAY) {
|
|
771
|
+
this.api_key = api_key;
|
|
772
|
+
this.base_url = base_url;
|
|
773
|
+
this.model = model;
|
|
774
|
+
this.max_steps = max_steps;
|
|
775
|
+
this.temperature = temperature;
|
|
776
|
+
this.step_observer = step_observer;
|
|
777
|
+
this.step_delay = step_delay;
|
|
778
|
+
}
|
|
779
|
+
async execute(instruction, action_handler, image_provider) {
|
|
780
|
+
const actor = new Actor(this.api_key, this.base_url, this.model);
|
|
781
|
+
logger4.info(`Starting async task execution: ${instruction}`);
|
|
782
|
+
await actor.initTask(instruction, this.max_steps);
|
|
783
|
+
resetHandler(action_handler);
|
|
784
|
+
for (let i = 0; i < this.max_steps; i++) {
|
|
785
|
+
const step_num = i + 1;
|
|
786
|
+
logger4.debug(`Executing step ${step_num}/${this.max_steps}`);
|
|
787
|
+
const image = await image_provider.provide();
|
|
788
|
+
const step = await actor.step(image, void 0, this.temperature);
|
|
789
|
+
if (step.reason) {
|
|
790
|
+
logger4.info(`Step ${step_num}: ${step.reason}`);
|
|
791
|
+
}
|
|
792
|
+
if (this.step_observer) {
|
|
793
|
+
const event = {
|
|
794
|
+
type: "step",
|
|
795
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
796
|
+
step_num,
|
|
797
|
+
image,
|
|
798
|
+
step,
|
|
799
|
+
task_id: actor.taskId
|
|
800
|
+
};
|
|
801
|
+
await this.step_observer.onEvent(event);
|
|
802
|
+
}
|
|
803
|
+
if (step.actions?.length) {
|
|
804
|
+
logger4.info(`Actions (${step.actions.length}):`);
|
|
805
|
+
for (const action of step.actions) {
|
|
806
|
+
const count_suffix = action.count && action.count > 1 ? ` x${action.count}` : "";
|
|
807
|
+
logger4.info(` [${action.type}] ${action.argument}${count_suffix}`);
|
|
808
|
+
}
|
|
809
|
+
let error = null;
|
|
810
|
+
try {
|
|
811
|
+
await action_handler.handle(step.actions);
|
|
812
|
+
} catch (e) {
|
|
813
|
+
error = String(e);
|
|
814
|
+
throw e;
|
|
815
|
+
} finally {
|
|
816
|
+
if (this.step_observer) {
|
|
817
|
+
const event = {
|
|
818
|
+
type: "action",
|
|
819
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
820
|
+
step_num,
|
|
821
|
+
actions: step.actions,
|
|
822
|
+
error: error ?? void 0
|
|
823
|
+
};
|
|
824
|
+
await this.step_observer.onEvent(event);
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
if (this.step_delay > 0) {
|
|
829
|
+
await sleep(this.step_delay);
|
|
830
|
+
}
|
|
831
|
+
if (step.stop) {
|
|
832
|
+
logger4.info(`Task completed successfully after ${step_num} steps`);
|
|
833
|
+
return true;
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
logger4.warn(
|
|
837
|
+
`Task reached max steps (${this.max_steps}) without completion`
|
|
838
|
+
);
|
|
839
|
+
return false;
|
|
840
|
+
}
|
|
841
|
+
};
|
|
842
|
+
|
|
843
|
+
// src/agent/registry.ts
|
|
844
|
+
var agentRegistry = {};
|
|
845
|
+
var asyncAgentRegister = (mode) => {
|
|
846
|
+
return (func) => {
|
|
847
|
+
if (mode in agentRegistry) {
|
|
848
|
+
throw new Error(
|
|
849
|
+
`Agent mode '${mode}' is already registered. Cannot register the same mode twice.`
|
|
850
|
+
);
|
|
851
|
+
}
|
|
852
|
+
agentRegistry[mode] = func;
|
|
853
|
+
return func;
|
|
854
|
+
};
|
|
855
|
+
};
|
|
856
|
+
var getAgentFactory = (mode) => {
|
|
857
|
+
if (!(mode in agentRegistry)) {
|
|
858
|
+
const availableModes = Object.keys(agentRegistry);
|
|
859
|
+
throw new Error(
|
|
860
|
+
`Unknown agent mode: '${mode}'. Available modes: ${availableModes}`
|
|
861
|
+
);
|
|
862
|
+
}
|
|
863
|
+
return agentRegistry[mode];
|
|
864
|
+
};
|
|
865
|
+
var listAgentModes = () => {
|
|
866
|
+
return Object.keys(agentRegistry);
|
|
867
|
+
};
|
|
868
|
+
var createAgent = (mode, options = {}) => {
|
|
869
|
+
const factory = getAgentFactory(mode);
|
|
870
|
+
const agent = factory(options);
|
|
871
|
+
if (!agent || typeof agent.execute !== "function") {
|
|
872
|
+
throw new TypeError(
|
|
873
|
+
`Factory for mode '${mode}' returned an object that doesn't implement Agent. Expected an object with an 'execute' method.`
|
|
874
|
+
);
|
|
875
|
+
}
|
|
876
|
+
return agent;
|
|
877
|
+
};
|
|
878
|
+
|
|
879
|
+
// src/agent/factories.ts
|
|
880
|
+
asyncAgentRegister("actor")((options = {}) => {
|
|
881
|
+
const {
|
|
882
|
+
apiKey,
|
|
883
|
+
baseUrl,
|
|
884
|
+
model = MODEL_ACTOR,
|
|
885
|
+
maxSteps = DEFAULT_MAX_STEPS,
|
|
886
|
+
temperature = DEFAULT_TEMPERATURE_LOW,
|
|
887
|
+
stepObserver,
|
|
888
|
+
stepDelay = DEFAULT_STEP_DELAY
|
|
889
|
+
} = options;
|
|
890
|
+
return new DefaultAgent(
|
|
891
|
+
apiKey,
|
|
892
|
+
baseUrl,
|
|
893
|
+
model,
|
|
894
|
+
maxSteps,
|
|
895
|
+
temperature,
|
|
896
|
+
stepObserver ?? void 0,
|
|
897
|
+
stepDelay
|
|
898
|
+
);
|
|
899
|
+
});
|
|
900
|
+
asyncAgentRegister("thinker")((options = {}) => {
|
|
901
|
+
const {
|
|
902
|
+
apiKey,
|
|
903
|
+
baseUrl,
|
|
904
|
+
model = MODEL_THINKER,
|
|
905
|
+
maxSteps = DEFAULT_MAX_STEPS_THINKER,
|
|
906
|
+
temperature = DEFAULT_TEMPERATURE_LOW,
|
|
907
|
+
stepObserver,
|
|
908
|
+
stepDelay = DEFAULT_STEP_DELAY
|
|
909
|
+
} = options;
|
|
910
|
+
return new DefaultAgent(
|
|
911
|
+
apiKey,
|
|
912
|
+
baseUrl,
|
|
913
|
+
model,
|
|
914
|
+
maxSteps,
|
|
915
|
+
temperature,
|
|
916
|
+
stepObserver ?? void 0,
|
|
917
|
+
stepDelay
|
|
918
|
+
);
|
|
919
|
+
});
|
|
920
|
+
|
|
921
|
+
// src/agent/observer/exporters.ts
|
|
922
|
+
import fs from "fs";
|
|
923
|
+
import path from "path";
|
|
924
|
+
import { fileURLToPath, pathToFileURL } from "url";
|
|
925
|
+
var ensureDir = (dirPath) => {
|
|
926
|
+
fs.mkdirSync(dirPath, { recursive: true });
|
|
927
|
+
};
|
|
928
|
+
var parseActionCoords = (action) => {
|
|
929
|
+
const arg = action.argument.replace(/^\(|\)$/g, "");
|
|
930
|
+
switch (action.type) {
|
|
931
|
+
case "click":
|
|
932
|
+
case "left_double":
|
|
933
|
+
case "left_triple":
|
|
934
|
+
case "right_single": {
|
|
935
|
+
const coords = parseCoords(arg);
|
|
936
|
+
if (coords) {
|
|
937
|
+
return { type: "click", x: coords[0], y: coords[1] };
|
|
938
|
+
}
|
|
939
|
+
return null;
|
|
940
|
+
}
|
|
941
|
+
case "drag": {
|
|
942
|
+
const coords = parseDragCoords(arg);
|
|
943
|
+
if (coords) {
|
|
944
|
+
return {
|
|
945
|
+
type: "drag",
|
|
946
|
+
x1: coords[0],
|
|
947
|
+
y1: coords[1],
|
|
948
|
+
x2: coords[2],
|
|
949
|
+
y2: coords[3]
|
|
950
|
+
};
|
|
951
|
+
}
|
|
952
|
+
return null;
|
|
953
|
+
}
|
|
954
|
+
case "scroll": {
|
|
955
|
+
const result = parseScroll(arg);
|
|
956
|
+
if (result) {
|
|
957
|
+
return {
|
|
958
|
+
type: "scroll",
|
|
959
|
+
x: result[0],
|
|
960
|
+
y: result[1],
|
|
961
|
+
direction: result[2]
|
|
962
|
+
};
|
|
963
|
+
}
|
|
964
|
+
return null;
|
|
965
|
+
}
|
|
966
|
+
default:
|
|
967
|
+
return null;
|
|
968
|
+
}
|
|
969
|
+
};
|
|
970
|
+
var exportToMarkdown = (events, filePath, imagesDir) => {
|
|
971
|
+
const outputDir = path.dirname(filePath);
|
|
972
|
+
ensureDir(outputDir);
|
|
973
|
+
if (imagesDir) {
|
|
974
|
+
ensureDir(imagesDir);
|
|
975
|
+
}
|
|
976
|
+
const lines = ["# Agent Execution Report\n"];
|
|
977
|
+
for (const event of events) {
|
|
978
|
+
const d = event.timestamp instanceof Date ? event.timestamp : new Date(event.timestamp);
|
|
979
|
+
const timestamp = d.toTimeString().slice(0, 8);
|
|
980
|
+
switch (event.type) {
|
|
981
|
+
case "step":
|
|
982
|
+
lines.push(`
|
|
983
|
+
## Step ${event.step_num}
|
|
984
|
+
`);
|
|
985
|
+
lines.push(`**Time:** ${timestamp}
|
|
986
|
+
`);
|
|
987
|
+
if (event.task_id) {
|
|
988
|
+
lines.push(`**Task ID:** \`${event.task_id}\`
|
|
989
|
+
`);
|
|
990
|
+
}
|
|
991
|
+
if (typeof event.image !== "string") {
|
|
992
|
+
if (imagesDir) {
|
|
993
|
+
const imageFilename = `step_${event.step_num}.png`;
|
|
994
|
+
const imagePath = path.join(imagesDir, imageFilename);
|
|
995
|
+
fs.writeFileSync(imagePath, Buffer.from(event.image));
|
|
996
|
+
const relPath = path.join(path.basename(imagesDir), imageFilename);
|
|
997
|
+
lines.push(`
|
|
998
|
+

|
|
999
|
+
`);
|
|
1000
|
+
} else {
|
|
1001
|
+
lines.push(
|
|
1002
|
+
`
|
|
1003
|
+
*[Screenshot captured - ${event.image.byteLength} bytes]*
|
|
1004
|
+
`
|
|
1005
|
+
);
|
|
1006
|
+
}
|
|
1007
|
+
} else {
|
|
1008
|
+
lines.push(`
|
|
1009
|
+
**Screenshot URL:** ${event.image}
|
|
1010
|
+
`);
|
|
1011
|
+
}
|
|
1012
|
+
if (event.step.reason) {
|
|
1013
|
+
lines.push(`
|
|
1014
|
+
**Reasoning:**
|
|
1015
|
+
> ${event.step.reason}
|
|
1016
|
+
`);
|
|
1017
|
+
}
|
|
1018
|
+
if (event.step.actions?.length) {
|
|
1019
|
+
lines.push("\n**Planned Actions:**\n");
|
|
1020
|
+
for (const action of event.step.actions) {
|
|
1021
|
+
const countStr = action.count && action.count > 1 ? ` (x${action.count})` : "";
|
|
1022
|
+
lines.push(`- \`${action.type}\`: ${action.argument}${countStr}
|
|
1023
|
+
`);
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
if (event.step.stop) {
|
|
1027
|
+
lines.push("\n**Status:** Task Complete\n");
|
|
1028
|
+
}
|
|
1029
|
+
break;
|
|
1030
|
+
case "action":
|
|
1031
|
+
lines.push(`
|
|
1032
|
+
### Actions Executed (${timestamp})
|
|
1033
|
+
`);
|
|
1034
|
+
if (event.error) {
|
|
1035
|
+
lines.push(`
|
|
1036
|
+
**Error:** ${event.error}
|
|
1037
|
+
`);
|
|
1038
|
+
} else {
|
|
1039
|
+
lines.push("\n**Result:** Success\n");
|
|
1040
|
+
}
|
|
1041
|
+
break;
|
|
1042
|
+
case "log":
|
|
1043
|
+
lines.push(`
|
|
1044
|
+
> **Log (${timestamp}):** ${event.message}
|
|
1045
|
+
`);
|
|
1046
|
+
break;
|
|
1047
|
+
case "split":
|
|
1048
|
+
if (event.label) {
|
|
1049
|
+
lines.push(`
|
|
1050
|
+
---
|
|
1051
|
+
|
|
1052
|
+
### ${event.label}
|
|
1053
|
+
`);
|
|
1054
|
+
} else {
|
|
1055
|
+
lines.push("\n---\n");
|
|
1056
|
+
}
|
|
1057
|
+
break;
|
|
1058
|
+
case "image":
|
|
1059
|
+
break;
|
|
1060
|
+
case "plan": {
|
|
1061
|
+
const phaseTitles = {
|
|
1062
|
+
initial: "Initial Planning",
|
|
1063
|
+
reflection: "Reflection",
|
|
1064
|
+
summary: "Summary"
|
|
1065
|
+
};
|
|
1066
|
+
const phaseTitle = phaseTitles[event.phase] ?? event.phase;
|
|
1067
|
+
lines.push(`
|
|
1068
|
+
### ${phaseTitle} (${timestamp})
|
|
1069
|
+
`);
|
|
1070
|
+
if (event.request_id) {
|
|
1071
|
+
lines.push(`**Request ID:** \`${event.request_id}\`
|
|
1072
|
+
`);
|
|
1073
|
+
}
|
|
1074
|
+
if (event.image) {
|
|
1075
|
+
if (typeof event.image !== "string") {
|
|
1076
|
+
if (imagesDir) {
|
|
1077
|
+
const imageFilename = `plan_${event.phase}_${Date.now()}.png`;
|
|
1078
|
+
const imagePath = path.join(imagesDir, imageFilename);
|
|
1079
|
+
fs.writeFileSync(imagePath, Buffer.from(event.image));
|
|
1080
|
+
const relPath = path.join(
|
|
1081
|
+
path.basename(imagesDir),
|
|
1082
|
+
imageFilename
|
|
1083
|
+
);
|
|
1084
|
+
lines.push(`
|
|
1085
|
+

|
|
1086
|
+
`);
|
|
1087
|
+
} else {
|
|
1088
|
+
lines.push(
|
|
1089
|
+
`
|
|
1090
|
+
*[Screenshot captured - ${event.image.byteLength} bytes]*
|
|
1091
|
+
`
|
|
1092
|
+
);
|
|
1093
|
+
}
|
|
1094
|
+
} else {
|
|
1095
|
+
lines.push(`
|
|
1096
|
+
**Screenshot URL:** ${event.image}
|
|
1097
|
+
`);
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
if (event.reasoning) {
|
|
1101
|
+
lines.push(`
|
|
1102
|
+
**Reasoning:**
|
|
1103
|
+
> ${event.reasoning}
|
|
1104
|
+
`);
|
|
1105
|
+
}
|
|
1106
|
+
if (event.result) {
|
|
1107
|
+
lines.push(`
|
|
1108
|
+
**Result:** ${event.result}
|
|
1109
|
+
`);
|
|
1110
|
+
}
|
|
1111
|
+
break;
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
1114
|
+
}
|
|
1115
|
+
fs.writeFileSync(filePath, lines.join(""), "utf-8");
|
|
1116
|
+
};
|
|
1117
|
+
var convertEventsForHtml = (events) => {
|
|
1118
|
+
const result = [];
|
|
1119
|
+
for (const event of events) {
|
|
1120
|
+
const d = event.timestamp instanceof Date ? event.timestamp : new Date(event.timestamp);
|
|
1121
|
+
const timestamp = d.toTimeString().slice(0, 8);
|
|
1122
|
+
switch (event.type) {
|
|
1123
|
+
case "step": {
|
|
1124
|
+
const action_coords = [];
|
|
1125
|
+
const actions = [];
|
|
1126
|
+
if (event.step.actions?.length) {
|
|
1127
|
+
for (const action of event.step.actions) {
|
|
1128
|
+
const coords = parseActionCoords(action);
|
|
1129
|
+
if (coords) {
|
|
1130
|
+
action_coords.push(coords);
|
|
1131
|
+
}
|
|
1132
|
+
actions.push({
|
|
1133
|
+
type: action.type,
|
|
1134
|
+
argument: action.argument,
|
|
1135
|
+
count: action.count ?? 1
|
|
1136
|
+
});
|
|
1137
|
+
}
|
|
1138
|
+
}
|
|
1139
|
+
let image = null;
|
|
1140
|
+
if (typeof event.image !== "string") {
|
|
1141
|
+
image = Buffer.from(event.image).toString("base64");
|
|
1142
|
+
} else {
|
|
1143
|
+
image = event.image;
|
|
1144
|
+
}
|
|
1145
|
+
result.push({
|
|
1146
|
+
event_type: "step",
|
|
1147
|
+
timestamp,
|
|
1148
|
+
step_num: event.step_num,
|
|
1149
|
+
image,
|
|
1150
|
+
action_coords,
|
|
1151
|
+
reason: event.step.reason,
|
|
1152
|
+
actions,
|
|
1153
|
+
stop: event.step.stop,
|
|
1154
|
+
task_id: event.task_id
|
|
1155
|
+
});
|
|
1156
|
+
break;
|
|
1157
|
+
}
|
|
1158
|
+
case "action":
|
|
1159
|
+
result.push({
|
|
1160
|
+
event_type: "action",
|
|
1161
|
+
timestamp,
|
|
1162
|
+
error: event.error ?? null
|
|
1163
|
+
});
|
|
1164
|
+
break;
|
|
1165
|
+
case "log":
|
|
1166
|
+
result.push({ event_type: "log", timestamp, message: event.message });
|
|
1167
|
+
break;
|
|
1168
|
+
case "split":
|
|
1169
|
+
result.push({ event_type: "split", timestamp, label: event.label });
|
|
1170
|
+
break;
|
|
1171
|
+
case "image":
|
|
1172
|
+
break;
|
|
1173
|
+
case "plan": {
|
|
1174
|
+
let image = null;
|
|
1175
|
+
if (event.image) {
|
|
1176
|
+
if (typeof event.image !== "string") {
|
|
1177
|
+
image = Buffer.from(event.image).toString("base64");
|
|
1178
|
+
} else {
|
|
1179
|
+
image = event.image;
|
|
1180
|
+
}
|
|
1181
|
+
}
|
|
1182
|
+
result.push({
|
|
1183
|
+
event_type: "plan",
|
|
1184
|
+
timestamp,
|
|
1185
|
+
phase: event.phase,
|
|
1186
|
+
image,
|
|
1187
|
+
reasoning: event.reasoning,
|
|
1188
|
+
result: event.result ?? null,
|
|
1189
|
+
request_id: event.request_id ?? null
|
|
1190
|
+
});
|
|
1191
|
+
break;
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
}
|
|
1195
|
+
return result;
|
|
1196
|
+
};
|
|
1197
|
+
var exportToHtml = (events, filePath) => {
|
|
1198
|
+
const outputDir = path.dirname(filePath);
|
|
1199
|
+
ensureDir(outputDir);
|
|
1200
|
+
const moduleUrl = import.meta?.url ? import.meta.url : pathToFileURL(__filename).href;
|
|
1201
|
+
const moduleDir = path.dirname(fileURLToPath(moduleUrl));
|
|
1202
|
+
const templatePath = path.join(moduleDir, "report_template.html");
|
|
1203
|
+
const template = fs.readFileSync(templatePath, "utf-8");
|
|
1204
|
+
const eventsData = convertEventsForHtml(events);
|
|
1205
|
+
const eventsJson = JSON.stringify(eventsData);
|
|
1206
|
+
const htmlContent = template.replace("{EVENTS_DATA}", eventsJson);
|
|
1207
|
+
fs.writeFileSync(filePath, htmlContent, "utf-8");
|
|
1208
|
+
};
|
|
1209
|
+
var exportToJson = (events, filePath) => {
|
|
1210
|
+
const outputDir = path.dirname(filePath);
|
|
1211
|
+
ensureDir(outputDir);
|
|
1212
|
+
const jsonEvents = events.map((event) => {
|
|
1213
|
+
const timestamp = event.timestamp instanceof Date ? event.timestamp.toISOString() : new Date(event.timestamp).toISOString();
|
|
1214
|
+
if ("image" in event && event.image instanceof ArrayBuffer) {
|
|
1215
|
+
return {
|
|
1216
|
+
...event,
|
|
1217
|
+
timestamp,
|
|
1218
|
+
image: Buffer.from(event.image).toString("base64"),
|
|
1219
|
+
image_encoding: "base64"
|
|
1220
|
+
};
|
|
1221
|
+
}
|
|
1222
|
+
return {
|
|
1223
|
+
...event,
|
|
1224
|
+
timestamp
|
|
1225
|
+
};
|
|
1226
|
+
});
|
|
1227
|
+
fs.writeFileSync(filePath, JSON.stringify(jsonEvents, null, 2), "utf-8");
|
|
1228
|
+
};
|
|
1229
|
+
|
|
1230
|
+
// src/agent/observer/agent_observer.ts
|
|
1231
|
+
var AsyncAgentObserver = class extends StepObserver {
|
|
1232
|
+
/**
|
|
1233
|
+
* Records agent execution events and exports to various formats.
|
|
1234
|
+
*
|
|
1235
|
+
* This class implements the AsyncObserver protocol and provides
|
|
1236
|
+
* functionality for recording events during agent execution and
|
|
1237
|
+
* exporting them to Markdown or HTML formats.
|
|
1238
|
+
*/
|
|
1239
|
+
events = [];
|
|
1240
|
+
async onEvent(event) {
|
|
1241
|
+
this.events.push(event);
|
|
1242
|
+
}
|
|
1243
|
+
addLog(message) {
|
|
1244
|
+
const event = {
|
|
1245
|
+
type: "log",
|
|
1246
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
1247
|
+
message
|
|
1248
|
+
};
|
|
1249
|
+
this.events.push(event);
|
|
1250
|
+
}
|
|
1251
|
+
addSplit(label = "") {
|
|
1252
|
+
const event = {
|
|
1253
|
+
type: "split",
|
|
1254
|
+
timestamp: /* @__PURE__ */ new Date(),
|
|
1255
|
+
label
|
|
1256
|
+
};
|
|
1257
|
+
this.events.push(event);
|
|
1258
|
+
}
|
|
1259
|
+
clear() {
|
|
1260
|
+
this.events = [];
|
|
1261
|
+
}
|
|
1262
|
+
getEventsByStep(step_num) {
|
|
1263
|
+
return this.events.filter(
|
|
1264
|
+
(event) => event.step_num !== void 0 && event.step_num === step_num
|
|
1265
|
+
);
|
|
1266
|
+
}
|
|
1267
|
+
export(format, path2, images_dir) {
|
|
1268
|
+
const normalized = typeof format === "string" ? format.toLowerCase() : format;
|
|
1269
|
+
switch (normalized) {
|
|
1270
|
+
case "markdown" /* MARKDOWN */:
|
|
1271
|
+
exportToMarkdown(this.events, path2, images_dir ?? void 0);
|
|
1272
|
+
return;
|
|
1273
|
+
case "html" /* HTML */:
|
|
1274
|
+
exportToHtml(this.events, path2);
|
|
1275
|
+
return;
|
|
1276
|
+
case "json" /* JSON */:
|
|
1277
|
+
exportToJson(this.events, path2);
|
|
1278
|
+
return;
|
|
1279
|
+
default:
|
|
1280
|
+
throw new Error(`Unknown export format: ${String(format)}`);
|
|
1281
|
+
}
|
|
1282
|
+
}
|
|
1283
|
+
};
|
|
1284
|
+
|
|
1285
|
+
// src/handler.ts
|
|
1286
|
+
import robot from "robotjs";
|
|
1287
|
+
import sharp from "sharp";
|
|
1288
|
+
var sleep2 = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
1289
|
+
var toSharpKernel = (resample) => {
|
|
1290
|
+
switch (resample) {
|
|
1291
|
+
case "NEAREST":
|
|
1292
|
+
return "nearest";
|
|
1293
|
+
case "BICUBIC":
|
|
1294
|
+
return "cubic";
|
|
1295
|
+
case "BILINEAR":
|
|
1296
|
+
return "mitchell";
|
|
1297
|
+
case "LANCZOS":
|
|
1298
|
+
default:
|
|
1299
|
+
return "lanczos3";
|
|
1300
|
+
}
|
|
1301
|
+
};
|
|
1302
|
+
var normalizeKey = (raw, opts) => {
|
|
1303
|
+
const key = raw.trim().toLowerCase();
|
|
1304
|
+
if (key === "caps_lock" || key === "caps") return "capslock";
|
|
1305
|
+
if (key === "page_up" || key === "pageup") return "pageup";
|
|
1306
|
+
if (key === "page_down" || key === "pagedown") return "pagedown";
|
|
1307
|
+
if (key === "cmd") return "command";
|
|
1308
|
+
if (opts.macosCtrlToCmd && process.platform === "darwin" && key === "ctrl") {
|
|
1309
|
+
return "command";
|
|
1310
|
+
}
|
|
1311
|
+
if (key === "ctrl") return "control";
|
|
1312
|
+
return key;
|
|
1313
|
+
};
|
|
1314
|
+
var parseHotkey = (arg, opts) => {
|
|
1315
|
+
const s = arg.trim().replace(/^\(/, "").replace(/\)$/, "");
|
|
1316
|
+
return s.split("+").map((k) => normalizeKey(k, opts)).filter(Boolean);
|
|
1317
|
+
};
|
|
1318
|
+
var stripOuterParens = (s) => s.trim().replace(/^\(/, "").replace(/\)$/, "");
|
|
1319
|
+
var applySessionCaps = (text, enabled) => {
|
|
1320
|
+
if (!enabled) return text;
|
|
1321
|
+
return text.split("").map((c) => /[a-z]/i.test(c) ? c.toUpperCase() : c).join("");
|
|
1322
|
+
};
|
|
1323
|
+
var defaultDesktopAutomationConfig = () => ({
|
|
1324
|
+
dragDurationMs: 500,
|
|
1325
|
+
scrollAmount: process.platform === "darwin" ? 2 : 100,
|
|
1326
|
+
waitDurationMs: 1e3,
|
|
1327
|
+
hotkeyDelayMs: 100,
|
|
1328
|
+
macosCtrlToCmd: true,
|
|
1329
|
+
capslockMode: "session"
|
|
1330
|
+
});
|
|
1331
|
+
var ScreenshotMaker = class _ScreenshotMaker {
|
|
1332
|
+
#cfg;
|
|
1333
|
+
constructor(cfg) {
|
|
1334
|
+
const defaultConfig = ImageConfigSchema.parse({});
|
|
1335
|
+
this.#cfg = { ...defaultConfig, ...cfg };
|
|
1336
|
+
}
|
|
1337
|
+
static toArrayBuffer(buffer) {
|
|
1338
|
+
const arraybuffer = new ArrayBuffer(buffer.length);
|
|
1339
|
+
const view = new Uint8Array(arraybuffer);
|
|
1340
|
+
for (let i = 0; i < buffer.length; ++i) {
|
|
1341
|
+
view[i] = buffer[i];
|
|
1342
|
+
}
|
|
1343
|
+
return arraybuffer;
|
|
1344
|
+
}
|
|
1345
|
+
async provide() {
|
|
1346
|
+
const { width, height } = robot.getScreenSize();
|
|
1347
|
+
const screenshot = robot.screen.capture(0, 0, width, height);
|
|
1348
|
+
const channels = 3;
|
|
1349
|
+
const data = new Uint8Array(
|
|
1350
|
+
screenshot.width * screenshot.height * channels
|
|
1351
|
+
);
|
|
1352
|
+
for (let w = 0; w < screenshot.width; ++w) {
|
|
1353
|
+
for (let h = 0; h < screenshot.height; ++h) {
|
|
1354
|
+
let offset = (h * screenshot.width + w) * channels;
|
|
1355
|
+
let offset2 = screenshot.byteWidth * h + w * screenshot.bytesPerPixel;
|
|
1356
|
+
data[offset] = screenshot.image.readUInt8(offset2 + 2);
|
|
1357
|
+
data[offset + 1] = screenshot.image.readUInt8(offset2 + 1);
|
|
1358
|
+
data[offset + 2] = screenshot.image.readUInt8(offset2 + 0);
|
|
1359
|
+
}
|
|
1360
|
+
}
|
|
1361
|
+
let p = sharp(Buffer.from(data), {
|
|
1362
|
+
raw: {
|
|
1363
|
+
width: screenshot.width,
|
|
1364
|
+
height: screenshot.height,
|
|
1365
|
+
channels
|
|
1366
|
+
}
|
|
1367
|
+
});
|
|
1368
|
+
if (this.#cfg.width || this.#cfg.height) {
|
|
1369
|
+
p = p.resize(this.#cfg.width ?? width, this.#cfg.height ?? height, {
|
|
1370
|
+
fit: "fill",
|
|
1371
|
+
kernel: toSharpKernel(this.#cfg.resample)
|
|
1372
|
+
});
|
|
1373
|
+
}
|
|
1374
|
+
const encoded = this.#cfg.format === "PNG" ? await p.png({ compressionLevel: this.#cfg.optimize ? 9 : 6 }).toBuffer() : await p.jpeg({ quality: this.#cfg.quality }).toBuffer();
|
|
1375
|
+
return _ScreenshotMaker.toArrayBuffer(encoded);
|
|
1376
|
+
}
|
|
1377
|
+
};
|
|
1378
|
+
var DefaultActionHandler = class {
|
|
1379
|
+
#cfg;
|
|
1380
|
+
#sessionCapsEnabled = false;
|
|
1381
|
+
constructor(cfg) {
|
|
1382
|
+
this.#cfg = { ...defaultDesktopAutomationConfig(), ...cfg };
|
|
1383
|
+
}
|
|
1384
|
+
reset() {
|
|
1385
|
+
this.#sessionCapsEnabled = false;
|
|
1386
|
+
}
|
|
1387
|
+
async handle(actions) {
|
|
1388
|
+
for (const action of actions) {
|
|
1389
|
+
const count = action.count ?? 1;
|
|
1390
|
+
for (let i = 0; i < count; i++) {
|
|
1391
|
+
await this.#handleOne(action);
|
|
1392
|
+
}
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
#denormalize(x, y) {
|
|
1396
|
+
const { width, height } = robot.getScreenSize();
|
|
1397
|
+
let px = Math.floor(x * width / 1e3);
|
|
1398
|
+
let py = Math.floor(y * height / 1e3);
|
|
1399
|
+
if (px < 1) px = 1;
|
|
1400
|
+
if (px > width - 1) px = width - 1;
|
|
1401
|
+
if (py < 1) py = 1;
|
|
1402
|
+
if (py > height - 1) py = height - 1;
|
|
1403
|
+
return { x: px, y: py };
|
|
1404
|
+
}
|
|
1405
|
+
async #handleOne(action) {
|
|
1406
|
+
const arg = stripOuterParens(action.argument);
|
|
1407
|
+
switch (action.type) {
|
|
1408
|
+
case "click": {
|
|
1409
|
+
const coords = parseCoords(arg);
|
|
1410
|
+
if (!coords) throw new Error(`Invalid coords: ${arg}`);
|
|
1411
|
+
const p = this.#denormalize(coords[0], coords[1]);
|
|
1412
|
+
robot.moveMouse(p.x, p.y);
|
|
1413
|
+
robot.mouseClick("left", false);
|
|
1414
|
+
return;
|
|
1415
|
+
}
|
|
1416
|
+
case "left_double": {
|
|
1417
|
+
const coords = parseCoords(arg);
|
|
1418
|
+
if (!coords) throw new Error(`Invalid coords: ${arg}`);
|
|
1419
|
+
const p = this.#denormalize(coords[0], coords[1]);
|
|
1420
|
+
robot.moveMouse(p.x, p.y);
|
|
1421
|
+
robot.mouseClick("left", true);
|
|
1422
|
+
return;
|
|
1423
|
+
}
|
|
1424
|
+
case "left_triple": {
|
|
1425
|
+
const coords = parseCoords(arg);
|
|
1426
|
+
if (!coords) throw new Error(`Invalid coords: ${arg}`);
|
|
1427
|
+
const p = this.#denormalize(coords[0], coords[1]);
|
|
1428
|
+
robot.moveMouse(p.x, p.y);
|
|
1429
|
+
robot.mouseClick("left", true);
|
|
1430
|
+
robot.mouseClick("left", false);
|
|
1431
|
+
return;
|
|
1432
|
+
}
|
|
1433
|
+
case "right_single": {
|
|
1434
|
+
const coords = parseCoords(arg);
|
|
1435
|
+
if (!coords) throw new Error(`Invalid coords: ${arg}`);
|
|
1436
|
+
const p = this.#denormalize(coords[0], coords[1]);
|
|
1437
|
+
robot.moveMouse(p.x, p.y);
|
|
1438
|
+
robot.mouseClick("right", false);
|
|
1439
|
+
return;
|
|
1440
|
+
}
|
|
1441
|
+
case "drag": {
|
|
1442
|
+
const coords = parseDragCoords(arg);
|
|
1443
|
+
if (!coords) throw new Error(`Invalid drag coords: ${arg}`);
|
|
1444
|
+
const p1 = this.#denormalize(coords[0], coords[1]);
|
|
1445
|
+
const p2 = this.#denormalize(coords[2], coords[3]);
|
|
1446
|
+
robot.moveMouse(p1.x, p1.y);
|
|
1447
|
+
robot.mouseToggle("down", "left");
|
|
1448
|
+
robot.dragMouse(p2.x, p2.y);
|
|
1449
|
+
await sleep2(this.#cfg.dragDurationMs);
|
|
1450
|
+
robot.mouseToggle("up", "left");
|
|
1451
|
+
return;
|
|
1452
|
+
}
|
|
1453
|
+
case "hotkey": {
|
|
1454
|
+
const keys = parseHotkey(arg, {
|
|
1455
|
+
macosCtrlToCmd: this.#cfg.macosCtrlToCmd
|
|
1456
|
+
});
|
|
1457
|
+
if (keys.length === 1 && keys[0] === "capslock") {
|
|
1458
|
+
if (this.#cfg.capslockMode === "system") {
|
|
1459
|
+
robot.keyTap("capslock");
|
|
1460
|
+
} else {
|
|
1461
|
+
this.#sessionCapsEnabled = !this.#sessionCapsEnabled;
|
|
1462
|
+
}
|
|
1463
|
+
return;
|
|
1464
|
+
}
|
|
1465
|
+
const last = keys.at(-1);
|
|
1466
|
+
if (!last) return;
|
|
1467
|
+
const modifiers = keys.slice(0, -1);
|
|
1468
|
+
robot.keyTap(last, modifiers.length ? modifiers : []);
|
|
1469
|
+
await sleep2(this.#cfg.hotkeyDelayMs);
|
|
1470
|
+
return;
|
|
1471
|
+
}
|
|
1472
|
+
case "type": {
|
|
1473
|
+
const raw = arg.replace(/^['"]/, "").replace(/['"]$/, "");
|
|
1474
|
+
const text = applySessionCaps(raw, this.#sessionCapsEnabled);
|
|
1475
|
+
robot.typeString(text);
|
|
1476
|
+
return;
|
|
1477
|
+
}
|
|
1478
|
+
case "scroll": {
|
|
1479
|
+
const parsed = parseScroll(arg);
|
|
1480
|
+
if (!parsed) throw new Error(`Invalid scroll: ${arg}`);
|
|
1481
|
+
const p = this.#denormalize(parsed[0], parsed[1]);
|
|
1482
|
+
const direction = parsed[2];
|
|
1483
|
+
robot.moveMouse(p.x, p.y);
|
|
1484
|
+
const amount = direction === "up" ? this.#cfg.scrollAmount : -this.#cfg.scrollAmount;
|
|
1485
|
+
robot.scrollMouse(0, amount);
|
|
1486
|
+
return;
|
|
1487
|
+
}
|
|
1488
|
+
case "wait": {
|
|
1489
|
+
await sleep2(this.#cfg.waitDurationMs);
|
|
1490
|
+
return;
|
|
1491
|
+
}
|
|
1492
|
+
case "finish": {
|
|
1493
|
+
this.reset();
|
|
1494
|
+
return;
|
|
1495
|
+
}
|
|
1496
|
+
case "call_user": {
|
|
1497
|
+
return;
|
|
1498
|
+
}
|
|
1499
|
+
default: {
|
|
1500
|
+
const exhaustive = action.type;
|
|
1501
|
+
throw new Error(`Unknown action type: ${String(exhaustive)}`);
|
|
1502
|
+
}
|
|
1503
|
+
}
|
|
1504
|
+
}
|
|
1505
|
+
};
|
|
1506
|
+
|
|
1507
|
+
export {
|
|
1508
|
+
DEFAULT_BASE_URL,
|
|
1509
|
+
API_KEY_HELP_URL,
|
|
1510
|
+
MODEL_ACTOR,
|
|
1511
|
+
MODEL_THINKER,
|
|
1512
|
+
MODE_ACTOR,
|
|
1513
|
+
DEFAULT_MAX_STEPS_THINKER,
|
|
1514
|
+
DEFAULT_STEP_DELAY,
|
|
1515
|
+
OAGIError,
|
|
1516
|
+
APIError,
|
|
1517
|
+
AuthenticationError,
|
|
1518
|
+
RateLimitError,
|
|
1519
|
+
ValidationError,
|
|
1520
|
+
NotFoundError,
|
|
1521
|
+
ServerError,
|
|
1522
|
+
ConfigurationError,
|
|
1523
|
+
NetworkError,
|
|
1524
|
+
RequestTimeoutError,
|
|
1525
|
+
logger_default,
|
|
1526
|
+
StepObserver,
|
|
1527
|
+
Client,
|
|
1528
|
+
Actor,
|
|
1529
|
+
DefaultAgent,
|
|
1530
|
+
listAgentModes,
|
|
1531
|
+
createAgent,
|
|
1532
|
+
AsyncAgentObserver,
|
|
1533
|
+
ScreenshotMaker,
|
|
1534
|
+
DefaultActionHandler
|
|
1535
|
+
};
|
|
1536
|
+
//# sourceMappingURL=chunk-SDBYP57G.js.map
|