@oagi/oagi 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -41,11 +41,13 @@ var src_exports = {};
41
41
  __export(src_exports, {
42
42
  APIError: () => APIError,
43
43
  Actor: () => Actor,
44
+ AsyncAgentObserver: () => AsyncAgentObserver,
44
45
  AuthenticationError: () => AuthenticationError,
45
46
  Client: () => Client,
46
47
  ConfigurationError: () => ConfigurationError,
47
48
  DefaultActionHandler: () => DefaultActionHandler,
48
49
  DefaultAgent: () => DefaultAgent,
50
+ ExportFormat: () => ExportFormat,
49
51
  NetworkError: () => NetworkError,
50
52
  NotFoundError: () => NotFoundError,
51
53
  OAGIError: () => OAGIError,
@@ -53,6 +55,7 @@ __export(src_exports, {
53
55
  RequestTimeoutError: () => RequestTimeoutError,
54
56
  ScreenshotMaker: () => ScreenshotMaker,
55
57
  ServerError: () => ServerError,
58
+ TaskerAgent: () => TaskerAgent,
56
59
  ValidationError: () => ValidationError
57
60
  });
58
61
  module.exports = __toCommonJS(src_exports);
@@ -72,9 +75,12 @@ var MODEL_ACTOR = "lux-actor-1";
72
75
  var MODEL_THINKER = "lux-thinker-1";
73
76
  var DEFAULT_MAX_STEPS = 20;
74
77
  var DEFAULT_MAX_STEPS_THINKER = 100;
75
- var MAX_STEPS_ACTOR = 30;
76
- var MAX_STEPS_THINKER = 120;
77
- var DEFAULT_STEP_DELAY = 0.3;
78
+ var DEFAULT_MAX_STEPS_TASKER = 60;
79
+ var MAX_STEPS_ACTOR = 100;
80
+ var MAX_STEPS_THINKER = 300;
81
+ var DEFAULT_REFLECTION_INTERVAL = 4;
82
+ var DEFAULT_REFLECTION_INTERVAL_TASKER = 20;
83
+ var DEFAULT_STEP_DELAY = 1;
78
84
  var DEFAULT_TEMPERATURE = 0.5;
79
85
  var DEFAULT_TEMPERATURE_LOW = 0.1;
80
86
  var HTTP_CLIENT_TIMEOUT = 60;
@@ -154,6 +160,39 @@ var logTraceOnFailure = (_, __, descriptor) => {
154
160
  return descriptor;
155
161
  };
156
162
 
163
+ // src/platform-info.ts
164
+ var import_module = require("module");
165
+ var import_meta = {};
166
+ var SDK_NAME = "oagi-typescript";
167
+ function getSdkVersion() {
168
+ try {
169
+ const require2 = (0, import_module.createRequire)(import_meta.url);
170
+ for (const p of ["../package.json", "../../package.json"]) {
171
+ try {
172
+ const pkg = require2(p);
173
+ if (pkg.version && pkg.version !== "0.0.0") return pkg.version;
174
+ } catch {
175
+ }
176
+ }
177
+ } catch {
178
+ }
179
+ return "unknown";
180
+ }
181
+ function getUserAgent() {
182
+ return `${SDK_NAME}/${getSdkVersion()} (node ${process.version}; ${process.platform}; ${process.arch})`;
183
+ }
184
+ function getSdkHeaders() {
185
+ return {
186
+ "User-Agent": getUserAgent(),
187
+ "x-sdk-name": SDK_NAME,
188
+ "x-sdk-version": getSdkVersion(),
189
+ "x-sdk-language": "typescript",
190
+ "x-sdk-language-version": process.version,
191
+ "x-sdk-os": process.platform,
192
+ "x-sdk-platform": process.arch
193
+ };
194
+ }
195
+
157
196
  // src/types/models/action.ts
158
197
  var z = __toESM(require("zod"), 1);
159
198
  var ActionTypeSchema = z.enum([
@@ -166,6 +205,7 @@ var ActionTypeSchema = z.enum([
166
205
  "type",
167
206
  "scroll",
168
207
  "finish",
208
+ "fail",
169
209
  "wait",
170
210
  "call_user"
171
211
  ]);
@@ -297,6 +337,24 @@ var PlanEventSchema = BaseEventSchema.extend({
297
337
  result: z4.string().optional(),
298
338
  request_id: z4.string().optional()
299
339
  });
340
+ var StepObserver = class {
341
+ chain(observer) {
342
+ return new ChainedStepObserver([this, observer ?? null]);
343
+ }
344
+ };
345
+ var ChainedStepObserver = class extends StepObserver {
346
+ observers;
347
+ constructor(observers) {
348
+ super();
349
+ this.observers = observers;
350
+ }
351
+ async onEvent(event) {
352
+ return await this.observers.reduce(async (prev, observer) => {
353
+ await prev;
354
+ if (observer) await observer.onEvent(event);
355
+ }, Promise.resolve());
356
+ }
357
+ };
300
358
 
301
359
  // src/utils/output-parser.ts
302
360
  var splitActions = (actionBlock) => {
@@ -364,7 +422,9 @@ var parseRawOutput = (rawOutput) => {
364
422
  return {
365
423
  reason,
366
424
  actions,
367
- stop: actions.some((action2) => action2.type === "finish")
425
+ stop: actions.some(
426
+ (action2) => action2.type === "finish" || action2.type === "fail"
427
+ )
368
428
  };
369
429
  };
370
430
 
@@ -415,10 +475,12 @@ var _Client = class _Client {
415
475
  `OAGI API key must be provided either as 'api_key' parameter or OAGI_API_KEY environment variable. Get your API key at ${API_KEY_HELP_URL}`
416
476
  );
417
477
  }
478
+ const sdkHeaders = getSdkHeaders();
418
479
  this.client = new import_openai.default({
419
480
  baseURL: new URL("./v1", baseURL).href,
420
481
  apiKey,
421
- maxRetries
482
+ maxRetries,
483
+ defaultHeaders: sdkHeaders
422
484
  });
423
485
  logger2.info(`Client initialized with base_url: ${baseURL}`);
424
486
  }
@@ -434,7 +496,7 @@ var _Client = class _Client {
434
496
  return fetch(input, init);
435
497
  }
436
498
  buildHeaders(apiVersion) {
437
- const headers = {};
499
+ const headers = getSdkHeaders();
438
500
  if (apiVersion) {
439
501
  headers["x-api-version"] = apiVersion;
440
502
  }
@@ -881,6 +943,783 @@ var DefaultAgent = class {
881
943
  }
882
944
  };
883
945
 
946
+ // src/agent/tasker.ts
947
+ var logger5 = logger_default("agent.tasker");
948
+ var resetHandler2 = (handler) => {
949
+ if (typeof handler.reset === "function") {
950
+ handler.reset();
951
+ }
952
+ };
953
+ var sleep2 = (seconds) => new Promise((resolve) => setTimeout(resolve, seconds * 1e3));
954
+ var extractUuidFromUrl = (url) => {
955
+ const pattern = /\/([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:\.[a-z]+)?(?:\?|$)/i;
956
+ const match = pattern.exec(url);
957
+ return match ? match[1] : null;
958
+ };
959
+ var PlannerMemory = class {
960
+ taskDescription = "";
961
+ todos = [];
962
+ history = [];
963
+ taskExecutionSummary = "";
964
+ todoExecutionSummaries = {};
965
+ setTask(taskDescription, todos) {
966
+ this.taskDescription = taskDescription;
967
+ this.todos = todos.map(
968
+ (todo) => typeof todo === "string" ? { description: todo, status: "pending" } : todo
969
+ );
970
+ }
971
+ getCurrentTodo() {
972
+ for (let i = 0; i < this.todos.length; i++) {
973
+ const todo = this.todos[i];
974
+ if (todo.status === "pending" || todo.status === "in_progress") {
975
+ return { todo, index: i };
976
+ }
977
+ }
978
+ return null;
979
+ }
980
+ updateTodo(index, status, summary) {
981
+ if (index < 0 || index >= this.todos.length) return;
982
+ this.todos[index].status = status;
983
+ if (summary) {
984
+ this.todoExecutionSummaries[index] = summary;
985
+ }
986
+ }
987
+ addHistory(todoIndex, actions, summary, completed = false) {
988
+ if (todoIndex < 0 || todoIndex >= this.todos.length) return;
989
+ this.history.push({
990
+ todo_index: todoIndex,
991
+ todo: this.todos[todoIndex].description,
992
+ actions,
993
+ summary,
994
+ completed
995
+ });
996
+ }
997
+ getContext() {
998
+ return {
999
+ task_description: this.taskDescription,
1000
+ todos: this.todos.map((todo, index) => ({
1001
+ index,
1002
+ description: todo.description,
1003
+ status: todo.status
1004
+ })),
1005
+ history: this.history.map((history) => ({
1006
+ todo_index: history.todo_index,
1007
+ todo: history.todo,
1008
+ action_count: history.actions.length,
1009
+ summary: history.summary,
1010
+ completed: history.completed
1011
+ })),
1012
+ task_execution_summary: this.taskExecutionSummary,
1013
+ todo_execution_summaries: this.todoExecutionSummaries
1014
+ };
1015
+ }
1016
+ getTodoStatusSummary() {
1017
+ const summary = {
1018
+ pending: 0,
1019
+ in_progress: 0,
1020
+ completed: 0,
1021
+ skipped: 0
1022
+ };
1023
+ for (const todo of this.todos) {
1024
+ summary[todo.status] = (summary[todo.status] ?? 0) + 1;
1025
+ }
1026
+ return summary;
1027
+ }
1028
+ appendTodo(description) {
1029
+ this.todos.push({ description, status: "pending" });
1030
+ }
1031
+ };
1032
+ var Planner = class {
1033
+ constructor(client, apiKey, baseUrl) {
1034
+ this.apiKey = apiKey;
1035
+ this.baseUrl = baseUrl;
1036
+ this.client = client;
1037
+ }
1038
+ client;
1039
+ ownsClient = false;
1040
+ ensureClient() {
1041
+ if (!this.client) {
1042
+ this.client = new Client(this.baseUrl, this.apiKey);
1043
+ this.ownsClient = true;
1044
+ }
1045
+ return this.client;
1046
+ }
1047
+ getClient() {
1048
+ return this.ensureClient();
1049
+ }
1050
+ async close() {
1051
+ if (!this.ownsClient || !this.client) return;
1052
+ const closable = this.client;
1053
+ if (typeof closable.close === "function") {
1054
+ await closable.close();
1055
+ }
1056
+ }
1057
+ extractMemoryData(memory, context, todoIndex) {
1058
+ if (memory && todoIndex !== void 0) {
1059
+ const taskDescription = memory.taskDescription;
1060
+ const todos = memory.todos.map((todo, index) => ({
1061
+ index,
1062
+ description: todo.description,
1063
+ status: todo.status,
1064
+ execution_summary: memory.todoExecutionSummaries[index] ?? void 0
1065
+ }));
1066
+ const history = memory.history.map((history2) => ({
1067
+ todo_index: history2.todo_index,
1068
+ todo_description: history2.todo,
1069
+ action_count: history2.actions.length,
1070
+ summary: history2.summary ?? void 0,
1071
+ completed: history2.completed
1072
+ }));
1073
+ const taskExecutionSummary = memory.taskExecutionSummary || void 0;
1074
+ const overallTodo = memory.todos[todoIndex] ? memory.todos[todoIndex].description : "";
1075
+ return {
1076
+ taskDescription,
1077
+ todos,
1078
+ history,
1079
+ taskExecutionSummary,
1080
+ overallTodo
1081
+ };
1082
+ }
1083
+ const rawTodos = context.todos;
1084
+ const rawHistory = context.history;
1085
+ return {
1086
+ taskDescription: context.task_description ?? "",
1087
+ todos: Array.isArray(rawTodos) ? rawTodos : [],
1088
+ history: Array.isArray(rawHistory) ? rawHistory : [],
1089
+ taskExecutionSummary: void 0,
1090
+ overallTodo: context.current_todo ?? ""
1091
+ };
1092
+ }
1093
+ extractJsonString(text) {
1094
+ const start = text.indexOf("{");
1095
+ const end = text.lastIndexOf("}") + 1;
1096
+ if (start < 0 || end <= start) return "";
1097
+ return text.slice(start, end);
1098
+ }
1099
+ parsePlannerOutput(response) {
1100
+ try {
1101
+ const jsonResponse = this.extractJsonString(response);
1102
+ const data = JSON.parse(jsonResponse);
1103
+ return {
1104
+ instruction: data.subtask ?? data.instruction ?? "",
1105
+ reasoning: data.reasoning ?? "",
1106
+ subtodos: data.subtodos ?? []
1107
+ };
1108
+ } catch {
1109
+ return {
1110
+ instruction: "",
1111
+ reasoning: "Failed to parse structured response",
1112
+ subtodos: []
1113
+ };
1114
+ }
1115
+ }
1116
+ parseReflectionOutput(response) {
1117
+ try {
1118
+ const jsonResponse = this.extractJsonString(response);
1119
+ const data = JSON.parse(jsonResponse);
1120
+ const success = data.success === "yes";
1121
+ const newSubtask = (data.subtask_instruction ?? "").trim();
1122
+ const continueCurrent = !success && !newSubtask;
1123
+ return {
1124
+ continue_current: continueCurrent,
1125
+ new_instruction: newSubtask || null,
1126
+ reasoning: data.reflection ?? data.reasoning ?? "",
1127
+ success_assessment: success
1128
+ };
1129
+ } catch {
1130
+ return {
1131
+ continue_current: true,
1132
+ new_instruction: null,
1133
+ reasoning: "Failed to parse reflection response, continuing current approach",
1134
+ success_assessment: false
1135
+ };
1136
+ }
1137
+ }
1138
+ formatExecutionNotes(context) {
1139
+ const history = context.history;
1140
+ if (!history?.length) return "";
1141
+ const parts = [];
1142
+ for (const item of history) {
1143
+ parts.push(
1144
+ `Todo ${item.todo_index}: ${item.action_count} actions, completed: ${item.completed}`
1145
+ );
1146
+ if (item.summary) {
1147
+ parts.push(`Summary: ${item.summary}`);
1148
+ }
1149
+ }
1150
+ return parts.join("\n");
1151
+ }
1152
+ async ensureScreenshotUuid(screenshot) {
1153
+ if (!screenshot) return { uuid: void 0, url: void 0 };
1154
+ if (typeof screenshot === "string") {
1155
+ const uuid = extractUuidFromUrl(screenshot);
1156
+ return { uuid: uuid ?? void 0, url: screenshot };
1157
+ }
1158
+ const client = this.ensureClient();
1159
+ const upload = await client.putS3PresignedUrl(screenshot);
1160
+ return { uuid: upload.uuid, url: upload.download_url };
1161
+ }
1162
+ async initialPlan(todo, context, screenshot, memory, todoIndex) {
1163
+ const client = this.ensureClient();
1164
+ const { uuid } = await this.ensureScreenshotUuid(screenshot);
1165
+ const { taskDescription, todos, history, taskExecutionSummary } = this.extractMemoryData(memory, context, todoIndex);
1166
+ const response = await client.callWorker({
1167
+ workerId: "oagi_first",
1168
+ overallTodo: todo,
1169
+ taskDescription,
1170
+ todos,
1171
+ history,
1172
+ currentTodoIndex: todoIndex,
1173
+ taskExecutionSummary,
1174
+ currentScreenshot: uuid
1175
+ });
1176
+ return {
1177
+ output: this.parsePlannerOutput(response.response),
1178
+ requestId: response.request_id
1179
+ };
1180
+ }
1181
+ async reflect(actions, context, screenshot, memory, todoIndex, currentInstruction, reflectionInterval = DEFAULT_REFLECTION_INTERVAL) {
1182
+ const client = this.ensureClient();
1183
+ const { uuid } = await this.ensureScreenshotUuid(screenshot);
1184
+ const {
1185
+ taskDescription,
1186
+ todos,
1187
+ history,
1188
+ taskExecutionSummary,
1189
+ overallTodo
1190
+ } = this.extractMemoryData(memory, context, todoIndex);
1191
+ const windowActions = actions.slice(-reflectionInterval);
1192
+ const windowSteps = windowActions.map((action, index) => ({
1193
+ step_number: index + 1,
1194
+ action_type: action.action_type,
1195
+ target: action.target ?? "",
1196
+ reasoning: action.reasoning ?? ""
1197
+ }));
1198
+ const windowScreenshots = windowActions.map((action) => action.screenshot_uuid).filter(Boolean);
1199
+ const priorNotes = this.formatExecutionNotes(context);
1200
+ const response = await client.callWorker({
1201
+ workerId: "oagi_follow",
1202
+ overallTodo,
1203
+ taskDescription,
1204
+ todos,
1205
+ history,
1206
+ currentTodoIndex: todoIndex,
1207
+ taskExecutionSummary,
1208
+ currentSubtaskInstruction: currentInstruction ?? "",
1209
+ windowSteps,
1210
+ windowScreenshots,
1211
+ resultScreenshot: uuid,
1212
+ priorNotes
1213
+ });
1214
+ return {
1215
+ output: this.parseReflectionOutput(response.response),
1216
+ requestId: response.request_id
1217
+ };
1218
+ }
1219
+ async summarize(_executionHistory, context, memory, todoIndex) {
1220
+ const client = this.ensureClient();
1221
+ const {
1222
+ taskDescription,
1223
+ todos,
1224
+ history,
1225
+ taskExecutionSummary,
1226
+ overallTodo
1227
+ } = this.extractMemoryData(memory, context, todoIndex);
1228
+ const latestTodoSummary = memory && todoIndex !== void 0 ? memory.todoExecutionSummaries[todoIndex] : "";
1229
+ const response = await client.callWorker({
1230
+ workerId: "oagi_task_summary",
1231
+ overallTodo,
1232
+ taskDescription,
1233
+ todos,
1234
+ history,
1235
+ currentTodoIndex: todoIndex,
1236
+ taskExecutionSummary,
1237
+ latestTodoSummary
1238
+ });
1239
+ try {
1240
+ const parsed = JSON.parse(response.response);
1241
+ return {
1242
+ summary: parsed.task_summary ?? response.response,
1243
+ requestId: response.request_id
1244
+ };
1245
+ } catch {
1246
+ return { summary: response.response, requestId: response.request_id };
1247
+ }
1248
+ }
1249
+ };
1250
+ var TaskeeAgent = class {
1251
+ apiKey;
1252
+ baseUrl;
1253
+ model;
1254
+ maxSteps;
1255
+ reflectionInterval;
1256
+ temperature;
1257
+ planner;
1258
+ externalMemory;
1259
+ todoIndex;
1260
+ stepObserver;
1261
+ stepDelay;
1262
+ actor;
1263
+ currentTodo = "";
1264
+ currentInstruction = "";
1265
+ actions = [];
1266
+ totalActions = 0;
1267
+ sinceReflection = 0;
1268
+ success = false;
1269
+ constructor(apiKey, baseUrl, model = MODEL_ACTOR, maxSteps = DEFAULT_MAX_STEPS, reflectionInterval = DEFAULT_REFLECTION_INTERVAL, temperature = DEFAULT_TEMPERATURE, planner, externalMemory, todoIndex, stepObserver, stepDelay = DEFAULT_STEP_DELAY) {
1270
+ this.apiKey = apiKey;
1271
+ this.baseUrl = baseUrl;
1272
+ this.model = model;
1273
+ this.maxSteps = maxSteps;
1274
+ this.reflectionInterval = reflectionInterval;
1275
+ this.temperature = temperature;
1276
+ this.planner = planner ?? new Planner(void 0, apiKey, baseUrl);
1277
+ this.externalMemory = externalMemory;
1278
+ this.todoIndex = todoIndex;
1279
+ this.stepObserver = stepObserver;
1280
+ this.stepDelay = stepDelay;
1281
+ }
1282
+ async execute(instruction, actionHandler, imageProvider) {
1283
+ resetHandler2(actionHandler);
1284
+ this.currentTodo = instruction;
1285
+ this.actions = [];
1286
+ this.totalActions = 0;
1287
+ this.sinceReflection = 0;
1288
+ this.success = false;
1289
+ try {
1290
+ this.actor = new Actor(
1291
+ this.apiKey,
1292
+ this.baseUrl,
1293
+ this.model,
1294
+ this.temperature
1295
+ );
1296
+ await this.initialPlan(imageProvider);
1297
+ this.actor.initTask(this.currentInstruction, this.maxSteps);
1298
+ let remainingSteps = this.maxSteps;
1299
+ while (remainingSteps > 0 && !this.success) {
1300
+ const stepsTaken = await this.executeSubtask(
1301
+ Math.min(this.maxSteps, remainingSteps),
1302
+ actionHandler,
1303
+ imageProvider
1304
+ );
1305
+ remainingSteps -= stepsTaken;
1306
+ if (!this.success && remainingSteps > 0) {
1307
+ const shouldContinue = await this.reflectAndDecide(imageProvider);
1308
+ if (!shouldContinue) {
1309
+ break;
1310
+ }
1311
+ }
1312
+ }
1313
+ await this.generateSummary();
1314
+ return this.success;
1315
+ } catch (err) {
1316
+ logger5.error(`Error executing todo: ${err}`);
1317
+ this.recordAction("error", null, String(err));
1318
+ return false;
1319
+ } finally {
1320
+ this.actor = void 0;
1321
+ }
1322
+ }
1323
+ getContext() {
1324
+ return this.externalMemory ? this.externalMemory.getContext() : {};
1325
+ }
1326
+ recordAction(actionType, target, reasoning, result, screenshotUuid) {
1327
+ this.actions.push({
1328
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
1329
+ action_type: actionType,
1330
+ target,
1331
+ reasoning,
1332
+ result,
1333
+ details: {},
1334
+ screenshot_uuid: screenshotUuid ?? void 0
1335
+ });
1336
+ }
1337
+ async initialPlan(imageProvider) {
1338
+ logger5.info("Generating initial plan for todo");
1339
+ const screenshot = await imageProvider.provide();
1340
+ const context = this.getContext();
1341
+ const { output, requestId } = await this.planner.initialPlan(
1342
+ this.currentTodo,
1343
+ context,
1344
+ screenshot,
1345
+ this.externalMemory,
1346
+ this.todoIndex
1347
+ );
1348
+ this.recordAction("plan", "initial", output.reasoning, output.instruction);
1349
+ if (this.stepObserver) {
1350
+ const event = {
1351
+ type: "plan",
1352
+ timestamp: /* @__PURE__ */ new Date(),
1353
+ phase: "initial",
1354
+ image: screenshot,
1355
+ reasoning: output.reasoning,
1356
+ result: output.instruction,
1357
+ request_id: requestId ?? void 0
1358
+ };
1359
+ await this.stepObserver.onEvent(event);
1360
+ }
1361
+ this.currentInstruction = output.instruction;
1362
+ logger5.info(`Initial instruction: ${this.currentInstruction}`);
1363
+ }
1364
+ async executeSubtask(maxSteps, actionHandler, imageProvider) {
1365
+ logger5.info(`Executing subtask with max ${maxSteps} steps`);
1366
+ let stepsTaken = 0;
1367
+ const client = this.planner.getClient();
1368
+ for (let stepNum = 0; stepNum < maxSteps; stepNum++) {
1369
+ const screenshot = await imageProvider.provide();
1370
+ let screenshotUuid;
1371
+ let screenshotUrl;
1372
+ try {
1373
+ if (typeof screenshot === "string") {
1374
+ screenshotUuid = extractUuidFromUrl(screenshot) ?? void 0;
1375
+ screenshotUrl = screenshot;
1376
+ } else {
1377
+ const upload = await client.putS3PresignedUrl(screenshot);
1378
+ screenshotUuid = upload.uuid;
1379
+ screenshotUrl = upload.download_url;
1380
+ }
1381
+ } catch (err) {
1382
+ logger5.error(`Error uploading screenshot: ${err}`);
1383
+ this.recordAction("error", "screenshot_upload", String(err));
1384
+ break;
1385
+ }
1386
+ let step;
1387
+ try {
1388
+ step = await this.actor.step(screenshotUrl ?? screenshot, void 0);
1389
+ } catch (err) {
1390
+ logger5.error(`Error getting step from OAGI: ${err}`);
1391
+ this.recordAction(
1392
+ "error",
1393
+ "oagi_step",
1394
+ String(err),
1395
+ null,
1396
+ screenshotUuid
1397
+ );
1398
+ break;
1399
+ }
1400
+ if (step.reason) {
1401
+ logger5.info(`Step ${this.totalActions + 1}: ${step.reason}`);
1402
+ }
1403
+ if (this.stepObserver) {
1404
+ const event = {
1405
+ type: "step",
1406
+ timestamp: /* @__PURE__ */ new Date(),
1407
+ step_num: this.totalActions + 1,
1408
+ image: screenshot,
1409
+ step,
1410
+ task_id: this.actor.taskId
1411
+ };
1412
+ await this.stepObserver.onEvent(event);
1413
+ }
1414
+ if (step.actions?.length) {
1415
+ logger5.info(`Actions (${step.actions.length}):`);
1416
+ for (const action of step.actions) {
1417
+ const countSuffix = action.count && action.count > 1 ? ` x${action.count}` : "";
1418
+ logger5.info(` [${action.type}] ${action.argument}${countSuffix}`);
1419
+ }
1420
+ for (const action of step.actions) {
1421
+ this.recordAction(
1422
+ action.type,
1423
+ action.argument,
1424
+ step.reason ?? null,
1425
+ null,
1426
+ screenshotUuid
1427
+ );
1428
+ }
1429
+ let error = null;
1430
+ try {
1431
+ await actionHandler.handle(step.actions);
1432
+ } catch (err) {
1433
+ error = String(err);
1434
+ throw err;
1435
+ } finally {
1436
+ if (this.stepObserver) {
1437
+ const event = {
1438
+ type: "action",
1439
+ timestamp: /* @__PURE__ */ new Date(),
1440
+ step_num: this.totalActions + 1,
1441
+ actions: step.actions,
1442
+ error: error ?? void 0
1443
+ };
1444
+ await this.stepObserver.onEvent(event);
1445
+ }
1446
+ }
1447
+ this.totalActions += step.actions.length;
1448
+ this.sinceReflection += step.actions.length;
1449
+ }
1450
+ if (this.stepDelay > 0) {
1451
+ await sleep2(this.stepDelay);
1452
+ }
1453
+ stepsTaken += 1;
1454
+ if (step.stop) {
1455
+ logger5.info("OAGI signaled task completion");
1456
+ break;
1457
+ }
1458
+ if (this.sinceReflection >= this.reflectionInterval) {
1459
+ logger5.info("Reflection interval reached");
1460
+ break;
1461
+ }
1462
+ }
1463
+ return stepsTaken;
1464
+ }
1465
+ async reflectAndDecide(imageProvider) {
1466
+ logger5.info("Reflecting on progress");
1467
+ const screenshot = await imageProvider.provide();
1468
+ const context = this.getContext();
1469
+ context.current_todo = this.currentTodo;
1470
+ const recentActions = this.actions.slice(-this.sinceReflection);
1471
+ const { output, requestId } = await this.planner.reflect(
1472
+ recentActions,
1473
+ context,
1474
+ screenshot,
1475
+ this.externalMemory,
1476
+ this.todoIndex,
1477
+ this.currentInstruction,
1478
+ this.reflectionInterval
1479
+ );
1480
+ this.recordAction(
1481
+ "reflect",
1482
+ null,
1483
+ output.reasoning,
1484
+ output.continue_current ? "continue" : "pivot"
1485
+ );
1486
+ if (this.stepObserver) {
1487
+ const decision = output.success_assessment ? "success" : output.continue_current ? "continue" : "pivot";
1488
+ const event = {
1489
+ type: "plan",
1490
+ timestamp: /* @__PURE__ */ new Date(),
1491
+ phase: "reflection",
1492
+ image: screenshot,
1493
+ reasoning: output.reasoning,
1494
+ result: decision,
1495
+ request_id: requestId ?? void 0
1496
+ };
1497
+ await this.stepObserver.onEvent(event);
1498
+ }
1499
+ if (output.success_assessment) {
1500
+ this.success = true;
1501
+ logger5.info("Reflection indicates task is successful");
1502
+ return false;
1503
+ }
1504
+ this.sinceReflection = 0;
1505
+ if (!output.continue_current && output.new_instruction) {
1506
+ logger5.info(`Pivoting to new instruction: ${output.new_instruction}`);
1507
+ this.currentInstruction = output.new_instruction;
1508
+ await this.actor.initTask(this.currentInstruction, this.maxSteps);
1509
+ return true;
1510
+ }
1511
+ return output.continue_current;
1512
+ }
1513
+ async generateSummary() {
1514
+ logger5.info("Generating execution summary");
1515
+ const context = this.getContext();
1516
+ context.current_todo = this.currentTodo;
1517
+ const { summary, requestId } = await this.planner.summarize(
1518
+ this.actions,
1519
+ context,
1520
+ this.externalMemory,
1521
+ this.todoIndex
1522
+ );
1523
+ this.recordAction("summary", null, summary);
1524
+ if (this.stepObserver) {
1525
+ const event = {
1526
+ type: "plan",
1527
+ timestamp: /* @__PURE__ */ new Date(),
1528
+ phase: "summary",
1529
+ image: void 0,
1530
+ reasoning: summary,
1531
+ result: void 0,
1532
+ request_id: requestId ?? void 0
1533
+ };
1534
+ await this.stepObserver.onEvent(event);
1535
+ }
1536
+ logger5.info(`Execution summary: ${summary}`);
1537
+ }
1538
+ returnExecutionResults() {
1539
+ let summary = "";
1540
+ for (let i = this.actions.length - 1; i >= 0; i--) {
1541
+ if (this.actions[i].action_type === "summary") {
1542
+ summary = this.actions[i].reasoning ?? "";
1543
+ break;
1544
+ }
1545
+ }
1546
+ return {
1547
+ success: this.success,
1548
+ actions: this.actions,
1549
+ summary,
1550
+ total_steps: this.totalActions
1551
+ };
1552
+ }
1553
+ };
1554
+ var TaskerAgent = class {
1555
+ /** Hierarchical agent that manages multi-todo workflows. */
1556
+ apiKey;
1557
+ baseUrl;
1558
+ model;
1559
+ maxSteps;
1560
+ temperature;
1561
+ reflectionInterval;
1562
+ planner;
1563
+ stepObserver;
1564
+ stepDelay;
1565
+ memory = new PlannerMemory();
1566
+ currentTaskeeAgent;
1567
+ constructor(apiKey, baseUrl, model = MODEL_ACTOR, maxSteps = DEFAULT_MAX_STEPS_TASKER, temperature = DEFAULT_TEMPERATURE, reflectionInterval = DEFAULT_REFLECTION_INTERVAL, planner, stepObserver, stepDelay = DEFAULT_STEP_DELAY) {
1568
+ this.apiKey = apiKey;
1569
+ this.baseUrl = baseUrl;
1570
+ this.model = model;
1571
+ this.maxSteps = maxSteps;
1572
+ this.temperature = temperature;
1573
+ this.reflectionInterval = reflectionInterval;
1574
+ this.planner = planner ?? new Planner(void 0, apiKey, baseUrl);
1575
+ this.stepObserver = stepObserver;
1576
+ this.stepDelay = stepDelay;
1577
+ }
1578
+ setTask(task, todos) {
1579
+ this.memory.setTask(task, todos);
1580
+ logger5.info(`Task set with ${todos.length} todos`);
1581
+ }
1582
+ set_task(task, todos) {
1583
+ this.setTask(task, todos);
1584
+ }
1585
+ async execute(_instruction, actionHandler, imageProvider) {
1586
+ resetHandler2(actionHandler);
1587
+ let overallSuccess = true;
1588
+ while (true) {
1589
+ const todoInfo = this.prepare();
1590
+ if (!todoInfo) {
1591
+ logger5.info("No more todos to execute");
1592
+ break;
1593
+ }
1594
+ const { todo, index } = todoInfo;
1595
+ logger5.info(`Executing todo ${index}: ${todo.description}`);
1596
+ if (this.stepObserver) {
1597
+ const event = {
1598
+ type: "split",
1599
+ timestamp: /* @__PURE__ */ new Date(),
1600
+ label: `Start of todo ${index + 1}: ${todo.description}`
1601
+ };
1602
+ await this.stepObserver.onEvent(event);
1603
+ }
1604
+ const success = await this.executeTodo(
1605
+ index,
1606
+ actionHandler,
1607
+ imageProvider
1608
+ );
1609
+ if (this.stepObserver) {
1610
+ const event = {
1611
+ type: "split",
1612
+ timestamp: /* @__PURE__ */ new Date(),
1613
+ label: `End of todo ${index + 1}: ${todo.description}`
1614
+ };
1615
+ await this.stepObserver.onEvent(event);
1616
+ }
1617
+ if (!success) {
1618
+ logger5.warn(`Todo ${index} failed`);
1619
+ overallSuccess = false;
1620
+ const currentStatus = this.memory.todos[index]?.status;
1621
+ if (currentStatus === "in_progress") {
1622
+ logger5.error("Todo failed with exception, stopping execution");
1623
+ break;
1624
+ }
1625
+ }
1626
+ this.updateTaskSummary();
1627
+ }
1628
+ const statusSummary = this.memory.getTodoStatusSummary();
1629
+ logger5.info(
1630
+ `Workflow complete. Status summary: ${JSON.stringify(statusSummary)}`
1631
+ );
1632
+ return overallSuccess;
1633
+ }
1634
+ prepare() {
1635
+ const current = this.memory.getCurrentTodo();
1636
+ if (!current) return null;
1637
+ this.currentTaskeeAgent = new TaskeeAgent(
1638
+ this.apiKey,
1639
+ this.baseUrl,
1640
+ this.model,
1641
+ this.maxSteps,
1642
+ this.reflectionInterval,
1643
+ this.temperature,
1644
+ this.planner,
1645
+ this.memory,
1646
+ current.index,
1647
+ this.stepObserver,
1648
+ this.stepDelay
1649
+ );
1650
+ if (current.todo.status === "pending") {
1651
+ this.memory.updateTodo(current.index, "in_progress");
1652
+ }
1653
+ logger5.info(`Prepared taskee agent for todo ${current.index}`);
1654
+ return current;
1655
+ }
1656
+ async executeTodo(todoIndex, actionHandler, imageProvider) {
1657
+ if (!this.currentTaskeeAgent || todoIndex < 0) {
1658
+ logger5.error("No taskee agent prepared");
1659
+ return false;
1660
+ }
1661
+ const todo = this.memory.todos[todoIndex];
1662
+ try {
1663
+ const success = await this.currentTaskeeAgent.execute(
1664
+ todo.description,
1665
+ actionHandler,
1666
+ imageProvider
1667
+ );
1668
+ const results = this.currentTaskeeAgent.returnExecutionResults();
1669
+ this.updateMemoryFromExecution(todoIndex, results, success);
1670
+ return success;
1671
+ } catch (err) {
1672
+ logger5.error(`Error executing todo ${todoIndex}: ${err}`);
1673
+ this.memory.updateTodo(
1674
+ todoIndex,
1675
+ "in_progress",
1676
+ `Execution failed: ${String(err)}`
1677
+ );
1678
+ return false;
1679
+ }
1680
+ }
1681
+ updateMemoryFromExecution(todoIndex, results, success) {
1682
+ const status = success ? "completed" : "in_progress";
1683
+ this.memory.updateTodo(todoIndex, status, results.summary);
1684
+ this.memory.addHistory(
1685
+ todoIndex,
1686
+ results.actions,
1687
+ results.summary,
1688
+ success
1689
+ );
1690
+ if (success) {
1691
+ const summaryLine = `- Completed todo ${todoIndex}: ${results.summary}`;
1692
+ this.memory.taskExecutionSummary = this.memory.taskExecutionSummary ? `${this.memory.taskExecutionSummary}
1693
+ ${summaryLine}` : summaryLine;
1694
+ }
1695
+ logger5.info(
1696
+ `Updated memory for todo ${todoIndex}: status=${status}, actions=${results.actions.length}`
1697
+ );
1698
+ }
1699
+ updateTaskSummary() {
1700
+ const statusSummary = this.memory.getTodoStatusSummary();
1701
+ const completed = statusSummary.completed ?? 0;
1702
+ const total = this.memory.todos.length;
1703
+ const summaryParts = [`Progress: ${completed}/${total} todos completed`];
1704
+ const recentHistory = this.memory.history.slice(-3);
1705
+ for (const history of recentHistory) {
1706
+ if (history.completed && history.summary) {
1707
+ summaryParts.push(
1708
+ `- Todo ${history.todo_index}: ${history.summary.slice(0, 100)}`
1709
+ );
1710
+ }
1711
+ }
1712
+ this.memory.taskExecutionSummary = summaryParts.join("\n");
1713
+ }
1714
+ getMemory() {
1715
+ return this.memory;
1716
+ }
1717
+ appendTodo(description) {
1718
+ this.memory.appendTodo(description);
1719
+ logger5.info(`Appended new todo: ${description}`);
1720
+ }
1721
+ };
1722
+
884
1723
  // src/agent/registry.ts
885
1724
  var agentRegistry = {};
886
1725
  var asyncAgentRegister = (mode) => {
@@ -936,16 +1775,504 @@ asyncAgentRegister("thinker")((options = {}) => {
936
1775
  stepDelay
937
1776
  );
938
1777
  });
1778
+ asyncAgentRegister("tasker")((options = {}) => {
1779
+ const {
1780
+ apiKey,
1781
+ baseURL,
1782
+ model = MODEL_ACTOR,
1783
+ maxSteps = DEFAULT_MAX_STEPS_TASKER,
1784
+ temperature = DEFAULT_TEMPERATURE,
1785
+ reflectionInterval = DEFAULT_REFLECTION_INTERVAL_TASKER,
1786
+ stepObserver,
1787
+ stepDelay = DEFAULT_STEP_DELAY
1788
+ } = options;
1789
+ return new TaskerAgent(
1790
+ apiKey,
1791
+ baseURL,
1792
+ model,
1793
+ maxSteps,
1794
+ temperature,
1795
+ reflectionInterval,
1796
+ void 0,
1797
+ stepObserver ?? void 0,
1798
+ stepDelay
1799
+ );
1800
+ });
1801
+ asyncAgentRegister("tasker:cvs_appointment")(
1802
+ (options = {}) => {
1803
+ const {
1804
+ apiKey,
1805
+ baseURL,
1806
+ model = MODEL_ACTOR,
1807
+ maxSteps = DEFAULT_MAX_STEPS_TASKER,
1808
+ temperature = DEFAULT_TEMPERATURE,
1809
+ reflectionInterval = DEFAULT_REFLECTION_INTERVAL_TASKER,
1810
+ stepObserver,
1811
+ stepDelay = DEFAULT_STEP_DELAY
1812
+ } = options;
1813
+ const tasker = new TaskerAgent(
1814
+ apiKey,
1815
+ baseURL,
1816
+ model,
1817
+ maxSteps,
1818
+ temperature,
1819
+ reflectionInterval,
1820
+ void 0,
1821
+ stepObserver ?? void 0,
1822
+ stepDelay
1823
+ );
1824
+ const firstName = "First";
1825
+ const lastName = "Last";
1826
+ const email = "user@example.com";
1827
+ const birthday = "01-01-1990";
1828
+ const zipCode = "00000";
1829
+ const [month, day, year] = birthday.split("-");
1830
+ const instruction = `Schedule an appointment at CVS for ${firstName} ${lastName} with email ${email} and birthday ${birthday}`;
1831
+ const todos = [
1832
+ "Open a new tab, go to www.cvs.com, type 'flu shot' in the search bar and press enter, wait for the page to load, then click on the button of Schedule vaccinations on the top of the page",
1833
+ `Enter the first name '${firstName}', last name '${lastName}', and email '${email}' in the form. Do not use any suggested autofills. Make sure the mobile phone number is empty.`,
1834
+ "Slightly scroll down to see the date of birth, enter Month '" + month + "', Day '" + day + "', and Year '" + year + "' in the form",
1835
+ "Click on 'Continue as guest' button, wait for the page to load with wait, click on 'Add vaccines' button, select 'Flu' and click on 'Add vaccines'",
1836
+ "Click on 'next' to enter the page with recommendation vaccines, then click on 'next' again, until on the page of entering zip code, enter '" + zipCode + "', select the first option from the dropdown menu, and click on 'Search'"
1837
+ ];
1838
+ tasker.setTask(instruction, todos);
1839
+ return tasker;
1840
+ }
1841
+ );
1842
+ asyncAgentRegister("tasker:software_qa")(
1843
+ (options = {}) => {
1844
+ const {
1845
+ apiKey,
1846
+ baseURL,
1847
+ model = MODEL_ACTOR,
1848
+ maxSteps = DEFAULT_MAX_STEPS_TASKER,
1849
+ temperature = DEFAULT_TEMPERATURE,
1850
+ reflectionInterval = DEFAULT_REFLECTION_INTERVAL_TASKER,
1851
+ stepObserver,
1852
+ stepDelay = DEFAULT_STEP_DELAY
1853
+ } = options;
1854
+ const tasker = new TaskerAgent(
1855
+ apiKey,
1856
+ baseURL,
1857
+ model,
1858
+ maxSteps,
1859
+ temperature,
1860
+ reflectionInterval,
1861
+ void 0,
1862
+ stepObserver ?? void 0,
1863
+ stepDelay
1864
+ );
1865
+ const instruction = "QA: click through every sidebar button in the Nuclear Player UI";
1866
+ const todos = [
1867
+ "Click on 'Dashboard' in the left sidebar",
1868
+ "Click on 'Downloads' in the left sidebar",
1869
+ "Click on 'Lyrics' in the left sidebar",
1870
+ "Click on 'Plugins' in the left sidebar",
1871
+ "Click on 'Search Results' in the left sidebar",
1872
+ "Click on 'Settings' in the left sidebar",
1873
+ "Click on 'Equalizer' in the left sidebar",
1874
+ "Click on 'Visualizer' in the left sidebar",
1875
+ "Click on 'Listening History' in the left sidebar",
1876
+ "Click on 'Favorite Albums' in the left sidebar",
1877
+ "Click on 'Favorite Tracks' in the left sidebar",
1878
+ "Click on 'Favorite Artists' in the left sidebar",
1879
+ "Click on 'Local Library' in the left sidebar",
1880
+ "Click on 'Playlists' in the left sidebar"
1881
+ ];
1882
+ tasker.setTask(instruction, todos);
1883
+ return tasker;
1884
+ }
1885
+ );
939
1886
 
940
1887
  // src/agent/observer/exporters.ts
941
1888
  var import_node_fs = __toESM(require("fs"), 1);
942
1889
  var import_node_path = __toESM(require("path"), 1);
943
1890
  var import_node_url = require("url");
1891
+ var import_meta2 = {};
1892
+ var ensureDir = (dirPath) => {
1893
+ import_node_fs.default.mkdirSync(dirPath, { recursive: true });
1894
+ };
1895
+ var parseActionCoords = (action) => {
1896
+ const arg = action.argument.replace(/^\(|\)$/g, "");
1897
+ switch (action.type) {
1898
+ case "click":
1899
+ case "left_double":
1900
+ case "left_triple":
1901
+ case "right_single": {
1902
+ const coords = parseCoords(arg);
1903
+ if (coords) {
1904
+ return { type: "click", x: coords[0], y: coords[1] };
1905
+ }
1906
+ return null;
1907
+ }
1908
+ case "drag": {
1909
+ const coords = parseDragCoords(arg);
1910
+ if (coords) {
1911
+ return {
1912
+ type: "drag",
1913
+ x1: coords[0],
1914
+ y1: coords[1],
1915
+ x2: coords[2],
1916
+ y2: coords[3]
1917
+ };
1918
+ }
1919
+ return null;
1920
+ }
1921
+ case "scroll": {
1922
+ const result = parseScroll(arg);
1923
+ if (result) {
1924
+ return {
1925
+ type: "scroll",
1926
+ x: result[0],
1927
+ y: result[1],
1928
+ direction: result[2]
1929
+ };
1930
+ }
1931
+ return null;
1932
+ }
1933
+ default:
1934
+ return null;
1935
+ }
1936
+ };
1937
+ var exportToMarkdown = (events, filePath, imagesDir) => {
1938
+ const outputDir = import_node_path.default.dirname(filePath);
1939
+ ensureDir(outputDir);
1940
+ if (imagesDir) {
1941
+ ensureDir(imagesDir);
1942
+ }
1943
+ const lines = ["# Agent Execution Report\n"];
1944
+ for (const event of events) {
1945
+ const d = event.timestamp instanceof Date ? event.timestamp : new Date(event.timestamp);
1946
+ const timestamp = d.toTimeString().slice(0, 8);
1947
+ switch (event.type) {
1948
+ case "step":
1949
+ lines.push(`
1950
+ ## Step ${event.step_num}
1951
+ `);
1952
+ lines.push(`**Time:** ${timestamp}
1953
+ `);
1954
+ if (event.task_id) {
1955
+ lines.push(`**Task ID:** \`${event.task_id}\`
1956
+ `);
1957
+ }
1958
+ if (typeof event.image !== "string") {
1959
+ if (imagesDir) {
1960
+ const imageFilename = `step_${event.step_num}.png`;
1961
+ const imagePath = import_node_path.default.join(imagesDir, imageFilename);
1962
+ import_node_fs.default.writeFileSync(imagePath, Buffer.from(event.image));
1963
+ const relPath = import_node_path.default.join(import_node_path.default.basename(imagesDir), imageFilename);
1964
+ lines.push(`
1965
+ ![Step ${event.step_num}](${relPath})
1966
+ `);
1967
+ } else {
1968
+ lines.push(
1969
+ `
1970
+ *[Screenshot captured - ${event.image.byteLength} bytes]*
1971
+ `
1972
+ );
1973
+ }
1974
+ } else {
1975
+ lines.push(`
1976
+ **Screenshot URL:** ${event.image}
1977
+ `);
1978
+ }
1979
+ if (event.step.reason) {
1980
+ lines.push(`
1981
+ **Reasoning:**
1982
+ > ${event.step.reason}
1983
+ `);
1984
+ }
1985
+ if (event.step.actions?.length) {
1986
+ lines.push("\n**Planned Actions:**\n");
1987
+ for (const action of event.step.actions) {
1988
+ const countStr = action.count && action.count > 1 ? ` (x${action.count})` : "";
1989
+ lines.push(`- \`${action.type}\`: ${action.argument}${countStr}
1990
+ `);
1991
+ }
1992
+ }
1993
+ if (event.step.stop) {
1994
+ lines.push("\n**Status:** Task Complete\n");
1995
+ }
1996
+ break;
1997
+ case "action":
1998
+ lines.push(`
1999
+ ### Actions Executed (${timestamp})
2000
+ `);
2001
+ if (event.error) {
2002
+ lines.push(`
2003
+ **Error:** ${event.error}
2004
+ `);
2005
+ } else {
2006
+ lines.push("\n**Result:** Success\n");
2007
+ }
2008
+ break;
2009
+ case "log":
2010
+ lines.push(`
2011
+ > **Log (${timestamp}):** ${event.message}
2012
+ `);
2013
+ break;
2014
+ case "split":
2015
+ if (event.label) {
2016
+ lines.push(`
2017
+ ---
2018
+
2019
+ ### ${event.label}
2020
+ `);
2021
+ } else {
2022
+ lines.push("\n---\n");
2023
+ }
2024
+ break;
2025
+ case "image":
2026
+ break;
2027
+ case "plan": {
2028
+ const phaseTitles = {
2029
+ initial: "Initial Planning",
2030
+ reflection: "Reflection",
2031
+ summary: "Summary"
2032
+ };
2033
+ const phaseTitle = phaseTitles[event.phase] ?? event.phase;
2034
+ lines.push(`
2035
+ ### ${phaseTitle} (${timestamp})
2036
+ `);
2037
+ if (event.request_id) {
2038
+ lines.push(`**Request ID:** \`${event.request_id}\`
2039
+ `);
2040
+ }
2041
+ if (event.image) {
2042
+ if (typeof event.image !== "string") {
2043
+ if (imagesDir) {
2044
+ const imageFilename = `plan_${event.phase}_${Date.now()}.png`;
2045
+ const imagePath = import_node_path.default.join(imagesDir, imageFilename);
2046
+ import_node_fs.default.writeFileSync(imagePath, Buffer.from(event.image));
2047
+ const relPath = import_node_path.default.join(
2048
+ import_node_path.default.basename(imagesDir),
2049
+ imageFilename
2050
+ );
2051
+ lines.push(`
2052
+ ![${phaseTitle}](${relPath})
2053
+ `);
2054
+ } else {
2055
+ lines.push(
2056
+ `
2057
+ *[Screenshot captured - ${event.image.byteLength} bytes]*
2058
+ `
2059
+ );
2060
+ }
2061
+ } else {
2062
+ lines.push(`
2063
+ **Screenshot URL:** ${event.image}
2064
+ `);
2065
+ }
2066
+ }
2067
+ if (event.reasoning) {
2068
+ lines.push(`
2069
+ **Reasoning:**
2070
+ > ${event.reasoning}
2071
+ `);
2072
+ }
2073
+ if (event.result) {
2074
+ lines.push(`
2075
+ **Result:** ${event.result}
2076
+ `);
2077
+ }
2078
+ break;
2079
+ }
2080
+ }
2081
+ }
2082
+ import_node_fs.default.writeFileSync(filePath, lines.join(""), "utf-8");
2083
+ };
2084
+ var convertEventsForHtml = (events) => {
2085
+ const result = [];
2086
+ for (const event of events) {
2087
+ const d = event.timestamp instanceof Date ? event.timestamp : new Date(event.timestamp);
2088
+ const timestamp = d.toTimeString().slice(0, 8);
2089
+ switch (event.type) {
2090
+ case "step": {
2091
+ const action_coords = [];
2092
+ const actions = [];
2093
+ if (event.step.actions?.length) {
2094
+ for (const action of event.step.actions) {
2095
+ const coords = parseActionCoords(action);
2096
+ if (coords) {
2097
+ action_coords.push(coords);
2098
+ }
2099
+ actions.push({
2100
+ type: action.type,
2101
+ argument: action.argument,
2102
+ count: action.count ?? 1
2103
+ });
2104
+ }
2105
+ }
2106
+ let image = null;
2107
+ if (typeof event.image !== "string") {
2108
+ image = Buffer.from(event.image).toString("base64");
2109
+ } else {
2110
+ image = event.image;
2111
+ }
2112
+ result.push({
2113
+ event_type: "step",
2114
+ timestamp,
2115
+ step_num: event.step_num,
2116
+ image,
2117
+ action_coords,
2118
+ reason: event.step.reason,
2119
+ actions,
2120
+ stop: event.step.stop,
2121
+ task_id: event.task_id
2122
+ });
2123
+ break;
2124
+ }
2125
+ case "action":
2126
+ result.push({
2127
+ event_type: "action",
2128
+ timestamp,
2129
+ error: event.error ?? null
2130
+ });
2131
+ break;
2132
+ case "log":
2133
+ result.push({ event_type: "log", timestamp, message: event.message });
2134
+ break;
2135
+ case "split":
2136
+ result.push({ event_type: "split", timestamp, label: event.label });
2137
+ break;
2138
+ case "image":
2139
+ break;
2140
+ case "plan": {
2141
+ let image = null;
2142
+ if (event.image) {
2143
+ if (typeof event.image !== "string") {
2144
+ image = Buffer.from(event.image).toString("base64");
2145
+ } else {
2146
+ image = event.image;
2147
+ }
2148
+ }
2149
+ result.push({
2150
+ event_type: "plan",
2151
+ timestamp,
2152
+ phase: event.phase,
2153
+ image,
2154
+ reasoning: event.reasoning,
2155
+ result: event.result ?? null,
2156
+ request_id: event.request_id ?? null
2157
+ });
2158
+ break;
2159
+ }
2160
+ }
2161
+ }
2162
+ return result;
2163
+ };
2164
+ var exportToHtml = (events, filePath) => {
2165
+ const outputDir = import_node_path.default.dirname(filePath);
2166
+ ensureDir(outputDir);
2167
+ const moduleUrl = import_meta2?.url ? import_meta2.url : (0, import_node_url.pathToFileURL)(__filename).href;
2168
+ const moduleDir = import_node_path.default.dirname((0, import_node_url.fileURLToPath)(moduleUrl));
2169
+ const primaryTemplate = import_node_path.default.join(moduleDir, "report_template.html");
2170
+ const fallbackTemplate = import_node_path.default.resolve(
2171
+ moduleDir,
2172
+ "..",
2173
+ "src",
2174
+ "agent",
2175
+ "observer",
2176
+ "report_template.html"
2177
+ );
2178
+ const templatePath = import_node_fs.default.existsSync(primaryTemplate) ? primaryTemplate : fallbackTemplate;
2179
+ if (!import_node_fs.default.existsSync(templatePath)) {
2180
+ throw new Error(
2181
+ `Report template not found at ${primaryTemplate} or ${fallbackTemplate}`
2182
+ );
2183
+ }
2184
+ const template = import_node_fs.default.readFileSync(templatePath, "utf-8");
2185
+ const eventsData = convertEventsForHtml(events);
2186
+ const eventsJson = JSON.stringify(eventsData);
2187
+ const htmlContent = template.replace("{EVENTS_DATA}", eventsJson);
2188
+ import_node_fs.default.writeFileSync(filePath, htmlContent, "utf-8");
2189
+ };
2190
+ var exportToJson = (events, filePath) => {
2191
+ const outputDir = import_node_path.default.dirname(filePath);
2192
+ ensureDir(outputDir);
2193
+ const jsonEvents = events.map((event) => {
2194
+ const timestamp = event.timestamp instanceof Date ? event.timestamp.toISOString() : new Date(event.timestamp).toISOString();
2195
+ if ("image" in event && event.image instanceof ArrayBuffer) {
2196
+ return {
2197
+ ...event,
2198
+ timestamp,
2199
+ image: Buffer.from(event.image).toString("base64"),
2200
+ image_encoding: "base64"
2201
+ };
2202
+ }
2203
+ return {
2204
+ ...event,
2205
+ timestamp
2206
+ };
2207
+ });
2208
+ import_node_fs.default.writeFileSync(filePath, JSON.stringify(jsonEvents, null, 2), "utf-8");
2209
+ };
2210
+
2211
+ // src/agent/observer/agent_observer.ts
2212
+ var ExportFormat = /* @__PURE__ */ ((ExportFormat2) => {
2213
+ ExportFormat2["MARKDOWN"] = "markdown";
2214
+ ExportFormat2["HTML"] = "html";
2215
+ ExportFormat2["JSON"] = "json";
2216
+ return ExportFormat2;
2217
+ })(ExportFormat || {});
2218
+ var AsyncAgentObserver = class extends StepObserver {
2219
+ /**
2220
+ * Records agent execution events and exports to various formats.
2221
+ *
2222
+ * This class implements the AsyncObserver protocol and provides
2223
+ * functionality for recording events during agent execution and
2224
+ * exporting them to Markdown or HTML formats.
2225
+ */
2226
+ events = [];
2227
+ async onEvent(event) {
2228
+ this.events.push(event);
2229
+ }
2230
+ addLog(message) {
2231
+ const event = {
2232
+ type: "log",
2233
+ timestamp: /* @__PURE__ */ new Date(),
2234
+ message
2235
+ };
2236
+ this.events.push(event);
2237
+ }
2238
+ addSplit(label = "") {
2239
+ const event = {
2240
+ type: "split",
2241
+ timestamp: /* @__PURE__ */ new Date(),
2242
+ label
2243
+ };
2244
+ this.events.push(event);
2245
+ }
2246
+ clear() {
2247
+ this.events = [];
2248
+ }
2249
+ getEventsByStep(step_num) {
2250
+ return this.events.filter(
2251
+ (event) => event.step_num !== void 0 && event.step_num === step_num
2252
+ );
2253
+ }
2254
+ export(format, path2, images_dir) {
2255
+ const normalized = typeof format === "string" ? format.toLowerCase() : format;
2256
+ switch (normalized) {
2257
+ case "markdown" /* MARKDOWN */:
2258
+ exportToMarkdown(this.events, path2, images_dir ?? void 0);
2259
+ return;
2260
+ case "html" /* HTML */:
2261
+ exportToHtml(this.events, path2);
2262
+ return;
2263
+ case "json" /* JSON */:
2264
+ exportToJson(this.events, path2);
2265
+ return;
2266
+ default:
2267
+ throw new Error(`Unknown export format: ${String(format)}`);
2268
+ }
2269
+ }
2270
+ };
944
2271
 
945
2272
  // src/handler.ts
946
2273
  var import_robotjs = __toESM(require("robotjs"), 1);
947
2274
  var import_sharp = __toESM(require("sharp"), 1);
948
- var sleep2 = (ms) => new Promise((r) => setTimeout(r, ms));
2275
+ var sleep3 = (ms) => new Promise((r) => setTimeout(r, ms));
949
2276
  var toSharpKernel = (resample) => {
950
2277
  switch (resample) {
951
2278
  case "NEAREST":
@@ -1106,7 +2433,7 @@ var DefaultActionHandler = class {
1106
2433
  import_robotjs.default.moveMouse(p1.x, p1.y);
1107
2434
  import_robotjs.default.mouseToggle("down", "left");
1108
2435
  import_robotjs.default.dragMouse(p2.x, p2.y);
1109
- await sleep2(this.#cfg.dragDurationMs);
2436
+ await sleep3(this.#cfg.dragDurationMs);
1110
2437
  import_robotjs.default.mouseToggle("up", "left");
1111
2438
  return;
1112
2439
  }
@@ -1126,7 +2453,7 @@ var DefaultActionHandler = class {
1126
2453
  if (!last) return;
1127
2454
  const modifiers = keys.slice(0, -1);
1128
2455
  import_robotjs.default.keyTap(last, modifiers.length ? modifiers : []);
1129
- await sleep2(this.#cfg.hotkeyDelayMs);
2456
+ await sleep3(this.#cfg.hotkeyDelayMs);
1130
2457
  return;
1131
2458
  }
1132
2459
  case "type": {
@@ -1146,10 +2473,11 @@ var DefaultActionHandler = class {
1146
2473
  return;
1147
2474
  }
1148
2475
  case "wait": {
1149
- await sleep2(this.#cfg.waitDurationMs);
2476
+ await sleep3(this.#cfg.waitDurationMs);
1150
2477
  return;
1151
2478
  }
1152
- case "finish": {
2479
+ case "finish":
2480
+ case "fail": {
1153
2481
  this.reset();
1154
2482
  return;
1155
2483
  }
@@ -1167,11 +2495,13 @@ var DefaultActionHandler = class {
1167
2495
  0 && (module.exports = {
1168
2496
  APIError,
1169
2497
  Actor,
2498
+ AsyncAgentObserver,
1170
2499
  AuthenticationError,
1171
2500
  Client,
1172
2501
  ConfigurationError,
1173
2502
  DefaultActionHandler,
1174
2503
  DefaultAgent,
2504
+ ExportFormat,
1175
2505
  NetworkError,
1176
2506
  NotFoundError,
1177
2507
  OAGIError,
@@ -1179,6 +2509,7 @@ var DefaultActionHandler = class {
1179
2509
  RequestTimeoutError,
1180
2510
  ScreenshotMaker,
1181
2511
  ServerError,
2512
+ TaskerAgent,
1182
2513
  ValidationError
1183
2514
  });
1184
2515
  //# sourceMappingURL=index.cjs.map