@oagi/oagi 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -41,11 +41,13 @@ var src_exports = {};
41
41
  __export(src_exports, {
42
42
  APIError: () => APIError,
43
43
  Actor: () => Actor,
44
+ AsyncAgentObserver: () => AsyncAgentObserver,
44
45
  AuthenticationError: () => AuthenticationError,
45
46
  Client: () => Client,
46
47
  ConfigurationError: () => ConfigurationError,
47
48
  DefaultActionHandler: () => DefaultActionHandler,
48
49
  DefaultAgent: () => DefaultAgent,
50
+ ExportFormat: () => ExportFormat,
49
51
  NetworkError: () => NetworkError,
50
52
  NotFoundError: () => NotFoundError,
51
53
  OAGIError: () => OAGIError,
@@ -53,6 +55,7 @@ __export(src_exports, {
53
55
  RequestTimeoutError: () => RequestTimeoutError,
54
56
  ScreenshotMaker: () => ScreenshotMaker,
55
57
  ServerError: () => ServerError,
58
+ TaskerAgent: () => TaskerAgent,
56
59
  ValidationError: () => ValidationError
57
60
  });
58
61
  module.exports = __toCommonJS(src_exports);
@@ -72,8 +75,11 @@ var MODEL_ACTOR = "lux-actor-1";
72
75
  var MODEL_THINKER = "lux-thinker-1";
73
76
  var DEFAULT_MAX_STEPS = 20;
74
77
  var DEFAULT_MAX_STEPS_THINKER = 100;
78
+ var DEFAULT_MAX_STEPS_TASKER = 60;
75
79
  var MAX_STEPS_ACTOR = 30;
76
80
  var MAX_STEPS_THINKER = 120;
81
+ var DEFAULT_REFLECTION_INTERVAL = 4;
82
+ var DEFAULT_REFLECTION_INTERVAL_TASKER = 20;
77
83
  var DEFAULT_STEP_DELAY = 0.3;
78
84
  var DEFAULT_TEMPERATURE = 0.5;
79
85
  var DEFAULT_TEMPERATURE_LOW = 0.1;
@@ -154,6 +160,39 @@ var logTraceOnFailure = (_, __, descriptor) => {
154
160
  return descriptor;
155
161
  };
156
162
 
163
+ // src/platform-info.ts
164
+ var import_module = require("module");
165
+ var import_meta = {};
166
+ var SDK_NAME = "oagi-typescript";
167
+ function getSdkVersion() {
168
+ try {
169
+ const require2 = (0, import_module.createRequire)(import_meta.url);
170
+ for (const p of ["../package.json", "../../package.json"]) {
171
+ try {
172
+ const pkg = require2(p);
173
+ if (pkg.version && pkg.version !== "0.0.0") return pkg.version;
174
+ } catch {
175
+ }
176
+ }
177
+ } catch {
178
+ }
179
+ return "unknown";
180
+ }
181
+ function getUserAgent() {
182
+ return `${SDK_NAME}/${getSdkVersion()} (node ${process.version}; ${process.platform}; ${process.arch})`;
183
+ }
184
+ function getSdkHeaders() {
185
+ return {
186
+ "User-Agent": getUserAgent(),
187
+ "x-sdk-name": SDK_NAME,
188
+ "x-sdk-version": getSdkVersion(),
189
+ "x-sdk-language": "typescript",
190
+ "x-sdk-language-version": process.version,
191
+ "x-sdk-os": process.platform,
192
+ "x-sdk-platform": process.arch
193
+ };
194
+ }
195
+
157
196
  // src/types/models/action.ts
158
197
  var z = __toESM(require("zod"), 1);
159
198
  var ActionTypeSchema = z.enum([
@@ -297,6 +336,24 @@ var PlanEventSchema = BaseEventSchema.extend({
297
336
  result: z4.string().optional(),
298
337
  request_id: z4.string().optional()
299
338
  });
339
+ var StepObserver = class {
340
+ chain(observer) {
341
+ return new ChainedStepObserver([this, observer ?? null]);
342
+ }
343
+ };
344
+ var ChainedStepObserver = class extends StepObserver {
345
+ observers;
346
+ constructor(observers) {
347
+ super();
348
+ this.observers = observers;
349
+ }
350
+ async onEvent(event) {
351
+ return await this.observers.reduce(async (prev, observer) => {
352
+ await prev;
353
+ if (observer) await observer.onEvent(event);
354
+ }, Promise.resolve());
355
+ }
356
+ };
300
357
 
301
358
  // src/utils/output-parser.ts
302
359
  var splitActions = (actionBlock) => {
@@ -415,10 +472,12 @@ var _Client = class _Client {
415
472
  `OAGI API key must be provided either as 'api_key' parameter or OAGI_API_KEY environment variable. Get your API key at ${API_KEY_HELP_URL}`
416
473
  );
417
474
  }
475
+ const sdkHeaders = getSdkHeaders();
418
476
  this.client = new import_openai.default({
419
477
  baseURL: new URL("./v1", baseURL).href,
420
478
  apiKey,
421
- maxRetries
479
+ maxRetries,
480
+ defaultHeaders: sdkHeaders
422
481
  });
423
482
  logger2.info(`Client initialized with base_url: ${baseURL}`);
424
483
  }
@@ -434,7 +493,7 @@ var _Client = class _Client {
434
493
  return fetch(input, init);
435
494
  }
436
495
  buildHeaders(apiVersion) {
437
- const headers = {};
496
+ const headers = getSdkHeaders();
438
497
  if (apiVersion) {
439
498
  headers["x-api-version"] = apiVersion;
440
499
  }
@@ -881,6 +940,784 @@ var DefaultAgent = class {
881
940
  }
882
941
  };
883
942
 
943
+ // src/agent/tasker.ts
944
+ var logger5 = logger_default("agent.tasker");
945
+ var resetHandler2 = (handler) => {
946
+ if (typeof handler.reset === "function") {
947
+ handler.reset();
948
+ }
949
+ };
950
+ var sleep2 = (seconds) => new Promise((resolve) => setTimeout(resolve, seconds * 1e3));
951
+ var extractUuidFromUrl = (url) => {
952
+ const pattern = /\/([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:\.[a-z]+)?(?:\?|$)/i;
953
+ const match = pattern.exec(url);
954
+ return match ? match[1] : null;
955
+ };
956
+ var PlannerMemory = class {
957
+ taskDescription = "";
958
+ todos = [];
959
+ history = [];
960
+ taskExecutionSummary = "";
961
+ todoExecutionSummaries = {};
962
+ setTask(taskDescription, todos) {
963
+ this.taskDescription = taskDescription;
964
+ this.todos = todos.map(
965
+ (todo) => typeof todo === "string" ? { description: todo, status: "pending" } : todo
966
+ );
967
+ }
968
+ getCurrentTodo() {
969
+ for (let i = 0; i < this.todos.length; i++) {
970
+ const todo = this.todos[i];
971
+ if (todo.status === "pending" || todo.status === "in_progress") {
972
+ return { todo, index: i };
973
+ }
974
+ }
975
+ return null;
976
+ }
977
+ updateTodo(index, status, summary) {
978
+ if (index < 0 || index >= this.todos.length) return;
979
+ this.todos[index].status = status;
980
+ if (summary) {
981
+ this.todoExecutionSummaries[index] = summary;
982
+ }
983
+ }
984
+ addHistory(todoIndex, actions, summary, completed = false) {
985
+ if (todoIndex < 0 || todoIndex >= this.todos.length) return;
986
+ this.history.push({
987
+ todo_index: todoIndex,
988
+ todo: this.todos[todoIndex].description,
989
+ actions,
990
+ summary,
991
+ completed
992
+ });
993
+ }
994
+ getContext() {
995
+ return {
996
+ task_description: this.taskDescription,
997
+ todos: this.todos.map((todo, index) => ({
998
+ index,
999
+ description: todo.description,
1000
+ status: todo.status
1001
+ })),
1002
+ history: this.history.map((history) => ({
1003
+ todo_index: history.todo_index,
1004
+ todo: history.todo,
1005
+ action_count: history.actions.length,
1006
+ summary: history.summary,
1007
+ completed: history.completed
1008
+ })),
1009
+ task_execution_summary: this.taskExecutionSummary,
1010
+ todo_execution_summaries: this.todoExecutionSummaries
1011
+ };
1012
+ }
1013
+ getTodoStatusSummary() {
1014
+ const summary = {
1015
+ pending: 0,
1016
+ in_progress: 0,
1017
+ completed: 0,
1018
+ skipped: 0,
1019
+ blocked: 0
1020
+ };
1021
+ for (const todo of this.todos) {
1022
+ summary[todo.status] = (summary[todo.status] ?? 0) + 1;
1023
+ }
1024
+ return summary;
1025
+ }
1026
+ appendTodo(description) {
1027
+ this.todos.push({ description, status: "pending" });
1028
+ }
1029
+ };
1030
+ var Planner = class {
1031
+ constructor(client, apiKey, baseUrl) {
1032
+ this.apiKey = apiKey;
1033
+ this.baseUrl = baseUrl;
1034
+ this.client = client;
1035
+ }
1036
+ client;
1037
+ ownsClient = false;
1038
+ ensureClient() {
1039
+ if (!this.client) {
1040
+ this.client = new Client(this.baseUrl, this.apiKey);
1041
+ this.ownsClient = true;
1042
+ }
1043
+ return this.client;
1044
+ }
1045
+ getClient() {
1046
+ return this.ensureClient();
1047
+ }
1048
+ async close() {
1049
+ if (!this.ownsClient || !this.client) return;
1050
+ const closable = this.client;
1051
+ if (typeof closable.close === "function") {
1052
+ await closable.close();
1053
+ }
1054
+ }
1055
+ extractMemoryData(memory, context, todoIndex) {
1056
+ if (memory && todoIndex !== void 0) {
1057
+ const taskDescription = memory.taskDescription;
1058
+ const todos = memory.todos.map((todo, index) => ({
1059
+ index,
1060
+ description: todo.description,
1061
+ status: todo.status,
1062
+ execution_summary: memory.todoExecutionSummaries[index] ?? void 0
1063
+ }));
1064
+ const history = memory.history.map((history2) => ({
1065
+ todo_index: history2.todo_index,
1066
+ todo_description: history2.todo,
1067
+ action_count: history2.actions.length,
1068
+ summary: history2.summary ?? void 0,
1069
+ completed: history2.completed
1070
+ }));
1071
+ const taskExecutionSummary = memory.taskExecutionSummary || void 0;
1072
+ const overallTodo = memory.todos[todoIndex] ? memory.todos[todoIndex].description : "";
1073
+ return {
1074
+ taskDescription,
1075
+ todos,
1076
+ history,
1077
+ taskExecutionSummary,
1078
+ overallTodo
1079
+ };
1080
+ }
1081
+ const rawTodos = context.todos;
1082
+ const rawHistory = context.history;
1083
+ return {
1084
+ taskDescription: context.task_description ?? "",
1085
+ todos: Array.isArray(rawTodos) ? rawTodos : [],
1086
+ history: Array.isArray(rawHistory) ? rawHistory : [],
1087
+ taskExecutionSummary: void 0,
1088
+ overallTodo: context.current_todo ?? ""
1089
+ };
1090
+ }
1091
+ extractJsonString(text) {
1092
+ const start = text.indexOf("{");
1093
+ const end = text.lastIndexOf("}") + 1;
1094
+ if (start < 0 || end <= start) return "";
1095
+ return text.slice(start, end);
1096
+ }
1097
+ parsePlannerOutput(response) {
1098
+ try {
1099
+ const jsonResponse = this.extractJsonString(response);
1100
+ const data = JSON.parse(jsonResponse);
1101
+ return {
1102
+ instruction: data.subtask ?? data.instruction ?? "",
1103
+ reasoning: data.reasoning ?? "",
1104
+ subtodos: data.subtodos ?? []
1105
+ };
1106
+ } catch {
1107
+ return {
1108
+ instruction: "",
1109
+ reasoning: "Failed to parse structured response",
1110
+ subtodos: []
1111
+ };
1112
+ }
1113
+ }
1114
+ parseReflectionOutput(response) {
1115
+ try {
1116
+ const jsonResponse = this.extractJsonString(response);
1117
+ const data = JSON.parse(jsonResponse);
1118
+ const success = data.success === "yes";
1119
+ const newSubtask = (data.subtask_instruction ?? "").trim();
1120
+ const continueCurrent = !success && !newSubtask;
1121
+ return {
1122
+ continue_current: continueCurrent,
1123
+ new_instruction: newSubtask || null,
1124
+ reasoning: data.reflection ?? data.reasoning ?? "",
1125
+ success_assessment: success
1126
+ };
1127
+ } catch {
1128
+ return {
1129
+ continue_current: true,
1130
+ new_instruction: null,
1131
+ reasoning: "Failed to parse reflection response, continuing current approach",
1132
+ success_assessment: false
1133
+ };
1134
+ }
1135
+ }
1136
+ formatExecutionNotes(context) {
1137
+ const history = context.history;
1138
+ if (!history?.length) return "";
1139
+ const parts = [];
1140
+ for (const item of history) {
1141
+ parts.push(
1142
+ `Todo ${item.todo_index}: ${item.action_count} actions, completed: ${item.completed}`
1143
+ );
1144
+ if (item.summary) {
1145
+ parts.push(`Summary: ${item.summary}`);
1146
+ }
1147
+ }
1148
+ return parts.join("\n");
1149
+ }
1150
+ async ensureScreenshotUuid(screenshot) {
1151
+ if (!screenshot) return { uuid: void 0, url: void 0 };
1152
+ if (typeof screenshot === "string") {
1153
+ const uuid = extractUuidFromUrl(screenshot);
1154
+ return { uuid: uuid ?? void 0, url: screenshot };
1155
+ }
1156
+ const client = this.ensureClient();
1157
+ const upload = await client.putS3PresignedUrl(screenshot);
1158
+ return { uuid: upload.uuid, url: upload.download_url };
1159
+ }
1160
+ async initialPlan(todo, context, screenshot, memory, todoIndex) {
1161
+ const client = this.ensureClient();
1162
+ const { uuid } = await this.ensureScreenshotUuid(screenshot);
1163
+ const { taskDescription, todos, history, taskExecutionSummary } = this.extractMemoryData(memory, context, todoIndex);
1164
+ const response = await client.callWorker({
1165
+ workerId: "oagi_first",
1166
+ overallTodo: todo,
1167
+ taskDescription,
1168
+ todos,
1169
+ history,
1170
+ currentTodoIndex: todoIndex,
1171
+ taskExecutionSummary,
1172
+ currentScreenshot: uuid
1173
+ });
1174
+ return {
1175
+ output: this.parsePlannerOutput(response.response),
1176
+ requestId: response.request_id
1177
+ };
1178
+ }
1179
+ async reflect(actions, context, screenshot, memory, todoIndex, currentInstruction, reflectionInterval = DEFAULT_REFLECTION_INTERVAL) {
1180
+ const client = this.ensureClient();
1181
+ const { uuid } = await this.ensureScreenshotUuid(screenshot);
1182
+ const {
1183
+ taskDescription,
1184
+ todos,
1185
+ history,
1186
+ taskExecutionSummary,
1187
+ overallTodo
1188
+ } = this.extractMemoryData(memory, context, todoIndex);
1189
+ const windowActions = actions.slice(-reflectionInterval);
1190
+ const windowSteps = windowActions.map((action, index) => ({
1191
+ step_number: index + 1,
1192
+ action_type: action.action_type,
1193
+ target: action.target ?? "",
1194
+ reasoning: action.reasoning ?? ""
1195
+ }));
1196
+ const windowScreenshots = windowActions.map((action) => action.screenshot_uuid).filter(Boolean);
1197
+ const priorNotes = this.formatExecutionNotes(context);
1198
+ const response = await client.callWorker({
1199
+ workerId: "oagi_follow",
1200
+ overallTodo,
1201
+ taskDescription,
1202
+ todos,
1203
+ history,
1204
+ currentTodoIndex: todoIndex,
1205
+ taskExecutionSummary,
1206
+ currentSubtaskInstruction: currentInstruction ?? "",
1207
+ windowSteps,
1208
+ windowScreenshots,
1209
+ resultScreenshot: uuid,
1210
+ priorNotes
1211
+ });
1212
+ return {
1213
+ output: this.parseReflectionOutput(response.response),
1214
+ requestId: response.request_id
1215
+ };
1216
+ }
1217
+ async summarize(_executionHistory, context, memory, todoIndex) {
1218
+ const client = this.ensureClient();
1219
+ const {
1220
+ taskDescription,
1221
+ todos,
1222
+ history,
1223
+ taskExecutionSummary,
1224
+ overallTodo
1225
+ } = this.extractMemoryData(memory, context, todoIndex);
1226
+ const latestTodoSummary = memory && todoIndex !== void 0 ? memory.todoExecutionSummaries[todoIndex] : "";
1227
+ const response = await client.callWorker({
1228
+ workerId: "oagi_task_summary",
1229
+ overallTodo,
1230
+ taskDescription,
1231
+ todos,
1232
+ history,
1233
+ currentTodoIndex: todoIndex,
1234
+ taskExecutionSummary,
1235
+ latestTodoSummary
1236
+ });
1237
+ try {
1238
+ const parsed = JSON.parse(response.response);
1239
+ return {
1240
+ summary: parsed.task_summary ?? response.response,
1241
+ requestId: response.request_id
1242
+ };
1243
+ } catch {
1244
+ return { summary: response.response, requestId: response.request_id };
1245
+ }
1246
+ }
1247
+ };
1248
+ var TaskeeAgent = class {
1249
+ apiKey;
1250
+ baseUrl;
1251
+ model;
1252
+ maxSteps;
1253
+ reflectionInterval;
1254
+ temperature;
1255
+ planner;
1256
+ externalMemory;
1257
+ todoIndex;
1258
+ stepObserver;
1259
+ stepDelay;
1260
+ actor;
1261
+ currentTodo = "";
1262
+ currentInstruction = "";
1263
+ actions = [];
1264
+ totalActions = 0;
1265
+ sinceReflection = 0;
1266
+ success = false;
1267
+ constructor(apiKey, baseUrl, model = MODEL_ACTOR, maxSteps = DEFAULT_MAX_STEPS, reflectionInterval = DEFAULT_REFLECTION_INTERVAL, temperature = DEFAULT_TEMPERATURE, planner, externalMemory, todoIndex, stepObserver, stepDelay = DEFAULT_STEP_DELAY) {
1268
+ this.apiKey = apiKey;
1269
+ this.baseUrl = baseUrl;
1270
+ this.model = model;
1271
+ this.maxSteps = maxSteps;
1272
+ this.reflectionInterval = reflectionInterval;
1273
+ this.temperature = temperature;
1274
+ this.planner = planner ?? new Planner(void 0, apiKey, baseUrl);
1275
+ this.externalMemory = externalMemory;
1276
+ this.todoIndex = todoIndex;
1277
+ this.stepObserver = stepObserver;
1278
+ this.stepDelay = stepDelay;
1279
+ }
1280
+ async execute(instruction, actionHandler, imageProvider) {
1281
+ resetHandler2(actionHandler);
1282
+ this.currentTodo = instruction;
1283
+ this.actions = [];
1284
+ this.totalActions = 0;
1285
+ this.sinceReflection = 0;
1286
+ this.success = false;
1287
+ try {
1288
+ this.actor = new Actor(
1289
+ this.apiKey,
1290
+ this.baseUrl,
1291
+ this.model,
1292
+ this.temperature
1293
+ );
1294
+ await this.initialPlan(imageProvider);
1295
+ this.actor.initTask(this.currentInstruction, this.maxSteps);
1296
+ let remainingSteps = this.maxSteps;
1297
+ while (remainingSteps > 0 && !this.success) {
1298
+ const stepsTaken = await this.executeSubtask(
1299
+ Math.min(this.maxSteps, remainingSteps),
1300
+ actionHandler,
1301
+ imageProvider
1302
+ );
1303
+ remainingSteps -= stepsTaken;
1304
+ if (!this.success && remainingSteps > 0) {
1305
+ const shouldContinue = await this.reflectAndDecide(imageProvider);
1306
+ if (!shouldContinue) {
1307
+ break;
1308
+ }
1309
+ }
1310
+ }
1311
+ await this.generateSummary();
1312
+ return this.success;
1313
+ } catch (err) {
1314
+ logger5.error(`Error executing todo: ${err}`);
1315
+ this.recordAction("error", null, String(err));
1316
+ return false;
1317
+ } finally {
1318
+ this.actor = void 0;
1319
+ }
1320
+ }
1321
+ getContext() {
1322
+ return this.externalMemory ? this.externalMemory.getContext() : {};
1323
+ }
1324
+ recordAction(actionType, target, reasoning, result, screenshotUuid) {
1325
+ this.actions.push({
1326
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
1327
+ action_type: actionType,
1328
+ target,
1329
+ reasoning,
1330
+ result,
1331
+ details: {},
1332
+ screenshot_uuid: screenshotUuid ?? void 0
1333
+ });
1334
+ }
1335
+ async initialPlan(imageProvider) {
1336
+ logger5.info("Generating initial plan for todo");
1337
+ const screenshot = await imageProvider.provide();
1338
+ const context = this.getContext();
1339
+ const { output, requestId } = await this.planner.initialPlan(
1340
+ this.currentTodo,
1341
+ context,
1342
+ screenshot,
1343
+ this.externalMemory,
1344
+ this.todoIndex
1345
+ );
1346
+ this.recordAction("plan", "initial", output.reasoning, output.instruction);
1347
+ if (this.stepObserver) {
1348
+ const event = {
1349
+ type: "plan",
1350
+ timestamp: /* @__PURE__ */ new Date(),
1351
+ phase: "initial",
1352
+ image: screenshot,
1353
+ reasoning: output.reasoning,
1354
+ result: output.instruction,
1355
+ request_id: requestId ?? void 0
1356
+ };
1357
+ await this.stepObserver.onEvent(event);
1358
+ }
1359
+ this.currentInstruction = output.instruction;
1360
+ logger5.info(`Initial instruction: ${this.currentInstruction}`);
1361
+ }
1362
+ async executeSubtask(maxSteps, actionHandler, imageProvider) {
1363
+ logger5.info(`Executing subtask with max ${maxSteps} steps`);
1364
+ let stepsTaken = 0;
1365
+ const client = this.planner.getClient();
1366
+ for (let stepNum = 0; stepNum < maxSteps; stepNum++) {
1367
+ const screenshot = await imageProvider.provide();
1368
+ let screenshotUuid;
1369
+ let screenshotUrl;
1370
+ try {
1371
+ if (typeof screenshot === "string") {
1372
+ screenshotUuid = extractUuidFromUrl(screenshot) ?? void 0;
1373
+ screenshotUrl = screenshot;
1374
+ } else {
1375
+ const upload = await client.putS3PresignedUrl(screenshot);
1376
+ screenshotUuid = upload.uuid;
1377
+ screenshotUrl = upload.download_url;
1378
+ }
1379
+ } catch (err) {
1380
+ logger5.error(`Error uploading screenshot: ${err}`);
1381
+ this.recordAction("error", "screenshot_upload", String(err));
1382
+ break;
1383
+ }
1384
+ let step;
1385
+ try {
1386
+ step = await this.actor.step(screenshotUrl ?? screenshot, void 0);
1387
+ } catch (err) {
1388
+ logger5.error(`Error getting step from OAGI: ${err}`);
1389
+ this.recordAction(
1390
+ "error",
1391
+ "oagi_step",
1392
+ String(err),
1393
+ null,
1394
+ screenshotUuid
1395
+ );
1396
+ break;
1397
+ }
1398
+ if (step.reason) {
1399
+ logger5.info(`Step ${this.totalActions + 1}: ${step.reason}`);
1400
+ }
1401
+ if (this.stepObserver) {
1402
+ const event = {
1403
+ type: "step",
1404
+ timestamp: /* @__PURE__ */ new Date(),
1405
+ step_num: this.totalActions + 1,
1406
+ image: screenshot,
1407
+ step,
1408
+ task_id: this.actor.taskId
1409
+ };
1410
+ await this.stepObserver.onEvent(event);
1411
+ }
1412
+ if (step.actions?.length) {
1413
+ logger5.info(`Actions (${step.actions.length}):`);
1414
+ for (const action of step.actions) {
1415
+ const countSuffix = action.count && action.count > 1 ? ` x${action.count}` : "";
1416
+ logger5.info(` [${action.type}] ${action.argument}${countSuffix}`);
1417
+ }
1418
+ for (const action of step.actions) {
1419
+ this.recordAction(
1420
+ action.type,
1421
+ action.argument,
1422
+ step.reason ?? null,
1423
+ null,
1424
+ screenshotUuid
1425
+ );
1426
+ }
1427
+ let error = null;
1428
+ try {
1429
+ await actionHandler.handle(step.actions);
1430
+ } catch (err) {
1431
+ error = String(err);
1432
+ throw err;
1433
+ } finally {
1434
+ if (this.stepObserver) {
1435
+ const event = {
1436
+ type: "action",
1437
+ timestamp: /* @__PURE__ */ new Date(),
1438
+ step_num: this.totalActions + 1,
1439
+ actions: step.actions,
1440
+ error: error ?? void 0
1441
+ };
1442
+ await this.stepObserver.onEvent(event);
1443
+ }
1444
+ }
1445
+ this.totalActions += step.actions.length;
1446
+ this.sinceReflection += step.actions.length;
1447
+ }
1448
+ if (this.stepDelay > 0) {
1449
+ await sleep2(this.stepDelay);
1450
+ }
1451
+ stepsTaken += 1;
1452
+ if (step.stop) {
1453
+ logger5.info("OAGI signaled task completion");
1454
+ break;
1455
+ }
1456
+ if (this.sinceReflection >= this.reflectionInterval) {
1457
+ logger5.info("Reflection interval reached");
1458
+ break;
1459
+ }
1460
+ }
1461
+ return stepsTaken;
1462
+ }
1463
+ async reflectAndDecide(imageProvider) {
1464
+ logger5.info("Reflecting on progress");
1465
+ const screenshot = await imageProvider.provide();
1466
+ const context = this.getContext();
1467
+ context.current_todo = this.currentTodo;
1468
+ const recentActions = this.actions.slice(-this.sinceReflection);
1469
+ const { output, requestId } = await this.planner.reflect(
1470
+ recentActions,
1471
+ context,
1472
+ screenshot,
1473
+ this.externalMemory,
1474
+ this.todoIndex,
1475
+ this.currentInstruction,
1476
+ this.reflectionInterval
1477
+ );
1478
+ this.recordAction(
1479
+ "reflect",
1480
+ null,
1481
+ output.reasoning,
1482
+ output.continue_current ? "continue" : "pivot"
1483
+ );
1484
+ if (this.stepObserver) {
1485
+ const decision = output.success_assessment ? "success" : output.continue_current ? "continue" : "pivot";
1486
+ const event = {
1487
+ type: "plan",
1488
+ timestamp: /* @__PURE__ */ new Date(),
1489
+ phase: "reflection",
1490
+ image: screenshot,
1491
+ reasoning: output.reasoning,
1492
+ result: decision,
1493
+ request_id: requestId ?? void 0
1494
+ };
1495
+ await this.stepObserver.onEvent(event);
1496
+ }
1497
+ if (output.success_assessment) {
1498
+ this.success = true;
1499
+ logger5.info("Reflection indicates task is successful");
1500
+ return false;
1501
+ }
1502
+ this.sinceReflection = 0;
1503
+ if (!output.continue_current && output.new_instruction) {
1504
+ logger5.info(`Pivoting to new instruction: ${output.new_instruction}`);
1505
+ this.currentInstruction = output.new_instruction;
1506
+ await this.actor.initTask(this.currentInstruction, this.maxSteps);
1507
+ return true;
1508
+ }
1509
+ return output.continue_current;
1510
+ }
1511
+ async generateSummary() {
1512
+ logger5.info("Generating execution summary");
1513
+ const context = this.getContext();
1514
+ context.current_todo = this.currentTodo;
1515
+ const { summary, requestId } = await this.planner.summarize(
1516
+ this.actions,
1517
+ context,
1518
+ this.externalMemory,
1519
+ this.todoIndex
1520
+ );
1521
+ this.recordAction("summary", null, summary);
1522
+ if (this.stepObserver) {
1523
+ const event = {
1524
+ type: "plan",
1525
+ timestamp: /* @__PURE__ */ new Date(),
1526
+ phase: "summary",
1527
+ image: void 0,
1528
+ reasoning: summary,
1529
+ result: void 0,
1530
+ request_id: requestId ?? void 0
1531
+ };
1532
+ await this.stepObserver.onEvent(event);
1533
+ }
1534
+ logger5.info(`Execution summary: ${summary}`);
1535
+ }
1536
+ returnExecutionResults() {
1537
+ let summary = "";
1538
+ for (let i = this.actions.length - 1; i >= 0; i--) {
1539
+ if (this.actions[i].action_type === "summary") {
1540
+ summary = this.actions[i].reasoning ?? "";
1541
+ break;
1542
+ }
1543
+ }
1544
+ return {
1545
+ success: this.success,
1546
+ actions: this.actions,
1547
+ summary,
1548
+ total_steps: this.totalActions
1549
+ };
1550
+ }
1551
+ };
1552
+ var TaskerAgent = class {
1553
+ /** Hierarchical agent that manages multi-todo workflows. */
1554
+ apiKey;
1555
+ baseUrl;
1556
+ model;
1557
+ maxSteps;
1558
+ temperature;
1559
+ reflectionInterval;
1560
+ planner;
1561
+ stepObserver;
1562
+ stepDelay;
1563
+ memory = new PlannerMemory();
1564
+ currentTaskeeAgent;
1565
+ constructor(apiKey, baseUrl, model = MODEL_ACTOR, maxSteps = DEFAULT_MAX_STEPS_TASKER, temperature = DEFAULT_TEMPERATURE, reflectionInterval = DEFAULT_REFLECTION_INTERVAL, planner, stepObserver, stepDelay = DEFAULT_STEP_DELAY) {
1566
+ this.apiKey = apiKey;
1567
+ this.baseUrl = baseUrl;
1568
+ this.model = model;
1569
+ this.maxSteps = maxSteps;
1570
+ this.temperature = temperature;
1571
+ this.reflectionInterval = reflectionInterval;
1572
+ this.planner = planner ?? new Planner(void 0, apiKey, baseUrl);
1573
+ this.stepObserver = stepObserver;
1574
+ this.stepDelay = stepDelay;
1575
+ }
1576
+ setTask(task, todos) {
1577
+ this.memory.setTask(task, todos);
1578
+ logger5.info(`Task set with ${todos.length} todos`);
1579
+ }
1580
+ set_task(task, todos) {
1581
+ this.setTask(task, todos);
1582
+ }
1583
+ async execute(_instruction, actionHandler, imageProvider) {
1584
+ resetHandler2(actionHandler);
1585
+ let overallSuccess = true;
1586
+ while (true) {
1587
+ const todoInfo = this.prepare();
1588
+ if (!todoInfo) {
1589
+ logger5.info("No more todos to execute");
1590
+ break;
1591
+ }
1592
+ const { todo, index } = todoInfo;
1593
+ logger5.info(`Executing todo ${index}: ${todo.description}`);
1594
+ if (this.stepObserver) {
1595
+ const event = {
1596
+ type: "split",
1597
+ timestamp: /* @__PURE__ */ new Date(),
1598
+ label: `Start of todo ${index + 1}: ${todo.description}`
1599
+ };
1600
+ await this.stepObserver.onEvent(event);
1601
+ }
1602
+ const success = await this.executeTodo(
1603
+ index,
1604
+ actionHandler,
1605
+ imageProvider
1606
+ );
1607
+ if (this.stepObserver) {
1608
+ const event = {
1609
+ type: "split",
1610
+ timestamp: /* @__PURE__ */ new Date(),
1611
+ label: `End of todo ${index + 1}: ${todo.description}`
1612
+ };
1613
+ await this.stepObserver.onEvent(event);
1614
+ }
1615
+ if (!success) {
1616
+ logger5.warn(`Todo ${index} failed`);
1617
+ overallSuccess = false;
1618
+ const currentStatus = this.memory.todos[index]?.status;
1619
+ if (currentStatus === "in_progress") {
1620
+ logger5.error("Todo failed with exception, stopping execution");
1621
+ break;
1622
+ }
1623
+ }
1624
+ this.updateTaskSummary();
1625
+ }
1626
+ const statusSummary = this.memory.getTodoStatusSummary();
1627
+ logger5.info(
1628
+ `Workflow complete. Status summary: ${JSON.stringify(statusSummary)}`
1629
+ );
1630
+ return overallSuccess;
1631
+ }
1632
+ prepare() {
1633
+ const current = this.memory.getCurrentTodo();
1634
+ if (!current) return null;
1635
+ this.currentTaskeeAgent = new TaskeeAgent(
1636
+ this.apiKey,
1637
+ this.baseUrl,
1638
+ this.model,
1639
+ this.maxSteps,
1640
+ this.reflectionInterval,
1641
+ this.temperature,
1642
+ this.planner,
1643
+ this.memory,
1644
+ current.index,
1645
+ this.stepObserver,
1646
+ this.stepDelay
1647
+ );
1648
+ if (current.todo.status === "pending") {
1649
+ this.memory.updateTodo(current.index, "in_progress");
1650
+ }
1651
+ logger5.info(`Prepared taskee agent for todo ${current.index}`);
1652
+ return current;
1653
+ }
1654
+ async executeTodo(todoIndex, actionHandler, imageProvider) {
1655
+ if (!this.currentTaskeeAgent || todoIndex < 0) {
1656
+ logger5.error("No taskee agent prepared");
1657
+ return false;
1658
+ }
1659
+ const todo = this.memory.todos[todoIndex];
1660
+ try {
1661
+ const success = await this.currentTaskeeAgent.execute(
1662
+ todo.description,
1663
+ actionHandler,
1664
+ imageProvider
1665
+ );
1666
+ const results = this.currentTaskeeAgent.returnExecutionResults();
1667
+ this.updateMemoryFromExecution(todoIndex, results, success);
1668
+ return success;
1669
+ } catch (err) {
1670
+ logger5.error(`Error executing todo ${todoIndex}: ${err}`);
1671
+ this.memory.updateTodo(
1672
+ todoIndex,
1673
+ "in_progress",
1674
+ `Execution failed: ${String(err)}`
1675
+ );
1676
+ return false;
1677
+ }
1678
+ }
1679
+ updateMemoryFromExecution(todoIndex, results, success) {
1680
+ const status = success ? "completed" : "in_progress";
1681
+ this.memory.updateTodo(todoIndex, status, results.summary);
1682
+ this.memory.addHistory(
1683
+ todoIndex,
1684
+ results.actions,
1685
+ results.summary,
1686
+ success
1687
+ );
1688
+ if (success) {
1689
+ const summaryLine = `- Completed todo ${todoIndex}: ${results.summary}`;
1690
+ this.memory.taskExecutionSummary = this.memory.taskExecutionSummary ? `${this.memory.taskExecutionSummary}
1691
+ ${summaryLine}` : summaryLine;
1692
+ }
1693
+ logger5.info(
1694
+ `Updated memory for todo ${todoIndex}: status=${status}, actions=${results.actions.length}`
1695
+ );
1696
+ }
1697
+ updateTaskSummary() {
1698
+ const statusSummary = this.memory.getTodoStatusSummary();
1699
+ const completed = statusSummary.completed ?? 0;
1700
+ const total = this.memory.todos.length;
1701
+ const summaryParts = [`Progress: ${completed}/${total} todos completed`];
1702
+ const recentHistory = this.memory.history.slice(-3);
1703
+ for (const history of recentHistory) {
1704
+ if (history.completed && history.summary) {
1705
+ summaryParts.push(
1706
+ `- Todo ${history.todo_index}: ${history.summary.slice(0, 100)}`
1707
+ );
1708
+ }
1709
+ }
1710
+ this.memory.taskExecutionSummary = summaryParts.join("\n");
1711
+ }
1712
+ getMemory() {
1713
+ return this.memory;
1714
+ }
1715
+ appendTodo(description) {
1716
+ this.memory.appendTodo(description);
1717
+ logger5.info(`Appended new todo: ${description}`);
1718
+ }
1719
+ };
1720
+
884
1721
  // src/agent/registry.ts
885
1722
  var agentRegistry = {};
886
1723
  var asyncAgentRegister = (mode) => {
@@ -936,16 +1773,504 @@ asyncAgentRegister("thinker")((options = {}) => {
936
1773
  stepDelay
937
1774
  );
938
1775
  });
1776
+ asyncAgentRegister("tasker")((options = {}) => {
1777
+ const {
1778
+ apiKey,
1779
+ baseURL,
1780
+ model = MODEL_ACTOR,
1781
+ maxSteps = DEFAULT_MAX_STEPS_TASKER,
1782
+ temperature = DEFAULT_TEMPERATURE,
1783
+ reflectionInterval = DEFAULT_REFLECTION_INTERVAL_TASKER,
1784
+ stepObserver,
1785
+ stepDelay = DEFAULT_STEP_DELAY
1786
+ } = options;
1787
+ return new TaskerAgent(
1788
+ apiKey,
1789
+ baseURL,
1790
+ model,
1791
+ maxSteps,
1792
+ temperature,
1793
+ reflectionInterval,
1794
+ void 0,
1795
+ stepObserver ?? void 0,
1796
+ stepDelay
1797
+ );
1798
+ });
1799
+ asyncAgentRegister("tasker:cvs_appointment")(
1800
+ (options = {}) => {
1801
+ const {
1802
+ apiKey,
1803
+ baseURL,
1804
+ model = MODEL_ACTOR,
1805
+ maxSteps = DEFAULT_MAX_STEPS_TASKER,
1806
+ temperature = DEFAULT_TEMPERATURE,
1807
+ reflectionInterval = DEFAULT_REFLECTION_INTERVAL_TASKER,
1808
+ stepObserver,
1809
+ stepDelay = DEFAULT_STEP_DELAY
1810
+ } = options;
1811
+ const tasker = new TaskerAgent(
1812
+ apiKey,
1813
+ baseURL,
1814
+ model,
1815
+ maxSteps,
1816
+ temperature,
1817
+ reflectionInterval,
1818
+ void 0,
1819
+ stepObserver ?? void 0,
1820
+ stepDelay
1821
+ );
1822
+ const firstName = "First";
1823
+ const lastName = "Last";
1824
+ const email = "user@example.com";
1825
+ const birthday = "01-01-1990";
1826
+ const zipCode = "00000";
1827
+ const [month, day, year] = birthday.split("-");
1828
+ const instruction = `Schedule an appointment at CVS for ${firstName} ${lastName} with email ${email} and birthday ${birthday}`;
1829
+ const todos = [
1830
+ "Open a new tab, go to www.cvs.com, type 'flu shot' in the search bar and press enter, wait for the page to load, then click on the button of Schedule vaccinations on the top of the page",
1831
+ `Enter the first name '${firstName}', last name '${lastName}', and email '${email}' in the form. Do not use any suggested autofills. Make sure the mobile phone number is empty.`,
1832
+ "Slightly scroll down to see the date of birth, enter Month '" + month + "', Day '" + day + "', and Year '" + year + "' in the form",
1833
+ "Click on 'Continue as guest' button, wait for the page to load with wait, click on 'Add vaccines' button, select 'Flu' and click on 'Add vaccines'",
1834
+ "Click on 'next' to enter the page with recommendation vaccines, then click on 'next' again, until on the page of entering zip code, enter '" + zipCode + "', select the first option from the dropdown menu, and click on 'Search'"
1835
+ ];
1836
+ tasker.setTask(instruction, todos);
1837
+ return tasker;
1838
+ }
1839
+ );
1840
+ asyncAgentRegister("tasker:software_qa")(
1841
+ (options = {}) => {
1842
+ const {
1843
+ apiKey,
1844
+ baseURL,
1845
+ model = MODEL_ACTOR,
1846
+ maxSteps = DEFAULT_MAX_STEPS_TASKER,
1847
+ temperature = DEFAULT_TEMPERATURE,
1848
+ reflectionInterval = DEFAULT_REFLECTION_INTERVAL_TASKER,
1849
+ stepObserver,
1850
+ stepDelay = DEFAULT_STEP_DELAY
1851
+ } = options;
1852
+ const tasker = new TaskerAgent(
1853
+ apiKey,
1854
+ baseURL,
1855
+ model,
1856
+ maxSteps,
1857
+ temperature,
1858
+ reflectionInterval,
1859
+ void 0,
1860
+ stepObserver ?? void 0,
1861
+ stepDelay
1862
+ );
1863
+ const instruction = "QA: click through every sidebar button in the Nuclear Player UI";
1864
+ const todos = [
1865
+ "Click on 'Dashboard' in the left sidebar",
1866
+ "Click on 'Downloads' in the left sidebar",
1867
+ "Click on 'Lyrics' in the left sidebar",
1868
+ "Click on 'Plugins' in the left sidebar",
1869
+ "Click on 'Search Results' in the left sidebar",
1870
+ "Click on 'Settings' in the left sidebar",
1871
+ "Click on 'Equalizer' in the left sidebar",
1872
+ "Click on 'Visualizer' in the left sidebar",
1873
+ "Click on 'Listening History' in the left sidebar",
1874
+ "Click on 'Favorite Albums' in the left sidebar",
1875
+ "Click on 'Favorite Tracks' in the left sidebar",
1876
+ "Click on 'Favorite Artists' in the left sidebar",
1877
+ "Click on 'Local Library' in the left sidebar",
1878
+ "Click on 'Playlists' in the left sidebar"
1879
+ ];
1880
+ tasker.setTask(instruction, todos);
1881
+ return tasker;
1882
+ }
1883
+ );
939
1884
 
940
1885
  // src/agent/observer/exporters.ts
941
1886
  var import_node_fs = __toESM(require("fs"), 1);
942
1887
  var import_node_path = __toESM(require("path"), 1);
943
1888
  var import_node_url = require("url");
1889
+ var import_meta2 = {};
1890
+ var ensureDir = (dirPath) => {
1891
+ import_node_fs.default.mkdirSync(dirPath, { recursive: true });
1892
+ };
1893
+ var parseActionCoords = (action) => {
1894
+ const arg = action.argument.replace(/^\(|\)$/g, "");
1895
+ switch (action.type) {
1896
+ case "click":
1897
+ case "left_double":
1898
+ case "left_triple":
1899
+ case "right_single": {
1900
+ const coords = parseCoords(arg);
1901
+ if (coords) {
1902
+ return { type: "click", x: coords[0], y: coords[1] };
1903
+ }
1904
+ return null;
1905
+ }
1906
+ case "drag": {
1907
+ const coords = parseDragCoords(arg);
1908
+ if (coords) {
1909
+ return {
1910
+ type: "drag",
1911
+ x1: coords[0],
1912
+ y1: coords[1],
1913
+ x2: coords[2],
1914
+ y2: coords[3]
1915
+ };
1916
+ }
1917
+ return null;
1918
+ }
1919
+ case "scroll": {
1920
+ const result = parseScroll(arg);
1921
+ if (result) {
1922
+ return {
1923
+ type: "scroll",
1924
+ x: result[0],
1925
+ y: result[1],
1926
+ direction: result[2]
1927
+ };
1928
+ }
1929
+ return null;
1930
+ }
1931
+ default:
1932
+ return null;
1933
+ }
1934
+ };
1935
+ var exportToMarkdown = (events, filePath, imagesDir) => {
1936
+ const outputDir = import_node_path.default.dirname(filePath);
1937
+ ensureDir(outputDir);
1938
+ if (imagesDir) {
1939
+ ensureDir(imagesDir);
1940
+ }
1941
+ const lines = ["# Agent Execution Report\n"];
1942
+ for (const event of events) {
1943
+ const d = event.timestamp instanceof Date ? event.timestamp : new Date(event.timestamp);
1944
+ const timestamp = d.toTimeString().slice(0, 8);
1945
+ switch (event.type) {
1946
+ case "step":
1947
+ lines.push(`
1948
+ ## Step ${event.step_num}
1949
+ `);
1950
+ lines.push(`**Time:** ${timestamp}
1951
+ `);
1952
+ if (event.task_id) {
1953
+ lines.push(`**Task ID:** \`${event.task_id}\`
1954
+ `);
1955
+ }
1956
+ if (typeof event.image !== "string") {
1957
+ if (imagesDir) {
1958
+ const imageFilename = `step_${event.step_num}.png`;
1959
+ const imagePath = import_node_path.default.join(imagesDir, imageFilename);
1960
+ import_node_fs.default.writeFileSync(imagePath, Buffer.from(event.image));
1961
+ const relPath = import_node_path.default.join(import_node_path.default.basename(imagesDir), imageFilename);
1962
+ lines.push(`
1963
+ ![Step ${event.step_num}](${relPath})
1964
+ `);
1965
+ } else {
1966
+ lines.push(
1967
+ `
1968
+ *[Screenshot captured - ${event.image.byteLength} bytes]*
1969
+ `
1970
+ );
1971
+ }
1972
+ } else {
1973
+ lines.push(`
1974
+ **Screenshot URL:** ${event.image}
1975
+ `);
1976
+ }
1977
+ if (event.step.reason) {
1978
+ lines.push(`
1979
+ **Reasoning:**
1980
+ > ${event.step.reason}
1981
+ `);
1982
+ }
1983
+ if (event.step.actions?.length) {
1984
+ lines.push("\n**Planned Actions:**\n");
1985
+ for (const action of event.step.actions) {
1986
+ const countStr = action.count && action.count > 1 ? ` (x${action.count})` : "";
1987
+ lines.push(`- \`${action.type}\`: ${action.argument}${countStr}
1988
+ `);
1989
+ }
1990
+ }
1991
+ if (event.step.stop) {
1992
+ lines.push("\n**Status:** Task Complete\n");
1993
+ }
1994
+ break;
1995
+ case "action":
1996
+ lines.push(`
1997
+ ### Actions Executed (${timestamp})
1998
+ `);
1999
+ if (event.error) {
2000
+ lines.push(`
2001
+ **Error:** ${event.error}
2002
+ `);
2003
+ } else {
2004
+ lines.push("\n**Result:** Success\n");
2005
+ }
2006
+ break;
2007
+ case "log":
2008
+ lines.push(`
2009
+ > **Log (${timestamp}):** ${event.message}
2010
+ `);
2011
+ break;
2012
+ case "split":
2013
+ if (event.label) {
2014
+ lines.push(`
2015
+ ---
2016
+
2017
+ ### ${event.label}
2018
+ `);
2019
+ } else {
2020
+ lines.push("\n---\n");
2021
+ }
2022
+ break;
2023
+ case "image":
2024
+ break;
2025
+ case "plan": {
2026
+ const phaseTitles = {
2027
+ initial: "Initial Planning",
2028
+ reflection: "Reflection",
2029
+ summary: "Summary"
2030
+ };
2031
+ const phaseTitle = phaseTitles[event.phase] ?? event.phase;
2032
+ lines.push(`
2033
+ ### ${phaseTitle} (${timestamp})
2034
+ `);
2035
+ if (event.request_id) {
2036
+ lines.push(`**Request ID:** \`${event.request_id}\`
2037
+ `);
2038
+ }
2039
+ if (event.image) {
2040
+ if (typeof event.image !== "string") {
2041
+ if (imagesDir) {
2042
+ const imageFilename = `plan_${event.phase}_${Date.now()}.png`;
2043
+ const imagePath = import_node_path.default.join(imagesDir, imageFilename);
2044
+ import_node_fs.default.writeFileSync(imagePath, Buffer.from(event.image));
2045
+ const relPath = import_node_path.default.join(
2046
+ import_node_path.default.basename(imagesDir),
2047
+ imageFilename
2048
+ );
2049
+ lines.push(`
2050
+ ![${phaseTitle}](${relPath})
2051
+ `);
2052
+ } else {
2053
+ lines.push(
2054
+ `
2055
+ *[Screenshot captured - ${event.image.byteLength} bytes]*
2056
+ `
2057
+ );
2058
+ }
2059
+ } else {
2060
+ lines.push(`
2061
+ **Screenshot URL:** ${event.image}
2062
+ `);
2063
+ }
2064
+ }
2065
+ if (event.reasoning) {
2066
+ lines.push(`
2067
+ **Reasoning:**
2068
+ > ${event.reasoning}
2069
+ `);
2070
+ }
2071
+ if (event.result) {
2072
+ lines.push(`
2073
+ **Result:** ${event.result}
2074
+ `);
2075
+ }
2076
+ break;
2077
+ }
2078
+ }
2079
+ }
2080
+ import_node_fs.default.writeFileSync(filePath, lines.join(""), "utf-8");
2081
+ };
2082
+ var convertEventsForHtml = (events) => {
2083
+ const result = [];
2084
+ for (const event of events) {
2085
+ const d = event.timestamp instanceof Date ? event.timestamp : new Date(event.timestamp);
2086
+ const timestamp = d.toTimeString().slice(0, 8);
2087
+ switch (event.type) {
2088
+ case "step": {
2089
+ const action_coords = [];
2090
+ const actions = [];
2091
+ if (event.step.actions?.length) {
2092
+ for (const action of event.step.actions) {
2093
+ const coords = parseActionCoords(action);
2094
+ if (coords) {
2095
+ action_coords.push(coords);
2096
+ }
2097
+ actions.push({
2098
+ type: action.type,
2099
+ argument: action.argument,
2100
+ count: action.count ?? 1
2101
+ });
2102
+ }
2103
+ }
2104
+ let image = null;
2105
+ if (typeof event.image !== "string") {
2106
+ image = Buffer.from(event.image).toString("base64");
2107
+ } else {
2108
+ image = event.image;
2109
+ }
2110
+ result.push({
2111
+ event_type: "step",
2112
+ timestamp,
2113
+ step_num: event.step_num,
2114
+ image,
2115
+ action_coords,
2116
+ reason: event.step.reason,
2117
+ actions,
2118
+ stop: event.step.stop,
2119
+ task_id: event.task_id
2120
+ });
2121
+ break;
2122
+ }
2123
+ case "action":
2124
+ result.push({
2125
+ event_type: "action",
2126
+ timestamp,
2127
+ error: event.error ?? null
2128
+ });
2129
+ break;
2130
+ case "log":
2131
+ result.push({ event_type: "log", timestamp, message: event.message });
2132
+ break;
2133
+ case "split":
2134
+ result.push({ event_type: "split", timestamp, label: event.label });
2135
+ break;
2136
+ case "image":
2137
+ break;
2138
+ case "plan": {
2139
+ let image = null;
2140
+ if (event.image) {
2141
+ if (typeof event.image !== "string") {
2142
+ image = Buffer.from(event.image).toString("base64");
2143
+ } else {
2144
+ image = event.image;
2145
+ }
2146
+ }
2147
+ result.push({
2148
+ event_type: "plan",
2149
+ timestamp,
2150
+ phase: event.phase,
2151
+ image,
2152
+ reasoning: event.reasoning,
2153
+ result: event.result ?? null,
2154
+ request_id: event.request_id ?? null
2155
+ });
2156
+ break;
2157
+ }
2158
+ }
2159
+ }
2160
+ return result;
2161
+ };
2162
+ var exportToHtml = (events, filePath) => {
2163
+ const outputDir = import_node_path.default.dirname(filePath);
2164
+ ensureDir(outputDir);
2165
+ const moduleUrl = import_meta2?.url ? import_meta2.url : (0, import_node_url.pathToFileURL)(__filename).href;
2166
+ const moduleDir = import_node_path.default.dirname((0, import_node_url.fileURLToPath)(moduleUrl));
2167
+ const primaryTemplate = import_node_path.default.join(moduleDir, "report_template.html");
2168
+ const fallbackTemplate = import_node_path.default.resolve(
2169
+ moduleDir,
2170
+ "..",
2171
+ "src",
2172
+ "agent",
2173
+ "observer",
2174
+ "report_template.html"
2175
+ );
2176
+ const templatePath = import_node_fs.default.existsSync(primaryTemplate) ? primaryTemplate : fallbackTemplate;
2177
+ if (!import_node_fs.default.existsSync(templatePath)) {
2178
+ throw new Error(
2179
+ `Report template not found at ${primaryTemplate} or ${fallbackTemplate}`
2180
+ );
2181
+ }
2182
+ const template = import_node_fs.default.readFileSync(templatePath, "utf-8");
2183
+ const eventsData = convertEventsForHtml(events);
2184
+ const eventsJson = JSON.stringify(eventsData);
2185
+ const htmlContent = template.replace("{EVENTS_DATA}", eventsJson);
2186
+ import_node_fs.default.writeFileSync(filePath, htmlContent, "utf-8");
2187
+ };
2188
+ var exportToJson = (events, filePath) => {
2189
+ const outputDir = import_node_path.default.dirname(filePath);
2190
+ ensureDir(outputDir);
2191
+ const jsonEvents = events.map((event) => {
2192
+ const timestamp = event.timestamp instanceof Date ? event.timestamp.toISOString() : new Date(event.timestamp).toISOString();
2193
+ if ("image" in event && event.image instanceof ArrayBuffer) {
2194
+ return {
2195
+ ...event,
2196
+ timestamp,
2197
+ image: Buffer.from(event.image).toString("base64"),
2198
+ image_encoding: "base64"
2199
+ };
2200
+ }
2201
+ return {
2202
+ ...event,
2203
+ timestamp
2204
+ };
2205
+ });
2206
+ import_node_fs.default.writeFileSync(filePath, JSON.stringify(jsonEvents, null, 2), "utf-8");
2207
+ };
2208
+
2209
+ // src/agent/observer/agent_observer.ts
2210
+ var ExportFormat = /* @__PURE__ */ ((ExportFormat2) => {
2211
+ ExportFormat2["MARKDOWN"] = "markdown";
2212
+ ExportFormat2["HTML"] = "html";
2213
+ ExportFormat2["JSON"] = "json";
2214
+ return ExportFormat2;
2215
+ })(ExportFormat || {});
2216
+ var AsyncAgentObserver = class extends StepObserver {
2217
+ /**
2218
+ * Records agent execution events and exports to various formats.
2219
+ *
2220
+ * This class implements the AsyncObserver protocol and provides
2221
+ * functionality for recording events during agent execution and
2222
+ * exporting them to Markdown or HTML formats.
2223
+ */
2224
+ events = [];
2225
+ async onEvent(event) {
2226
+ this.events.push(event);
2227
+ }
2228
+ addLog(message) {
2229
+ const event = {
2230
+ type: "log",
2231
+ timestamp: /* @__PURE__ */ new Date(),
2232
+ message
2233
+ };
2234
+ this.events.push(event);
2235
+ }
2236
+ addSplit(label = "") {
2237
+ const event = {
2238
+ type: "split",
2239
+ timestamp: /* @__PURE__ */ new Date(),
2240
+ label
2241
+ };
2242
+ this.events.push(event);
2243
+ }
2244
+ clear() {
2245
+ this.events = [];
2246
+ }
2247
+ getEventsByStep(step_num) {
2248
+ return this.events.filter(
2249
+ (event) => event.step_num !== void 0 && event.step_num === step_num
2250
+ );
2251
+ }
2252
+ export(format, path2, images_dir) {
2253
+ const normalized = typeof format === "string" ? format.toLowerCase() : format;
2254
+ switch (normalized) {
2255
+ case "markdown" /* MARKDOWN */:
2256
+ exportToMarkdown(this.events, path2, images_dir ?? void 0);
2257
+ return;
2258
+ case "html" /* HTML */:
2259
+ exportToHtml(this.events, path2);
2260
+ return;
2261
+ case "json" /* JSON */:
2262
+ exportToJson(this.events, path2);
2263
+ return;
2264
+ default:
2265
+ throw new Error(`Unknown export format: ${String(format)}`);
2266
+ }
2267
+ }
2268
+ };
944
2269
 
945
2270
  // src/handler.ts
946
2271
  var import_robotjs = __toESM(require("robotjs"), 1);
947
2272
  var import_sharp = __toESM(require("sharp"), 1);
948
- var sleep2 = (ms) => new Promise((r) => setTimeout(r, ms));
2273
+ var sleep3 = (ms) => new Promise((r) => setTimeout(r, ms));
949
2274
  var toSharpKernel = (resample) => {
950
2275
  switch (resample) {
951
2276
  case "NEAREST":
@@ -1106,7 +2431,7 @@ var DefaultActionHandler = class {
1106
2431
  import_robotjs.default.moveMouse(p1.x, p1.y);
1107
2432
  import_robotjs.default.mouseToggle("down", "left");
1108
2433
  import_robotjs.default.dragMouse(p2.x, p2.y);
1109
- await sleep2(this.#cfg.dragDurationMs);
2434
+ await sleep3(this.#cfg.dragDurationMs);
1110
2435
  import_robotjs.default.mouseToggle("up", "left");
1111
2436
  return;
1112
2437
  }
@@ -1126,7 +2451,7 @@ var DefaultActionHandler = class {
1126
2451
  if (!last) return;
1127
2452
  const modifiers = keys.slice(0, -1);
1128
2453
  import_robotjs.default.keyTap(last, modifiers.length ? modifiers : []);
1129
- await sleep2(this.#cfg.hotkeyDelayMs);
2454
+ await sleep3(this.#cfg.hotkeyDelayMs);
1130
2455
  return;
1131
2456
  }
1132
2457
  case "type": {
@@ -1146,7 +2471,7 @@ var DefaultActionHandler = class {
1146
2471
  return;
1147
2472
  }
1148
2473
  case "wait": {
1149
- await sleep2(this.#cfg.waitDurationMs);
2474
+ await sleep3(this.#cfg.waitDurationMs);
1150
2475
  return;
1151
2476
  }
1152
2477
  case "finish": {
@@ -1167,11 +2492,13 @@ var DefaultActionHandler = class {
1167
2492
  0 && (module.exports = {
1168
2493
  APIError,
1169
2494
  Actor,
2495
+ AsyncAgentObserver,
1170
2496
  AuthenticationError,
1171
2497
  Client,
1172
2498
  ConfigurationError,
1173
2499
  DefaultActionHandler,
1174
2500
  DefaultAgent,
2501
+ ExportFormat,
1175
2502
  NetworkError,
1176
2503
  NotFoundError,
1177
2504
  OAGIError,
@@ -1179,6 +2506,7 @@ var DefaultActionHandler = class {
1179
2506
  RequestTimeoutError,
1180
2507
  ScreenshotMaker,
1181
2508
  ServerError,
2509
+ TaskerAgent,
1182
2510
  ValidationError
1183
2511
  });
1184
2512
  //# sourceMappingURL=index.cjs.map