@oagi/oagi 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -41,11 +41,13 @@ var src_exports = {};
41
41
  __export(src_exports, {
42
42
  APIError: () => APIError,
43
43
  Actor: () => Actor,
44
+ AsyncAgentObserver: () => AsyncAgentObserver,
44
45
  AuthenticationError: () => AuthenticationError,
45
46
  Client: () => Client,
46
47
  ConfigurationError: () => ConfigurationError,
47
48
  DefaultActionHandler: () => DefaultActionHandler,
48
49
  DefaultAgent: () => DefaultAgent,
50
+ ExportFormat: () => ExportFormat,
49
51
  NetworkError: () => NetworkError,
50
52
  NotFoundError: () => NotFoundError,
51
53
  OAGIError: () => OAGIError,
@@ -53,6 +55,7 @@ __export(src_exports, {
53
55
  RequestTimeoutError: () => RequestTimeoutError,
54
56
  ScreenshotMaker: () => ScreenshotMaker,
55
57
  ServerError: () => ServerError,
58
+ TaskerAgent: () => TaskerAgent,
56
59
  ValidationError: () => ValidationError
57
60
  });
58
61
  module.exports = __toCommonJS(src_exports);
@@ -72,8 +75,11 @@ var MODEL_ACTOR = "lux-actor-1";
72
75
  var MODEL_THINKER = "lux-thinker-1";
73
76
  var DEFAULT_MAX_STEPS = 20;
74
77
  var DEFAULT_MAX_STEPS_THINKER = 100;
78
+ var DEFAULT_MAX_STEPS_TASKER = 60;
75
79
  var MAX_STEPS_ACTOR = 30;
76
80
  var MAX_STEPS_THINKER = 120;
81
+ var DEFAULT_REFLECTION_INTERVAL = 4;
82
+ var DEFAULT_REFLECTION_INTERVAL_TASKER = 20;
77
83
  var DEFAULT_STEP_DELAY = 0.3;
78
84
  var DEFAULT_TEMPERATURE = 0.5;
79
85
  var DEFAULT_TEMPERATURE_LOW = 0.1;
@@ -154,6 +160,39 @@ var logTraceOnFailure = (_, __, descriptor) => {
154
160
  return descriptor;
155
161
  };
156
162
 
163
+ // src/platform-info.ts
164
+ var import_module = require("module");
165
+ var import_meta = {};
166
+ var SDK_NAME = "oagi-typescript";
167
+ function getSdkVersion() {
168
+ try {
169
+ const require2 = (0, import_module.createRequire)(import_meta.url);
170
+ for (const p of ["../package.json", "../../package.json"]) {
171
+ try {
172
+ const pkg = require2(p);
173
+ if (pkg.version && pkg.version !== "0.0.0") return pkg.version;
174
+ } catch {
175
+ }
176
+ }
177
+ } catch {
178
+ }
179
+ return "unknown";
180
+ }
181
+ function getUserAgent() {
182
+ return `${SDK_NAME}/${getSdkVersion()} (node ${process.version}; ${process.platform}; ${process.arch})`;
183
+ }
184
+ function getSdkHeaders() {
185
+ return {
186
+ "User-Agent": getUserAgent(),
187
+ "x-sdk-name": SDK_NAME,
188
+ "x-sdk-version": getSdkVersion(),
189
+ "x-sdk-language": "typescript",
190
+ "x-sdk-language-version": process.version,
191
+ "x-sdk-os": process.platform,
192
+ "x-sdk-platform": process.arch
193
+ };
194
+ }
195
+
157
196
  // src/types/models/action.ts
158
197
  var z = __toESM(require("zod"), 1);
159
198
  var ActionTypeSchema = z.enum([
@@ -297,6 +336,24 @@ var PlanEventSchema = BaseEventSchema.extend({
297
336
  result: z4.string().optional(),
298
337
  request_id: z4.string().optional()
299
338
  });
339
+ var StepObserver = class {
340
+ chain(observer) {
341
+ return new ChainedStepObserver([this, observer ?? null]);
342
+ }
343
+ };
344
+ var ChainedStepObserver = class extends StepObserver {
345
+ observers;
346
+ constructor(observers) {
347
+ super();
348
+ this.observers = observers;
349
+ }
350
+ async onEvent(event) {
351
+ return await this.observers.reduce(async (prev, observer) => {
352
+ await prev;
353
+ if (observer) await observer.onEvent(event);
354
+ }, Promise.resolve());
355
+ }
356
+ };
300
357
 
301
358
  // src/utils/output-parser.ts
302
359
  var splitActions = (actionBlock) => {
@@ -397,28 +454,38 @@ ${taskDescription}
397
454
  // src/client.ts
398
455
  var logger2 = logger_default("client");
399
456
  var _Client = class _Client {
400
- constructor(baseUrl = process.env.OAGI_BASE_URL ?? DEFAULT_BASE_URL, apiKey = process.env.OAGI_API_KEY ?? null, maxRetries = DEFAULT_MAX_RETRIES) {
401
- this.baseUrl = baseUrl;
457
+ baseURL;
458
+ apiKey;
459
+ timeout = HTTP_CLIENT_TIMEOUT;
460
+ client;
461
+ constructor(baseURL, apiKey, maxRetries) {
462
+ if (typeof baseURL === "object") {
463
+ ({ baseURL, apiKey, maxRetries } = baseURL);
464
+ }
465
+ baseURL ??= process.env.OAGI_BASE_URL ?? DEFAULT_BASE_URL;
466
+ apiKey ??= process.env.OAGI_API_KEY;
467
+ maxRetries ??= DEFAULT_MAX_RETRIES;
468
+ this.baseURL = baseURL;
402
469
  this.apiKey = apiKey;
403
470
  if (!apiKey) {
404
471
  throw new ConfigurationError(
405
472
  `OAGI API key must be provided either as 'api_key' parameter or OAGI_API_KEY environment variable. Get your API key at ${API_KEY_HELP_URL}`
406
473
  );
407
474
  }
475
+ const sdkHeaders = getSdkHeaders();
408
476
  this.client = new import_openai.default({
409
- baseURL: new URL("./v1", baseUrl).href,
477
+ baseURL: new URL("./v1", baseURL).href,
410
478
  apiKey,
411
- maxRetries
479
+ maxRetries,
480
+ defaultHeaders: sdkHeaders
412
481
  });
413
- logger2.info(`Client initialized with base_url: ${baseUrl}`);
482
+ logger2.info(`Client initialized with base_url: ${baseURL}`);
414
483
  }
415
- timeout = HTTP_CLIENT_TIMEOUT;
416
- client;
417
484
  fetch(input, init) {
418
485
  if (typeof input === "string" || input instanceof URL) {
419
- input = new URL(input, this.baseUrl);
486
+ input = new URL(input, this.baseURL);
420
487
  } else {
421
- input = new URL(input.url, this.baseUrl);
488
+ input = new URL(input.url, this.baseURL);
422
489
  }
423
490
  init ??= {};
424
491
  const signal = AbortSignal.timeout(this.timeout * 1e3);
@@ -426,7 +493,7 @@ var _Client = class _Client {
426
493
  return fetch(input, init);
427
494
  }
428
495
  buildHeaders(apiVersion) {
429
- const headers = {};
496
+ const headers = getSdkHeaders();
430
497
  if (apiVersion) {
431
498
  headers["x-api-version"] = apiVersion;
432
499
  }
@@ -487,7 +554,10 @@ var _Client = class _Client {
487
554
  task_id: taskId
488
555
  });
489
556
  const rawOutput = response.choices[0].message.content ?? "";
490
- const step = parseRawOutput(rawOutput);
557
+ const step = {
558
+ ...parseRawOutput(rawOutput),
559
+ usage: response.usage
560
+ };
491
561
  taskId = response.task_id;
492
562
  const task = taskId ? `task_id: ${taskId}, ` : "";
493
563
  const usage = response.usage ? `, tokens: ${response.usage.prompt_tokens}+${response.usage.completion_tokens}` : "";
@@ -641,10 +711,10 @@ var Client = _Client;
641
711
  // src/actor.ts
642
712
  var logger3 = logger_default("task");
643
713
  var Actor = class {
644
- constructor(apiKey, baseUrl, model = MODEL_ACTOR, temperature) {
714
+ constructor(apiKey, baseURL, model = MODEL_ACTOR, temperature) {
645
715
  this.model = model;
646
716
  this.temperature = temperature;
647
- this.client = new Client(baseUrl, apiKey);
717
+ this.client = new Client(baseURL, apiKey);
648
718
  }
649
719
  /**
650
720
  * Client-side generated UUID
@@ -809,7 +879,7 @@ var DefaultAgent = class {
809
879
  async execute(instruction, action_handler, image_provider) {
810
880
  const actor = new Actor(this.api_key, this.base_url, this.model);
811
881
  logger4.info(`Starting async task execution: ${instruction}`);
812
- await actor.initTask(instruction, this.max_steps);
882
+ actor.initTask(instruction, this.max_steps);
813
883
  resetHandler(action_handler);
814
884
  for (let i = 0; i < this.max_steps; i++) {
815
885
  const step_num = i + 1;
@@ -870,6 +940,784 @@ var DefaultAgent = class {
870
940
  }
871
941
  };
872
942
 
943
+ // src/agent/tasker.ts
944
+ var logger5 = logger_default("agent.tasker");
945
+ var resetHandler2 = (handler) => {
946
+ if (typeof handler.reset === "function") {
947
+ handler.reset();
948
+ }
949
+ };
950
+ var sleep2 = (seconds) => new Promise((resolve) => setTimeout(resolve, seconds * 1e3));
951
+ var extractUuidFromUrl = (url) => {
952
+ const pattern = /\/([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:\.[a-z]+)?(?:\?|$)/i;
953
+ const match = pattern.exec(url);
954
+ return match ? match[1] : null;
955
+ };
956
+ var PlannerMemory = class {
957
+ taskDescription = "";
958
+ todos = [];
959
+ history = [];
960
+ taskExecutionSummary = "";
961
+ todoExecutionSummaries = {};
962
+ setTask(taskDescription, todos) {
963
+ this.taskDescription = taskDescription;
964
+ this.todos = todos.map(
965
+ (todo) => typeof todo === "string" ? { description: todo, status: "pending" } : todo
966
+ );
967
+ }
968
+ getCurrentTodo() {
969
+ for (let i = 0; i < this.todos.length; i++) {
970
+ const todo = this.todos[i];
971
+ if (todo.status === "pending" || todo.status === "in_progress") {
972
+ return { todo, index: i };
973
+ }
974
+ }
975
+ return null;
976
+ }
977
+ updateTodo(index, status, summary) {
978
+ if (index < 0 || index >= this.todos.length) return;
979
+ this.todos[index].status = status;
980
+ if (summary) {
981
+ this.todoExecutionSummaries[index] = summary;
982
+ }
983
+ }
984
+ addHistory(todoIndex, actions, summary, completed = false) {
985
+ if (todoIndex < 0 || todoIndex >= this.todos.length) return;
986
+ this.history.push({
987
+ todo_index: todoIndex,
988
+ todo: this.todos[todoIndex].description,
989
+ actions,
990
+ summary,
991
+ completed
992
+ });
993
+ }
994
+ getContext() {
995
+ return {
996
+ task_description: this.taskDescription,
997
+ todos: this.todos.map((todo, index) => ({
998
+ index,
999
+ description: todo.description,
1000
+ status: todo.status
1001
+ })),
1002
+ history: this.history.map((history) => ({
1003
+ todo_index: history.todo_index,
1004
+ todo: history.todo,
1005
+ action_count: history.actions.length,
1006
+ summary: history.summary,
1007
+ completed: history.completed
1008
+ })),
1009
+ task_execution_summary: this.taskExecutionSummary,
1010
+ todo_execution_summaries: this.todoExecutionSummaries
1011
+ };
1012
+ }
1013
+ getTodoStatusSummary() {
1014
+ const summary = {
1015
+ pending: 0,
1016
+ in_progress: 0,
1017
+ completed: 0,
1018
+ skipped: 0,
1019
+ blocked: 0
1020
+ };
1021
+ for (const todo of this.todos) {
1022
+ summary[todo.status] = (summary[todo.status] ?? 0) + 1;
1023
+ }
1024
+ return summary;
1025
+ }
1026
+ appendTodo(description) {
1027
+ this.todos.push({ description, status: "pending" });
1028
+ }
1029
+ };
1030
+ var Planner = class {
1031
+ constructor(client, apiKey, baseUrl) {
1032
+ this.apiKey = apiKey;
1033
+ this.baseUrl = baseUrl;
1034
+ this.client = client;
1035
+ }
1036
+ client;
1037
+ ownsClient = false;
1038
+ ensureClient() {
1039
+ if (!this.client) {
1040
+ this.client = new Client(this.baseUrl, this.apiKey);
1041
+ this.ownsClient = true;
1042
+ }
1043
+ return this.client;
1044
+ }
1045
+ getClient() {
1046
+ return this.ensureClient();
1047
+ }
1048
+ async close() {
1049
+ if (!this.ownsClient || !this.client) return;
1050
+ const closable = this.client;
1051
+ if (typeof closable.close === "function") {
1052
+ await closable.close();
1053
+ }
1054
+ }
1055
+ extractMemoryData(memory, context, todoIndex) {
1056
+ if (memory && todoIndex !== void 0) {
1057
+ const taskDescription = memory.taskDescription;
1058
+ const todos = memory.todos.map((todo, index) => ({
1059
+ index,
1060
+ description: todo.description,
1061
+ status: todo.status,
1062
+ execution_summary: memory.todoExecutionSummaries[index] ?? void 0
1063
+ }));
1064
+ const history = memory.history.map((history2) => ({
1065
+ todo_index: history2.todo_index,
1066
+ todo_description: history2.todo,
1067
+ action_count: history2.actions.length,
1068
+ summary: history2.summary ?? void 0,
1069
+ completed: history2.completed
1070
+ }));
1071
+ const taskExecutionSummary = memory.taskExecutionSummary || void 0;
1072
+ const overallTodo = memory.todos[todoIndex] ? memory.todos[todoIndex].description : "";
1073
+ return {
1074
+ taskDescription,
1075
+ todos,
1076
+ history,
1077
+ taskExecutionSummary,
1078
+ overallTodo
1079
+ };
1080
+ }
1081
+ const rawTodos = context.todos;
1082
+ const rawHistory = context.history;
1083
+ return {
1084
+ taskDescription: context.task_description ?? "",
1085
+ todos: Array.isArray(rawTodos) ? rawTodos : [],
1086
+ history: Array.isArray(rawHistory) ? rawHistory : [],
1087
+ taskExecutionSummary: void 0,
1088
+ overallTodo: context.current_todo ?? ""
1089
+ };
1090
+ }
1091
+ extractJsonString(text) {
1092
+ const start = text.indexOf("{");
1093
+ const end = text.lastIndexOf("}") + 1;
1094
+ if (start < 0 || end <= start) return "";
1095
+ return text.slice(start, end);
1096
+ }
1097
+ parsePlannerOutput(response) {
1098
+ try {
1099
+ const jsonResponse = this.extractJsonString(response);
1100
+ const data = JSON.parse(jsonResponse);
1101
+ return {
1102
+ instruction: data.subtask ?? data.instruction ?? "",
1103
+ reasoning: data.reasoning ?? "",
1104
+ subtodos: data.subtodos ?? []
1105
+ };
1106
+ } catch {
1107
+ return {
1108
+ instruction: "",
1109
+ reasoning: "Failed to parse structured response",
1110
+ subtodos: []
1111
+ };
1112
+ }
1113
+ }
1114
+ parseReflectionOutput(response) {
1115
+ try {
1116
+ const jsonResponse = this.extractJsonString(response);
1117
+ const data = JSON.parse(jsonResponse);
1118
+ const success = data.success === "yes";
1119
+ const newSubtask = (data.subtask_instruction ?? "").trim();
1120
+ const continueCurrent = !success && !newSubtask;
1121
+ return {
1122
+ continue_current: continueCurrent,
1123
+ new_instruction: newSubtask || null,
1124
+ reasoning: data.reflection ?? data.reasoning ?? "",
1125
+ success_assessment: success
1126
+ };
1127
+ } catch {
1128
+ return {
1129
+ continue_current: true,
1130
+ new_instruction: null,
1131
+ reasoning: "Failed to parse reflection response, continuing current approach",
1132
+ success_assessment: false
1133
+ };
1134
+ }
1135
+ }
1136
+ formatExecutionNotes(context) {
1137
+ const history = context.history;
1138
+ if (!history?.length) return "";
1139
+ const parts = [];
1140
+ for (const item of history) {
1141
+ parts.push(
1142
+ `Todo ${item.todo_index}: ${item.action_count} actions, completed: ${item.completed}`
1143
+ );
1144
+ if (item.summary) {
1145
+ parts.push(`Summary: ${item.summary}`);
1146
+ }
1147
+ }
1148
+ return parts.join("\n");
1149
+ }
1150
+ async ensureScreenshotUuid(screenshot) {
1151
+ if (!screenshot) return { uuid: void 0, url: void 0 };
1152
+ if (typeof screenshot === "string") {
1153
+ const uuid = extractUuidFromUrl(screenshot);
1154
+ return { uuid: uuid ?? void 0, url: screenshot };
1155
+ }
1156
+ const client = this.ensureClient();
1157
+ const upload = await client.putS3PresignedUrl(screenshot);
1158
+ return { uuid: upload.uuid, url: upload.download_url };
1159
+ }
1160
+ async initialPlan(todo, context, screenshot, memory, todoIndex) {
1161
+ const client = this.ensureClient();
1162
+ const { uuid } = await this.ensureScreenshotUuid(screenshot);
1163
+ const { taskDescription, todos, history, taskExecutionSummary } = this.extractMemoryData(memory, context, todoIndex);
1164
+ const response = await client.callWorker({
1165
+ workerId: "oagi_first",
1166
+ overallTodo: todo,
1167
+ taskDescription,
1168
+ todos,
1169
+ history,
1170
+ currentTodoIndex: todoIndex,
1171
+ taskExecutionSummary,
1172
+ currentScreenshot: uuid
1173
+ });
1174
+ return {
1175
+ output: this.parsePlannerOutput(response.response),
1176
+ requestId: response.request_id
1177
+ };
1178
+ }
1179
+ async reflect(actions, context, screenshot, memory, todoIndex, currentInstruction, reflectionInterval = DEFAULT_REFLECTION_INTERVAL) {
1180
+ const client = this.ensureClient();
1181
+ const { uuid } = await this.ensureScreenshotUuid(screenshot);
1182
+ const {
1183
+ taskDescription,
1184
+ todos,
1185
+ history,
1186
+ taskExecutionSummary,
1187
+ overallTodo
1188
+ } = this.extractMemoryData(memory, context, todoIndex);
1189
+ const windowActions = actions.slice(-reflectionInterval);
1190
+ const windowSteps = windowActions.map((action, index) => ({
1191
+ step_number: index + 1,
1192
+ action_type: action.action_type,
1193
+ target: action.target ?? "",
1194
+ reasoning: action.reasoning ?? ""
1195
+ }));
1196
+ const windowScreenshots = windowActions.map((action) => action.screenshot_uuid).filter(Boolean);
1197
+ const priorNotes = this.formatExecutionNotes(context);
1198
+ const response = await client.callWorker({
1199
+ workerId: "oagi_follow",
1200
+ overallTodo,
1201
+ taskDescription,
1202
+ todos,
1203
+ history,
1204
+ currentTodoIndex: todoIndex,
1205
+ taskExecutionSummary,
1206
+ currentSubtaskInstruction: currentInstruction ?? "",
1207
+ windowSteps,
1208
+ windowScreenshots,
1209
+ resultScreenshot: uuid,
1210
+ priorNotes
1211
+ });
1212
+ return {
1213
+ output: this.parseReflectionOutput(response.response),
1214
+ requestId: response.request_id
1215
+ };
1216
+ }
1217
+ async summarize(_executionHistory, context, memory, todoIndex) {
1218
+ const client = this.ensureClient();
1219
+ const {
1220
+ taskDescription,
1221
+ todos,
1222
+ history,
1223
+ taskExecutionSummary,
1224
+ overallTodo
1225
+ } = this.extractMemoryData(memory, context, todoIndex);
1226
+ const latestTodoSummary = memory && todoIndex !== void 0 ? memory.todoExecutionSummaries[todoIndex] : "";
1227
+ const response = await client.callWorker({
1228
+ workerId: "oagi_task_summary",
1229
+ overallTodo,
1230
+ taskDescription,
1231
+ todos,
1232
+ history,
1233
+ currentTodoIndex: todoIndex,
1234
+ taskExecutionSummary,
1235
+ latestTodoSummary
1236
+ });
1237
+ try {
1238
+ const parsed = JSON.parse(response.response);
1239
+ return {
1240
+ summary: parsed.task_summary ?? response.response,
1241
+ requestId: response.request_id
1242
+ };
1243
+ } catch {
1244
+ return { summary: response.response, requestId: response.request_id };
1245
+ }
1246
+ }
1247
+ };
1248
+ var TaskeeAgent = class {
1249
+ apiKey;
1250
+ baseUrl;
1251
+ model;
1252
+ maxSteps;
1253
+ reflectionInterval;
1254
+ temperature;
1255
+ planner;
1256
+ externalMemory;
1257
+ todoIndex;
1258
+ stepObserver;
1259
+ stepDelay;
1260
+ actor;
1261
+ currentTodo = "";
1262
+ currentInstruction = "";
1263
+ actions = [];
1264
+ totalActions = 0;
1265
+ sinceReflection = 0;
1266
+ success = false;
1267
+ constructor(apiKey, baseUrl, model = MODEL_ACTOR, maxSteps = DEFAULT_MAX_STEPS, reflectionInterval = DEFAULT_REFLECTION_INTERVAL, temperature = DEFAULT_TEMPERATURE, planner, externalMemory, todoIndex, stepObserver, stepDelay = DEFAULT_STEP_DELAY) {
1268
+ this.apiKey = apiKey;
1269
+ this.baseUrl = baseUrl;
1270
+ this.model = model;
1271
+ this.maxSteps = maxSteps;
1272
+ this.reflectionInterval = reflectionInterval;
1273
+ this.temperature = temperature;
1274
+ this.planner = planner ?? new Planner(void 0, apiKey, baseUrl);
1275
+ this.externalMemory = externalMemory;
1276
+ this.todoIndex = todoIndex;
1277
+ this.stepObserver = stepObserver;
1278
+ this.stepDelay = stepDelay;
1279
+ }
1280
+ async execute(instruction, actionHandler, imageProvider) {
1281
+ resetHandler2(actionHandler);
1282
+ this.currentTodo = instruction;
1283
+ this.actions = [];
1284
+ this.totalActions = 0;
1285
+ this.sinceReflection = 0;
1286
+ this.success = false;
1287
+ try {
1288
+ this.actor = new Actor(
1289
+ this.apiKey,
1290
+ this.baseUrl,
1291
+ this.model,
1292
+ this.temperature
1293
+ );
1294
+ await this.initialPlan(imageProvider);
1295
+ this.actor.initTask(this.currentInstruction, this.maxSteps);
1296
+ let remainingSteps = this.maxSteps;
1297
+ while (remainingSteps > 0 && !this.success) {
1298
+ const stepsTaken = await this.executeSubtask(
1299
+ Math.min(this.maxSteps, remainingSteps),
1300
+ actionHandler,
1301
+ imageProvider
1302
+ );
1303
+ remainingSteps -= stepsTaken;
1304
+ if (!this.success && remainingSteps > 0) {
1305
+ const shouldContinue = await this.reflectAndDecide(imageProvider);
1306
+ if (!shouldContinue) {
1307
+ break;
1308
+ }
1309
+ }
1310
+ }
1311
+ await this.generateSummary();
1312
+ return this.success;
1313
+ } catch (err) {
1314
+ logger5.error(`Error executing todo: ${err}`);
1315
+ this.recordAction("error", null, String(err));
1316
+ return false;
1317
+ } finally {
1318
+ this.actor = void 0;
1319
+ }
1320
+ }
1321
+ getContext() {
1322
+ return this.externalMemory ? this.externalMemory.getContext() : {};
1323
+ }
1324
+ recordAction(actionType, target, reasoning, result, screenshotUuid) {
1325
+ this.actions.push({
1326
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
1327
+ action_type: actionType,
1328
+ target,
1329
+ reasoning,
1330
+ result,
1331
+ details: {},
1332
+ screenshot_uuid: screenshotUuid ?? void 0
1333
+ });
1334
+ }
1335
+ async initialPlan(imageProvider) {
1336
+ logger5.info("Generating initial plan for todo");
1337
+ const screenshot = await imageProvider.provide();
1338
+ const context = this.getContext();
1339
+ const { output, requestId } = await this.planner.initialPlan(
1340
+ this.currentTodo,
1341
+ context,
1342
+ screenshot,
1343
+ this.externalMemory,
1344
+ this.todoIndex
1345
+ );
1346
+ this.recordAction("plan", "initial", output.reasoning, output.instruction);
1347
+ if (this.stepObserver) {
1348
+ const event = {
1349
+ type: "plan",
1350
+ timestamp: /* @__PURE__ */ new Date(),
1351
+ phase: "initial",
1352
+ image: screenshot,
1353
+ reasoning: output.reasoning,
1354
+ result: output.instruction,
1355
+ request_id: requestId ?? void 0
1356
+ };
1357
+ await this.stepObserver.onEvent(event);
1358
+ }
1359
+ this.currentInstruction = output.instruction;
1360
+ logger5.info(`Initial instruction: ${this.currentInstruction}`);
1361
+ }
1362
+ async executeSubtask(maxSteps, actionHandler, imageProvider) {
1363
+ logger5.info(`Executing subtask with max ${maxSteps} steps`);
1364
+ let stepsTaken = 0;
1365
+ const client = this.planner.getClient();
1366
+ for (let stepNum = 0; stepNum < maxSteps; stepNum++) {
1367
+ const screenshot = await imageProvider.provide();
1368
+ let screenshotUuid;
1369
+ let screenshotUrl;
1370
+ try {
1371
+ if (typeof screenshot === "string") {
1372
+ screenshotUuid = extractUuidFromUrl(screenshot) ?? void 0;
1373
+ screenshotUrl = screenshot;
1374
+ } else {
1375
+ const upload = await client.putS3PresignedUrl(screenshot);
1376
+ screenshotUuid = upload.uuid;
1377
+ screenshotUrl = upload.download_url;
1378
+ }
1379
+ } catch (err) {
1380
+ logger5.error(`Error uploading screenshot: ${err}`);
1381
+ this.recordAction("error", "screenshot_upload", String(err));
1382
+ break;
1383
+ }
1384
+ let step;
1385
+ try {
1386
+ step = await this.actor.step(screenshotUrl ?? screenshot, void 0);
1387
+ } catch (err) {
1388
+ logger5.error(`Error getting step from OAGI: ${err}`);
1389
+ this.recordAction(
1390
+ "error",
1391
+ "oagi_step",
1392
+ String(err),
1393
+ null,
1394
+ screenshotUuid
1395
+ );
1396
+ break;
1397
+ }
1398
+ if (step.reason) {
1399
+ logger5.info(`Step ${this.totalActions + 1}: ${step.reason}`);
1400
+ }
1401
+ if (this.stepObserver) {
1402
+ const event = {
1403
+ type: "step",
1404
+ timestamp: /* @__PURE__ */ new Date(),
1405
+ step_num: this.totalActions + 1,
1406
+ image: screenshot,
1407
+ step,
1408
+ task_id: this.actor.taskId
1409
+ };
1410
+ await this.stepObserver.onEvent(event);
1411
+ }
1412
+ if (step.actions?.length) {
1413
+ logger5.info(`Actions (${step.actions.length}):`);
1414
+ for (const action of step.actions) {
1415
+ const countSuffix = action.count && action.count > 1 ? ` x${action.count}` : "";
1416
+ logger5.info(` [${action.type}] ${action.argument}${countSuffix}`);
1417
+ }
1418
+ for (const action of step.actions) {
1419
+ this.recordAction(
1420
+ action.type,
1421
+ action.argument,
1422
+ step.reason ?? null,
1423
+ null,
1424
+ screenshotUuid
1425
+ );
1426
+ }
1427
+ let error = null;
1428
+ try {
1429
+ await actionHandler.handle(step.actions);
1430
+ } catch (err) {
1431
+ error = String(err);
1432
+ throw err;
1433
+ } finally {
1434
+ if (this.stepObserver) {
1435
+ const event = {
1436
+ type: "action",
1437
+ timestamp: /* @__PURE__ */ new Date(),
1438
+ step_num: this.totalActions + 1,
1439
+ actions: step.actions,
1440
+ error: error ?? void 0
1441
+ };
1442
+ await this.stepObserver.onEvent(event);
1443
+ }
1444
+ }
1445
+ this.totalActions += step.actions.length;
1446
+ this.sinceReflection += step.actions.length;
1447
+ }
1448
+ if (this.stepDelay > 0) {
1449
+ await sleep2(this.stepDelay);
1450
+ }
1451
+ stepsTaken += 1;
1452
+ if (step.stop) {
1453
+ logger5.info("OAGI signaled task completion");
1454
+ break;
1455
+ }
1456
+ if (this.sinceReflection >= this.reflectionInterval) {
1457
+ logger5.info("Reflection interval reached");
1458
+ break;
1459
+ }
1460
+ }
1461
+ return stepsTaken;
1462
+ }
1463
+ async reflectAndDecide(imageProvider) {
1464
+ logger5.info("Reflecting on progress");
1465
+ const screenshot = await imageProvider.provide();
1466
+ const context = this.getContext();
1467
+ context.current_todo = this.currentTodo;
1468
+ const recentActions = this.actions.slice(-this.sinceReflection);
1469
+ const { output, requestId } = await this.planner.reflect(
1470
+ recentActions,
1471
+ context,
1472
+ screenshot,
1473
+ this.externalMemory,
1474
+ this.todoIndex,
1475
+ this.currentInstruction,
1476
+ this.reflectionInterval
1477
+ );
1478
+ this.recordAction(
1479
+ "reflect",
1480
+ null,
1481
+ output.reasoning,
1482
+ output.continue_current ? "continue" : "pivot"
1483
+ );
1484
+ if (this.stepObserver) {
1485
+ const decision = output.success_assessment ? "success" : output.continue_current ? "continue" : "pivot";
1486
+ const event = {
1487
+ type: "plan",
1488
+ timestamp: /* @__PURE__ */ new Date(),
1489
+ phase: "reflection",
1490
+ image: screenshot,
1491
+ reasoning: output.reasoning,
1492
+ result: decision,
1493
+ request_id: requestId ?? void 0
1494
+ };
1495
+ await this.stepObserver.onEvent(event);
1496
+ }
1497
+ if (output.success_assessment) {
1498
+ this.success = true;
1499
+ logger5.info("Reflection indicates task is successful");
1500
+ return false;
1501
+ }
1502
+ this.sinceReflection = 0;
1503
+ if (!output.continue_current && output.new_instruction) {
1504
+ logger5.info(`Pivoting to new instruction: ${output.new_instruction}`);
1505
+ this.currentInstruction = output.new_instruction;
1506
+ await this.actor.initTask(this.currentInstruction, this.maxSteps);
1507
+ return true;
1508
+ }
1509
+ return output.continue_current;
1510
+ }
1511
+ async generateSummary() {
1512
+ logger5.info("Generating execution summary");
1513
+ const context = this.getContext();
1514
+ context.current_todo = this.currentTodo;
1515
+ const { summary, requestId } = await this.planner.summarize(
1516
+ this.actions,
1517
+ context,
1518
+ this.externalMemory,
1519
+ this.todoIndex
1520
+ );
1521
+ this.recordAction("summary", null, summary);
1522
+ if (this.stepObserver) {
1523
+ const event = {
1524
+ type: "plan",
1525
+ timestamp: /* @__PURE__ */ new Date(),
1526
+ phase: "summary",
1527
+ image: void 0,
1528
+ reasoning: summary,
1529
+ result: void 0,
1530
+ request_id: requestId ?? void 0
1531
+ };
1532
+ await this.stepObserver.onEvent(event);
1533
+ }
1534
+ logger5.info(`Execution summary: ${summary}`);
1535
+ }
1536
+ returnExecutionResults() {
1537
+ let summary = "";
1538
+ for (let i = this.actions.length - 1; i >= 0; i--) {
1539
+ if (this.actions[i].action_type === "summary") {
1540
+ summary = this.actions[i].reasoning ?? "";
1541
+ break;
1542
+ }
1543
+ }
1544
+ return {
1545
+ success: this.success,
1546
+ actions: this.actions,
1547
+ summary,
1548
+ total_steps: this.totalActions
1549
+ };
1550
+ }
1551
+ };
1552
+ var TaskerAgent = class {
1553
+ /** Hierarchical agent that manages multi-todo workflows. */
1554
+ apiKey;
1555
+ baseUrl;
1556
+ model;
1557
+ maxSteps;
1558
+ temperature;
1559
+ reflectionInterval;
1560
+ planner;
1561
+ stepObserver;
1562
+ stepDelay;
1563
+ memory = new PlannerMemory();
1564
+ currentTaskeeAgent;
1565
+ constructor(apiKey, baseUrl, model = MODEL_ACTOR, maxSteps = DEFAULT_MAX_STEPS_TASKER, temperature = DEFAULT_TEMPERATURE, reflectionInterval = DEFAULT_REFLECTION_INTERVAL, planner, stepObserver, stepDelay = DEFAULT_STEP_DELAY) {
1566
+ this.apiKey = apiKey;
1567
+ this.baseUrl = baseUrl;
1568
+ this.model = model;
1569
+ this.maxSteps = maxSteps;
1570
+ this.temperature = temperature;
1571
+ this.reflectionInterval = reflectionInterval;
1572
+ this.planner = planner ?? new Planner(void 0, apiKey, baseUrl);
1573
+ this.stepObserver = stepObserver;
1574
+ this.stepDelay = stepDelay;
1575
+ }
1576
+ setTask(task, todos) {
1577
+ this.memory.setTask(task, todos);
1578
+ logger5.info(`Task set with ${todos.length} todos`);
1579
+ }
1580
+ set_task(task, todos) {
1581
+ this.setTask(task, todos);
1582
+ }
1583
+ async execute(_instruction, actionHandler, imageProvider) {
1584
+ resetHandler2(actionHandler);
1585
+ let overallSuccess = true;
1586
+ while (true) {
1587
+ const todoInfo = this.prepare();
1588
+ if (!todoInfo) {
1589
+ logger5.info("No more todos to execute");
1590
+ break;
1591
+ }
1592
+ const { todo, index } = todoInfo;
1593
+ logger5.info(`Executing todo ${index}: ${todo.description}`);
1594
+ if (this.stepObserver) {
1595
+ const event = {
1596
+ type: "split",
1597
+ timestamp: /* @__PURE__ */ new Date(),
1598
+ label: `Start of todo ${index + 1}: ${todo.description}`
1599
+ };
1600
+ await this.stepObserver.onEvent(event);
1601
+ }
1602
+ const success = await this.executeTodo(
1603
+ index,
1604
+ actionHandler,
1605
+ imageProvider
1606
+ );
1607
+ if (this.stepObserver) {
1608
+ const event = {
1609
+ type: "split",
1610
+ timestamp: /* @__PURE__ */ new Date(),
1611
+ label: `End of todo ${index + 1}: ${todo.description}`
1612
+ };
1613
+ await this.stepObserver.onEvent(event);
1614
+ }
1615
+ if (!success) {
1616
+ logger5.warn(`Todo ${index} failed`);
1617
+ overallSuccess = false;
1618
+ const currentStatus = this.memory.todos[index]?.status;
1619
+ if (currentStatus === "in_progress") {
1620
+ logger5.error("Todo failed with exception, stopping execution");
1621
+ break;
1622
+ }
1623
+ }
1624
+ this.updateTaskSummary();
1625
+ }
1626
+ const statusSummary = this.memory.getTodoStatusSummary();
1627
+ logger5.info(
1628
+ `Workflow complete. Status summary: ${JSON.stringify(statusSummary)}`
1629
+ );
1630
+ return overallSuccess;
1631
+ }
1632
+ prepare() {
1633
+ const current = this.memory.getCurrentTodo();
1634
+ if (!current) return null;
1635
+ this.currentTaskeeAgent = new TaskeeAgent(
1636
+ this.apiKey,
1637
+ this.baseUrl,
1638
+ this.model,
1639
+ this.maxSteps,
1640
+ this.reflectionInterval,
1641
+ this.temperature,
1642
+ this.planner,
1643
+ this.memory,
1644
+ current.index,
1645
+ this.stepObserver,
1646
+ this.stepDelay
1647
+ );
1648
+ if (current.todo.status === "pending") {
1649
+ this.memory.updateTodo(current.index, "in_progress");
1650
+ }
1651
+ logger5.info(`Prepared taskee agent for todo ${current.index}`);
1652
+ return current;
1653
+ }
1654
+ async executeTodo(todoIndex, actionHandler, imageProvider) {
1655
+ if (!this.currentTaskeeAgent || todoIndex < 0) {
1656
+ logger5.error("No taskee agent prepared");
1657
+ return false;
1658
+ }
1659
+ const todo = this.memory.todos[todoIndex];
1660
+ try {
1661
+ const success = await this.currentTaskeeAgent.execute(
1662
+ todo.description,
1663
+ actionHandler,
1664
+ imageProvider
1665
+ );
1666
+ const results = this.currentTaskeeAgent.returnExecutionResults();
1667
+ this.updateMemoryFromExecution(todoIndex, results, success);
1668
+ return success;
1669
+ } catch (err) {
1670
+ logger5.error(`Error executing todo ${todoIndex}: ${err}`);
1671
+ this.memory.updateTodo(
1672
+ todoIndex,
1673
+ "in_progress",
1674
+ `Execution failed: ${String(err)}`
1675
+ );
1676
+ return false;
1677
+ }
1678
+ }
1679
+ updateMemoryFromExecution(todoIndex, results, success) {
1680
+ const status = success ? "completed" : "in_progress";
1681
+ this.memory.updateTodo(todoIndex, status, results.summary);
1682
+ this.memory.addHistory(
1683
+ todoIndex,
1684
+ results.actions,
1685
+ results.summary,
1686
+ success
1687
+ );
1688
+ if (success) {
1689
+ const summaryLine = `- Completed todo ${todoIndex}: ${results.summary}`;
1690
+ this.memory.taskExecutionSummary = this.memory.taskExecutionSummary ? `${this.memory.taskExecutionSummary}
1691
+ ${summaryLine}` : summaryLine;
1692
+ }
1693
+ logger5.info(
1694
+ `Updated memory for todo ${todoIndex}: status=${status}, actions=${results.actions.length}`
1695
+ );
1696
+ }
1697
+ updateTaskSummary() {
1698
+ const statusSummary = this.memory.getTodoStatusSummary();
1699
+ const completed = statusSummary.completed ?? 0;
1700
+ const total = this.memory.todos.length;
1701
+ const summaryParts = [`Progress: ${completed}/${total} todos completed`];
1702
+ const recentHistory = this.memory.history.slice(-3);
1703
+ for (const history of recentHistory) {
1704
+ if (history.completed && history.summary) {
1705
+ summaryParts.push(
1706
+ `- Todo ${history.todo_index}: ${history.summary.slice(0, 100)}`
1707
+ );
1708
+ }
1709
+ }
1710
+ this.memory.taskExecutionSummary = summaryParts.join("\n");
1711
+ }
1712
+ getMemory() {
1713
+ return this.memory;
1714
+ }
1715
+ appendTodo(description) {
1716
+ this.memory.appendTodo(description);
1717
+ logger5.info(`Appended new todo: ${description}`);
1718
+ }
1719
+ };
1720
+
873
1721
  // src/agent/registry.ts
874
1722
  var agentRegistry = {};
875
1723
  var asyncAgentRegister = (mode) => {
@@ -888,7 +1736,7 @@ var asyncAgentRegister = (mode) => {
888
1736
  asyncAgentRegister("actor")((options = {}) => {
889
1737
  const {
890
1738
  apiKey,
891
- baseUrl,
1739
+ baseURL,
892
1740
  model = MODEL_ACTOR,
893
1741
  maxSteps = DEFAULT_MAX_STEPS,
894
1742
  temperature = DEFAULT_TEMPERATURE_LOW,
@@ -897,7 +1745,7 @@ asyncAgentRegister("actor")((options = {}) => {
897
1745
  } = options;
898
1746
  return new DefaultAgent(
899
1747
  apiKey,
900
- baseUrl,
1748
+ baseURL,
901
1749
  model,
902
1750
  maxSteps,
903
1751
  temperature,
@@ -908,7 +1756,7 @@ asyncAgentRegister("actor")((options = {}) => {
908
1756
  asyncAgentRegister("thinker")((options = {}) => {
909
1757
  const {
910
1758
  apiKey,
911
- baseUrl,
1759
+ baseURL,
912
1760
  model = MODEL_THINKER,
913
1761
  maxSteps = DEFAULT_MAX_STEPS_THINKER,
914
1762
  temperature = DEFAULT_TEMPERATURE_LOW,
@@ -917,24 +1765,512 @@ asyncAgentRegister("thinker")((options = {}) => {
917
1765
  } = options;
918
1766
  return new DefaultAgent(
919
1767
  apiKey,
920
- baseUrl,
1768
+ baseURL,
1769
+ model,
1770
+ maxSteps,
1771
+ temperature,
1772
+ stepObserver ?? void 0,
1773
+ stepDelay
1774
+ );
1775
+ });
1776
+ asyncAgentRegister("tasker")((options = {}) => {
1777
+ const {
1778
+ apiKey,
1779
+ baseURL,
1780
+ model = MODEL_ACTOR,
1781
+ maxSteps = DEFAULT_MAX_STEPS_TASKER,
1782
+ temperature = DEFAULT_TEMPERATURE,
1783
+ reflectionInterval = DEFAULT_REFLECTION_INTERVAL_TASKER,
1784
+ stepObserver,
1785
+ stepDelay = DEFAULT_STEP_DELAY
1786
+ } = options;
1787
+ return new TaskerAgent(
1788
+ apiKey,
1789
+ baseURL,
921
1790
  model,
922
1791
  maxSteps,
923
1792
  temperature,
1793
+ reflectionInterval,
1794
+ void 0,
924
1795
  stepObserver ?? void 0,
925
1796
  stepDelay
926
1797
  );
927
1798
  });
1799
+ asyncAgentRegister("tasker:cvs_appointment")(
1800
+ (options = {}) => {
1801
+ const {
1802
+ apiKey,
1803
+ baseURL,
1804
+ model = MODEL_ACTOR,
1805
+ maxSteps = DEFAULT_MAX_STEPS_TASKER,
1806
+ temperature = DEFAULT_TEMPERATURE,
1807
+ reflectionInterval = DEFAULT_REFLECTION_INTERVAL_TASKER,
1808
+ stepObserver,
1809
+ stepDelay = DEFAULT_STEP_DELAY
1810
+ } = options;
1811
+ const tasker = new TaskerAgent(
1812
+ apiKey,
1813
+ baseURL,
1814
+ model,
1815
+ maxSteps,
1816
+ temperature,
1817
+ reflectionInterval,
1818
+ void 0,
1819
+ stepObserver ?? void 0,
1820
+ stepDelay
1821
+ );
1822
+ const firstName = "First";
1823
+ const lastName = "Last";
1824
+ const email = "user@example.com";
1825
+ const birthday = "01-01-1990";
1826
+ const zipCode = "00000";
1827
+ const [month, day, year] = birthday.split("-");
1828
+ const instruction = `Schedule an appointment at CVS for ${firstName} ${lastName} with email ${email} and birthday ${birthday}`;
1829
+ const todos = [
1830
+ "Open a new tab, go to www.cvs.com, type 'flu shot' in the search bar and press enter, wait for the page to load, then click on the button of Schedule vaccinations on the top of the page",
1831
+ `Enter the first name '${firstName}', last name '${lastName}', and email '${email}' in the form. Do not use any suggested autofills. Make sure the mobile phone number is empty.`,
1832
+ "Slightly scroll down to see the date of birth, enter Month '" + month + "', Day '" + day + "', and Year '" + year + "' in the form",
1833
+ "Click on 'Continue as guest' button, wait for the page to load with wait, click on 'Add vaccines' button, select 'Flu' and click on 'Add vaccines'",
1834
+ "Click on 'next' to enter the page with recommendation vaccines, then click on 'next' again, until on the page of entering zip code, enter '" + zipCode + "', select the first option from the dropdown menu, and click on 'Search'"
1835
+ ];
1836
+ tasker.setTask(instruction, todos);
1837
+ return tasker;
1838
+ }
1839
+ );
1840
+ asyncAgentRegister("tasker:software_qa")(
1841
+ (options = {}) => {
1842
+ const {
1843
+ apiKey,
1844
+ baseURL,
1845
+ model = MODEL_ACTOR,
1846
+ maxSteps = DEFAULT_MAX_STEPS_TASKER,
1847
+ temperature = DEFAULT_TEMPERATURE,
1848
+ reflectionInterval = DEFAULT_REFLECTION_INTERVAL_TASKER,
1849
+ stepObserver,
1850
+ stepDelay = DEFAULT_STEP_DELAY
1851
+ } = options;
1852
+ const tasker = new TaskerAgent(
1853
+ apiKey,
1854
+ baseURL,
1855
+ model,
1856
+ maxSteps,
1857
+ temperature,
1858
+ reflectionInterval,
1859
+ void 0,
1860
+ stepObserver ?? void 0,
1861
+ stepDelay
1862
+ );
1863
+ const instruction = "QA: click through every sidebar button in the Nuclear Player UI";
1864
+ const todos = [
1865
+ "Click on 'Dashboard' in the left sidebar",
1866
+ "Click on 'Downloads' in the left sidebar",
1867
+ "Click on 'Lyrics' in the left sidebar",
1868
+ "Click on 'Plugins' in the left sidebar",
1869
+ "Click on 'Search Results' in the left sidebar",
1870
+ "Click on 'Settings' in the left sidebar",
1871
+ "Click on 'Equalizer' in the left sidebar",
1872
+ "Click on 'Visualizer' in the left sidebar",
1873
+ "Click on 'Listening History' in the left sidebar",
1874
+ "Click on 'Favorite Albums' in the left sidebar",
1875
+ "Click on 'Favorite Tracks' in the left sidebar",
1876
+ "Click on 'Favorite Artists' in the left sidebar",
1877
+ "Click on 'Local Library' in the left sidebar",
1878
+ "Click on 'Playlists' in the left sidebar"
1879
+ ];
1880
+ tasker.setTask(instruction, todos);
1881
+ return tasker;
1882
+ }
1883
+ );
928
1884
 
929
1885
  // src/agent/observer/exporters.ts
930
1886
  var import_node_fs = __toESM(require("fs"), 1);
931
1887
  var import_node_path = __toESM(require("path"), 1);
932
1888
  var import_node_url = require("url");
1889
+ var import_meta2 = {};
1890
+ var ensureDir = (dirPath) => {
1891
+ import_node_fs.default.mkdirSync(dirPath, { recursive: true });
1892
+ };
1893
+ var parseActionCoords = (action) => {
1894
+ const arg = action.argument.replace(/^\(|\)$/g, "");
1895
+ switch (action.type) {
1896
+ case "click":
1897
+ case "left_double":
1898
+ case "left_triple":
1899
+ case "right_single": {
1900
+ const coords = parseCoords(arg);
1901
+ if (coords) {
1902
+ return { type: "click", x: coords[0], y: coords[1] };
1903
+ }
1904
+ return null;
1905
+ }
1906
+ case "drag": {
1907
+ const coords = parseDragCoords(arg);
1908
+ if (coords) {
1909
+ return {
1910
+ type: "drag",
1911
+ x1: coords[0],
1912
+ y1: coords[1],
1913
+ x2: coords[2],
1914
+ y2: coords[3]
1915
+ };
1916
+ }
1917
+ return null;
1918
+ }
1919
+ case "scroll": {
1920
+ const result = parseScroll(arg);
1921
+ if (result) {
1922
+ return {
1923
+ type: "scroll",
1924
+ x: result[0],
1925
+ y: result[1],
1926
+ direction: result[2]
1927
+ };
1928
+ }
1929
+ return null;
1930
+ }
1931
+ default:
1932
+ return null;
1933
+ }
1934
+ };
1935
+ var exportToMarkdown = (events, filePath, imagesDir) => {
1936
+ const outputDir = import_node_path.default.dirname(filePath);
1937
+ ensureDir(outputDir);
1938
+ if (imagesDir) {
1939
+ ensureDir(imagesDir);
1940
+ }
1941
+ const lines = ["# Agent Execution Report\n"];
1942
+ for (const event of events) {
1943
+ const d = event.timestamp instanceof Date ? event.timestamp : new Date(event.timestamp);
1944
+ const timestamp = d.toTimeString().slice(0, 8);
1945
+ switch (event.type) {
1946
+ case "step":
1947
+ lines.push(`
1948
+ ## Step ${event.step_num}
1949
+ `);
1950
+ lines.push(`**Time:** ${timestamp}
1951
+ `);
1952
+ if (event.task_id) {
1953
+ lines.push(`**Task ID:** \`${event.task_id}\`
1954
+ `);
1955
+ }
1956
+ if (typeof event.image !== "string") {
1957
+ if (imagesDir) {
1958
+ const imageFilename = `step_${event.step_num}.png`;
1959
+ const imagePath = import_node_path.default.join(imagesDir, imageFilename);
1960
+ import_node_fs.default.writeFileSync(imagePath, Buffer.from(event.image));
1961
+ const relPath = import_node_path.default.join(import_node_path.default.basename(imagesDir), imageFilename);
1962
+ lines.push(`
1963
+ ![Step ${event.step_num}](${relPath})
1964
+ `);
1965
+ } else {
1966
+ lines.push(
1967
+ `
1968
+ *[Screenshot captured - ${event.image.byteLength} bytes]*
1969
+ `
1970
+ );
1971
+ }
1972
+ } else {
1973
+ lines.push(`
1974
+ **Screenshot URL:** ${event.image}
1975
+ `);
1976
+ }
1977
+ if (event.step.reason) {
1978
+ lines.push(`
1979
+ **Reasoning:**
1980
+ > ${event.step.reason}
1981
+ `);
1982
+ }
1983
+ if (event.step.actions?.length) {
1984
+ lines.push("\n**Planned Actions:**\n");
1985
+ for (const action of event.step.actions) {
1986
+ const countStr = action.count && action.count > 1 ? ` (x${action.count})` : "";
1987
+ lines.push(`- \`${action.type}\`: ${action.argument}${countStr}
1988
+ `);
1989
+ }
1990
+ }
1991
+ if (event.step.stop) {
1992
+ lines.push("\n**Status:** Task Complete\n");
1993
+ }
1994
+ break;
1995
+ case "action":
1996
+ lines.push(`
1997
+ ### Actions Executed (${timestamp})
1998
+ `);
1999
+ if (event.error) {
2000
+ lines.push(`
2001
+ **Error:** ${event.error}
2002
+ `);
2003
+ } else {
2004
+ lines.push("\n**Result:** Success\n");
2005
+ }
2006
+ break;
2007
+ case "log":
2008
+ lines.push(`
2009
+ > **Log (${timestamp}):** ${event.message}
2010
+ `);
2011
+ break;
2012
+ case "split":
2013
+ if (event.label) {
2014
+ lines.push(`
2015
+ ---
2016
+
2017
+ ### ${event.label}
2018
+ `);
2019
+ } else {
2020
+ lines.push("\n---\n");
2021
+ }
2022
+ break;
2023
+ case "image":
2024
+ break;
2025
+ case "plan": {
2026
+ const phaseTitles = {
2027
+ initial: "Initial Planning",
2028
+ reflection: "Reflection",
2029
+ summary: "Summary"
2030
+ };
2031
+ const phaseTitle = phaseTitles[event.phase] ?? event.phase;
2032
+ lines.push(`
2033
+ ### ${phaseTitle} (${timestamp})
2034
+ `);
2035
+ if (event.request_id) {
2036
+ lines.push(`**Request ID:** \`${event.request_id}\`
2037
+ `);
2038
+ }
2039
+ if (event.image) {
2040
+ if (typeof event.image !== "string") {
2041
+ if (imagesDir) {
2042
+ const imageFilename = `plan_${event.phase}_${Date.now()}.png`;
2043
+ const imagePath = import_node_path.default.join(imagesDir, imageFilename);
2044
+ import_node_fs.default.writeFileSync(imagePath, Buffer.from(event.image));
2045
+ const relPath = import_node_path.default.join(
2046
+ import_node_path.default.basename(imagesDir),
2047
+ imageFilename
2048
+ );
2049
+ lines.push(`
2050
+ ![${phaseTitle}](${relPath})
2051
+ `);
2052
+ } else {
2053
+ lines.push(
2054
+ `
2055
+ *[Screenshot captured - ${event.image.byteLength} bytes]*
2056
+ `
2057
+ );
2058
+ }
2059
+ } else {
2060
+ lines.push(`
2061
+ **Screenshot URL:** ${event.image}
2062
+ `);
2063
+ }
2064
+ }
2065
+ if (event.reasoning) {
2066
+ lines.push(`
2067
+ **Reasoning:**
2068
+ > ${event.reasoning}
2069
+ `);
2070
+ }
2071
+ if (event.result) {
2072
+ lines.push(`
2073
+ **Result:** ${event.result}
2074
+ `);
2075
+ }
2076
+ break;
2077
+ }
2078
+ }
2079
+ }
2080
+ import_node_fs.default.writeFileSync(filePath, lines.join(""), "utf-8");
2081
+ };
2082
+ var convertEventsForHtml = (events) => {
2083
+ const result = [];
2084
+ for (const event of events) {
2085
+ const d = event.timestamp instanceof Date ? event.timestamp : new Date(event.timestamp);
2086
+ const timestamp = d.toTimeString().slice(0, 8);
2087
+ switch (event.type) {
2088
+ case "step": {
2089
+ const action_coords = [];
2090
+ const actions = [];
2091
+ if (event.step.actions?.length) {
2092
+ for (const action of event.step.actions) {
2093
+ const coords = parseActionCoords(action);
2094
+ if (coords) {
2095
+ action_coords.push(coords);
2096
+ }
2097
+ actions.push({
2098
+ type: action.type,
2099
+ argument: action.argument,
2100
+ count: action.count ?? 1
2101
+ });
2102
+ }
2103
+ }
2104
+ let image = null;
2105
+ if (typeof event.image !== "string") {
2106
+ image = Buffer.from(event.image).toString("base64");
2107
+ } else {
2108
+ image = event.image;
2109
+ }
2110
+ result.push({
2111
+ event_type: "step",
2112
+ timestamp,
2113
+ step_num: event.step_num,
2114
+ image,
2115
+ action_coords,
2116
+ reason: event.step.reason,
2117
+ actions,
2118
+ stop: event.step.stop,
2119
+ task_id: event.task_id
2120
+ });
2121
+ break;
2122
+ }
2123
+ case "action":
2124
+ result.push({
2125
+ event_type: "action",
2126
+ timestamp,
2127
+ error: event.error ?? null
2128
+ });
2129
+ break;
2130
+ case "log":
2131
+ result.push({ event_type: "log", timestamp, message: event.message });
2132
+ break;
2133
+ case "split":
2134
+ result.push({ event_type: "split", timestamp, label: event.label });
2135
+ break;
2136
+ case "image":
2137
+ break;
2138
+ case "plan": {
2139
+ let image = null;
2140
+ if (event.image) {
2141
+ if (typeof event.image !== "string") {
2142
+ image = Buffer.from(event.image).toString("base64");
2143
+ } else {
2144
+ image = event.image;
2145
+ }
2146
+ }
2147
+ result.push({
2148
+ event_type: "plan",
2149
+ timestamp,
2150
+ phase: event.phase,
2151
+ image,
2152
+ reasoning: event.reasoning,
2153
+ result: event.result ?? null,
2154
+ request_id: event.request_id ?? null
2155
+ });
2156
+ break;
2157
+ }
2158
+ }
2159
+ }
2160
+ return result;
2161
+ };
2162
+ var exportToHtml = (events, filePath) => {
2163
+ const outputDir = import_node_path.default.dirname(filePath);
2164
+ ensureDir(outputDir);
2165
+ const moduleUrl = import_meta2?.url ? import_meta2.url : (0, import_node_url.pathToFileURL)(__filename).href;
2166
+ const moduleDir = import_node_path.default.dirname((0, import_node_url.fileURLToPath)(moduleUrl));
2167
+ const primaryTemplate = import_node_path.default.join(moduleDir, "report_template.html");
2168
+ const fallbackTemplate = import_node_path.default.resolve(
2169
+ moduleDir,
2170
+ "..",
2171
+ "src",
2172
+ "agent",
2173
+ "observer",
2174
+ "report_template.html"
2175
+ );
2176
+ const templatePath = import_node_fs.default.existsSync(primaryTemplate) ? primaryTemplate : fallbackTemplate;
2177
+ if (!import_node_fs.default.existsSync(templatePath)) {
2178
+ throw new Error(
2179
+ `Report template not found at ${primaryTemplate} or ${fallbackTemplate}`
2180
+ );
2181
+ }
2182
+ const template = import_node_fs.default.readFileSync(templatePath, "utf-8");
2183
+ const eventsData = convertEventsForHtml(events);
2184
+ const eventsJson = JSON.stringify(eventsData);
2185
+ const htmlContent = template.replace("{EVENTS_DATA}", eventsJson);
2186
+ import_node_fs.default.writeFileSync(filePath, htmlContent, "utf-8");
2187
+ };
2188
+ var exportToJson = (events, filePath) => {
2189
+ const outputDir = import_node_path.default.dirname(filePath);
2190
+ ensureDir(outputDir);
2191
+ const jsonEvents = events.map((event) => {
2192
+ const timestamp = event.timestamp instanceof Date ? event.timestamp.toISOString() : new Date(event.timestamp).toISOString();
2193
+ if ("image" in event && event.image instanceof ArrayBuffer) {
2194
+ return {
2195
+ ...event,
2196
+ timestamp,
2197
+ image: Buffer.from(event.image).toString("base64"),
2198
+ image_encoding: "base64"
2199
+ };
2200
+ }
2201
+ return {
2202
+ ...event,
2203
+ timestamp
2204
+ };
2205
+ });
2206
+ import_node_fs.default.writeFileSync(filePath, JSON.stringify(jsonEvents, null, 2), "utf-8");
2207
+ };
2208
+
2209
+ // src/agent/observer/agent_observer.ts
2210
+ var ExportFormat = /* @__PURE__ */ ((ExportFormat2) => {
2211
+ ExportFormat2["MARKDOWN"] = "markdown";
2212
+ ExportFormat2["HTML"] = "html";
2213
+ ExportFormat2["JSON"] = "json";
2214
+ return ExportFormat2;
2215
+ })(ExportFormat || {});
2216
+ var AsyncAgentObserver = class extends StepObserver {
2217
+ /**
2218
+ * Records agent execution events and exports to various formats.
2219
+ *
2220
+ * This class implements the AsyncObserver protocol and provides
2221
+ * functionality for recording events during agent execution and
2222
+ * exporting them to Markdown or HTML formats.
2223
+ */
2224
+ events = [];
2225
+ async onEvent(event) {
2226
+ this.events.push(event);
2227
+ }
2228
+ addLog(message) {
2229
+ const event = {
2230
+ type: "log",
2231
+ timestamp: /* @__PURE__ */ new Date(),
2232
+ message
2233
+ };
2234
+ this.events.push(event);
2235
+ }
2236
+ addSplit(label = "") {
2237
+ const event = {
2238
+ type: "split",
2239
+ timestamp: /* @__PURE__ */ new Date(),
2240
+ label
2241
+ };
2242
+ this.events.push(event);
2243
+ }
2244
+ clear() {
2245
+ this.events = [];
2246
+ }
2247
+ getEventsByStep(step_num) {
2248
+ return this.events.filter(
2249
+ (event) => event.step_num !== void 0 && event.step_num === step_num
2250
+ );
2251
+ }
2252
+ export(format, path2, images_dir) {
2253
+ const normalized = typeof format === "string" ? format.toLowerCase() : format;
2254
+ switch (normalized) {
2255
+ case "markdown" /* MARKDOWN */:
2256
+ exportToMarkdown(this.events, path2, images_dir ?? void 0);
2257
+ return;
2258
+ case "html" /* HTML */:
2259
+ exportToHtml(this.events, path2);
2260
+ return;
2261
+ case "json" /* JSON */:
2262
+ exportToJson(this.events, path2);
2263
+ return;
2264
+ default:
2265
+ throw new Error(`Unknown export format: ${String(format)}`);
2266
+ }
2267
+ }
2268
+ };
933
2269
 
934
2270
  // src/handler.ts
935
2271
  var import_robotjs = __toESM(require("robotjs"), 1);
936
2272
  var import_sharp = __toESM(require("sharp"), 1);
937
- var sleep2 = (ms) => new Promise((r) => setTimeout(r, ms));
2273
+ var sleep3 = (ms) => new Promise((r) => setTimeout(r, ms));
938
2274
  var toSharpKernel = (resample) => {
939
2275
  switch (resample) {
940
2276
  case "NEAREST":
@@ -1095,7 +2431,7 @@ var DefaultActionHandler = class {
1095
2431
  import_robotjs.default.moveMouse(p1.x, p1.y);
1096
2432
  import_robotjs.default.mouseToggle("down", "left");
1097
2433
  import_robotjs.default.dragMouse(p2.x, p2.y);
1098
- await sleep2(this.#cfg.dragDurationMs);
2434
+ await sleep3(this.#cfg.dragDurationMs);
1099
2435
  import_robotjs.default.mouseToggle("up", "left");
1100
2436
  return;
1101
2437
  }
@@ -1115,7 +2451,7 @@ var DefaultActionHandler = class {
1115
2451
  if (!last) return;
1116
2452
  const modifiers = keys.slice(0, -1);
1117
2453
  import_robotjs.default.keyTap(last, modifiers.length ? modifiers : []);
1118
- await sleep2(this.#cfg.hotkeyDelayMs);
2454
+ await sleep3(this.#cfg.hotkeyDelayMs);
1119
2455
  return;
1120
2456
  }
1121
2457
  case "type": {
@@ -1135,7 +2471,7 @@ var DefaultActionHandler = class {
1135
2471
  return;
1136
2472
  }
1137
2473
  case "wait": {
1138
- await sleep2(this.#cfg.waitDurationMs);
2474
+ await sleep3(this.#cfg.waitDurationMs);
1139
2475
  return;
1140
2476
  }
1141
2477
  case "finish": {
@@ -1156,11 +2492,13 @@ var DefaultActionHandler = class {
1156
2492
  0 && (module.exports = {
1157
2493
  APIError,
1158
2494
  Actor,
2495
+ AsyncAgentObserver,
1159
2496
  AuthenticationError,
1160
2497
  Client,
1161
2498
  ConfigurationError,
1162
2499
  DefaultActionHandler,
1163
2500
  DefaultAgent,
2501
+ ExportFormat,
1164
2502
  NetworkError,
1165
2503
  NotFoundError,
1166
2504
  OAGIError,
@@ -1168,6 +2506,7 @@ var DefaultActionHandler = class {
1168
2506
  RequestTimeoutError,
1169
2507
  ScreenshotMaker,
1170
2508
  ServerError,
2509
+ TaskerAgent,
1171
2510
  ValidationError
1172
2511
  });
1173
2512
  //# sourceMappingURL=index.cjs.map