@oagi/oagi 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -58,8 +58,11 @@ var MODEL_THINKER = "lux-thinker-1";
58
58
  var MODE_ACTOR = "actor";
59
59
  var DEFAULT_MAX_STEPS = 20;
60
60
  var DEFAULT_MAX_STEPS_THINKER = 100;
61
+ var DEFAULT_MAX_STEPS_TASKER = 60;
61
62
  var MAX_STEPS_ACTOR = 30;
62
63
  var MAX_STEPS_THINKER = 120;
64
+ var DEFAULT_REFLECTION_INTERVAL = 4;
65
+ var DEFAULT_REFLECTION_INTERVAL_TASKER = 20;
63
66
  var DEFAULT_STEP_DELAY = 0.3;
64
67
  var DEFAULT_TEMPERATURE = 0.5;
65
68
  var DEFAULT_TEMPERATURE_LOW = 0.1;
@@ -105,6 +108,38 @@ var logTraceOnFailure = (_, __, descriptor) => {
105
108
  return descriptor;
106
109
  };
107
110
 
111
+ // src/platform-info.ts
112
+ import { createRequire } from "module";
113
+ var SDK_NAME = "oagi-typescript";
114
+ function getSdkVersion() {
115
+ try {
116
+ const require2 = createRequire(import.meta.url);
117
+ for (const p of ["../package.json", "../../package.json"]) {
118
+ try {
119
+ const pkg = require2(p);
120
+ if (pkg.version && pkg.version !== "0.0.0") return pkg.version;
121
+ } catch {
122
+ }
123
+ }
124
+ } catch {
125
+ }
126
+ return "unknown";
127
+ }
128
+ function getUserAgent() {
129
+ return `${SDK_NAME}/${getSdkVersion()} (node ${process.version}; ${process.platform}; ${process.arch})`;
130
+ }
131
+ function getSdkHeaders() {
132
+ return {
133
+ "User-Agent": getUserAgent(),
134
+ "x-sdk-name": SDK_NAME,
135
+ "x-sdk-version": getSdkVersion(),
136
+ "x-sdk-language": "typescript",
137
+ "x-sdk-language-version": process.version,
138
+ "x-sdk-os": process.platform,
139
+ "x-sdk-platform": process.arch
140
+ };
141
+ }
142
+
108
143
  // src/types/models/action.ts
109
144
  import * as z from "zod";
110
145
  var ActionTypeSchema = z.enum([
@@ -384,10 +419,12 @@ var _Client = class _Client {
384
419
  `OAGI API key must be provided either as 'api_key' parameter or OAGI_API_KEY environment variable. Get your API key at ${API_KEY_HELP_URL}`
385
420
  );
386
421
  }
422
+ const sdkHeaders = getSdkHeaders();
387
423
  this.client = new OpenAI({
388
424
  baseURL: new URL("./v1", baseURL).href,
389
425
  apiKey,
390
- maxRetries
426
+ maxRetries,
427
+ defaultHeaders: sdkHeaders
391
428
  });
392
429
  logger2.info(`Client initialized with base_url: ${baseURL}`);
393
430
  }
@@ -403,7 +440,7 @@ var _Client = class _Client {
403
440
  return fetch(input, init);
404
441
  }
405
442
  buildHeaders(apiVersion) {
406
- const headers = {};
443
+ const headers = getSdkHeaders();
407
444
  if (apiVersion) {
408
445
  headers["x-api-version"] = apiVersion;
409
446
  }
@@ -851,84 +888,784 @@ var DefaultAgent = class {
851
888
  }
852
889
  };
853
890
 
854
- // src/agent/registry.ts
855
- var agentRegistry = {};
856
- var asyncAgentRegister = (mode) => {
857
- return (func) => {
858
- if (mode in agentRegistry) {
859
- throw new Error(
860
- `Agent mode '${mode}' is already registered. Cannot register the same mode twice.`
891
+ // src/agent/tasker.ts
892
+ var logger5 = logger_default("agent.tasker");
893
+ var resetHandler2 = (handler) => {
894
+ if (typeof handler.reset === "function") {
895
+ handler.reset();
896
+ }
897
+ };
898
+ var sleep2 = (seconds) => new Promise((resolve) => setTimeout(resolve, seconds * 1e3));
899
+ var extractUuidFromUrl = (url) => {
900
+ const pattern = /\/([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:\.[a-z]+)?(?:\?|$)/i;
901
+ const match = pattern.exec(url);
902
+ return match ? match[1] : null;
903
+ };
904
+ var PlannerMemory = class {
905
+ taskDescription = "";
906
+ todos = [];
907
+ history = [];
908
+ taskExecutionSummary = "";
909
+ todoExecutionSummaries = {};
910
+ setTask(taskDescription, todos) {
911
+ this.taskDescription = taskDescription;
912
+ this.todos = todos.map(
913
+ (todo) => typeof todo === "string" ? { description: todo, status: "pending" } : todo
914
+ );
915
+ }
916
+ getCurrentTodo() {
917
+ for (let i = 0; i < this.todos.length; i++) {
918
+ const todo = this.todos[i];
919
+ if (todo.status === "pending" || todo.status === "in_progress") {
920
+ return { todo, index: i };
921
+ }
922
+ }
923
+ return null;
924
+ }
925
+ updateTodo(index, status, summary) {
926
+ if (index < 0 || index >= this.todos.length) return;
927
+ this.todos[index].status = status;
928
+ if (summary) {
929
+ this.todoExecutionSummaries[index] = summary;
930
+ }
931
+ }
932
+ addHistory(todoIndex, actions, summary, completed = false) {
933
+ if (todoIndex < 0 || todoIndex >= this.todos.length) return;
934
+ this.history.push({
935
+ todo_index: todoIndex,
936
+ todo: this.todos[todoIndex].description,
937
+ actions,
938
+ summary,
939
+ completed
940
+ });
941
+ }
942
+ getContext() {
943
+ return {
944
+ task_description: this.taskDescription,
945
+ todos: this.todos.map((todo, index) => ({
946
+ index,
947
+ description: todo.description,
948
+ status: todo.status
949
+ })),
950
+ history: this.history.map((history) => ({
951
+ todo_index: history.todo_index,
952
+ todo: history.todo,
953
+ action_count: history.actions.length,
954
+ summary: history.summary,
955
+ completed: history.completed
956
+ })),
957
+ task_execution_summary: this.taskExecutionSummary,
958
+ todo_execution_summaries: this.todoExecutionSummaries
959
+ };
960
+ }
961
+ getTodoStatusSummary() {
962
+ const summary = {
963
+ pending: 0,
964
+ in_progress: 0,
965
+ completed: 0,
966
+ skipped: 0,
967
+ blocked: 0
968
+ };
969
+ for (const todo of this.todos) {
970
+ summary[todo.status] = (summary[todo.status] ?? 0) + 1;
971
+ }
972
+ return summary;
973
+ }
974
+ appendTodo(description) {
975
+ this.todos.push({ description, status: "pending" });
976
+ }
977
+ };
978
+ var Planner = class {
979
+ constructor(client, apiKey, baseUrl) {
980
+ this.apiKey = apiKey;
981
+ this.baseUrl = baseUrl;
982
+ this.client = client;
983
+ }
984
+ client;
985
+ ownsClient = false;
986
+ ensureClient() {
987
+ if (!this.client) {
988
+ this.client = new Client(this.baseUrl, this.apiKey);
989
+ this.ownsClient = true;
990
+ }
991
+ return this.client;
992
+ }
993
+ getClient() {
994
+ return this.ensureClient();
995
+ }
996
+ async close() {
997
+ if (!this.ownsClient || !this.client) return;
998
+ const closable = this.client;
999
+ if (typeof closable.close === "function") {
1000
+ await closable.close();
1001
+ }
1002
+ }
1003
+ extractMemoryData(memory, context, todoIndex) {
1004
+ if (memory && todoIndex !== void 0) {
1005
+ const taskDescription = memory.taskDescription;
1006
+ const todos = memory.todos.map((todo, index) => ({
1007
+ index,
1008
+ description: todo.description,
1009
+ status: todo.status,
1010
+ execution_summary: memory.todoExecutionSummaries[index] ?? void 0
1011
+ }));
1012
+ const history = memory.history.map((history2) => ({
1013
+ todo_index: history2.todo_index,
1014
+ todo_description: history2.todo,
1015
+ action_count: history2.actions.length,
1016
+ summary: history2.summary ?? void 0,
1017
+ completed: history2.completed
1018
+ }));
1019
+ const taskExecutionSummary = memory.taskExecutionSummary || void 0;
1020
+ const overallTodo = memory.todos[todoIndex] ? memory.todos[todoIndex].description : "";
1021
+ return {
1022
+ taskDescription,
1023
+ todos,
1024
+ history,
1025
+ taskExecutionSummary,
1026
+ overallTodo
1027
+ };
1028
+ }
1029
+ const rawTodos = context.todos;
1030
+ const rawHistory = context.history;
1031
+ return {
1032
+ taskDescription: context.task_description ?? "",
1033
+ todos: Array.isArray(rawTodos) ? rawTodos : [],
1034
+ history: Array.isArray(rawHistory) ? rawHistory : [],
1035
+ taskExecutionSummary: void 0,
1036
+ overallTodo: context.current_todo ?? ""
1037
+ };
1038
+ }
1039
+ extractJsonString(text) {
1040
+ const start = text.indexOf("{");
1041
+ const end = text.lastIndexOf("}") + 1;
1042
+ if (start < 0 || end <= start) return "";
1043
+ return text.slice(start, end);
1044
+ }
1045
+ parsePlannerOutput(response) {
1046
+ try {
1047
+ const jsonResponse = this.extractJsonString(response);
1048
+ const data = JSON.parse(jsonResponse);
1049
+ return {
1050
+ instruction: data.subtask ?? data.instruction ?? "",
1051
+ reasoning: data.reasoning ?? "",
1052
+ subtodos: data.subtodos ?? []
1053
+ };
1054
+ } catch {
1055
+ return {
1056
+ instruction: "",
1057
+ reasoning: "Failed to parse structured response",
1058
+ subtodos: []
1059
+ };
1060
+ }
1061
+ }
1062
+ parseReflectionOutput(response) {
1063
+ try {
1064
+ const jsonResponse = this.extractJsonString(response);
1065
+ const data = JSON.parse(jsonResponse);
1066
+ const success = data.success === "yes";
1067
+ const newSubtask = (data.subtask_instruction ?? "").trim();
1068
+ const continueCurrent = !success && !newSubtask;
1069
+ return {
1070
+ continue_current: continueCurrent,
1071
+ new_instruction: newSubtask || null,
1072
+ reasoning: data.reflection ?? data.reasoning ?? "",
1073
+ success_assessment: success
1074
+ };
1075
+ } catch {
1076
+ return {
1077
+ continue_current: true,
1078
+ new_instruction: null,
1079
+ reasoning: "Failed to parse reflection response, continuing current approach",
1080
+ success_assessment: false
1081
+ };
1082
+ }
1083
+ }
1084
+ formatExecutionNotes(context) {
1085
+ const history = context.history;
1086
+ if (!history?.length) return "";
1087
+ const parts = [];
1088
+ for (const item of history) {
1089
+ parts.push(
1090
+ `Todo ${item.todo_index}: ${item.action_count} actions, completed: ${item.completed}`
861
1091
  );
1092
+ if (item.summary) {
1093
+ parts.push(`Summary: ${item.summary}`);
1094
+ }
862
1095
  }
863
- agentRegistry[mode] = func;
864
- return func;
865
- };
1096
+ return parts.join("\n");
1097
+ }
1098
+ async ensureScreenshotUuid(screenshot) {
1099
+ if (!screenshot) return { uuid: void 0, url: void 0 };
1100
+ if (typeof screenshot === "string") {
1101
+ const uuid = extractUuidFromUrl(screenshot);
1102
+ return { uuid: uuid ?? void 0, url: screenshot };
1103
+ }
1104
+ const client = this.ensureClient();
1105
+ const upload = await client.putS3PresignedUrl(screenshot);
1106
+ return { uuid: upload.uuid, url: upload.download_url };
1107
+ }
1108
+ async initialPlan(todo, context, screenshot, memory, todoIndex) {
1109
+ const client = this.ensureClient();
1110
+ const { uuid } = await this.ensureScreenshotUuid(screenshot);
1111
+ const { taskDescription, todos, history, taskExecutionSummary } = this.extractMemoryData(memory, context, todoIndex);
1112
+ const response = await client.callWorker({
1113
+ workerId: "oagi_first",
1114
+ overallTodo: todo,
1115
+ taskDescription,
1116
+ todos,
1117
+ history,
1118
+ currentTodoIndex: todoIndex,
1119
+ taskExecutionSummary,
1120
+ currentScreenshot: uuid
1121
+ });
1122
+ return {
1123
+ output: this.parsePlannerOutput(response.response),
1124
+ requestId: response.request_id
1125
+ };
1126
+ }
1127
+ async reflect(actions, context, screenshot, memory, todoIndex, currentInstruction, reflectionInterval = DEFAULT_REFLECTION_INTERVAL) {
1128
+ const client = this.ensureClient();
1129
+ const { uuid } = await this.ensureScreenshotUuid(screenshot);
1130
+ const {
1131
+ taskDescription,
1132
+ todos,
1133
+ history,
1134
+ taskExecutionSummary,
1135
+ overallTodo
1136
+ } = this.extractMemoryData(memory, context, todoIndex);
1137
+ const windowActions = actions.slice(-reflectionInterval);
1138
+ const windowSteps = windowActions.map((action, index) => ({
1139
+ step_number: index + 1,
1140
+ action_type: action.action_type,
1141
+ target: action.target ?? "",
1142
+ reasoning: action.reasoning ?? ""
1143
+ }));
1144
+ const windowScreenshots = windowActions.map((action) => action.screenshot_uuid).filter(Boolean);
1145
+ const priorNotes = this.formatExecutionNotes(context);
1146
+ const response = await client.callWorker({
1147
+ workerId: "oagi_follow",
1148
+ overallTodo,
1149
+ taskDescription,
1150
+ todos,
1151
+ history,
1152
+ currentTodoIndex: todoIndex,
1153
+ taskExecutionSummary,
1154
+ currentSubtaskInstruction: currentInstruction ?? "",
1155
+ windowSteps,
1156
+ windowScreenshots,
1157
+ resultScreenshot: uuid,
1158
+ priorNotes
1159
+ });
1160
+ return {
1161
+ output: this.parseReflectionOutput(response.response),
1162
+ requestId: response.request_id
1163
+ };
1164
+ }
1165
+ async summarize(_executionHistory, context, memory, todoIndex) {
1166
+ const client = this.ensureClient();
1167
+ const {
1168
+ taskDescription,
1169
+ todos,
1170
+ history,
1171
+ taskExecutionSummary,
1172
+ overallTodo
1173
+ } = this.extractMemoryData(memory, context, todoIndex);
1174
+ const latestTodoSummary = memory && todoIndex !== void 0 ? memory.todoExecutionSummaries[todoIndex] : "";
1175
+ const response = await client.callWorker({
1176
+ workerId: "oagi_task_summary",
1177
+ overallTodo,
1178
+ taskDescription,
1179
+ todos,
1180
+ history,
1181
+ currentTodoIndex: todoIndex,
1182
+ taskExecutionSummary,
1183
+ latestTodoSummary
1184
+ });
1185
+ try {
1186
+ const parsed = JSON.parse(response.response);
1187
+ return {
1188
+ summary: parsed.task_summary ?? response.response,
1189
+ requestId: response.request_id
1190
+ };
1191
+ } catch {
1192
+ return { summary: response.response, requestId: response.request_id };
1193
+ }
1194
+ }
866
1195
  };
867
- var getAgentFactory = (mode) => {
868
- if (!(mode in agentRegistry)) {
869
- const availableModes = Object.keys(agentRegistry);
870
- throw new Error(
871
- `Unknown agent mode: '${mode}'. Available modes: ${availableModes}`
1196
+ var TaskeeAgent = class {
1197
+ apiKey;
1198
+ baseUrl;
1199
+ model;
1200
+ maxSteps;
1201
+ reflectionInterval;
1202
+ temperature;
1203
+ planner;
1204
+ externalMemory;
1205
+ todoIndex;
1206
+ stepObserver;
1207
+ stepDelay;
1208
+ actor;
1209
+ currentTodo = "";
1210
+ currentInstruction = "";
1211
+ actions = [];
1212
+ totalActions = 0;
1213
+ sinceReflection = 0;
1214
+ success = false;
1215
+ constructor(apiKey, baseUrl, model = MODEL_ACTOR, maxSteps = DEFAULT_MAX_STEPS, reflectionInterval = DEFAULT_REFLECTION_INTERVAL, temperature = DEFAULT_TEMPERATURE, planner, externalMemory, todoIndex, stepObserver, stepDelay = DEFAULT_STEP_DELAY) {
1216
+ this.apiKey = apiKey;
1217
+ this.baseUrl = baseUrl;
1218
+ this.model = model;
1219
+ this.maxSteps = maxSteps;
1220
+ this.reflectionInterval = reflectionInterval;
1221
+ this.temperature = temperature;
1222
+ this.planner = planner ?? new Planner(void 0, apiKey, baseUrl);
1223
+ this.externalMemory = externalMemory;
1224
+ this.todoIndex = todoIndex;
1225
+ this.stepObserver = stepObserver;
1226
+ this.stepDelay = stepDelay;
1227
+ }
1228
+ async execute(instruction, actionHandler, imageProvider) {
1229
+ resetHandler2(actionHandler);
1230
+ this.currentTodo = instruction;
1231
+ this.actions = [];
1232
+ this.totalActions = 0;
1233
+ this.sinceReflection = 0;
1234
+ this.success = false;
1235
+ try {
1236
+ this.actor = new Actor(
1237
+ this.apiKey,
1238
+ this.baseUrl,
1239
+ this.model,
1240
+ this.temperature
1241
+ );
1242
+ await this.initialPlan(imageProvider);
1243
+ this.actor.initTask(this.currentInstruction, this.maxSteps);
1244
+ let remainingSteps = this.maxSteps;
1245
+ while (remainingSteps > 0 && !this.success) {
1246
+ const stepsTaken = await this.executeSubtask(
1247
+ Math.min(this.maxSteps, remainingSteps),
1248
+ actionHandler,
1249
+ imageProvider
1250
+ );
1251
+ remainingSteps -= stepsTaken;
1252
+ if (!this.success && remainingSteps > 0) {
1253
+ const shouldContinue = await this.reflectAndDecide(imageProvider);
1254
+ if (!shouldContinue) {
1255
+ break;
1256
+ }
1257
+ }
1258
+ }
1259
+ await this.generateSummary();
1260
+ return this.success;
1261
+ } catch (err) {
1262
+ logger5.error(`Error executing todo: ${err}`);
1263
+ this.recordAction("error", null, String(err));
1264
+ return false;
1265
+ } finally {
1266
+ this.actor = void 0;
1267
+ }
1268
+ }
1269
+ getContext() {
1270
+ return this.externalMemory ? this.externalMemory.getContext() : {};
1271
+ }
1272
+ recordAction(actionType, target, reasoning, result, screenshotUuid) {
1273
+ this.actions.push({
1274
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
1275
+ action_type: actionType,
1276
+ target,
1277
+ reasoning,
1278
+ result,
1279
+ details: {},
1280
+ screenshot_uuid: screenshotUuid ?? void 0
1281
+ });
1282
+ }
1283
+ async initialPlan(imageProvider) {
1284
+ logger5.info("Generating initial plan for todo");
1285
+ const screenshot = await imageProvider.provide();
1286
+ const context = this.getContext();
1287
+ const { output, requestId } = await this.planner.initialPlan(
1288
+ this.currentTodo,
1289
+ context,
1290
+ screenshot,
1291
+ this.externalMemory,
1292
+ this.todoIndex
872
1293
  );
1294
+ this.recordAction("plan", "initial", output.reasoning, output.instruction);
1295
+ if (this.stepObserver) {
1296
+ const event = {
1297
+ type: "plan",
1298
+ timestamp: /* @__PURE__ */ new Date(),
1299
+ phase: "initial",
1300
+ image: screenshot,
1301
+ reasoning: output.reasoning,
1302
+ result: output.instruction,
1303
+ request_id: requestId ?? void 0
1304
+ };
1305
+ await this.stepObserver.onEvent(event);
1306
+ }
1307
+ this.currentInstruction = output.instruction;
1308
+ logger5.info(`Initial instruction: ${this.currentInstruction}`);
1309
+ }
1310
+ async executeSubtask(maxSteps, actionHandler, imageProvider) {
1311
+ logger5.info(`Executing subtask with max ${maxSteps} steps`);
1312
+ let stepsTaken = 0;
1313
+ const client = this.planner.getClient();
1314
+ for (let stepNum = 0; stepNum < maxSteps; stepNum++) {
1315
+ const screenshot = await imageProvider.provide();
1316
+ let screenshotUuid;
1317
+ let screenshotUrl;
1318
+ try {
1319
+ if (typeof screenshot === "string") {
1320
+ screenshotUuid = extractUuidFromUrl(screenshot) ?? void 0;
1321
+ screenshotUrl = screenshot;
1322
+ } else {
1323
+ const upload = await client.putS3PresignedUrl(screenshot);
1324
+ screenshotUuid = upload.uuid;
1325
+ screenshotUrl = upload.download_url;
1326
+ }
1327
+ } catch (err) {
1328
+ logger5.error(`Error uploading screenshot: ${err}`);
1329
+ this.recordAction("error", "screenshot_upload", String(err));
1330
+ break;
1331
+ }
1332
+ let step;
1333
+ try {
1334
+ step = await this.actor.step(screenshotUrl ?? screenshot, void 0);
1335
+ } catch (err) {
1336
+ logger5.error(`Error getting step from OAGI: ${err}`);
1337
+ this.recordAction(
1338
+ "error",
1339
+ "oagi_step",
1340
+ String(err),
1341
+ null,
1342
+ screenshotUuid
1343
+ );
1344
+ break;
1345
+ }
1346
+ if (step.reason) {
1347
+ logger5.info(`Step ${this.totalActions + 1}: ${step.reason}`);
1348
+ }
1349
+ if (this.stepObserver) {
1350
+ const event = {
1351
+ type: "step",
1352
+ timestamp: /* @__PURE__ */ new Date(),
1353
+ step_num: this.totalActions + 1,
1354
+ image: screenshot,
1355
+ step,
1356
+ task_id: this.actor.taskId
1357
+ };
1358
+ await this.stepObserver.onEvent(event);
1359
+ }
1360
+ if (step.actions?.length) {
1361
+ logger5.info(`Actions (${step.actions.length}):`);
1362
+ for (const action of step.actions) {
1363
+ const countSuffix = action.count && action.count > 1 ? ` x${action.count}` : "";
1364
+ logger5.info(` [${action.type}] ${action.argument}${countSuffix}`);
1365
+ }
1366
+ for (const action of step.actions) {
1367
+ this.recordAction(
1368
+ action.type,
1369
+ action.argument,
1370
+ step.reason ?? null,
1371
+ null,
1372
+ screenshotUuid
1373
+ );
1374
+ }
1375
+ let error = null;
1376
+ try {
1377
+ await actionHandler.handle(step.actions);
1378
+ } catch (err) {
1379
+ error = String(err);
1380
+ throw err;
1381
+ } finally {
1382
+ if (this.stepObserver) {
1383
+ const event = {
1384
+ type: "action",
1385
+ timestamp: /* @__PURE__ */ new Date(),
1386
+ step_num: this.totalActions + 1,
1387
+ actions: step.actions,
1388
+ error: error ?? void 0
1389
+ };
1390
+ await this.stepObserver.onEvent(event);
1391
+ }
1392
+ }
1393
+ this.totalActions += step.actions.length;
1394
+ this.sinceReflection += step.actions.length;
1395
+ }
1396
+ if (this.stepDelay > 0) {
1397
+ await sleep2(this.stepDelay);
1398
+ }
1399
+ stepsTaken += 1;
1400
+ if (step.stop) {
1401
+ logger5.info("OAGI signaled task completion");
1402
+ break;
1403
+ }
1404
+ if (this.sinceReflection >= this.reflectionInterval) {
1405
+ logger5.info("Reflection interval reached");
1406
+ break;
1407
+ }
1408
+ }
1409
+ return stepsTaken;
1410
+ }
1411
+ async reflectAndDecide(imageProvider) {
1412
+ logger5.info("Reflecting on progress");
1413
+ const screenshot = await imageProvider.provide();
1414
+ const context = this.getContext();
1415
+ context.current_todo = this.currentTodo;
1416
+ const recentActions = this.actions.slice(-this.sinceReflection);
1417
+ const { output, requestId } = await this.planner.reflect(
1418
+ recentActions,
1419
+ context,
1420
+ screenshot,
1421
+ this.externalMemory,
1422
+ this.todoIndex,
1423
+ this.currentInstruction,
1424
+ this.reflectionInterval
1425
+ );
1426
+ this.recordAction(
1427
+ "reflect",
1428
+ null,
1429
+ output.reasoning,
1430
+ output.continue_current ? "continue" : "pivot"
1431
+ );
1432
+ if (this.stepObserver) {
1433
+ const decision = output.success_assessment ? "success" : output.continue_current ? "continue" : "pivot";
1434
+ const event = {
1435
+ type: "plan",
1436
+ timestamp: /* @__PURE__ */ new Date(),
1437
+ phase: "reflection",
1438
+ image: screenshot,
1439
+ reasoning: output.reasoning,
1440
+ result: decision,
1441
+ request_id: requestId ?? void 0
1442
+ };
1443
+ await this.stepObserver.onEvent(event);
1444
+ }
1445
+ if (output.success_assessment) {
1446
+ this.success = true;
1447
+ logger5.info("Reflection indicates task is successful");
1448
+ return false;
1449
+ }
1450
+ this.sinceReflection = 0;
1451
+ if (!output.continue_current && output.new_instruction) {
1452
+ logger5.info(`Pivoting to new instruction: ${output.new_instruction}`);
1453
+ this.currentInstruction = output.new_instruction;
1454
+ await this.actor.initTask(this.currentInstruction, this.maxSteps);
1455
+ return true;
1456
+ }
1457
+ return output.continue_current;
1458
+ }
1459
+ async generateSummary() {
1460
+ logger5.info("Generating execution summary");
1461
+ const context = this.getContext();
1462
+ context.current_todo = this.currentTodo;
1463
+ const { summary, requestId } = await this.planner.summarize(
1464
+ this.actions,
1465
+ context,
1466
+ this.externalMemory,
1467
+ this.todoIndex
1468
+ );
1469
+ this.recordAction("summary", null, summary);
1470
+ if (this.stepObserver) {
1471
+ const event = {
1472
+ type: "plan",
1473
+ timestamp: /* @__PURE__ */ new Date(),
1474
+ phase: "summary",
1475
+ image: void 0,
1476
+ reasoning: summary,
1477
+ result: void 0,
1478
+ request_id: requestId ?? void 0
1479
+ };
1480
+ await this.stepObserver.onEvent(event);
1481
+ }
1482
+ logger5.info(`Execution summary: ${summary}`);
1483
+ }
1484
+ returnExecutionResults() {
1485
+ let summary = "";
1486
+ for (let i = this.actions.length - 1; i >= 0; i--) {
1487
+ if (this.actions[i].action_type === "summary") {
1488
+ summary = this.actions[i].reasoning ?? "";
1489
+ break;
1490
+ }
1491
+ }
1492
+ return {
1493
+ success: this.success,
1494
+ actions: this.actions,
1495
+ summary,
1496
+ total_steps: this.totalActions
1497
+ };
873
1498
  }
874
- return agentRegistry[mode];
875
- };
876
- var listAgentModes = () => {
877
- return Object.keys(agentRegistry);
878
1499
  };
879
- var createAgent = (mode, options = {}) => {
880
- const factory = getAgentFactory(mode);
881
- const agent = factory(options);
882
- if (!agent || typeof agent.execute !== "function") {
883
- throw new TypeError(
884
- `Factory for mode '${mode}' returned an object that doesn't implement Agent. Expected an object with an 'execute' method.`
1500
+ var TaskerAgent = class {
1501
+ /** Hierarchical agent that manages multi-todo workflows. */
1502
+ apiKey;
1503
+ baseUrl;
1504
+ model;
1505
+ maxSteps;
1506
+ temperature;
1507
+ reflectionInterval;
1508
+ planner;
1509
+ stepObserver;
1510
+ stepDelay;
1511
+ memory = new PlannerMemory();
1512
+ currentTaskeeAgent;
1513
+ constructor(apiKey, baseUrl, model = MODEL_ACTOR, maxSteps = DEFAULT_MAX_STEPS_TASKER, temperature = DEFAULT_TEMPERATURE, reflectionInterval = DEFAULT_REFLECTION_INTERVAL, planner, stepObserver, stepDelay = DEFAULT_STEP_DELAY) {
1514
+ this.apiKey = apiKey;
1515
+ this.baseUrl = baseUrl;
1516
+ this.model = model;
1517
+ this.maxSteps = maxSteps;
1518
+ this.temperature = temperature;
1519
+ this.reflectionInterval = reflectionInterval;
1520
+ this.planner = planner ?? new Planner(void 0, apiKey, baseUrl);
1521
+ this.stepObserver = stepObserver;
1522
+ this.stepDelay = stepDelay;
1523
+ }
1524
+ setTask(task, todos) {
1525
+ this.memory.setTask(task, todos);
1526
+ logger5.info(`Task set with ${todos.length} todos`);
1527
+ }
1528
+ set_task(task, todos) {
1529
+ this.setTask(task, todos);
1530
+ }
1531
+ async execute(_instruction, actionHandler, imageProvider) {
1532
+ resetHandler2(actionHandler);
1533
+ let overallSuccess = true;
1534
+ while (true) {
1535
+ const todoInfo = this.prepare();
1536
+ if (!todoInfo) {
1537
+ logger5.info("No more todos to execute");
1538
+ break;
1539
+ }
1540
+ const { todo, index } = todoInfo;
1541
+ logger5.info(`Executing todo ${index}: ${todo.description}`);
1542
+ if (this.stepObserver) {
1543
+ const event = {
1544
+ type: "split",
1545
+ timestamp: /* @__PURE__ */ new Date(),
1546
+ label: `Start of todo ${index + 1}: ${todo.description}`
1547
+ };
1548
+ await this.stepObserver.onEvent(event);
1549
+ }
1550
+ const success = await this.executeTodo(
1551
+ index,
1552
+ actionHandler,
1553
+ imageProvider
1554
+ );
1555
+ if (this.stepObserver) {
1556
+ const event = {
1557
+ type: "split",
1558
+ timestamp: /* @__PURE__ */ new Date(),
1559
+ label: `End of todo ${index + 1}: ${todo.description}`
1560
+ };
1561
+ await this.stepObserver.onEvent(event);
1562
+ }
1563
+ if (!success) {
1564
+ logger5.warn(`Todo ${index} failed`);
1565
+ overallSuccess = false;
1566
+ const currentStatus = this.memory.todos[index]?.status;
1567
+ if (currentStatus === "in_progress") {
1568
+ logger5.error("Todo failed with exception, stopping execution");
1569
+ break;
1570
+ }
1571
+ }
1572
+ this.updateTaskSummary();
1573
+ }
1574
+ const statusSummary = this.memory.getTodoStatusSummary();
1575
+ logger5.info(
1576
+ `Workflow complete. Status summary: ${JSON.stringify(statusSummary)}`
885
1577
  );
1578
+ return overallSuccess;
1579
+ }
1580
+ prepare() {
1581
+ const current = this.memory.getCurrentTodo();
1582
+ if (!current) return null;
1583
+ this.currentTaskeeAgent = new TaskeeAgent(
1584
+ this.apiKey,
1585
+ this.baseUrl,
1586
+ this.model,
1587
+ this.maxSteps,
1588
+ this.reflectionInterval,
1589
+ this.temperature,
1590
+ this.planner,
1591
+ this.memory,
1592
+ current.index,
1593
+ this.stepObserver,
1594
+ this.stepDelay
1595
+ );
1596
+ if (current.todo.status === "pending") {
1597
+ this.memory.updateTodo(current.index, "in_progress");
1598
+ }
1599
+ logger5.info(`Prepared taskee agent for todo ${current.index}`);
1600
+ return current;
1601
+ }
1602
+ async executeTodo(todoIndex, actionHandler, imageProvider) {
1603
+ if (!this.currentTaskeeAgent || todoIndex < 0) {
1604
+ logger5.error("No taskee agent prepared");
1605
+ return false;
1606
+ }
1607
+ const todo = this.memory.todos[todoIndex];
1608
+ try {
1609
+ const success = await this.currentTaskeeAgent.execute(
1610
+ todo.description,
1611
+ actionHandler,
1612
+ imageProvider
1613
+ );
1614
+ const results = this.currentTaskeeAgent.returnExecutionResults();
1615
+ this.updateMemoryFromExecution(todoIndex, results, success);
1616
+ return success;
1617
+ } catch (err) {
1618
+ logger5.error(`Error executing todo ${todoIndex}: ${err}`);
1619
+ this.memory.updateTodo(
1620
+ todoIndex,
1621
+ "in_progress",
1622
+ `Execution failed: ${String(err)}`
1623
+ );
1624
+ return false;
1625
+ }
1626
+ }
1627
+ updateMemoryFromExecution(todoIndex, results, success) {
1628
+ const status = success ? "completed" : "in_progress";
1629
+ this.memory.updateTodo(todoIndex, status, results.summary);
1630
+ this.memory.addHistory(
1631
+ todoIndex,
1632
+ results.actions,
1633
+ results.summary,
1634
+ success
1635
+ );
1636
+ if (success) {
1637
+ const summaryLine = `- Completed todo ${todoIndex}: ${results.summary}`;
1638
+ this.memory.taskExecutionSummary = this.memory.taskExecutionSummary ? `${this.memory.taskExecutionSummary}
1639
+ ${summaryLine}` : summaryLine;
1640
+ }
1641
+ logger5.info(
1642
+ `Updated memory for todo ${todoIndex}: status=${status}, actions=${results.actions.length}`
1643
+ );
1644
+ }
1645
+ updateTaskSummary() {
1646
+ const statusSummary = this.memory.getTodoStatusSummary();
1647
+ const completed = statusSummary.completed ?? 0;
1648
+ const total = this.memory.todos.length;
1649
+ const summaryParts = [`Progress: ${completed}/${total} todos completed`];
1650
+ const recentHistory = this.memory.history.slice(-3);
1651
+ for (const history of recentHistory) {
1652
+ if (history.completed && history.summary) {
1653
+ summaryParts.push(
1654
+ `- Todo ${history.todo_index}: ${history.summary.slice(0, 100)}`
1655
+ );
1656
+ }
1657
+ }
1658
+ this.memory.taskExecutionSummary = summaryParts.join("\n");
1659
+ }
1660
+ getMemory() {
1661
+ return this.memory;
1662
+ }
1663
+ appendTodo(description) {
1664
+ this.memory.appendTodo(description);
1665
+ logger5.info(`Appended new todo: ${description}`);
886
1666
  }
887
- return agent;
888
1667
  };
889
1668
 
890
- // src/agent/factories.ts
891
- asyncAgentRegister("actor")((options = {}) => {
892
- const {
893
- apiKey,
894
- baseURL,
895
- model = MODEL_ACTOR,
896
- maxSteps = DEFAULT_MAX_STEPS,
897
- temperature = DEFAULT_TEMPERATURE_LOW,
898
- stepObserver,
899
- stepDelay = DEFAULT_STEP_DELAY
900
- } = options;
901
- return new DefaultAgent(
902
- apiKey,
903
- baseURL,
904
- model,
905
- maxSteps,
906
- temperature,
907
- stepObserver ?? void 0,
908
- stepDelay
909
- );
910
- });
911
- asyncAgentRegister("thinker")((options = {}) => {
912
- const {
913
- apiKey,
914
- baseURL,
915
- model = MODEL_THINKER,
916
- maxSteps = DEFAULT_MAX_STEPS_THINKER,
917
- temperature = DEFAULT_TEMPERATURE_LOW,
918
- stepObserver,
919
- stepDelay = DEFAULT_STEP_DELAY
920
- } = options;
921
- return new DefaultAgent(
922
- apiKey,
923
- baseURL,
924
- model,
925
- maxSteps,
926
- temperature,
927
- stepObserver ?? void 0,
928
- stepDelay
929
- );
930
- });
931
-
932
1669
  // src/agent/observer/exporters.ts
933
1670
  import fs from "fs";
934
1671
  import path from "path";
@@ -1210,7 +1947,21 @@ var exportToHtml = (events, filePath) => {
1210
1947
  ensureDir(outputDir);
1211
1948
  const moduleUrl = import.meta?.url ? import.meta.url : pathToFileURL(__filename).href;
1212
1949
  const moduleDir = path.dirname(fileURLToPath(moduleUrl));
1213
- const templatePath = path.join(moduleDir, "report_template.html");
1950
+ const primaryTemplate = path.join(moduleDir, "report_template.html");
1951
+ const fallbackTemplate = path.resolve(
1952
+ moduleDir,
1953
+ "..",
1954
+ "src",
1955
+ "agent",
1956
+ "observer",
1957
+ "report_template.html"
1958
+ );
1959
+ const templatePath = fs.existsSync(primaryTemplate) ? primaryTemplate : fallbackTemplate;
1960
+ if (!fs.existsSync(templatePath)) {
1961
+ throw new Error(
1962
+ `Report template not found at ${primaryTemplate} or ${fallbackTemplate}`
1963
+ );
1964
+ }
1214
1965
  const template = fs.readFileSync(templatePath, "utf-8");
1215
1966
  const eventsData = convertEventsForHtml(events);
1216
1967
  const eventsJson = JSON.stringify(eventsData);
@@ -1239,6 +1990,12 @@ var exportToJson = (events, filePath) => {
1239
1990
  };
1240
1991
 
1241
1992
  // src/agent/observer/agent_observer.ts
1993
+ var ExportFormat = /* @__PURE__ */ ((ExportFormat2) => {
1994
+ ExportFormat2["MARKDOWN"] = "markdown";
1995
+ ExportFormat2["HTML"] = "html";
1996
+ ExportFormat2["JSON"] = "json";
1997
+ return ExportFormat2;
1998
+ })(ExportFormat || {});
1242
1999
  var AsyncAgentObserver = class extends StepObserver {
1243
2000
  /**
1244
2001
  * Records agent execution events and exports to various formats.
@@ -1293,10 +2050,196 @@ var AsyncAgentObserver = class extends StepObserver {
1293
2050
  }
1294
2051
  };
1295
2052
 
2053
+ // src/agent/registry.ts
2054
+ var agentRegistry = {};
2055
+ var asyncAgentRegister = (mode) => {
2056
+ return (func) => {
2057
+ if (mode in agentRegistry) {
2058
+ throw new Error(
2059
+ `Agent mode '${mode}' is already registered. Cannot register the same mode twice.`
2060
+ );
2061
+ }
2062
+ agentRegistry[mode] = func;
2063
+ return func;
2064
+ };
2065
+ };
2066
+ var getAgentFactory = (mode) => {
2067
+ if (!(mode in agentRegistry)) {
2068
+ const availableModes = Object.keys(agentRegistry);
2069
+ throw new Error(
2070
+ `Unknown agent mode: '${mode}'. Available modes: ${availableModes}`
2071
+ );
2072
+ }
2073
+ return agentRegistry[mode];
2074
+ };
2075
+ var listAgentModes = () => {
2076
+ return Object.keys(agentRegistry);
2077
+ };
2078
+ var createAgent = (mode, options = {}) => {
2079
+ const factory = getAgentFactory(mode);
2080
+ const agent = factory(options);
2081
+ if (!agent || typeof agent.execute !== "function") {
2082
+ throw new TypeError(
2083
+ `Factory for mode '${mode}' returned an object that doesn't implement Agent. Expected an object with an 'execute' method.`
2084
+ );
2085
+ }
2086
+ return agent;
2087
+ };
2088
+
2089
+ // src/agent/factories.ts
2090
+ asyncAgentRegister("actor")((options = {}) => {
2091
+ const {
2092
+ apiKey,
2093
+ baseURL,
2094
+ model = MODEL_ACTOR,
2095
+ maxSteps = DEFAULT_MAX_STEPS,
2096
+ temperature = DEFAULT_TEMPERATURE_LOW,
2097
+ stepObserver,
2098
+ stepDelay = DEFAULT_STEP_DELAY
2099
+ } = options;
2100
+ return new DefaultAgent(
2101
+ apiKey,
2102
+ baseURL,
2103
+ model,
2104
+ maxSteps,
2105
+ temperature,
2106
+ stepObserver ?? void 0,
2107
+ stepDelay
2108
+ );
2109
+ });
2110
+ asyncAgentRegister("thinker")((options = {}) => {
2111
+ const {
2112
+ apiKey,
2113
+ baseURL,
2114
+ model = MODEL_THINKER,
2115
+ maxSteps = DEFAULT_MAX_STEPS_THINKER,
2116
+ temperature = DEFAULT_TEMPERATURE_LOW,
2117
+ stepObserver,
2118
+ stepDelay = DEFAULT_STEP_DELAY
2119
+ } = options;
2120
+ return new DefaultAgent(
2121
+ apiKey,
2122
+ baseURL,
2123
+ model,
2124
+ maxSteps,
2125
+ temperature,
2126
+ stepObserver ?? void 0,
2127
+ stepDelay
2128
+ );
2129
+ });
2130
+ asyncAgentRegister("tasker")((options = {}) => {
2131
+ const {
2132
+ apiKey,
2133
+ baseURL,
2134
+ model = MODEL_ACTOR,
2135
+ maxSteps = DEFAULT_MAX_STEPS_TASKER,
2136
+ temperature = DEFAULT_TEMPERATURE,
2137
+ reflectionInterval = DEFAULT_REFLECTION_INTERVAL_TASKER,
2138
+ stepObserver,
2139
+ stepDelay = DEFAULT_STEP_DELAY
2140
+ } = options;
2141
+ return new TaskerAgent(
2142
+ apiKey,
2143
+ baseURL,
2144
+ model,
2145
+ maxSteps,
2146
+ temperature,
2147
+ reflectionInterval,
2148
+ void 0,
2149
+ stepObserver ?? void 0,
2150
+ stepDelay
2151
+ );
2152
+ });
2153
+ asyncAgentRegister("tasker:cvs_appointment")(
2154
+ (options = {}) => {
2155
+ const {
2156
+ apiKey,
2157
+ baseURL,
2158
+ model = MODEL_ACTOR,
2159
+ maxSteps = DEFAULT_MAX_STEPS_TASKER,
2160
+ temperature = DEFAULT_TEMPERATURE,
2161
+ reflectionInterval = DEFAULT_REFLECTION_INTERVAL_TASKER,
2162
+ stepObserver,
2163
+ stepDelay = DEFAULT_STEP_DELAY
2164
+ } = options;
2165
+ const tasker = new TaskerAgent(
2166
+ apiKey,
2167
+ baseURL,
2168
+ model,
2169
+ maxSteps,
2170
+ temperature,
2171
+ reflectionInterval,
2172
+ void 0,
2173
+ stepObserver ?? void 0,
2174
+ stepDelay
2175
+ );
2176
+ const firstName = "First";
2177
+ const lastName = "Last";
2178
+ const email = "user@example.com";
2179
+ const birthday = "01-01-1990";
2180
+ const zipCode = "00000";
2181
+ const [month, day, year] = birthday.split("-");
2182
+ const instruction = `Schedule an appointment at CVS for ${firstName} ${lastName} with email ${email} and birthday ${birthday}`;
2183
+ const todos = [
2184
+ "Open a new tab, go to www.cvs.com, type 'flu shot' in the search bar and press enter, wait for the page to load, then click on the button of Schedule vaccinations on the top of the page",
2185
+ `Enter the first name '${firstName}', last name '${lastName}', and email '${email}' in the form. Do not use any suggested autofills. Make sure the mobile phone number is empty.`,
2186
+ "Slightly scroll down to see the date of birth, enter Month '" + month + "', Day '" + day + "', and Year '" + year + "' in the form",
2187
+ "Click on 'Continue as guest' button, wait for the page to load with wait, click on 'Add vaccines' button, select 'Flu' and click on 'Add vaccines'",
2188
+ "Click on 'next' to enter the page with recommendation vaccines, then click on 'next' again, until on the page of entering zip code, enter '" + zipCode + "', select the first option from the dropdown menu, and click on 'Search'"
2189
+ ];
2190
+ tasker.setTask(instruction, todos);
2191
+ return tasker;
2192
+ }
2193
+ );
2194
+ asyncAgentRegister("tasker:software_qa")(
2195
+ (options = {}) => {
2196
+ const {
2197
+ apiKey,
2198
+ baseURL,
2199
+ model = MODEL_ACTOR,
2200
+ maxSteps = DEFAULT_MAX_STEPS_TASKER,
2201
+ temperature = DEFAULT_TEMPERATURE,
2202
+ reflectionInterval = DEFAULT_REFLECTION_INTERVAL_TASKER,
2203
+ stepObserver,
2204
+ stepDelay = DEFAULT_STEP_DELAY
2205
+ } = options;
2206
+ const tasker = new TaskerAgent(
2207
+ apiKey,
2208
+ baseURL,
2209
+ model,
2210
+ maxSteps,
2211
+ temperature,
2212
+ reflectionInterval,
2213
+ void 0,
2214
+ stepObserver ?? void 0,
2215
+ stepDelay
2216
+ );
2217
+ const instruction = "QA: click through every sidebar button in the Nuclear Player UI";
2218
+ const todos = [
2219
+ "Click on 'Dashboard' in the left sidebar",
2220
+ "Click on 'Downloads' in the left sidebar",
2221
+ "Click on 'Lyrics' in the left sidebar",
2222
+ "Click on 'Plugins' in the left sidebar",
2223
+ "Click on 'Search Results' in the left sidebar",
2224
+ "Click on 'Settings' in the left sidebar",
2225
+ "Click on 'Equalizer' in the left sidebar",
2226
+ "Click on 'Visualizer' in the left sidebar",
2227
+ "Click on 'Listening History' in the left sidebar",
2228
+ "Click on 'Favorite Albums' in the left sidebar",
2229
+ "Click on 'Favorite Tracks' in the left sidebar",
2230
+ "Click on 'Favorite Artists' in the left sidebar",
2231
+ "Click on 'Local Library' in the left sidebar",
2232
+ "Click on 'Playlists' in the left sidebar"
2233
+ ];
2234
+ tasker.setTask(instruction, todos);
2235
+ return tasker;
2236
+ }
2237
+ );
2238
+
1296
2239
  // src/handler.ts
1297
2240
  import robot from "robotjs";
1298
2241
  import sharp from "sharp";
1299
- var sleep2 = (ms) => new Promise((r) => setTimeout(r, ms));
2242
+ var sleep3 = (ms) => new Promise((r) => setTimeout(r, ms));
1300
2243
  var toSharpKernel = (resample) => {
1301
2244
  switch (resample) {
1302
2245
  case "NEAREST":
@@ -1457,7 +2400,7 @@ var DefaultActionHandler = class {
1457
2400
  robot.moveMouse(p1.x, p1.y);
1458
2401
  robot.mouseToggle("down", "left");
1459
2402
  robot.dragMouse(p2.x, p2.y);
1460
- await sleep2(this.#cfg.dragDurationMs);
2403
+ await sleep3(this.#cfg.dragDurationMs);
1461
2404
  robot.mouseToggle("up", "left");
1462
2405
  return;
1463
2406
  }
@@ -1477,7 +2420,7 @@ var DefaultActionHandler = class {
1477
2420
  if (!last) return;
1478
2421
  const modifiers = keys.slice(0, -1);
1479
2422
  robot.keyTap(last, modifiers.length ? modifiers : []);
1480
- await sleep2(this.#cfg.hotkeyDelayMs);
2423
+ await sleep3(this.#cfg.hotkeyDelayMs);
1481
2424
  return;
1482
2425
  }
1483
2426
  case "type": {
@@ -1497,7 +2440,7 @@ var DefaultActionHandler = class {
1497
2440
  return;
1498
2441
  }
1499
2442
  case "wait": {
1500
- await sleep2(this.#cfg.waitDurationMs);
2443
+ await sleep3(this.#cfg.waitDurationMs);
1501
2444
  return;
1502
2445
  }
1503
2446
  case "finish": {
@@ -1538,10 +2481,12 @@ export {
1538
2481
  Client,
1539
2482
  Actor,
1540
2483
  DefaultAgent,
2484
+ TaskerAgent,
1541
2485
  listAgentModes,
1542
2486
  createAgent,
2487
+ ExportFormat,
1543
2488
  AsyncAgentObserver,
1544
2489
  ScreenshotMaker,
1545
2490
  DefaultActionHandler
1546
2491
  };
1547
- //# sourceMappingURL=chunk-JVNTVY6W.js.map
2492
+ //# sourceMappingURL=chunk-SRTB44IH.js.map