@oagi/oagi 0.1.4 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -46,6 +46,7 @@ var ActionTypeSchema = z.enum([
46
46
  "type",
47
47
  "scroll",
48
48
  "finish",
49
+ "fail",
49
50
  "wait",
50
51
  "call_user"
51
52
  ]);
@@ -478,7 +479,21 @@ var exportToHtml = (events, filePath) => {
478
479
  ensureDir(outputDir);
479
480
  const moduleUrl = import_meta?.url ? import_meta.url : (0, import_node_url.pathToFileURL)(__filename).href;
480
481
  const moduleDir = import_node_path.default.dirname((0, import_node_url.fileURLToPath)(moduleUrl));
481
- const templatePath = import_node_path.default.join(moduleDir, "report_template.html");
482
+ const primaryTemplate = import_node_path.default.join(moduleDir, "report_template.html");
483
+ const fallbackTemplate = import_node_path.default.resolve(
484
+ moduleDir,
485
+ "..",
486
+ "src",
487
+ "agent",
488
+ "observer",
489
+ "report_template.html"
490
+ );
491
+ const templatePath = import_node_fs.default.existsSync(primaryTemplate) ? primaryTemplate : fallbackTemplate;
492
+ if (!import_node_fs.default.existsSync(templatePath)) {
493
+ throw new Error(
494
+ `Report template not found at ${primaryTemplate} or ${fallbackTemplate}`
495
+ );
496
+ }
482
497
  const template = import_node_fs.default.readFileSync(templatePath, "utf-8");
483
498
  const eventsData = convertEventsForHtml(events);
484
499
  const eventsJson = JSON.stringify(eventsData);
@@ -571,9 +586,12 @@ var MODEL_THINKER = "lux-thinker-1";
571
586
  var MODE_ACTOR = "actor";
572
587
  var DEFAULT_MAX_STEPS = 20;
573
588
  var DEFAULT_MAX_STEPS_THINKER = 100;
574
- var MAX_STEPS_ACTOR = 30;
575
- var MAX_STEPS_THINKER = 120;
576
- var DEFAULT_STEP_DELAY = 0.3;
589
+ var DEFAULT_MAX_STEPS_TASKER = 60;
590
+ var MAX_STEPS_ACTOR = 100;
591
+ var MAX_STEPS_THINKER = 300;
592
+ var DEFAULT_REFLECTION_INTERVAL = 4;
593
+ var DEFAULT_REFLECTION_INTERVAL_TASKER = 20;
594
+ var DEFAULT_STEP_DELAY = 1;
577
595
  var DEFAULT_TEMPERATURE = 0.5;
578
596
  var DEFAULT_TEMPERATURE_LOW = 0.1;
579
597
  var HTTP_CLIENT_TIMEOUT = 60;
@@ -659,6 +677,39 @@ var logTraceOnFailure = (_, __, descriptor) => {
659
677
  return descriptor;
660
678
  };
661
679
 
680
+ // src/platform-info.ts
681
+ var import_module = require("module");
682
+ var import_meta2 = {};
683
+ var SDK_NAME = "oagi-typescript";
684
+ function getSdkVersion() {
685
+ try {
686
+ const require2 = (0, import_module.createRequire)(import_meta2.url);
687
+ for (const p of ["../package.json", "../../package.json"]) {
688
+ try {
689
+ const pkg = require2(p);
690
+ if (pkg.version && pkg.version !== "0.0.0") return pkg.version;
691
+ } catch {
692
+ }
693
+ }
694
+ } catch {
695
+ }
696
+ return "unknown";
697
+ }
698
+ function getUserAgent() {
699
+ return `${SDK_NAME}/${getSdkVersion()} (node ${process.version}; ${process.platform}; ${process.arch})`;
700
+ }
701
+ function getSdkHeaders() {
702
+ return {
703
+ "User-Agent": getUserAgent(),
704
+ "x-sdk-name": SDK_NAME,
705
+ "x-sdk-version": getSdkVersion(),
706
+ "x-sdk-language": "typescript",
707
+ "x-sdk-language-version": process.version,
708
+ "x-sdk-os": process.platform,
709
+ "x-sdk-platform": process.arch
710
+ };
711
+ }
712
+
662
713
  // src/utils/output-parser.ts
663
714
  var splitActions = (actionBlock) => {
664
715
  const actions = [];
@@ -725,7 +776,9 @@ var parseRawOutput = (rawOutput) => {
725
776
  return {
726
777
  reason,
727
778
  actions,
728
- stop: actions.some((action2) => action2.type === "finish")
779
+ stop: actions.some(
780
+ (action2) => action2.type === "finish" || action2.type === "fail"
781
+ )
729
782
  };
730
783
  };
731
784
 
@@ -776,10 +829,12 @@ var _Client = class _Client {
776
829
  `OAGI API key must be provided either as 'api_key' parameter or OAGI_API_KEY environment variable. Get your API key at ${API_KEY_HELP_URL}`
777
830
  );
778
831
  }
832
+ const sdkHeaders = getSdkHeaders();
779
833
  this.client = new import_openai.default({
780
834
  baseURL: new URL("./v1", baseURL).href,
781
835
  apiKey,
782
- maxRetries
836
+ maxRetries,
837
+ defaultHeaders: sdkHeaders
783
838
  });
784
839
  logger2.info(`Client initialized with base_url: ${baseURL}`);
785
840
  }
@@ -795,7 +850,7 @@ var _Client = class _Client {
795
850
  return fetch(input, init);
796
851
  }
797
852
  buildHeaders(apiVersion) {
798
- const headers = {};
853
+ const headers = getSdkHeaders();
799
854
  if (apiVersion) {
800
855
  headers["x-api-version"] = apiVersion;
801
856
  }
@@ -1242,6 +1297,783 @@ var DefaultAgent = class {
1242
1297
  }
1243
1298
  };
1244
1299
 
1300
+ // src/agent/tasker.ts
1301
+ var logger5 = logger_default("agent.tasker");
1302
+ var resetHandler2 = (handler) => {
1303
+ if (typeof handler.reset === "function") {
1304
+ handler.reset();
1305
+ }
1306
+ };
1307
+ var sleep2 = (seconds) => new Promise((resolve) => setTimeout(resolve, seconds * 1e3));
1308
+ var extractUuidFromUrl = (url) => {
1309
+ const pattern = /\/([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(?:\.[a-z]+)?(?:\?|$)/i;
1310
+ const match = pattern.exec(url);
1311
+ return match ? match[1] : null;
1312
+ };
1313
+ var PlannerMemory = class {
1314
+ taskDescription = "";
1315
+ todos = [];
1316
+ history = [];
1317
+ taskExecutionSummary = "";
1318
+ todoExecutionSummaries = {};
1319
+ setTask(taskDescription, todos) {
1320
+ this.taskDescription = taskDescription;
1321
+ this.todos = todos.map(
1322
+ (todo) => typeof todo === "string" ? { description: todo, status: "pending" } : todo
1323
+ );
1324
+ }
1325
+ getCurrentTodo() {
1326
+ for (let i = 0; i < this.todos.length; i++) {
1327
+ const todo = this.todos[i];
1328
+ if (todo.status === "pending" || todo.status === "in_progress") {
1329
+ return { todo, index: i };
1330
+ }
1331
+ }
1332
+ return null;
1333
+ }
1334
+ updateTodo(index, status, summary) {
1335
+ if (index < 0 || index >= this.todos.length) return;
1336
+ this.todos[index].status = status;
1337
+ if (summary) {
1338
+ this.todoExecutionSummaries[index] = summary;
1339
+ }
1340
+ }
1341
+ addHistory(todoIndex, actions, summary, completed = false) {
1342
+ if (todoIndex < 0 || todoIndex >= this.todos.length) return;
1343
+ this.history.push({
1344
+ todo_index: todoIndex,
1345
+ todo: this.todos[todoIndex].description,
1346
+ actions,
1347
+ summary,
1348
+ completed
1349
+ });
1350
+ }
1351
+ getContext() {
1352
+ return {
1353
+ task_description: this.taskDescription,
1354
+ todos: this.todos.map((todo, index) => ({
1355
+ index,
1356
+ description: todo.description,
1357
+ status: todo.status
1358
+ })),
1359
+ history: this.history.map((history) => ({
1360
+ todo_index: history.todo_index,
1361
+ todo: history.todo,
1362
+ action_count: history.actions.length,
1363
+ summary: history.summary,
1364
+ completed: history.completed
1365
+ })),
1366
+ task_execution_summary: this.taskExecutionSummary,
1367
+ todo_execution_summaries: this.todoExecutionSummaries
1368
+ };
1369
+ }
1370
+ getTodoStatusSummary() {
1371
+ const summary = {
1372
+ pending: 0,
1373
+ in_progress: 0,
1374
+ completed: 0,
1375
+ skipped: 0
1376
+ };
1377
+ for (const todo of this.todos) {
1378
+ summary[todo.status] = (summary[todo.status] ?? 0) + 1;
1379
+ }
1380
+ return summary;
1381
+ }
1382
+ appendTodo(description) {
1383
+ this.todos.push({ description, status: "pending" });
1384
+ }
1385
+ };
1386
+ var Planner = class {
1387
+ constructor(client, apiKey, baseUrl) {
1388
+ this.apiKey = apiKey;
1389
+ this.baseUrl = baseUrl;
1390
+ this.client = client;
1391
+ }
1392
+ client;
1393
+ ownsClient = false;
1394
+ ensureClient() {
1395
+ if (!this.client) {
1396
+ this.client = new Client(this.baseUrl, this.apiKey);
1397
+ this.ownsClient = true;
1398
+ }
1399
+ return this.client;
1400
+ }
1401
+ getClient() {
1402
+ return this.ensureClient();
1403
+ }
1404
+ async close() {
1405
+ if (!this.ownsClient || !this.client) return;
1406
+ const closable = this.client;
1407
+ if (typeof closable.close === "function") {
1408
+ await closable.close();
1409
+ }
1410
+ }
1411
+ extractMemoryData(memory, context, todoIndex) {
1412
+ if (memory && todoIndex !== void 0) {
1413
+ const taskDescription = memory.taskDescription;
1414
+ const todos = memory.todos.map((todo, index) => ({
1415
+ index,
1416
+ description: todo.description,
1417
+ status: todo.status,
1418
+ execution_summary: memory.todoExecutionSummaries[index] ?? void 0
1419
+ }));
1420
+ const history = memory.history.map((history2) => ({
1421
+ todo_index: history2.todo_index,
1422
+ todo_description: history2.todo,
1423
+ action_count: history2.actions.length,
1424
+ summary: history2.summary ?? void 0,
1425
+ completed: history2.completed
1426
+ }));
1427
+ const taskExecutionSummary = memory.taskExecutionSummary || void 0;
1428
+ const overallTodo = memory.todos[todoIndex] ? memory.todos[todoIndex].description : "";
1429
+ return {
1430
+ taskDescription,
1431
+ todos,
1432
+ history,
1433
+ taskExecutionSummary,
1434
+ overallTodo
1435
+ };
1436
+ }
1437
+ const rawTodos = context.todos;
1438
+ const rawHistory = context.history;
1439
+ return {
1440
+ taskDescription: context.task_description ?? "",
1441
+ todos: Array.isArray(rawTodos) ? rawTodos : [],
1442
+ history: Array.isArray(rawHistory) ? rawHistory : [],
1443
+ taskExecutionSummary: void 0,
1444
+ overallTodo: context.current_todo ?? ""
1445
+ };
1446
+ }
1447
+ extractJsonString(text) {
1448
+ const start = text.indexOf("{");
1449
+ const end = text.lastIndexOf("}") + 1;
1450
+ if (start < 0 || end <= start) return "";
1451
+ return text.slice(start, end);
1452
+ }
1453
+ parsePlannerOutput(response) {
1454
+ try {
1455
+ const jsonResponse = this.extractJsonString(response);
1456
+ const data = JSON.parse(jsonResponse);
1457
+ return {
1458
+ instruction: data.subtask ?? data.instruction ?? "",
1459
+ reasoning: data.reasoning ?? "",
1460
+ subtodos: data.subtodos ?? []
1461
+ };
1462
+ } catch {
1463
+ return {
1464
+ instruction: "",
1465
+ reasoning: "Failed to parse structured response",
1466
+ subtodos: []
1467
+ };
1468
+ }
1469
+ }
1470
+ parseReflectionOutput(response) {
1471
+ try {
1472
+ const jsonResponse = this.extractJsonString(response);
1473
+ const data = JSON.parse(jsonResponse);
1474
+ const success = data.success === "yes";
1475
+ const newSubtask = (data.subtask_instruction ?? "").trim();
1476
+ const continueCurrent = !success && !newSubtask;
1477
+ return {
1478
+ continue_current: continueCurrent,
1479
+ new_instruction: newSubtask || null,
1480
+ reasoning: data.reflection ?? data.reasoning ?? "",
1481
+ success_assessment: success
1482
+ };
1483
+ } catch {
1484
+ return {
1485
+ continue_current: true,
1486
+ new_instruction: null,
1487
+ reasoning: "Failed to parse reflection response, continuing current approach",
1488
+ success_assessment: false
1489
+ };
1490
+ }
1491
+ }
1492
+ formatExecutionNotes(context) {
1493
+ const history = context.history;
1494
+ if (!history?.length) return "";
1495
+ const parts = [];
1496
+ for (const item of history) {
1497
+ parts.push(
1498
+ `Todo ${item.todo_index}: ${item.action_count} actions, completed: ${item.completed}`
1499
+ );
1500
+ if (item.summary) {
1501
+ parts.push(`Summary: ${item.summary}`);
1502
+ }
1503
+ }
1504
+ return parts.join("\n");
1505
+ }
1506
+ async ensureScreenshotUuid(screenshot) {
1507
+ if (!screenshot) return { uuid: void 0, url: void 0 };
1508
+ if (typeof screenshot === "string") {
1509
+ const uuid = extractUuidFromUrl(screenshot);
1510
+ return { uuid: uuid ?? void 0, url: screenshot };
1511
+ }
1512
+ const client = this.ensureClient();
1513
+ const upload = await client.putS3PresignedUrl(screenshot);
1514
+ return { uuid: upload.uuid, url: upload.download_url };
1515
+ }
1516
+ async initialPlan(todo, context, screenshot, memory, todoIndex) {
1517
+ const client = this.ensureClient();
1518
+ const { uuid } = await this.ensureScreenshotUuid(screenshot);
1519
+ const { taskDescription, todos, history, taskExecutionSummary } = this.extractMemoryData(memory, context, todoIndex);
1520
+ const response = await client.callWorker({
1521
+ workerId: "oagi_first",
1522
+ overallTodo: todo,
1523
+ taskDescription,
1524
+ todos,
1525
+ history,
1526
+ currentTodoIndex: todoIndex,
1527
+ taskExecutionSummary,
1528
+ currentScreenshot: uuid
1529
+ });
1530
+ return {
1531
+ output: this.parsePlannerOutput(response.response),
1532
+ requestId: response.request_id
1533
+ };
1534
+ }
1535
+ async reflect(actions, context, screenshot, memory, todoIndex, currentInstruction, reflectionInterval = DEFAULT_REFLECTION_INTERVAL) {
1536
+ const client = this.ensureClient();
1537
+ const { uuid } = await this.ensureScreenshotUuid(screenshot);
1538
+ const {
1539
+ taskDescription,
1540
+ todos,
1541
+ history,
1542
+ taskExecutionSummary,
1543
+ overallTodo
1544
+ } = this.extractMemoryData(memory, context, todoIndex);
1545
+ const windowActions = actions.slice(-reflectionInterval);
1546
+ const windowSteps = windowActions.map((action, index) => ({
1547
+ step_number: index + 1,
1548
+ action_type: action.action_type,
1549
+ target: action.target ?? "",
1550
+ reasoning: action.reasoning ?? ""
1551
+ }));
1552
+ const windowScreenshots = windowActions.map((action) => action.screenshot_uuid).filter(Boolean);
1553
+ const priorNotes = this.formatExecutionNotes(context);
1554
+ const response = await client.callWorker({
1555
+ workerId: "oagi_follow",
1556
+ overallTodo,
1557
+ taskDescription,
1558
+ todos,
1559
+ history,
1560
+ currentTodoIndex: todoIndex,
1561
+ taskExecutionSummary,
1562
+ currentSubtaskInstruction: currentInstruction ?? "",
1563
+ windowSteps,
1564
+ windowScreenshots,
1565
+ resultScreenshot: uuid,
1566
+ priorNotes
1567
+ });
1568
+ return {
1569
+ output: this.parseReflectionOutput(response.response),
1570
+ requestId: response.request_id
1571
+ };
1572
+ }
1573
+ async summarize(_executionHistory, context, memory, todoIndex) {
1574
+ const client = this.ensureClient();
1575
+ const {
1576
+ taskDescription,
1577
+ todos,
1578
+ history,
1579
+ taskExecutionSummary,
1580
+ overallTodo
1581
+ } = this.extractMemoryData(memory, context, todoIndex);
1582
+ const latestTodoSummary = memory && todoIndex !== void 0 ? memory.todoExecutionSummaries[todoIndex] : "";
1583
+ const response = await client.callWorker({
1584
+ workerId: "oagi_task_summary",
1585
+ overallTodo,
1586
+ taskDescription,
1587
+ todos,
1588
+ history,
1589
+ currentTodoIndex: todoIndex,
1590
+ taskExecutionSummary,
1591
+ latestTodoSummary
1592
+ });
1593
+ try {
1594
+ const parsed = JSON.parse(response.response);
1595
+ return {
1596
+ summary: parsed.task_summary ?? response.response,
1597
+ requestId: response.request_id
1598
+ };
1599
+ } catch {
1600
+ return { summary: response.response, requestId: response.request_id };
1601
+ }
1602
+ }
1603
+ };
1604
+ var TaskeeAgent = class {
1605
+ apiKey;
1606
+ baseUrl;
1607
+ model;
1608
+ maxSteps;
1609
+ reflectionInterval;
1610
+ temperature;
1611
+ planner;
1612
+ externalMemory;
1613
+ todoIndex;
1614
+ stepObserver;
1615
+ stepDelay;
1616
+ actor;
1617
+ currentTodo = "";
1618
+ currentInstruction = "";
1619
+ actions = [];
1620
+ totalActions = 0;
1621
+ sinceReflection = 0;
1622
+ success = false;
1623
+ constructor(apiKey, baseUrl, model = MODEL_ACTOR, maxSteps = DEFAULT_MAX_STEPS, reflectionInterval = DEFAULT_REFLECTION_INTERVAL, temperature = DEFAULT_TEMPERATURE, planner, externalMemory, todoIndex, stepObserver, stepDelay = DEFAULT_STEP_DELAY) {
1624
+ this.apiKey = apiKey;
1625
+ this.baseUrl = baseUrl;
1626
+ this.model = model;
1627
+ this.maxSteps = maxSteps;
1628
+ this.reflectionInterval = reflectionInterval;
1629
+ this.temperature = temperature;
1630
+ this.planner = planner ?? new Planner(void 0, apiKey, baseUrl);
1631
+ this.externalMemory = externalMemory;
1632
+ this.todoIndex = todoIndex;
1633
+ this.stepObserver = stepObserver;
1634
+ this.stepDelay = stepDelay;
1635
+ }
1636
+ async execute(instruction, actionHandler, imageProvider) {
1637
+ resetHandler2(actionHandler);
1638
+ this.currentTodo = instruction;
1639
+ this.actions = [];
1640
+ this.totalActions = 0;
1641
+ this.sinceReflection = 0;
1642
+ this.success = false;
1643
+ try {
1644
+ this.actor = new Actor(
1645
+ this.apiKey,
1646
+ this.baseUrl,
1647
+ this.model,
1648
+ this.temperature
1649
+ );
1650
+ await this.initialPlan(imageProvider);
1651
+ this.actor.initTask(this.currentInstruction, this.maxSteps);
1652
+ let remainingSteps = this.maxSteps;
1653
+ while (remainingSteps > 0 && !this.success) {
1654
+ const stepsTaken = await this.executeSubtask(
1655
+ Math.min(this.maxSteps, remainingSteps),
1656
+ actionHandler,
1657
+ imageProvider
1658
+ );
1659
+ remainingSteps -= stepsTaken;
1660
+ if (!this.success && remainingSteps > 0) {
1661
+ const shouldContinue = await this.reflectAndDecide(imageProvider);
1662
+ if (!shouldContinue) {
1663
+ break;
1664
+ }
1665
+ }
1666
+ }
1667
+ await this.generateSummary();
1668
+ return this.success;
1669
+ } catch (err) {
1670
+ logger5.error(`Error executing todo: ${err}`);
1671
+ this.recordAction("error", null, String(err));
1672
+ return false;
1673
+ } finally {
1674
+ this.actor = void 0;
1675
+ }
1676
+ }
1677
+ getContext() {
1678
+ return this.externalMemory ? this.externalMemory.getContext() : {};
1679
+ }
1680
+ recordAction(actionType, target, reasoning, result, screenshotUuid) {
1681
+ this.actions.push({
1682
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
1683
+ action_type: actionType,
1684
+ target,
1685
+ reasoning,
1686
+ result,
1687
+ details: {},
1688
+ screenshot_uuid: screenshotUuid ?? void 0
1689
+ });
1690
+ }
1691
+ async initialPlan(imageProvider) {
1692
+ logger5.info("Generating initial plan for todo");
1693
+ const screenshot = await imageProvider.provide();
1694
+ const context = this.getContext();
1695
+ const { output, requestId } = await this.planner.initialPlan(
1696
+ this.currentTodo,
1697
+ context,
1698
+ screenshot,
1699
+ this.externalMemory,
1700
+ this.todoIndex
1701
+ );
1702
+ this.recordAction("plan", "initial", output.reasoning, output.instruction);
1703
+ if (this.stepObserver) {
1704
+ const event = {
1705
+ type: "plan",
1706
+ timestamp: /* @__PURE__ */ new Date(),
1707
+ phase: "initial",
1708
+ image: screenshot,
1709
+ reasoning: output.reasoning,
1710
+ result: output.instruction,
1711
+ request_id: requestId ?? void 0
1712
+ };
1713
+ await this.stepObserver.onEvent(event);
1714
+ }
1715
+ this.currentInstruction = output.instruction;
1716
+ logger5.info(`Initial instruction: ${this.currentInstruction}`);
1717
+ }
1718
+ async executeSubtask(maxSteps, actionHandler, imageProvider) {
1719
+ logger5.info(`Executing subtask with max ${maxSteps} steps`);
1720
+ let stepsTaken = 0;
1721
+ const client = this.planner.getClient();
1722
+ for (let stepNum = 0; stepNum < maxSteps; stepNum++) {
1723
+ const screenshot = await imageProvider.provide();
1724
+ let screenshotUuid;
1725
+ let screenshotUrl;
1726
+ try {
1727
+ if (typeof screenshot === "string") {
1728
+ screenshotUuid = extractUuidFromUrl(screenshot) ?? void 0;
1729
+ screenshotUrl = screenshot;
1730
+ } else {
1731
+ const upload = await client.putS3PresignedUrl(screenshot);
1732
+ screenshotUuid = upload.uuid;
1733
+ screenshotUrl = upload.download_url;
1734
+ }
1735
+ } catch (err) {
1736
+ logger5.error(`Error uploading screenshot: ${err}`);
1737
+ this.recordAction("error", "screenshot_upload", String(err));
1738
+ break;
1739
+ }
1740
+ let step;
1741
+ try {
1742
+ step = await this.actor.step(screenshotUrl ?? screenshot, void 0);
1743
+ } catch (err) {
1744
+ logger5.error(`Error getting step from OAGI: ${err}`);
1745
+ this.recordAction(
1746
+ "error",
1747
+ "oagi_step",
1748
+ String(err),
1749
+ null,
1750
+ screenshotUuid
1751
+ );
1752
+ break;
1753
+ }
1754
+ if (step.reason) {
1755
+ logger5.info(`Step ${this.totalActions + 1}: ${step.reason}`);
1756
+ }
1757
+ if (this.stepObserver) {
1758
+ const event = {
1759
+ type: "step",
1760
+ timestamp: /* @__PURE__ */ new Date(),
1761
+ step_num: this.totalActions + 1,
1762
+ image: screenshot,
1763
+ step,
1764
+ task_id: this.actor.taskId
1765
+ };
1766
+ await this.stepObserver.onEvent(event);
1767
+ }
1768
+ if (step.actions?.length) {
1769
+ logger5.info(`Actions (${step.actions.length}):`);
1770
+ for (const action of step.actions) {
1771
+ const countSuffix = action.count && action.count > 1 ? ` x${action.count}` : "";
1772
+ logger5.info(` [${action.type}] ${action.argument}${countSuffix}`);
1773
+ }
1774
+ for (const action of step.actions) {
1775
+ this.recordAction(
1776
+ action.type,
1777
+ action.argument,
1778
+ step.reason ?? null,
1779
+ null,
1780
+ screenshotUuid
1781
+ );
1782
+ }
1783
+ let error = null;
1784
+ try {
1785
+ await actionHandler.handle(step.actions);
1786
+ } catch (err) {
1787
+ error = String(err);
1788
+ throw err;
1789
+ } finally {
1790
+ if (this.stepObserver) {
1791
+ const event = {
1792
+ type: "action",
1793
+ timestamp: /* @__PURE__ */ new Date(),
1794
+ step_num: this.totalActions + 1,
1795
+ actions: step.actions,
1796
+ error: error ?? void 0
1797
+ };
1798
+ await this.stepObserver.onEvent(event);
1799
+ }
1800
+ }
1801
+ this.totalActions += step.actions.length;
1802
+ this.sinceReflection += step.actions.length;
1803
+ }
1804
+ if (this.stepDelay > 0) {
1805
+ await sleep2(this.stepDelay);
1806
+ }
1807
+ stepsTaken += 1;
1808
+ if (step.stop) {
1809
+ logger5.info("OAGI signaled task completion");
1810
+ break;
1811
+ }
1812
+ if (this.sinceReflection >= this.reflectionInterval) {
1813
+ logger5.info("Reflection interval reached");
1814
+ break;
1815
+ }
1816
+ }
1817
+ return stepsTaken;
1818
+ }
1819
+ async reflectAndDecide(imageProvider) {
1820
+ logger5.info("Reflecting on progress");
1821
+ const screenshot = await imageProvider.provide();
1822
+ const context = this.getContext();
1823
+ context.current_todo = this.currentTodo;
1824
+ const recentActions = this.actions.slice(-this.sinceReflection);
1825
+ const { output, requestId } = await this.planner.reflect(
1826
+ recentActions,
1827
+ context,
1828
+ screenshot,
1829
+ this.externalMemory,
1830
+ this.todoIndex,
1831
+ this.currentInstruction,
1832
+ this.reflectionInterval
1833
+ );
1834
+ this.recordAction(
1835
+ "reflect",
1836
+ null,
1837
+ output.reasoning,
1838
+ output.continue_current ? "continue" : "pivot"
1839
+ );
1840
+ if (this.stepObserver) {
1841
+ const decision = output.success_assessment ? "success" : output.continue_current ? "continue" : "pivot";
1842
+ const event = {
1843
+ type: "plan",
1844
+ timestamp: /* @__PURE__ */ new Date(),
1845
+ phase: "reflection",
1846
+ image: screenshot,
1847
+ reasoning: output.reasoning,
1848
+ result: decision,
1849
+ request_id: requestId ?? void 0
1850
+ };
1851
+ await this.stepObserver.onEvent(event);
1852
+ }
1853
+ if (output.success_assessment) {
1854
+ this.success = true;
1855
+ logger5.info("Reflection indicates task is successful");
1856
+ return false;
1857
+ }
1858
+ this.sinceReflection = 0;
1859
+ if (!output.continue_current && output.new_instruction) {
1860
+ logger5.info(`Pivoting to new instruction: ${output.new_instruction}`);
1861
+ this.currentInstruction = output.new_instruction;
1862
+ await this.actor.initTask(this.currentInstruction, this.maxSteps);
1863
+ return true;
1864
+ }
1865
+ return output.continue_current;
1866
+ }
1867
+ async generateSummary() {
1868
+ logger5.info("Generating execution summary");
1869
+ const context = this.getContext();
1870
+ context.current_todo = this.currentTodo;
1871
+ const { summary, requestId } = await this.planner.summarize(
1872
+ this.actions,
1873
+ context,
1874
+ this.externalMemory,
1875
+ this.todoIndex
1876
+ );
1877
+ this.recordAction("summary", null, summary);
1878
+ if (this.stepObserver) {
1879
+ const event = {
1880
+ type: "plan",
1881
+ timestamp: /* @__PURE__ */ new Date(),
1882
+ phase: "summary",
1883
+ image: void 0,
1884
+ reasoning: summary,
1885
+ result: void 0,
1886
+ request_id: requestId ?? void 0
1887
+ };
1888
+ await this.stepObserver.onEvent(event);
1889
+ }
1890
+ logger5.info(`Execution summary: ${summary}`);
1891
+ }
1892
+ returnExecutionResults() {
1893
+ let summary = "";
1894
+ for (let i = this.actions.length - 1; i >= 0; i--) {
1895
+ if (this.actions[i].action_type === "summary") {
1896
+ summary = this.actions[i].reasoning ?? "";
1897
+ break;
1898
+ }
1899
+ }
1900
+ return {
1901
+ success: this.success,
1902
+ actions: this.actions,
1903
+ summary,
1904
+ total_steps: this.totalActions
1905
+ };
1906
+ }
1907
+ };
1908
+ var TaskerAgent = class {
1909
+ /** Hierarchical agent that manages multi-todo workflows. */
1910
+ apiKey;
1911
+ baseUrl;
1912
+ model;
1913
+ maxSteps;
1914
+ temperature;
1915
+ reflectionInterval;
1916
+ planner;
1917
+ stepObserver;
1918
+ stepDelay;
1919
+ memory = new PlannerMemory();
1920
+ currentTaskeeAgent;
1921
+ constructor(apiKey, baseUrl, model = MODEL_ACTOR, maxSteps = DEFAULT_MAX_STEPS_TASKER, temperature = DEFAULT_TEMPERATURE, reflectionInterval = DEFAULT_REFLECTION_INTERVAL, planner, stepObserver, stepDelay = DEFAULT_STEP_DELAY) {
1922
+ this.apiKey = apiKey;
1923
+ this.baseUrl = baseUrl;
1924
+ this.model = model;
1925
+ this.maxSteps = maxSteps;
1926
+ this.temperature = temperature;
1927
+ this.reflectionInterval = reflectionInterval;
1928
+ this.planner = planner ?? new Planner(void 0, apiKey, baseUrl);
1929
+ this.stepObserver = stepObserver;
1930
+ this.stepDelay = stepDelay;
1931
+ }
1932
+ setTask(task, todos) {
1933
+ this.memory.setTask(task, todos);
1934
+ logger5.info(`Task set with ${todos.length} todos`);
1935
+ }
1936
+ set_task(task, todos) {
1937
+ this.setTask(task, todos);
1938
+ }
1939
+ async execute(_instruction, actionHandler, imageProvider) {
1940
+ resetHandler2(actionHandler);
1941
+ let overallSuccess = true;
1942
+ while (true) {
1943
+ const todoInfo = this.prepare();
1944
+ if (!todoInfo) {
1945
+ logger5.info("No more todos to execute");
1946
+ break;
1947
+ }
1948
+ const { todo, index } = todoInfo;
1949
+ logger5.info(`Executing todo ${index}: ${todo.description}`);
1950
+ if (this.stepObserver) {
1951
+ const event = {
1952
+ type: "split",
1953
+ timestamp: /* @__PURE__ */ new Date(),
1954
+ label: `Start of todo ${index + 1}: ${todo.description}`
1955
+ };
1956
+ await this.stepObserver.onEvent(event);
1957
+ }
1958
+ const success = await this.executeTodo(
1959
+ index,
1960
+ actionHandler,
1961
+ imageProvider
1962
+ );
1963
+ if (this.stepObserver) {
1964
+ const event = {
1965
+ type: "split",
1966
+ timestamp: /* @__PURE__ */ new Date(),
1967
+ label: `End of todo ${index + 1}: ${todo.description}`
1968
+ };
1969
+ await this.stepObserver.onEvent(event);
1970
+ }
1971
+ if (!success) {
1972
+ logger5.warn(`Todo ${index} failed`);
1973
+ overallSuccess = false;
1974
+ const currentStatus = this.memory.todos[index]?.status;
1975
+ if (currentStatus === "in_progress") {
1976
+ logger5.error("Todo failed with exception, stopping execution");
1977
+ break;
1978
+ }
1979
+ }
1980
+ this.updateTaskSummary();
1981
+ }
1982
+ const statusSummary = this.memory.getTodoStatusSummary();
1983
+ logger5.info(
1984
+ `Workflow complete. Status summary: ${JSON.stringify(statusSummary)}`
1985
+ );
1986
+ return overallSuccess;
1987
+ }
1988
+ prepare() {
1989
+ const current = this.memory.getCurrentTodo();
1990
+ if (!current) return null;
1991
+ this.currentTaskeeAgent = new TaskeeAgent(
1992
+ this.apiKey,
1993
+ this.baseUrl,
1994
+ this.model,
1995
+ this.maxSteps,
1996
+ this.reflectionInterval,
1997
+ this.temperature,
1998
+ this.planner,
1999
+ this.memory,
2000
+ current.index,
2001
+ this.stepObserver,
2002
+ this.stepDelay
2003
+ );
2004
+ if (current.todo.status === "pending") {
2005
+ this.memory.updateTodo(current.index, "in_progress");
2006
+ }
2007
+ logger5.info(`Prepared taskee agent for todo ${current.index}`);
2008
+ return current;
2009
+ }
2010
+ async executeTodo(todoIndex, actionHandler, imageProvider) {
2011
+ if (!this.currentTaskeeAgent || todoIndex < 0) {
2012
+ logger5.error("No taskee agent prepared");
2013
+ return false;
2014
+ }
2015
+ const todo = this.memory.todos[todoIndex];
2016
+ try {
2017
+ const success = await this.currentTaskeeAgent.execute(
2018
+ todo.description,
2019
+ actionHandler,
2020
+ imageProvider
2021
+ );
2022
+ const results = this.currentTaskeeAgent.returnExecutionResults();
2023
+ this.updateMemoryFromExecution(todoIndex, results, success);
2024
+ return success;
2025
+ } catch (err) {
2026
+ logger5.error(`Error executing todo ${todoIndex}: ${err}`);
2027
+ this.memory.updateTodo(
2028
+ todoIndex,
2029
+ "in_progress",
2030
+ `Execution failed: ${String(err)}`
2031
+ );
2032
+ return false;
2033
+ }
2034
+ }
2035
+ updateMemoryFromExecution(todoIndex, results, success) {
2036
+ const status = success ? "completed" : "in_progress";
2037
+ this.memory.updateTodo(todoIndex, status, results.summary);
2038
+ this.memory.addHistory(
2039
+ todoIndex,
2040
+ results.actions,
2041
+ results.summary,
2042
+ success
2043
+ );
2044
+ if (success) {
2045
+ const summaryLine = `- Completed todo ${todoIndex}: ${results.summary}`;
2046
+ this.memory.taskExecutionSummary = this.memory.taskExecutionSummary ? `${this.memory.taskExecutionSummary}
2047
+ ${summaryLine}` : summaryLine;
2048
+ }
2049
+ logger5.info(
2050
+ `Updated memory for todo ${todoIndex}: status=${status}, actions=${results.actions.length}`
2051
+ );
2052
+ }
2053
+ updateTaskSummary() {
2054
+ const statusSummary = this.memory.getTodoStatusSummary();
2055
+ const completed = statusSummary.completed ?? 0;
2056
+ const total = this.memory.todos.length;
2057
+ const summaryParts = [`Progress: ${completed}/${total} todos completed`];
2058
+ const recentHistory = this.memory.history.slice(-3);
2059
+ for (const history of recentHistory) {
2060
+ if (history.completed && history.summary) {
2061
+ summaryParts.push(
2062
+ `- Todo ${history.todo_index}: ${history.summary.slice(0, 100)}`
2063
+ );
2064
+ }
2065
+ }
2066
+ this.memory.taskExecutionSummary = summaryParts.join("\n");
2067
+ }
2068
+ getMemory() {
2069
+ return this.memory;
2070
+ }
2071
+ appendTodo(description) {
2072
+ this.memory.appendTodo(description);
2073
+ logger5.info(`Appended new todo: ${description}`);
2074
+ }
2075
+ };
2076
+
1245
2077
  // src/agent/registry.ts
1246
2078
  var agentRegistry = {};
1247
2079
  var asyncAgentRegister = (mode) => {
@@ -1319,11 +2151,119 @@ asyncAgentRegister("thinker")((options = {}) => {
1319
2151
  stepDelay
1320
2152
  );
1321
2153
  });
2154
+ asyncAgentRegister("tasker")((options = {}) => {
2155
+ const {
2156
+ apiKey,
2157
+ baseURL,
2158
+ model = MODEL_ACTOR,
2159
+ maxSteps = DEFAULT_MAX_STEPS_TASKER,
2160
+ temperature = DEFAULT_TEMPERATURE,
2161
+ reflectionInterval = DEFAULT_REFLECTION_INTERVAL_TASKER,
2162
+ stepObserver,
2163
+ stepDelay = DEFAULT_STEP_DELAY
2164
+ } = options;
2165
+ return new TaskerAgent(
2166
+ apiKey,
2167
+ baseURL,
2168
+ model,
2169
+ maxSteps,
2170
+ temperature,
2171
+ reflectionInterval,
2172
+ void 0,
2173
+ stepObserver ?? void 0,
2174
+ stepDelay
2175
+ );
2176
+ });
2177
+ asyncAgentRegister("tasker:cvs_appointment")(
2178
+ (options = {}) => {
2179
+ const {
2180
+ apiKey,
2181
+ baseURL,
2182
+ model = MODEL_ACTOR,
2183
+ maxSteps = DEFAULT_MAX_STEPS_TASKER,
2184
+ temperature = DEFAULT_TEMPERATURE,
2185
+ reflectionInterval = DEFAULT_REFLECTION_INTERVAL_TASKER,
2186
+ stepObserver,
2187
+ stepDelay = DEFAULT_STEP_DELAY
2188
+ } = options;
2189
+ const tasker = new TaskerAgent(
2190
+ apiKey,
2191
+ baseURL,
2192
+ model,
2193
+ maxSteps,
2194
+ temperature,
2195
+ reflectionInterval,
2196
+ void 0,
2197
+ stepObserver ?? void 0,
2198
+ stepDelay
2199
+ );
2200
+ const firstName = "First";
2201
+ const lastName = "Last";
2202
+ const email = "user@example.com";
2203
+ const birthday = "01-01-1990";
2204
+ const zipCode = "00000";
2205
+ const [month, day, year] = birthday.split("-");
2206
+ const instruction = `Schedule an appointment at CVS for ${firstName} ${lastName} with email ${email} and birthday ${birthday}`;
2207
+ const todos = [
2208
+ "Open a new tab, go to www.cvs.com, type 'flu shot' in the search bar and press enter, wait for the page to load, then click on the button of Schedule vaccinations on the top of the page",
2209
+ `Enter the first name '${firstName}', last name '${lastName}', and email '${email}' in the form. Do not use any suggested autofills. Make sure the mobile phone number is empty.`,
2210
+ "Slightly scroll down to see the date of birth, enter Month '" + month + "', Day '" + day + "', and Year '" + year + "' in the form",
2211
+ "Click on 'Continue as guest' button, wait for the page to load with wait, click on 'Add vaccines' button, select 'Flu' and click on 'Add vaccines'",
2212
+ "Click on 'next' to enter the page with recommendation vaccines, then click on 'next' again, until on the page of entering zip code, enter '" + zipCode + "', select the first option from the dropdown menu, and click on 'Search'"
2213
+ ];
2214
+ tasker.setTask(instruction, todos);
2215
+ return tasker;
2216
+ }
2217
+ );
2218
+ asyncAgentRegister("tasker:software_qa")(
2219
+ (options = {}) => {
2220
+ const {
2221
+ apiKey,
2222
+ baseURL,
2223
+ model = MODEL_ACTOR,
2224
+ maxSteps = DEFAULT_MAX_STEPS_TASKER,
2225
+ temperature = DEFAULT_TEMPERATURE,
2226
+ reflectionInterval = DEFAULT_REFLECTION_INTERVAL_TASKER,
2227
+ stepObserver,
2228
+ stepDelay = DEFAULT_STEP_DELAY
2229
+ } = options;
2230
+ const tasker = new TaskerAgent(
2231
+ apiKey,
2232
+ baseURL,
2233
+ model,
2234
+ maxSteps,
2235
+ temperature,
2236
+ reflectionInterval,
2237
+ void 0,
2238
+ stepObserver ?? void 0,
2239
+ stepDelay
2240
+ );
2241
+ const instruction = "QA: click through every sidebar button in the Nuclear Player UI";
2242
+ const todos = [
2243
+ "Click on 'Dashboard' in the left sidebar",
2244
+ "Click on 'Downloads' in the left sidebar",
2245
+ "Click on 'Lyrics' in the left sidebar",
2246
+ "Click on 'Plugins' in the left sidebar",
2247
+ "Click on 'Search Results' in the left sidebar",
2248
+ "Click on 'Settings' in the left sidebar",
2249
+ "Click on 'Equalizer' in the left sidebar",
2250
+ "Click on 'Visualizer' in the left sidebar",
2251
+ "Click on 'Listening History' in the left sidebar",
2252
+ "Click on 'Favorite Albums' in the left sidebar",
2253
+ "Click on 'Favorite Tracks' in the left sidebar",
2254
+ "Click on 'Favorite Artists' in the left sidebar",
2255
+ "Click on 'Local Library' in the left sidebar",
2256
+ "Click on 'Playlists' in the left sidebar"
2257
+ ];
2258
+ tasker.setTask(instruction, todos);
2259
+ return tasker;
2260
+ }
2261
+ );
1322
2262
 
1323
2263
  // src/handler.ts
1324
2264
  var import_robotjs = __toESM(require("robotjs"), 1);
1325
2265
  var import_sharp = __toESM(require("sharp"), 1);
1326
- var sleep2 = (ms) => new Promise((r) => setTimeout(r, ms));
2266
+ var sleep3 = (ms) => new Promise((r) => setTimeout(r, ms));
1327
2267
  var toSharpKernel = (resample) => {
1328
2268
  switch (resample) {
1329
2269
  case "NEAREST":
@@ -1484,7 +2424,7 @@ var DefaultActionHandler = class {
1484
2424
  import_robotjs.default.moveMouse(p1.x, p1.y);
1485
2425
  import_robotjs.default.mouseToggle("down", "left");
1486
2426
  import_robotjs.default.dragMouse(p2.x, p2.y);
1487
- await sleep2(this.#cfg.dragDurationMs);
2427
+ await sleep3(this.#cfg.dragDurationMs);
1488
2428
  import_robotjs.default.mouseToggle("up", "left");
1489
2429
  return;
1490
2430
  }
@@ -1504,7 +2444,7 @@ var DefaultActionHandler = class {
1504
2444
  if (!last) return;
1505
2445
  const modifiers = keys.slice(0, -1);
1506
2446
  import_robotjs.default.keyTap(last, modifiers.length ? modifiers : []);
1507
- await sleep2(this.#cfg.hotkeyDelayMs);
2447
+ await sleep3(this.#cfg.hotkeyDelayMs);
1508
2448
  return;
1509
2449
  }
1510
2450
  case "type": {
@@ -1524,10 +2464,11 @@ var DefaultActionHandler = class {
1524
2464
  return;
1525
2465
  }
1526
2466
  case "wait": {
1527
- await sleep2(this.#cfg.waitDurationMs);
2467
+ await sleep3(this.#cfg.waitDurationMs);
1528
2468
  return;
1529
2469
  }
1530
- case "finish": {
2470
+ case "finish":
2471
+ case "fail": {
1531
2472
  this.reset();
1532
2473
  return;
1533
2474
  }
@@ -1614,7 +2555,7 @@ var StepTracker = class extends StepObserver {
1614
2555
 
1615
2556
  // src/cli/agent.ts
1616
2557
  var import_node_mac_permissions = __toESM(require("@hurdlegroup/node-mac-permissions"), 1);
1617
- var logger5 = logger_default("cli.agent");
2558
+ var logger6 = logger_default("cli.agent");
1618
2559
  var checkPermissions = async () => {
1619
2560
  if (process.platform !== "darwin") {
1620
2561
  process.stdout.write(
@@ -1728,7 +2669,7 @@ If you're using pnpm and robotjs is installed, you may need to run: pnpm approve
1728
2669
  if (interrupted) {
1729
2670
  process.exitCode = 130;
1730
2671
  } else {
1731
- logger5.error(`Error during agent execution: ${String(err)}`);
2672
+ logger6.error(`Error during agent execution: ${String(err)}`);
1732
2673
  process.exitCode = 1;
1733
2674
  }
1734
2675
  } finally {
@@ -1866,11 +2807,11 @@ var addConfigCommand = (program) => {
1866
2807
  };
1867
2808
 
1868
2809
  // src/cli/version.ts
1869
- var import_module = require("module");
1870
- var import_meta2 = {};
1871
- var getSdkVersion = () => {
2810
+ var import_module2 = require("module");
2811
+ var import_meta3 = {};
2812
+ var getSdkVersion2 = () => {
1872
2813
  try {
1873
- const require2 = (0, import_module.createRequire)(import_meta2.url);
2814
+ const require2 = (0, import_module2.createRequire)(import_meta3.url);
1874
2815
  for (const p of ["../package.json", "../../package.json"]) {
1875
2816
  try {
1876
2817
  const pkg = require2(p);
@@ -1883,7 +2824,7 @@ var getSdkVersion = () => {
1883
2824
  return "unknown";
1884
2825
  };
1885
2826
  var displayVersion = () => {
1886
- const sdkVersion = getSdkVersion();
2827
+ const sdkVersion = getSdkVersion2();
1887
2828
  process.stdout.write(`OAGI SDK version: ${sdkVersion}
1888
2829
  `);
1889
2830
  process.stdout.write(`Node version: ${process.version}