@oagi/oagi 0.1.5 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -46,6 +46,7 @@ var ActionTypeSchema = z.enum([
46
46
  "type",
47
47
  "scroll",
48
48
  "finish",
49
+ "fail",
49
50
  "wait",
50
51
  "call_user"
51
52
  ]);
@@ -586,11 +587,11 @@ var MODE_ACTOR = "actor";
586
587
  var DEFAULT_MAX_STEPS = 20;
587
588
  var DEFAULT_MAX_STEPS_THINKER = 100;
588
589
  var DEFAULT_MAX_STEPS_TASKER = 60;
589
- var MAX_STEPS_ACTOR = 30;
590
- var MAX_STEPS_THINKER = 120;
590
+ var MAX_STEPS_ACTOR = 100;
591
+ var MAX_STEPS_THINKER = 300;
591
592
  var DEFAULT_REFLECTION_INTERVAL = 4;
592
593
  var DEFAULT_REFLECTION_INTERVAL_TASKER = 20;
593
- var DEFAULT_STEP_DELAY = 0.3;
594
+ var DEFAULT_STEP_DELAY = 1;
594
595
  var DEFAULT_TEMPERATURE = 0.5;
595
596
  var DEFAULT_TEMPERATURE_LOW = 0.1;
596
597
  var HTTP_CLIENT_TIMEOUT = 60;
@@ -775,7 +776,9 @@ var parseRawOutput = (rawOutput) => {
775
776
  return {
776
777
  reason,
777
778
  actions,
778
- stop: actions.some((action2) => action2.type === "finish")
779
+ stop: actions.some(
780
+ (action2) => action2.type === "finish" || action2.type === "fail"
781
+ )
779
782
  };
780
783
  };
781
784
 
@@ -1369,8 +1372,7 @@ var PlannerMemory = class {
1369
1372
  pending: 0,
1370
1373
  in_progress: 0,
1371
1374
  completed: 0,
1372
- skipped: 0,
1373
- blocked: 0
1375
+ skipped: 0
1374
1376
  };
1375
1377
  for (const todo of this.todos) {
1376
1378
  summary[todo.status] = (summary[todo.status] ?? 0) + 1;
@@ -2259,8 +2261,32 @@ asyncAgentRegister("tasker:software_qa")(
2259
2261
  );
2260
2262
 
2261
2263
  // src/handler.ts
2262
- var import_robotjs = __toESM(require("robotjs"), 1);
2263
- var import_sharp = __toESM(require("sharp"), 1);
2264
+ var _robot;
2265
+ async function getRobot() {
2266
+ if (!_robot) {
2267
+ try {
2268
+ _robot = (await import("robotjs")).default;
2269
+ } catch {
2270
+ throw new Error(
2271
+ "robotjs is not available. Install it with: npm install robotjs\nOn Linux, ensure libx11-dev and libxtst-dev are installed."
2272
+ );
2273
+ }
2274
+ }
2275
+ return _robot;
2276
+ }
2277
+ var _sharp;
2278
+ async function getSharp() {
2279
+ if (!_sharp) {
2280
+ try {
2281
+ _sharp = (await import("sharp")).default;
2282
+ } catch {
2283
+ throw new Error(
2284
+ "sharp is not available. Install it with: npm install sharp"
2285
+ );
2286
+ }
2287
+ }
2288
+ return _sharp;
2289
+ }
2264
2290
  var sleep3 = (ms) => new Promise((r) => setTimeout(r, ms));
2265
2291
  var toSharpKernel = (resample) => {
2266
2292
  switch (resample) {
@@ -2319,8 +2345,10 @@ var ScreenshotMaker = class _ScreenshotMaker {
2319
2345
  return arraybuffer;
2320
2346
  }
2321
2347
  async provide() {
2322
- const { width, height } = import_robotjs.default.getScreenSize();
2323
- const screenshot = import_robotjs.default.screen.capture(0, 0, width, height);
2348
+ const robot = await getRobot();
2349
+ const sharp = await getSharp();
2350
+ const { width, height } = robot.getScreenSize();
2351
+ const screenshot = robot.screen.capture(0, 0, width, height);
2324
2352
  const channels = 3;
2325
2353
  const data = new Uint8Array(
2326
2354
  screenshot.width * screenshot.height * channels
@@ -2334,7 +2362,7 @@ var ScreenshotMaker = class _ScreenshotMaker {
2334
2362
  data[offset + 2] = screenshot.image.readUInt8(offset2 + 0);
2335
2363
  }
2336
2364
  }
2337
- let p = (0, import_sharp.default)(Buffer.from(data), {
2365
+ let p = sharp(Buffer.from(data), {
2338
2366
  raw: {
2339
2367
  width: screenshot.width,
2340
2368
  height: screenshot.height,
@@ -2368,8 +2396,9 @@ var DefaultActionHandler = class {
2368
2396
  }
2369
2397
  }
2370
2398
  }
2371
- #denormalize(x, y) {
2372
- const { width, height } = import_robotjs.default.getScreenSize();
2399
+ async #denormalize(x, y) {
2400
+ const robot = await getRobot();
2401
+ const { width, height } = robot.getScreenSize();
2373
2402
  let px = Math.floor(x * width / 1e3);
2374
2403
  let py = Math.floor(y * height / 1e3);
2375
2404
  if (px < 1) px = 1;
@@ -2379,51 +2408,52 @@ var DefaultActionHandler = class {
2379
2408
  return { x: px, y: py };
2380
2409
  }
2381
2410
  async #handleOne(action) {
2411
+ const robot = await getRobot();
2382
2412
  const arg = stripOuterParens(action.argument);
2383
2413
  switch (action.type) {
2384
2414
  case "click": {
2385
2415
  const coords = parseCoords(arg);
2386
2416
  if (!coords) throw new Error(`Invalid coords: ${arg}`);
2387
- const p = this.#denormalize(coords[0], coords[1]);
2388
- import_robotjs.default.moveMouse(p.x, p.y);
2389
- import_robotjs.default.mouseClick("left", false);
2417
+ const p = await this.#denormalize(coords[0], coords[1]);
2418
+ robot.moveMouse(p.x, p.y);
2419
+ robot.mouseClick("left", false);
2390
2420
  return;
2391
2421
  }
2392
2422
  case "left_double": {
2393
2423
  const coords = parseCoords(arg);
2394
2424
  if (!coords) throw new Error(`Invalid coords: ${arg}`);
2395
- const p = this.#denormalize(coords[0], coords[1]);
2396
- import_robotjs.default.moveMouse(p.x, p.y);
2397
- import_robotjs.default.mouseClick("left", true);
2425
+ const p = await this.#denormalize(coords[0], coords[1]);
2426
+ robot.moveMouse(p.x, p.y);
2427
+ robot.mouseClick("left", true);
2398
2428
  return;
2399
2429
  }
2400
2430
  case "left_triple": {
2401
2431
  const coords = parseCoords(arg);
2402
2432
  if (!coords) throw new Error(`Invalid coords: ${arg}`);
2403
- const p = this.#denormalize(coords[0], coords[1]);
2404
- import_robotjs.default.moveMouse(p.x, p.y);
2405
- import_robotjs.default.mouseClick("left", true);
2406
- import_robotjs.default.mouseClick("left", false);
2433
+ const p = await this.#denormalize(coords[0], coords[1]);
2434
+ robot.moveMouse(p.x, p.y);
2435
+ robot.mouseClick("left", true);
2436
+ robot.mouseClick("left", false);
2407
2437
  return;
2408
2438
  }
2409
2439
  case "right_single": {
2410
2440
  const coords = parseCoords(arg);
2411
2441
  if (!coords) throw new Error(`Invalid coords: ${arg}`);
2412
- const p = this.#denormalize(coords[0], coords[1]);
2413
- import_robotjs.default.moveMouse(p.x, p.y);
2414
- import_robotjs.default.mouseClick("right", false);
2442
+ const p = await this.#denormalize(coords[0], coords[1]);
2443
+ robot.moveMouse(p.x, p.y);
2444
+ robot.mouseClick("right", false);
2415
2445
  return;
2416
2446
  }
2417
2447
  case "drag": {
2418
2448
  const coords = parseDragCoords(arg);
2419
2449
  if (!coords) throw new Error(`Invalid drag coords: ${arg}`);
2420
- const p1 = this.#denormalize(coords[0], coords[1]);
2421
- const p2 = this.#denormalize(coords[2], coords[3]);
2422
- import_robotjs.default.moveMouse(p1.x, p1.y);
2423
- import_robotjs.default.mouseToggle("down", "left");
2424
- import_robotjs.default.dragMouse(p2.x, p2.y);
2450
+ const p1 = await this.#denormalize(coords[0], coords[1]);
2451
+ const p2 = await this.#denormalize(coords[2], coords[3]);
2452
+ robot.moveMouse(p1.x, p1.y);
2453
+ robot.mouseToggle("down", "left");
2454
+ robot.dragMouse(p2.x, p2.y);
2425
2455
  await sleep3(this.#cfg.dragDurationMs);
2426
- import_robotjs.default.mouseToggle("up", "left");
2456
+ robot.mouseToggle("up", "left");
2427
2457
  return;
2428
2458
  }
2429
2459
  case "hotkey": {
@@ -2432,7 +2462,7 @@ var DefaultActionHandler = class {
2432
2462
  });
2433
2463
  if (keys.length === 1 && keys[0] === "capslock") {
2434
2464
  if (this.#cfg.capslockMode === "system") {
2435
- import_robotjs.default.keyTap("capslock");
2465
+ robot.keyTap("capslock");
2436
2466
  } else {
2437
2467
  this.#sessionCapsEnabled = !this.#sessionCapsEnabled;
2438
2468
  }
@@ -2441,31 +2471,32 @@ var DefaultActionHandler = class {
2441
2471
  const last = keys.at(-1);
2442
2472
  if (!last) return;
2443
2473
  const modifiers = keys.slice(0, -1);
2444
- import_robotjs.default.keyTap(last, modifiers.length ? modifiers : []);
2474
+ robot.keyTap(last, modifiers.length ? modifiers : []);
2445
2475
  await sleep3(this.#cfg.hotkeyDelayMs);
2446
2476
  return;
2447
2477
  }
2448
2478
  case "type": {
2449
2479
  const raw = arg.replace(/^['"]/, "").replace(/['"]$/, "");
2450
2480
  const text = applySessionCaps(raw, this.#sessionCapsEnabled);
2451
- import_robotjs.default.typeString(text);
2481
+ robot.typeString(text);
2452
2482
  return;
2453
2483
  }
2454
2484
  case "scroll": {
2455
2485
  const parsed = parseScroll(arg);
2456
2486
  if (!parsed) throw new Error(`Invalid scroll: ${arg}`);
2457
- const p = this.#denormalize(parsed[0], parsed[1]);
2487
+ const p = await this.#denormalize(parsed[0], parsed[1]);
2458
2488
  const direction = parsed[2];
2459
- import_robotjs.default.moveMouse(p.x, p.y);
2489
+ robot.moveMouse(p.x, p.y);
2460
2490
  const amount = direction === "up" ? this.#cfg.scrollAmount : -this.#cfg.scrollAmount;
2461
- import_robotjs.default.scrollMouse(0, amount);
2491
+ robot.scrollMouse(0, amount);
2462
2492
  return;
2463
2493
  }
2464
2494
  case "wait": {
2465
2495
  await sleep3(this.#cfg.waitDurationMs);
2466
2496
  return;
2467
2497
  }
2468
- case "finish": {
2498
+ case "finish":
2499
+ case "fail": {
2469
2500
  this.reset();
2470
2501
  return;
2471
2502
  }
@@ -2551,7 +2582,6 @@ var StepTracker = class extends StepObserver {
2551
2582
  };
2552
2583
 
2553
2584
  // src/cli/agent.ts
2554
- var import_node_mac_permissions = __toESM(require("@hurdlegroup/node-mac-permissions"), 1);
2555
2585
  var logger6 = logger_default("cli.agent");
2556
2586
  var checkPermissions = async () => {
2557
2587
  if (process.platform !== "darwin") {
@@ -2563,18 +2593,28 @@ var checkPermissions = async () => {
2563
2593
  );
2564
2594
  return;
2565
2595
  }
2566
- const screenPermission = import_node_mac_permissions.default.getAuthStatus("screen");
2567
- const accessibilityPermission = import_node_mac_permissions.default.getAuthStatus("accessibility");
2596
+ let macPerm;
2597
+ try {
2598
+ macPerm = (await import("@hurdlegroup/node-mac-permissions")).default;
2599
+ } catch {
2600
+ console.error(
2601
+ "node-mac-permissions not available. Install with: npm install @hurdlegroup/node-mac-permissions"
2602
+ );
2603
+ process.exitCode = 1;
2604
+ return;
2605
+ }
2606
+ const screenPermission = macPerm.getAuthStatus("screen");
2607
+ const accessibilityPermission = macPerm.getAuthStatus("accessibility");
2568
2608
  console.log("Checking permissions...");
2569
2609
  console.log(` ${screenPermission ? "[OK]" : "[MISSING]"} Screen Recording`);
2570
2610
  console.log(
2571
2611
  ` ${accessibilityPermission ? "[OK]" : "[MISSING]"} Accessibility`
2572
2612
  );
2573
2613
  if (!screenPermission) {
2574
- import_node_mac_permissions.default.askForScreenCaptureAccess(true);
2614
+ macPerm.askForScreenCaptureAccess(true);
2575
2615
  }
2576
2616
  if (!accessibilityPermission) {
2577
- import_node_mac_permissions.default.askForAccessibilityAccess();
2617
+ macPerm.askForAccessibilityAccess();
2578
2618
  }
2579
2619
  if (screenPermission && accessibilityPermission) {
2580
2620
  console.log("All permissions granted. You can run the agent.");