visionclaw 0.1.187-beta.9 → 0.1.187-dev.refactor-computer-use-direct-coordinates.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14607,63 +14607,6 @@ var init_types = __esm({
14607
14607
  });
14608
14608
 
14609
14609
  // dist/config/index.js
14610
- var config_exports = {};
14611
- __export(config_exports, {
14612
- addCommandIdToPendingUpgrade: () => addCommandIdToPendingUpgrade,
14613
- buildPricingUrl: () => buildPricingUrl,
14614
- configExists: () => configExists,
14615
- consumePendingRestart: () => consumePendingRestart,
14616
- consumePendingUpgrade: () => consumePendingUpgrade,
14617
- deleteOnboardingSession: () => deleteOnboardingSession,
14618
- ensureConfigDir: () => ensureConfigDir,
14619
- getBaseDir: () => getBaseDir,
14620
- getClaudeAutoMemoryDir: () => getClaudeAutoMemoryDir,
14621
- getClaudeLocalSettingsFile: () => getClaudeLocalSettingsFile,
14622
- getConfigDir: () => getConfigDir,
14623
- getConfigFile: () => getConfigFile,
14624
- getDynamicMcpServersFile: () => getDynamicMcpServersFile,
14625
- getFailedUploadsDir: () => getFailedUploadsDir,
14626
- getMemoriesDir: () => getMemoriesDir,
14627
- getOnboardingSessionFile: () => getOnboardingSessionFile,
14628
- getOwnerFile: () => getOwnerFile,
14629
- getPlaywrightProfileDir: () => getPlaywrightProfileDir,
14630
- getProfile: () => getProfile,
14631
- getProfilePhotosDir: () => getProfilePhotosDir,
14632
- getProfileTunnelConfigPath: () => getProfileTunnelConfigPath,
14633
- getProfileTunnelCredentialsPath: () => getProfileTunnelCredentialsPath,
14634
- getProfileTunnelDir: () => getProfileTunnelDir,
14635
- getProfilesDir: () => getProfilesDir,
14636
- getScreenshotsDir: () => getScreenshotsDir,
14637
- getSessionFile: () => getSessionFile,
14638
- getSkillsDir: () => getSkillsDir,
14639
- getTOTPSecretsFile: () => getTOTPSecretsFile,
14640
- getTelegramAccessFile: () => getTelegramAccessFile,
14641
- getTokensFile: () => getTokensFile,
14642
- getUserCatalogDir: () => getUserCatalogDir,
14643
- isOnboardingComplete: () => isOnboardingComplete,
14644
- loadConfig: () => loadConfig,
14645
- loadDynamicMcpServers: () => loadDynamicMcpServers,
14646
- loadGoogleTokens: () => loadGoogleTokens,
14647
- loadOnboardingSessionId: () => loadOnboardingSessionId,
14648
- loadOwnerConfig: () => loadOwnerConfig,
14649
- loadSessionId: () => loadSessionId,
14650
- loadTOTPSecrets: () => loadTOTPSecrets,
14651
- loadTelegramAccessConfig: () => loadTelegramAccessConfig,
14652
- loadUsageSnapshot: () => loadUsageSnapshot,
14653
- ownerConfigExists: () => ownerConfigExists,
14654
- saveConfig: () => saveConfig,
14655
- saveDynamicMcpServers: () => saveDynamicMcpServers,
14656
- saveGoogleTokens: () => saveGoogleTokens,
14657
- saveOnboardingSessionId: () => saveOnboardingSessionId,
14658
- saveOwnerConfig: () => saveOwnerConfig,
14659
- saveSessionId: () => saveSessionId,
14660
- saveTOTPSecrets: () => saveTOTPSecrets,
14661
- saveTelegramAccessConfig: () => saveTelegramAccessConfig,
14662
- saveUsageSnapshot: () => saveUsageSnapshot,
14663
- setProfile: () => setProfile,
14664
- writePendingRestart: () => writePendingRestart,
14665
- writePendingUpgrade: () => writePendingUpgrade
14666
- });
14667
14610
  function setProfile(name) {
14668
14611
  if (!/^[a-z0-9][a-z0-9-]*$/.test(name)) {
14669
14612
  throw new Error(`Invalid profile name "${name}". Use lowercase letters, numbers, and hyphens.`);
@@ -14676,9 +14619,6 @@ function getProfile() {
14676
14619
  function getBaseDir() {
14677
14620
  return BASE_DIR;
14678
14621
  }
14679
- function getProfilesDir() {
14680
- return PROFILES_DIR;
14681
- }
14682
14622
  function getConfigDir() {
14683
14623
  return import_node_path.default.join(PROFILES_DIR, currentProfile);
14684
14624
  }
@@ -15007,22 +14947,6 @@ function saveSessionId(sessionId, mode) {
15007
14947
  existing.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
15008
14948
  import_node_fs.default.writeFileSync(sessionFile, JSON.stringify(existing, null, 2), "utf-8");
15009
14949
  }
15010
- function loadOnboardingSessionId() {
15011
- const file2 = getOnboardingSessionFile();
15012
- if (!import_node_fs.default.existsSync(file2))
15013
- return null;
15014
- try {
15015
- const raw = JSON.parse(import_node_fs.default.readFileSync(file2, "utf-8"));
15016
- const parsed = raw;
15017
- return parsed.sessionId ?? null;
15018
- } catch {
15019
- return null;
15020
- }
15021
- }
15022
- function saveOnboardingSessionId(sessionId) {
15023
- ensureConfigDir();
15024
- import_node_fs.default.writeFileSync(getOnboardingSessionFile(), JSON.stringify({ sessionId, updatedAt: (/* @__PURE__ */ new Date()).toISOString() }, null, 2), "utf-8");
15025
- }
15026
14950
  function deleteOnboardingSession() {
15027
14951
  const file2 = getOnboardingSessionFile();
15028
14952
  if (import_node_fs.default.existsSync(file2)) {
@@ -20445,7 +20369,7 @@ var require_prompts3 = __commonJS({
20445
20369
 
20446
20370
  // dist/utils/version-check.js
20447
20371
  function isBundled() {
20448
- const v16 = "0.1.187-beta.9";
20372
+ const v16 = "0.1.187-dev.refactor-computer-use-direct-coordinates.1";
20449
20373
  return typeof v16 === "string" && v16 !== "undefined";
20450
20374
  }
20451
20375
  function getPackageRoot() {
@@ -20462,7 +20386,7 @@ function getInstallationInfo() {
20462
20386
  };
20463
20387
  }
20464
20388
  function getCurrentVersion() {
20465
- const bundledVersion = "0.1.187-beta.9";
20389
+ const bundledVersion = "0.1.187-dev.refactor-computer-use-direct-coordinates.1";
20466
20390
  if (bundledVersion && bundledVersion !== "undefined") {
20467
20391
  return bundledVersion;
20468
20392
  }
@@ -24995,7 +24919,7 @@ var require_retry = __commonJS({
24995
24919
  Object.defineProperty(exports2, "__esModule", { value: true });
24996
24920
  exports2.getRetryConfig = getRetryConfig;
24997
24921
  async function getRetryConfig(err7) {
24998
- let config2 = getConfig4(err7);
24922
+ let config2 = getConfig3(err7);
24999
24923
  if (!err7 || !err7.config || !config2 && !err7.config.retry) {
25000
24924
  return { shouldRetry: false };
25001
24925
  }
@@ -25047,7 +24971,7 @@ var require_retry = __commonJS({
25047
24971
  }
25048
24972
  function shouldRetryRequest(err7) {
25049
24973
  var _a7;
25050
- const config2 = getConfig4(err7);
24974
+ const config2 = getConfig3(err7);
25051
24975
  if (err7.name === "AbortError" || ((_a7 = err7.error) === null || _a7 === void 0 ? void 0 : _a7.name) === "AbortError") {
25052
24976
  return false;
25053
24977
  }
@@ -25079,7 +25003,7 @@ var require_retry = __commonJS({
25079
25003
  }
25080
25004
  return true;
25081
25005
  }
25082
- function getConfig4(err7) {
25006
+ function getConfig3(err7) {
25083
25007
  if (err7 && err7.config && err7.config.retryConfig) {
25084
25008
  return err7.config.retryConfig;
25085
25009
  }
@@ -686554,108 +686478,14 @@ function generateAndSetWallpaper(agentName) {
686554
686478
  cleanupOldWallpapers(dest);
686555
686479
  return dest;
686556
686480
  }
686557
- function wallpaperHelperSwiftSource() {
686558
- return `
686559
- import AppKit
686560
-
686561
- let args = CommandLine.arguments
686562
- guard args.count >= 2 else {
686563
- fputs("Usage: wallpaper-helper set <path> | get\\n", stderr)
686564
- exit(1)
686565
- }
686566
-
686567
- let command = args[1]
686568
-
686569
- switch command {
686570
- case "set":
686571
- guard args.count >= 3 else {
686572
- fputs("Usage: wallpaper-helper set <imagePath>\\n", stderr)
686573
- exit(1)
686574
- }
686575
- let imagePath = args[2]
686576
- let url = URL(fileURLWithPath: imagePath)
686577
- do {
686578
- for screen in NSScreen.screens {
686579
- // Preserve the screen's existing display options (scaling, fill colour,
686580
- // clipping). Apple's docs note these come from System Settings; if we
686581
- // pass [:] we silently reset them to defaults, which can change the
686582
- // visual layout of the wallpaper on some setups.
686583
- let existingOptions = NSWorkspace.shared.desktopImageOptions(for: screen) ?? [:]
686584
- try NSWorkspace.shared.setDesktopImageURL(url, for: screen, options: existingOptions)
686585
- }
686586
- } catch {
686587
- fputs("Failed to set wallpaper: \\(error)\\n", stderr)
686588
- exit(1)
686589
- }
686590
-
686591
- case "get":
686592
- if let screen = NSScreen.main,
686593
- let url = NSWorkspace.shared.desktopImageURL(for: screen) {
686594
- print(url.path)
686595
- }
686596
-
686597
- default:
686598
- fputs("Unknown command: \\(command). Use 'set' or 'get'.\\n", stderr)
686599
- exit(1)
686600
- }
686601
- `;
686602
- }
686603
- function getHelperHash() {
686604
- _helperHash ??= (0, import_node_crypto3.createHash)("sha256").update(wallpaperHelperSwiftSource()).digest("hex").slice(0, 12);
686605
- return _helperHash;
686606
- }
686607
- function getWallpaperHelperBin() {
686608
- const hash3 = getHelperHash();
686609
- const binFile = import_node_path11.default.join(import_node_os7.default.tmpdir(), `visionclaw-wallpaper-helper-${hash3}`);
686610
- if (!import_node_fs14.default.existsSync(binFile)) {
686611
- const srcFile = import_node_path11.default.join(import_node_os7.default.tmpdir(), `visionclaw-wallpaper-helper-${hash3}.swift`);
686612
- import_node_fs14.default.writeFileSync(srcFile, wallpaperHelperSwiftSource(), "utf-8");
686613
- const compile = (0, import_node_child_process9.spawnSync)("swiftc", ["-O", "-o", binFile, srcFile], {
686614
- encoding: "utf-8",
686615
- timeout: 6e4,
686616
- stdio: ["pipe", "pipe", "pipe"]
686617
- });
686618
- try {
686619
- import_node_fs14.default.unlinkSync(srcFile);
686620
- } catch {
686621
- }
686622
- if (compile.status !== 0) {
686623
- throw new Error(`Wallpaper helper compilation failed: ${compile.stderr}`);
686624
- }
686625
- }
686626
- return binFile;
686627
- }
686628
686481
  function setDesktopWallpaper(imagePath) {
686629
686482
  if (process.platform !== "darwin")
686630
686483
  return;
686631
- try {
686632
- const bin = getWallpaperHelperBin();
686633
- const result = (0, import_node_child_process9.spawnSync)(bin, ["set", imagePath], {
686634
- encoding: "utf-8",
686635
- timeout: 15e3,
686636
- stdio: ["pipe", "pipe", "pipe"]
686637
- });
686638
- if (result.status === 0)
686639
- return;
686640
- } catch {
686641
- }
686642
686484
  (0, import_node_child_process9.execSync)(`osascript -e 'tell application "Finder" to set desktop picture to POSIX file "${imagePath}"'`, { stdio: "ignore", timeout: 15e3 });
686643
686485
  }
686644
686486
  function getCurrentDesktopWallpaper() {
686645
686487
  if (process.platform !== "darwin")
686646
686488
  return "";
686647
- try {
686648
- const bin = getWallpaperHelperBin();
686649
- const result = (0, import_node_child_process9.spawnSync)(bin, ["get"], {
686650
- encoding: "utf-8",
686651
- timeout: 15e3,
686652
- stdio: ["pipe", "pipe", "pipe"]
686653
- });
686654
- if (result.status === 0 && result.stdout.trim()) {
686655
- return result.stdout.trim();
686656
- }
686657
- } catch {
686658
- }
686659
686489
  try {
686660
686490
  return (0, import_node_child_process9.execSync)(`osascript -e 'tell application "Finder" to get POSIX path of (desktop picture as alias)'`, { encoding: "utf-8", timeout: 15e3, stdio: ["pipe", "pipe", "pipe"] }).trim();
686661
686491
  } catch {
@@ -686679,7 +686509,7 @@ function ensureWallpaper(agentName) {
686679
686509
  }
686680
686510
  return false;
686681
686511
  }
686682
- var import_node_child_process9, import_node_crypto3, import_node_fs14, import_node_os7, import_node_path11, WALLPAPER_META_FILENAME, BG_SETUP, BG_RUNNING, _rendererHash, MACOS_CODENAMES, _helperHash;
686512
+ var import_node_child_process9, import_node_crypto3, import_node_fs14, import_node_os7, import_node_path11, WALLPAPER_META_FILENAME, BG_SETUP, BG_RUNNING, _rendererHash, MACOS_CODENAMES;
686683
686513
  var init_generate_wallpaper = __esm({
686684
686514
  "dist/onboarding/generate-wallpaper.js"() {
686685
686515
  "use strict";
@@ -810608,7 +810438,7 @@ var require_retry4 = __commonJS({
810608
810438
  Object.defineProperty(exports2, "__esModule", { value: true });
810609
810439
  exports2.getRetryConfig = getRetryConfig;
810610
810440
  async function getRetryConfig(err7) {
810611
- let config2 = getConfig4(err7);
810441
+ let config2 = getConfig3(err7);
810612
810442
  if (!err7 || !err7.config || !config2 && !err7.config.retry) {
810613
810443
  return { shouldRetry: false };
810614
810444
  }
@@ -810659,7 +810489,7 @@ var require_retry4 = __commonJS({
810659
810489
  return { shouldRetry: true, config: err7.config };
810660
810490
  }
810661
810491
  function shouldRetryRequest(err7) {
810662
- const config2 = getConfig4(err7);
810492
+ const config2 = getConfig3(err7);
810663
810493
  if (err7.config.signal?.aborted && err7.code !== "TimeoutError" || err7.code === "AbortError") {
810664
810494
  return false;
810665
810495
  }
@@ -810691,7 +810521,7 @@ var require_retry4 = __commonJS({
810691
810521
  }
810692
810522
  return true;
810693
810523
  }
810694
- function getConfig4(err7) {
810524
+ function getConfig3(err7) {
810695
810525
  if (err7 && err7.config && err7.config.retryConfig) {
810696
810526
  return err7.config.retryConfig;
810697
810527
  }
@@ -974873,7 +974703,7 @@ async function sendMessage(params) {
974873
974703
  label: "sendMessage"
974874
974704
  });
974875
974705
  }
974876
- async function getConfig3(params) {
974706
+ async function getConfig2(params) {
974877
974707
  const rawText = await apiPostFetch({
974878
974708
  baseUrl: params.baseUrl,
974879
974709
  endpoint: "ilink/bot/getconfig",
@@ -974942,7 +974772,7 @@ var init_config_cache = __esm({
974942
974772
  if (shouldFetch) {
974943
974773
  let fetchOk = false;
974944
974774
  try {
974945
- const resp = await getConfig3({
974775
+ const resp = await getConfig2({
974946
974776
  baseUrl: this.apiOpts.baseUrl,
974947
974777
  token: this.apiOpts.token,
974948
974778
  ilinkUserId: userId,
@@ -976538,9 +976368,9 @@ var init_weixin = __esm({
976538
976368
  setContextToken(deps.accountId, fromUserId, full.context_token);
976539
976369
  this.contextTokens.set(fromUserId, full.context_token);
976540
976370
  }
976541
- const cachedConfig2 = await deps.configManager.getForUser(fromUserId, full.context_token);
976542
- if (cachedConfig2.typingTicket) {
976543
- this.typingTickets.set(fromUserId, cachedConfig2.typingTicket);
976371
+ const cachedConfig = await deps.configManager.getForUser(fromUserId, full.context_token);
976372
+ if (cachedConfig.typingTicket) {
976373
+ this.typingTickets.set(fromUserId, cachedConfig.typingTicket);
976544
976374
  }
976545
976375
  const textBody = extractTextBody(full.item_list);
976546
976376
  const attachments = [];
@@ -991680,9 +991510,7 @@ Even if you are using Playwright to operate the browser, you should bring the br
991680
991510
 
991681
991511
  ### Desktop Interactions (Computer Use)
991682
991512
 
991683
- For desktop applications and UI outside the browser, use the available desktop/computer-use tools. Some runtimes expose a native computer tool, while others expose the \`computer_use_*\` tool family with AI-powered coordinate resolution.
991684
-
991685
- If you describe something but the computer use tool can not resolve the coordinates, try describe the target in more details. Also, the screenshot will always show the current cursor location.
991513
+ For desktop applications and UI outside the browser, use the available desktop/computer-use tools. The \`computer_use_*\` tools accept pixel coordinates from screenshots \u2014 always take a screenshot first with \`computer_use_screenshot\` to see the screen and determine coordinates, then use those coordinates for click/move/scroll/drag actions.
991686
991514
 
991687
991515
  **Prefer ${config2.browserBackend === "playwriter" ? "Playwriter" : "Playwright"} browser tools** for all in-browser work. Use desktop/computer-use tools only for desktop apps, situations outside the browser, or as a fallback when the browser tools can't do the job.
991688
991516
 
@@ -996065,84 +995893,6 @@ init_transcribe_audio();
996065
995893
  init_zod();
996066
995894
  init_sdk2();
996067
995895
  init_desktop_executor_factory();
996068
-
996069
- // dist/tools/coordinate-resolver.js
996070
- init_logger();
996071
- init_screenshot();
996072
- init_client_factory();
996073
- function clamp(n6, min, max) {
996074
- return Math.max(min, Math.min(max, n6));
996075
- }
996076
- async function resolveCoordinates(screenshotBase64, instruction, displayWidth, displayHeight, config2, screenshotFilePath) {
996077
- const dims = screenshotFilePath ? await getImageDimensionsFromFile(screenshotFilePath) : await getImageDimensions(Buffer.from(screenshotBase64, "base64"));
996078
- if (!dims) {
996079
- logger.warn("Could not read screenshot dimensions, aborting coordinate resolution");
996080
- return null;
996081
- }
996082
- const imgW = dims.width;
996083
- const imgH = dims.height;
996084
- const scaleX = displayWidth / imgW;
996085
- const scaleY = displayHeight / imgH;
996086
- logger.info(`Resolving coordinates: "${instruction}" (display ${displayWidth}x${displayHeight}, image ${imgW}x${imgH}, scaleX=${scaleX.toFixed(3)}, scaleY=${scaleY.toFixed(3)})`, { instruction, displayWidth, displayHeight, imgW, imgH, scaleX, scaleY });
996087
- const client = createClient(config2);
996088
- const response = await client.beta.messages.create({
996089
- model: getModelId(config2),
996090
- max_tokens: 1024,
996091
- betas: ["computer-use-2025-11-24"],
996092
- tools: [
996093
- {
996094
- type: "computer_20251124",
996095
- name: "computer",
996096
- display_width_px: imgW,
996097
- display_height_px: imgH,
996098
- display_number: 1
996099
- }
996100
- ],
996101
- system: "You are a helpful assistant that can resolve natural language targets to pixel coordinates using the computer tool. Call the tool directly, no questions or explanations.",
996102
- tool_choice: { type: "tool", name: "computer" },
996103
- messages: [
996104
- {
996105
- role: "user",
996106
- content: [
996107
- {
996108
- type: "image",
996109
- source: {
996110
- type: "base64",
996111
- media_type: "image/png",
996112
- data: screenshotBase64
996113
- }
996114
- },
996115
- {
996116
- type: "text",
996117
- text: instruction
996118
- }
996119
- ]
996120
- }
996121
- ]
996122
- });
996123
- for (const block of response.content) {
996124
- if (block.type === "tool_use" && block.name === "computer") {
996125
- const input = block.input;
996126
- const coord = input.coordinate;
996127
- const action = input.action ?? "left_click";
996128
- logger.debug(`Coordinate resolver raw response: action=${action} coordinate=${JSON.stringify(coord)}`, { instruction, action, coordinate: coord, fullInput: input });
996129
- if (Array.isArray(coord) && coord.length >= 2) {
996130
- const rawX = coord[0];
996131
- const rawY = coord[1];
996132
- if (typeof rawX === "number" && typeof rawY === "number" && Number.isFinite(rawX) && Number.isFinite(rawY)) {
996133
- const x14 = clamp(Math.round(rawX * scaleX), 0, displayWidth);
996134
- const y12 = clamp(Math.round(rawY * scaleY), 0, displayHeight);
996135
- logger.info(`Resolved coordinates: action=${action} raw=(${rawX},${rawY}) unscaled=(${x14},${y12}) scaleX=${scaleX.toFixed(3)} scaleY=${scaleY.toFixed(3)}`, { instruction, action, rawX, rawY, x: x14, y: y12, scaleX, scaleY });
996136
- return { action, x: x14, y: y12 };
996137
- }
996138
- }
996139
- }
996140
- }
996141
- logger.info(`Could not resolve coordinates: "${instruction}"`, { instruction });
996142
- return null;
996143
- }
996144
-
996145
- // dist/tools/computer-use.js
996146
995896
  init_screenshot();
996147
995897
  init_logger();
996148
995898
  function requireDesktop() {
@@ -996165,48 +995915,32 @@ function requireDesktop() {
996165
995915
  }
996166
995916
  return null;
996167
995917
  }
996168
- var cachedConfig = null;
996169
- async function getConfig() {
996170
- if (cachedConfig)
996171
- return cachedConfig;
996172
- const { loadConfig: loadConfig2 } = await Promise.resolve().then(() => (init_config(), config_exports));
996173
- try {
996174
- cachedConfig = loadConfig2();
996175
- return cachedConfig;
996176
- } catch {
996177
- if (process.env.ANTHROPIC_API_KEY) {
996178
- cachedConfig = {
996179
- model: "claude-sonnet-4-6",
996180
- provider: "anthropic",
996181
- anthropicApiKey: process.env.ANTHROPIC_API_KEY
996182
- };
996183
- return cachedConfig;
996184
- }
996185
- return null;
996186
- }
995918
+ function getScreenshotSize(displaySize) {
995919
+ const scale = getVisionScaleFactor(displaySize.width, displaySize.height);
995920
+ if (scale >= 1)
995921
+ return { ...displaySize };
995922
+ return {
995923
+ width: Math.max(1, Math.round(displaySize.width * scale)),
995924
+ height: Math.max(1, Math.round(displaySize.height * scale))
995925
+ };
996187
995926
  }
996188
- async function resolveAndExecute(instruction, config2, fn2) {
996189
- const { base64: screenshot, filePath: screenshotPath } = await captureScreen3();
996190
- const size = getDisplaySize3();
996191
- const resolved = await resolveCoordinates(screenshot, instruction, size.width, size.height, config2, screenshotPath);
996192
- if (!resolved) {
996193
- return {
996194
- content: [
996195
- {
996196
- type: "text",
996197
- text: `Could not resolve coordinates for: ${instruction}. Please try again with a clearer instruction.`
996198
- }
996199
- ],
996200
- isError: true
996201
- };
996202
- }
996203
- await fn2(resolved.x, resolved.y);
996204
- await new Promise((resolve) => setTimeout(resolve, 1e3));
995927
+ function screenshotToDisplay(point, screenshotSize, displaySize) {
995928
+ return {
995929
+ x: Math.round(point.x / screenshotSize.width * displaySize.width),
995930
+ y: Math.round(point.y / screenshotSize.height * displaySize.height)
995931
+ };
995932
+ }
995933
+ var pointSchema = external_exports.object({
995934
+ x: external_exports.number().min(0).describe("X pixel coordinate in the screenshot image (0 = left edge)"),
995935
+ y: external_exports.number().min(0).describe("Y pixel coordinate in the screenshot image (0 = top edge)")
995936
+ });
995937
+ async function actionResult(text, delayMs = 1e3) {
995938
+ await new Promise((resolve) => setTimeout(resolve, delayMs));
996205
995939
  const { base64: resultScreenshot } = await captureScreen3();
996206
995940
  logger.screenshot(resultScreenshot);
996207
995941
  return {
996208
995942
  content: [
996209
- { type: "text", text: `Executed at (${resolved.x}, ${resolved.y})` },
995943
+ { type: "text", text },
996210
995944
  {
996211
995945
  type: "image",
996212
995946
  data: resultScreenshot,
@@ -996215,184 +995949,88 @@ async function resolveAndExecute(instruction, config2, fn2) {
996215
995949
  ]
996216
995950
  };
996217
995951
  }
996218
- var computerUseClickTool = gl("computer_use_click", "Click on a UI element described in natural language (e.g. 'the Chrome icon', 'the submit button'). Resolves target via AI, then performs left/right/double-click.", {
996219
- target: external_exports.string().describe("Natural language description of the element to click"),
995952
+ function withDesktopCheck(toolName, fn2) {
995953
+ return (async () => {
995954
+ const err7 = requireDesktop();
995955
+ if (err7)
995956
+ return err7;
995957
+ try {
995958
+ return await fn2();
995959
+ } catch (e11) {
995960
+ logger.err(`${toolName} failed`, { error: String(e11) });
995961
+ return {
995962
+ content: [
995963
+ {
995964
+ type: "text",
995965
+ text: `${toolName} failed: ${e11 instanceof Error ? e11.message : String(e11)}`
995966
+ }
995967
+ ],
995968
+ isError: true
995969
+ };
995970
+ }
995971
+ })();
995972
+ }
995973
+ var computerUseClickTool = gl("computer_use_click", "Click at a point on the desktop screen. Coordinates are pixel positions in the screenshot image returned by computer_use_screenshot. Supports left-click, right-click, and double-click.", {
995974
+ point: pointSchema.describe("Pixel coordinates in the screenshot image to click"),
996220
995975
  button: external_exports.enum(["left", "right", "double"]).optional().default("left")
996221
995976
  }, async (args) => {
996222
- const err7 = requireDesktop();
996223
- if (err7)
996224
- return err7;
996225
- const cfg = await getConfig();
996226
- if (!cfg) {
996227
- return {
996228
- content: [{ type: "text", text: "Claude API credentials not configured" }],
996229
- isError: true
996230
- };
996231
- }
996232
- const instruction = args.button === "right" ? `Right-click on: ${args.target}` : args.button === "double" ? `Double-click on: ${args.target}` : `Click on: ${args.target}`;
996233
- const fn2 = args.button === "right" ? (x14, y12) => rightClick3(x14, y12) : args.button === "double" ? (x14, y12) => doubleClick3(x14, y12) : (x14, y12) => click3(x14, y12);
996234
- try {
996235
- return await resolveAndExecute(instruction, cfg, fn2);
996236
- } catch (e11) {
996237
- logger.err("computer_use_click failed", { error: String(e11) });
996238
- return {
996239
- content: [
996240
- {
996241
- type: "text",
996242
- text: `computer_use_click failed: ${e11 instanceof Error ? e11.message : String(e11)}`
996243
- }
996244
- ],
996245
- isError: true
996246
- };
996247
- }
995977
+ return withDesktopCheck("computer_use_click", async () => {
995978
+ const displaySize = getDisplaySize3();
995979
+ const ssSize = getScreenshotSize(displaySize);
995980
+ const abs = screenshotToDisplay(args.point, ssSize, displaySize);
995981
+ if (args.button === "right") {
995982
+ await rightClick3(abs.x, abs.y);
995983
+ } else if (args.button === "double") {
995984
+ await doubleClick3(abs.x, abs.y);
995985
+ } else {
995986
+ await click3(abs.x, abs.y);
995987
+ }
995988
+ return actionResult(`${args.button === "double" ? "Double-clicked" : args.button === "right" ? "Right-clicked" : "Clicked"} at screenshot (${Math.round(args.point.x)}, ${Math.round(args.point.y)}) \u2192 display (${abs.x}, ${abs.y})`);
995989
+ });
996248
995990
  });
996249
- var computerUseMoveTool = gl("computer_use_move", "Move the mouse cursor to a UI element described in natural language (e.g. 'the search bar', 'the file menu'). Resolves target via AI, then moves cursor without clicking.", {
996250
- target: external_exports.string().describe("Natural language description of the element to move the cursor to")
995991
+ var computerUseMoveTool = gl("computer_use_move", "Move the mouse cursor to a point on the desktop screen. Coordinates are pixel positions in the screenshot image.", {
995992
+ point: pointSchema.describe("Pixel coordinates in the screenshot image to move the cursor to")
996251
995993
  }, async (args) => {
996252
- const err7 = requireDesktop();
996253
- if (err7)
996254
- return err7;
996255
- const cfg = await getConfig();
996256
- if (!cfg) {
996257
- return {
996258
- content: [{ type: "text", text: "Claude API credentials not configured" }],
996259
- isError: true
996260
- };
996261
- }
996262
- const instruction = `Move cursor to the center of: ${args.target}`;
996263
- try {
996264
- return await resolveAndExecute(instruction, cfg, async (x14, y12) => {
996265
- await moveTo3(x14, y12);
996266
- });
996267
- } catch (e11) {
996268
- logger.err("computer_use_move failed", { error: String(e11) });
996269
- return {
996270
- content: [
996271
- {
996272
- type: "text",
996273
- text: `computer_use_move failed: ${e11 instanceof Error ? e11.message : String(e11)}`
996274
- }
996275
- ],
996276
- isError: true
996277
- };
996278
- }
995994
+ return withDesktopCheck("computer_use_move", async () => {
995995
+ const displaySize = getDisplaySize3();
995996
+ const ssSize = getScreenshotSize(displaySize);
995997
+ const abs = screenshotToDisplay(args.point, ssSize, displaySize);
995998
+ await moveTo3(abs.x, abs.y);
995999
+ return actionResult(`Moved cursor to screenshot (${Math.round(args.point.x)}, ${Math.round(args.point.y)}) \u2192 display (${abs.x}, ${abs.y})`);
996000
+ });
996279
996001
  });
996280
- var computerUseScrollTool = gl("computer_use_scroll", "Scroll at a target location in a direction. Resolves the scroll target via AI, then scrolls.", {
996281
- target: external_exports.string().describe("Natural language description of where to scroll"),
996002
+ var computerUseScrollTool = gl("computer_use_scroll", "Scroll at a point on the desktop screen in a direction. Coordinates are pixel positions in the screenshot image.", {
996003
+ point: pointSchema.describe("Pixel coordinates in the screenshot image for scroll origin"),
996282
996004
  direction: external_exports.enum(["up", "down", "left", "right"]),
996283
996005
  amount: external_exports.number().optional().default(3).describe("Scroll amount (default 3)")
996284
996006
  }, async (args) => {
996285
- const err7 = requireDesktop();
996286
- if (err7)
996287
- return err7;
996288
- const cfg = await getConfig();
996289
- if (!cfg) {
996290
- return {
996291
- content: [{ type: "text", text: "Claude API credentials not configured" }],
996292
- isError: true
996293
- };
996294
- }
996295
- const instruction = `Move cursor to the center of: ${args.target} (for scrolling)`;
996296
- const amount = args.amount;
996297
- try {
996298
- return await resolveAndExecute(instruction, cfg, async (x14, y12) => {
996299
- await scroll3(x14, y12, args.direction, amount);
996300
- });
996301
- } catch (e11) {
996302
- logger.err("computer_use_scroll failed", { error: String(e11) });
996303
- return {
996304
- content: [
996305
- {
996306
- type: "text",
996307
- text: `computer_use_scroll failed: ${e11 instanceof Error ? e11.message : String(e11)}`
996308
- }
996309
- ],
996310
- isError: true
996311
- };
996312
- }
996007
+ return withDesktopCheck("computer_use_scroll", async () => {
996008
+ const displaySize = getDisplaySize3();
996009
+ const ssSize = getScreenshotSize(displaySize);
996010
+ const abs = screenshotToDisplay(args.point, ssSize, displaySize);
996011
+ await scroll3(abs.x, abs.y, args.direction, args.amount);
996012
+ return actionResult(`Scrolled ${args.direction} at screenshot (${Math.round(args.point.x)}, ${Math.round(args.point.y)}) \u2192 display (${abs.x}, ${abs.y})`);
996013
+ });
996313
996014
  });
996314
- var computerUseDragTool = gl("computer_use_drag", "Drag from one element to another. Resolves both 'from' and 'to' via AI, then performs drag.", {
996315
- from: external_exports.string().describe("Natural language description of drag start"),
996316
- to: external_exports.string().describe("Natural language description of drag end")
996015
+ var computerUseDragTool = gl("computer_use_drag", "Drag from one point to another on the desktop screen. Coordinates are pixel positions in the screenshot image.", {
996016
+ startPoint: pointSchema.describe("Pixel coordinates in the screenshot image (drag source)"),
996017
+ endPoint: pointSchema.describe("Pixel coordinates in the screenshot image (drop target)")
996317
996018
  }, async (args) => {
996318
- const err7 = requireDesktop();
996319
- if (err7)
996320
- return err7;
996321
- const cfg = await getConfig();
996322
- if (!cfg) {
996323
- return {
996324
- content: [{ type: "text", text: "Claude API credentials not configured" }],
996325
- isError: true
996326
- };
996327
- }
996328
- const { base64: screenshot, filePath: screenshotPath } = await captureScreen3();
996329
- const size = getDisplaySize3();
996330
- const [fromResolved, toResolved] = await Promise.all([
996331
- resolveCoordinates(screenshot, `Move cursor to the center of: ${args.from} (for dragging)`, size.width, size.height, cfg, screenshotPath),
996332
- resolveCoordinates(screenshot, `Move cursor to the center of: ${args.to} (for dragging)`, size.width, size.height, cfg, screenshotPath)
996333
- ]);
996334
- if (!fromResolved) {
996335
- return {
996336
- content: [
996337
- {
996338
- type: "text",
996339
- text: `Could not resolve 'from' coordinates: ${args.from}. Please try again with a clearer instruction.`
996340
- }
996341
- ],
996342
- isError: true
996343
- };
996344
- }
996345
- if (!toResolved) {
996346
- return {
996347
- content: [
996348
- {
996349
- type: "text",
996350
- text: `Could not resolve 'to' coordinates: ${args.to}. Please try again with a clearer instruction.`
996351
- }
996352
- ],
996353
- isError: true
996354
- };
996355
- }
996356
- try {
996357
- await drag3(fromResolved.x, fromResolved.y, toResolved.x, toResolved.y);
996358
- await new Promise((resolve) => setTimeout(resolve, 1e3));
996359
- const { base64: resultScreenshot } = await captureScreen3();
996360
- logger.screenshot(resultScreenshot);
996361
- return {
996362
- content: [
996363
- {
996364
- type: "text",
996365
- text: `Dragged from (${fromResolved.x}, ${fromResolved.y}) to (${toResolved.x}, ${toResolved.y})`
996366
- },
996367
- {
996368
- type: "image",
996369
- data: resultScreenshot,
996370
- mimeType: "image/png"
996371
- }
996372
- ]
996373
- };
996374
- } catch (e11) {
996375
- logger.err("computer_use_drag failed", { error: String(e11) });
996376
- return {
996377
- content: [
996378
- {
996379
- type: "text",
996380
- text: `computer_use_drag failed: ${e11 instanceof Error ? e11.message : String(e11)}`
996381
- }
996382
- ],
996383
- isError: true
996384
- };
996385
- }
996019
+ return withDesktopCheck("computer_use_drag", async () => {
996020
+ const displaySize = getDisplaySize3();
996021
+ const ssSize = getScreenshotSize(displaySize);
996022
+ const start = screenshotToDisplay(args.startPoint, ssSize, displaySize);
996023
+ const end = screenshotToDisplay(args.endPoint, ssSize, displaySize);
996024
+ await drag3(start.x, start.y, end.x, end.y);
996025
+ return actionResult(`Dragged from screenshot (${Math.round(args.startPoint.x)}, ${Math.round(args.startPoint.y)}) to (${Math.round(args.endPoint.x)}, ${Math.round(args.endPoint.y)}) \u2192 display (${start.x}, ${start.y}) to (${end.x}, ${end.y})`);
996026
+ });
996386
996027
  });
996387
996028
  var computerUseTypeTool = gl("computer_use_type", "Type text into the focused field. Optionally clear the field first (replace) and/or press Enter after.", {
996388
996029
  text: external_exports.string().describe("Text to type"),
996389
996030
  replace: external_exports.boolean().optional().default(false).describe("If true, select-all and replace existing text first"),
996390
996031
  press_enter: external_exports.boolean().optional().default(false).describe("If true, press Enter after typing")
996391
996032
  }, async (args) => {
996392
- const err7 = requireDesktop();
996393
- if (err7)
996394
- return err7;
996395
- try {
996033
+ return withDesktopCheck("computer_use_type", async () => {
996396
996034
  if (args.replace) {
996397
996035
  await pressKey3(process.platform === "win32" ? "ctrl+a" : "cmd+a");
996398
996036
  }
@@ -996400,78 +996038,33 @@ var computerUseTypeTool = gl("computer_use_type", "Type text into the focused fi
996400
996038
  if (args.press_enter) {
996401
996039
  await pressKey3("enter");
996402
996040
  }
996403
- await new Promise((resolve) => setTimeout(resolve, 1e3));
996404
- const { base64: resultScreenshot } = await captureScreen3();
996405
- logger.screenshot(resultScreenshot);
996406
- return {
996407
- content: [
996408
- { type: "text", text: `Typed text${args.press_enter ? " and pressed Enter" : ""}` },
996409
- {
996410
- type: "image",
996411
- data: resultScreenshot,
996412
- mimeType: "image/png"
996413
- }
996414
- ]
996415
- };
996416
- } catch (e11) {
996417
- logger.err("computer_use_type failed", { error: String(e11) });
996418
- return {
996419
- content: [
996420
- {
996421
- type: "text",
996422
- text: `computer_use_type failed: ${e11 instanceof Error ? e11.message : String(e11)}`
996423
- }
996424
- ],
996425
- isError: true
996426
- };
996427
- }
996041
+ return actionResult(`Typed text${args.press_enter ? " and pressed Enter" : ""}`);
996042
+ });
996428
996043
  });
996429
996044
  var computerUseKeyTool = gl("computer_use_key", "Press a key or key combination (e.g. 'enter', 'escape', 'cmd+s', 'ctrl+shift+t').", {
996430
996045
  key: external_exports.string().describe("Key or combo: 'enter', 'escape', 'cmd+s', 'ctrl+shift+t', etc.")
996431
996046
  }, async (args) => {
996432
- const err7 = requireDesktop();
996433
- if (err7)
996434
- return err7;
996435
- try {
996047
+ return withDesktopCheck("computer_use_key", async () => {
996436
996048
  await pressKey3(args.key);
996437
- await new Promise((resolve) => setTimeout(resolve, 1e3));
996438
- const { base64: resultScreenshot } = await captureScreen3();
996439
- logger.screenshot(resultScreenshot);
996440
- return {
996441
- content: [
996442
- { type: "text", text: `Pressed: ${args.key}` },
996443
- {
996444
- type: "image",
996445
- data: resultScreenshot,
996446
- mimeType: "image/png"
996447
- }
996448
- ]
996449
- };
996450
- } catch (e11) {
996451
- logger.err("computer_use_key failed", { error: String(e11) });
996452
- return {
996453
- content: [
996454
- {
996455
- type: "text",
996456
- text: `computer_use_key failed: ${e11 instanceof Error ? e11.message : String(e11)}`
996457
- }
996458
- ],
996459
- isError: true
996460
- };
996461
- }
996049
+ return actionResult(`Pressed: ${args.key}`);
996050
+ });
996462
996051
  });
996463
- var computerUseScreenshotTool = gl("computer_use_screenshot", "Capture a screenshot of the current desktop screen. Returns the image as base64 PNG and the file path where it is saved.", {}, async (_args) => {
996052
+ var computerUseScreenshotTool = gl("computer_use_screenshot", "Capture a screenshot of the current desktop screen. Returns the image as base64 PNG. Use the pixel coordinates from this image when calling click, move, scroll, or drag tools.", {}, async (_args) => {
996464
996053
  const err7 = requireDesktop();
996465
996054
  if (err7)
996466
996055
  return err7;
996467
996056
  try {
996057
+ const displaySize = getDisplaySize3();
996058
+ const ssSize = getScreenshotSize(displaySize);
996468
996059
  const { base64: base643, filePath } = await takeScreenshot();
996469
996060
  logger.screenshot(base643);
996470
996061
  return {
996471
996062
  content: [
996472
996063
  {
996473
996064
  type: "text",
996474
- text: `Screenshot saved to ${filePath}`
996065
+ text: `Screenshot saved to ${filePath}
996066
+ Screenshot image size: ${ssSize.width}x${ssSize.height}px | Display resolution: ${displaySize.width}x${displaySize.height}px
996067
+ Use pixel coordinates from this image when calling click/move/scroll/drag tools.`
996475
996068
  },
996476
996069
  {
996477
996070
  type: "image",
@@ -996910,7 +996503,7 @@ async function ensureDisplaySize() {
996910
996503
  await initDisplaySize4();
996911
996504
  }
996912
996505
  }
996913
- function getScreenshotSize(deviceSize) {
996506
+ function getScreenshotSize2(deviceSize) {
996914
996507
  const scale = getVisionScaleFactor(deviceSize.width, deviceSize.height);
996915
996508
  if (scale >= 1)
996916
996509
  return { ...deviceSize };
@@ -996925,11 +996518,11 @@ function screenshotToDevice(point, screenshotSize, deviceSize) {
996925
996518
  y: Math.round(point.y / screenshotSize.height * deviceSize.height)
996926
996519
  };
996927
996520
  }
996928
- var pointSchema = external_exports.object({
996521
+ var pointSchema2 = external_exports.object({
996929
996522
  x: external_exports.number().min(0).describe("X pixel coordinate in the screenshot image (0 = left edge)"),
996930
996523
  y: external_exports.number().min(0).describe("Y pixel coordinate in the screenshot image (0 = top edge)")
996931
996524
  });
996932
- async function actionResult(text, delayMs = 1e3) {
996525
+ async function actionResult2(text, delayMs = 1e3) {
996933
996526
  await new Promise((resolve) => setTimeout(resolve, delayMs));
996934
996527
  const { base64: resultScreenshot } = await captureScreen4();
996935
996528
  logger.screenshot(resultScreenshot);
@@ -996991,34 +996584,34 @@ function normalizeKey(raw) {
996991
996584
  return KEY_ALIASES[normalized] ?? normalized;
996992
996585
  }
996993
996586
  var androidUseClickTool = gl("android_use_click", "Click (tap) at a point on the Android device screen. Coordinates are pixel positions in the screenshot image returned by android_use_screenshot. Supports single tap, double tap, and button variants.", {
996994
- point: pointSchema.describe("Pixel coordinates in the screenshot image to click"),
996587
+ point: pointSchema2.describe("Pixel coordinates in the screenshot image to click"),
996995
996588
  button: external_exports.enum(["left", "right", "middle"]).optional().default("left").describe("Click button (default: left). On Android, all buttons map to tap."),
996996
996589
  double: external_exports.boolean().optional().default(false).describe("If true, perform a double-tap instead of a single tap")
996997
996590
  }, async (args) => {
996998
996591
  return withDeviceCheck("android_use_click", async () => {
996999
996592
  await ensureDisplaySize();
997000
996593
  const deviceSize = getDisplaySize4();
997001
- const ssSize = getScreenshotSize(deviceSize);
996594
+ const ssSize = getScreenshotSize2(deviceSize);
997002
996595
  const abs = screenshotToDevice(args.point, ssSize, deviceSize);
997003
996596
  if (args.double) {
997004
996597
  await doubleTap(abs.x, abs.y);
997005
996598
  } else {
997006
996599
  await tap(abs.x, abs.y);
997007
996600
  }
997008
- return actionResult(`${args.double ? "Double-clicked" : "Clicked"} at screenshot (${Math.round(args.point.x)}, ${Math.round(args.point.y)}) \u2192 device (${abs.x}, ${abs.y})`);
996601
+ return actionResult2(`${args.double ? "Double-clicked" : "Clicked"} at screenshot (${Math.round(args.point.x)}, ${Math.round(args.point.y)}) \u2192 device (${abs.x}, ${abs.y})`);
997009
996602
  });
997010
996603
  });
997011
996604
  var androidUseLongPressTool = gl("android_use_long_press", "Long press at a point on the Android device screen. Coordinates are pixel positions in the screenshot image.", {
997012
- point: pointSchema.describe("Pixel coordinates in the screenshot image to long-press"),
996605
+ point: pointSchema2.describe("Pixel coordinates in the screenshot image to long-press"),
997013
996606
  durationMs: external_exports.number().int().positive().optional().default(1e3).describe("Long press duration in milliseconds (default: 1000)")
997014
996607
  }, async (args) => {
997015
996608
  return withDeviceCheck("android_use_long_press", async () => {
997016
996609
  await ensureDisplaySize();
997017
996610
  const deviceSize = getDisplaySize4();
997018
- const ssSize = getScreenshotSize(deviceSize);
996611
+ const ssSize = getScreenshotSize2(deviceSize);
997019
996612
  const abs = screenshotToDevice(args.point, ssSize, deviceSize);
997020
996613
  await longPress(abs.x, abs.y, args.durationMs);
997021
- return actionResult(`Long-pressed at screenshot (${Math.round(args.point.x)}, ${Math.round(args.point.y)}) \u2192 device (${abs.x}, ${abs.y}) for ${args.durationMs}ms`);
996614
+ return actionResult2(`Long-pressed at screenshot (${Math.round(args.point.x)}, ${Math.round(args.point.y)}) \u2192 device (${abs.x}, ${abs.y}) for ${args.durationMs}ms`);
997022
996615
  });
997023
996616
  });
997024
996617
  var androidUseTypeTool = gl("android_use_type", "Type text into the currently focused input field on the Android device. Optionally replace existing text first (select all + delete before typing).", {
@@ -997027,18 +996620,18 @@ var androidUseTypeTool = gl("android_use_type", "Type text into the currently fo
997027
996620
  }, async (args) => {
997028
996621
  return withDeviceCheck("android_use_type", async () => {
997029
996622
  await typeText4(args.content, { replace: args.replace });
997030
- return actionResult(`Typed "${args.content.length > 50 ? args.content.slice(0, 50) + "..." : args.content}"${args.replace ? " (replaced existing text)" : ""}`);
996623
+ return actionResult2(`Typed "${args.content.length > 50 ? args.content.slice(0, 50) + "..." : args.content}"${args.replace ? " (replaced existing text)" : ""}`);
997031
996624
  });
997032
996625
  });
997033
996626
  var androidUseScrollTool = gl("android_use_scroll", "Scroll the Android device screen in a direction from a point. Coordinates are pixel positions in the screenshot image. Amount is a float in [0, 1] representing scroll distance relative to screen size.", {
997034
996627
  direction: external_exports.enum(["up", "down", "left", "right"]).describe("Scroll direction"),
997035
996628
  amount: external_exports.number().min(0).max(1).describe("Scroll distance as fraction of screen size (0.0 to 1.0, e.g. 0.25 = quarter screen)"),
997036
- point: pointSchema.optional().describe("Pixel coordinates in the screenshot image for scroll origin (default: center of screen)")
996629
+ point: pointSchema2.optional().describe("Pixel coordinates in the screenshot image for scroll origin (default: center of screen)")
997037
996630
  }, async (args) => {
997038
996631
  return withDeviceCheck("android_use_scroll", async () => {
997039
996632
  await ensureDisplaySize();
997040
996633
  const deviceSize = getDisplaySize4();
997041
- const ssSize = getScreenshotSize(deviceSize);
996634
+ const ssSize = getScreenshotSize2(deviceSize);
997042
996635
  const ssPoint = args.point ?? {
997043
996636
  x: ssSize.width / 2,
997044
996637
  y: ssSize.height / 2
@@ -997065,22 +996658,22 @@ var androidUseScrollTool = gl("android_use_scroll", "Scroll the Android device s
997065
996658
  endX = Math.max(0, Math.min(deviceSize.width, endX));
997066
996659
  endY = Math.max(0, Math.min(deviceSize.height, endY));
997067
996660
  await swipe(origin.x, origin.y, endX, endY, 300);
997068
- return actionResult(`Scrolled ${args.direction} by ${(args.amount * 100).toFixed(0)}% from screenshot (${Math.round(ssPoint.x)}, ${Math.round(ssPoint.y)})`);
996661
+ return actionResult2(`Scrolled ${args.direction} by ${(args.amount * 100).toFixed(0)}% from screenshot (${Math.round(ssPoint.x)}, ${Math.round(ssPoint.y)})`);
997069
996662
  });
997070
996663
  });
997071
996664
  var androidUseDragTool = gl("android_use_drag", "Drag from one point to another on the Android device. Coordinates are pixel positions in the screenshot image. Suitable for drag-and-drop operations (moving icons, reordering list items).", {
997072
- startPoint: pointSchema.describe("Pixel coordinates in the screenshot image (drag source)"),
997073
- endPoint: pointSchema.describe("Pixel coordinates in the screenshot image (drop target)"),
996665
+ startPoint: pointSchema2.describe("Pixel coordinates in the screenshot image (drag source)"),
996666
+ endPoint: pointSchema2.describe("Pixel coordinates in the screenshot image (drop target)"),
997074
996667
  durationMs: external_exports.number().int().positive().optional().default(800).describe("Drag duration in milliseconds (default: 800, longer than swipe for drag-and-drop)")
997075
996668
  }, async (args) => {
997076
996669
  return withDeviceCheck("android_use_drag", async () => {
997077
996670
  await ensureDisplaySize();
997078
996671
  const deviceSize = getDisplaySize4();
997079
- const ssSize = getScreenshotSize(deviceSize);
996672
+ const ssSize = getScreenshotSize2(deviceSize);
997080
996673
  const start = screenshotToDevice(args.startPoint, ssSize, deviceSize);
997081
996674
  const end = screenshotToDevice(args.endPoint, ssSize, deviceSize);
997082
996675
  await swipe(start.x, start.y, end.x, end.y, args.durationMs);
997083
- return actionResult(`Dragged from screenshot (${Math.round(args.startPoint.x)}, ${Math.round(args.startPoint.y)}) to (${Math.round(args.endPoint.x)}, ${Math.round(args.endPoint.y)}) over ${args.durationMs}ms`);
996676
+ return actionResult2(`Dragged from screenshot (${Math.round(args.startPoint.x)}, ${Math.round(args.startPoint.y)}) to (${Math.round(args.endPoint.x)}, ${Math.round(args.endPoint.y)}) over ${args.durationMs}ms`);
997084
996677
  });
997085
996678
  });
997086
996679
  var androidUsePressTool = gl("android_use_press", "Press a key or button on the Android device. Supports standard keys: enter, tab, escape, backspace, delete, space, arrow keys (up/down/left/right), mobile keys (home, back, menu, app_switch, volume_up, volume_down, power), and function keys (f1-f12).", {
@@ -997089,7 +996682,7 @@ var androidUsePressTool = gl("android_use_press", "Press a key or button on the
997089
996682
  return withDeviceCheck("android_use_press", async () => {
997090
996683
  const normalized = normalizeKey(args.key);
997091
996684
  await pressKey4(normalized);
997092
- return actionResult(`Pressed: ${normalized}${normalized !== args.key ? ` (normalized from "${args.key}")` : ""}`);
996685
+ return actionResult2(`Pressed: ${normalized}${normalized !== args.key ? ` (normalized from "${args.key}")` : ""}`);
997093
996686
  });
997094
996687
  });
997095
996688
  var androidUseScreenshotTool = gl("android_use_screenshot", "Capture a screenshot of the connected Android device screen. Returns the image as base64 PNG. The image is resized for vision model processing. Use the pixel coordinates from this image when calling click, long_press, scroll, or drag tools.", {}, async (_args) => {
@@ -997099,7 +996692,7 @@ var androidUseScreenshotTool = gl("android_use_screenshot", "Capture a screensho
997099
996692
  try {
997100
996693
  await ensureDisplaySize();
997101
996694
  const deviceSize = getDisplaySize4();
997102
- const ssSize = getScreenshotSize(deviceSize);
996695
+ const ssSize = getScreenshotSize2(deviceSize);
997103
996696
  const { base64: base643, filePath } = await captureScreen4();
997104
996697
  logger.screenshot(base643);
997105
996698
  return {
@@ -997137,7 +996730,7 @@ var androidUseOpenTool = gl("android_use_open", "Open (launch) an app on the And
997137
996730
  await new Promise((resolve) => setTimeout(resolve, 2e3));
997138
996731
  await ensureDisplaySize();
997139
996732
  const deviceSize = getDisplaySize4();
997140
- const ssSize = getScreenshotSize(deviceSize);
996733
+ const ssSize = getScreenshotSize2(deviceSize);
997141
996734
  const { base64: resultScreenshot } = await captureScreen4();
997142
996735
  logger.screenshot(resultScreenshot);
997143
996736
  let activity = "";
@@ -1029367,7 +1028960,7 @@ var _config = void 0;
1029367
1028960
  function resetArchiveConfigCache() {
1029368
1028961
  _config = void 0;
1029369
1028962
  }
1029370
- function getConfig2() {
1028963
+ function getConfig() {
1029371
1028964
  if (_config === void 0) {
1029372
1028965
  try {
1029373
1028966
  _config = buildArchiveConfig(loadConfig());
@@ -1029378,7 +1028971,7 @@ function getConfig2() {
1029378
1028971
  return _config;
1029379
1028972
  }
1029380
1028973
  function isArchiveEnabled() {
1029381
- return getConfig2() !== null;
1028974
+ return getConfig() !== null;
1029382
1028975
  }
1029383
1028976
  var TEXT_CHUNK_KEYWORD = "vc-archive-id";
1029384
1028977
  function pngCrc32(data2) {
@@ -1029526,7 +1029119,7 @@ async function drainFailedUploads(config2) {
1029526
1029119
  }
1029527
1029120
  }
1029528
1029121
  function archiveImage(base64Data, mediaType, sessionId) {
1029529
- const config2 = getConfig2();
1029122
+ const config2 = getConfig();
1029530
1029123
  if (!config2)
1029531
1029124
  return null;
1029532
1029125
  const now = /* @__PURE__ */ new Date();