visionclaw 0.1.187-beta.9 → 0.1.187-dev.refactor-computer-use-direct-coordinates.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/providers/client-factory.d.ts +1 -1
- package/dist/agent/providers/client-factory.js +1 -1
- package/dist/agent/system-prompt.d.ts.map +1 -1
- package/dist/agent/system-prompt.js +1 -3
- package/dist/agent/system-prompt.js.map +1 -1
- package/dist/builtin-skills/macos-automation/SKILL.md +13 -10
- package/dist/onboarding/generate-wallpaper.d.ts +3 -8
- package/dist/onboarding/generate-wallpaper.d.ts.map +1 -1
- package/dist/onboarding/generate-wallpaper.js +3 -123
- package/dist/onboarding/generate-wallpaper.js.map +1 -1
- package/dist/tools/computer-use.d.ts +56 -6
- package/dist/tools/computer-use.d.ts.map +1 -1
- package/dist/tools/computer-use.js +129 -286
- package/dist/tools/computer-use.js.map +1 -1
- package/dist-agent/bundle.cjs +136 -543
- package/package.json +1 -1
- package/dist/tools/coordinate-resolver.d.ts +0 -30
- package/dist/tools/coordinate-resolver.d.ts.map +0 -1
- package/dist/tools/coordinate-resolver.js +0 -104
- package/dist/tools/coordinate-resolver.js.map +0 -1
package/dist-agent/bundle.cjs
CHANGED
|
@@ -14607,63 +14607,6 @@ var init_types = __esm({
|
|
|
14607
14607
|
});
|
|
14608
14608
|
|
|
14609
14609
|
// dist/config/index.js
|
|
14610
|
-
var config_exports = {};
|
|
14611
|
-
__export(config_exports, {
|
|
14612
|
-
addCommandIdToPendingUpgrade: () => addCommandIdToPendingUpgrade,
|
|
14613
|
-
buildPricingUrl: () => buildPricingUrl,
|
|
14614
|
-
configExists: () => configExists,
|
|
14615
|
-
consumePendingRestart: () => consumePendingRestart,
|
|
14616
|
-
consumePendingUpgrade: () => consumePendingUpgrade,
|
|
14617
|
-
deleteOnboardingSession: () => deleteOnboardingSession,
|
|
14618
|
-
ensureConfigDir: () => ensureConfigDir,
|
|
14619
|
-
getBaseDir: () => getBaseDir,
|
|
14620
|
-
getClaudeAutoMemoryDir: () => getClaudeAutoMemoryDir,
|
|
14621
|
-
getClaudeLocalSettingsFile: () => getClaudeLocalSettingsFile,
|
|
14622
|
-
getConfigDir: () => getConfigDir,
|
|
14623
|
-
getConfigFile: () => getConfigFile,
|
|
14624
|
-
getDynamicMcpServersFile: () => getDynamicMcpServersFile,
|
|
14625
|
-
getFailedUploadsDir: () => getFailedUploadsDir,
|
|
14626
|
-
getMemoriesDir: () => getMemoriesDir,
|
|
14627
|
-
getOnboardingSessionFile: () => getOnboardingSessionFile,
|
|
14628
|
-
getOwnerFile: () => getOwnerFile,
|
|
14629
|
-
getPlaywrightProfileDir: () => getPlaywrightProfileDir,
|
|
14630
|
-
getProfile: () => getProfile,
|
|
14631
|
-
getProfilePhotosDir: () => getProfilePhotosDir,
|
|
14632
|
-
getProfileTunnelConfigPath: () => getProfileTunnelConfigPath,
|
|
14633
|
-
getProfileTunnelCredentialsPath: () => getProfileTunnelCredentialsPath,
|
|
14634
|
-
getProfileTunnelDir: () => getProfileTunnelDir,
|
|
14635
|
-
getProfilesDir: () => getProfilesDir,
|
|
14636
|
-
getScreenshotsDir: () => getScreenshotsDir,
|
|
14637
|
-
getSessionFile: () => getSessionFile,
|
|
14638
|
-
getSkillsDir: () => getSkillsDir,
|
|
14639
|
-
getTOTPSecretsFile: () => getTOTPSecretsFile,
|
|
14640
|
-
getTelegramAccessFile: () => getTelegramAccessFile,
|
|
14641
|
-
getTokensFile: () => getTokensFile,
|
|
14642
|
-
getUserCatalogDir: () => getUserCatalogDir,
|
|
14643
|
-
isOnboardingComplete: () => isOnboardingComplete,
|
|
14644
|
-
loadConfig: () => loadConfig,
|
|
14645
|
-
loadDynamicMcpServers: () => loadDynamicMcpServers,
|
|
14646
|
-
loadGoogleTokens: () => loadGoogleTokens,
|
|
14647
|
-
loadOnboardingSessionId: () => loadOnboardingSessionId,
|
|
14648
|
-
loadOwnerConfig: () => loadOwnerConfig,
|
|
14649
|
-
loadSessionId: () => loadSessionId,
|
|
14650
|
-
loadTOTPSecrets: () => loadTOTPSecrets,
|
|
14651
|
-
loadTelegramAccessConfig: () => loadTelegramAccessConfig,
|
|
14652
|
-
loadUsageSnapshot: () => loadUsageSnapshot,
|
|
14653
|
-
ownerConfigExists: () => ownerConfigExists,
|
|
14654
|
-
saveConfig: () => saveConfig,
|
|
14655
|
-
saveDynamicMcpServers: () => saveDynamicMcpServers,
|
|
14656
|
-
saveGoogleTokens: () => saveGoogleTokens,
|
|
14657
|
-
saveOnboardingSessionId: () => saveOnboardingSessionId,
|
|
14658
|
-
saveOwnerConfig: () => saveOwnerConfig,
|
|
14659
|
-
saveSessionId: () => saveSessionId,
|
|
14660
|
-
saveTOTPSecrets: () => saveTOTPSecrets,
|
|
14661
|
-
saveTelegramAccessConfig: () => saveTelegramAccessConfig,
|
|
14662
|
-
saveUsageSnapshot: () => saveUsageSnapshot,
|
|
14663
|
-
setProfile: () => setProfile,
|
|
14664
|
-
writePendingRestart: () => writePendingRestart,
|
|
14665
|
-
writePendingUpgrade: () => writePendingUpgrade
|
|
14666
|
-
});
|
|
14667
14610
|
function setProfile(name) {
|
|
14668
14611
|
if (!/^[a-z0-9][a-z0-9-]*$/.test(name)) {
|
|
14669
14612
|
throw new Error(`Invalid profile name "${name}". Use lowercase letters, numbers, and hyphens.`);
|
|
@@ -14676,9 +14619,6 @@ function getProfile() {
|
|
|
14676
14619
|
function getBaseDir() {
|
|
14677
14620
|
return BASE_DIR;
|
|
14678
14621
|
}
|
|
14679
|
-
function getProfilesDir() {
|
|
14680
|
-
return PROFILES_DIR;
|
|
14681
|
-
}
|
|
14682
14622
|
function getConfigDir() {
|
|
14683
14623
|
return import_node_path.default.join(PROFILES_DIR, currentProfile);
|
|
14684
14624
|
}
|
|
@@ -15007,22 +14947,6 @@ function saveSessionId(sessionId, mode) {
|
|
|
15007
14947
|
existing.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
15008
14948
|
import_node_fs.default.writeFileSync(sessionFile, JSON.stringify(existing, null, 2), "utf-8");
|
|
15009
14949
|
}
|
|
15010
|
-
function loadOnboardingSessionId() {
|
|
15011
|
-
const file2 = getOnboardingSessionFile();
|
|
15012
|
-
if (!import_node_fs.default.existsSync(file2))
|
|
15013
|
-
return null;
|
|
15014
|
-
try {
|
|
15015
|
-
const raw = JSON.parse(import_node_fs.default.readFileSync(file2, "utf-8"));
|
|
15016
|
-
const parsed = raw;
|
|
15017
|
-
return parsed.sessionId ?? null;
|
|
15018
|
-
} catch {
|
|
15019
|
-
return null;
|
|
15020
|
-
}
|
|
15021
|
-
}
|
|
15022
|
-
function saveOnboardingSessionId(sessionId) {
|
|
15023
|
-
ensureConfigDir();
|
|
15024
|
-
import_node_fs.default.writeFileSync(getOnboardingSessionFile(), JSON.stringify({ sessionId, updatedAt: (/* @__PURE__ */ new Date()).toISOString() }, null, 2), "utf-8");
|
|
15025
|
-
}
|
|
15026
14950
|
function deleteOnboardingSession() {
|
|
15027
14951
|
const file2 = getOnboardingSessionFile();
|
|
15028
14952
|
if (import_node_fs.default.existsSync(file2)) {
|
|
@@ -20445,7 +20369,7 @@ var require_prompts3 = __commonJS({
|
|
|
20445
20369
|
|
|
20446
20370
|
// dist/utils/version-check.js
|
|
20447
20371
|
function isBundled() {
|
|
20448
|
-
const v16 = "0.1.187-
|
|
20372
|
+
const v16 = "0.1.187-dev.refactor-computer-use-direct-coordinates.1";
|
|
20449
20373
|
return typeof v16 === "string" && v16 !== "undefined";
|
|
20450
20374
|
}
|
|
20451
20375
|
function getPackageRoot() {
|
|
@@ -20462,7 +20386,7 @@ function getInstallationInfo() {
|
|
|
20462
20386
|
};
|
|
20463
20387
|
}
|
|
20464
20388
|
function getCurrentVersion() {
|
|
20465
|
-
const bundledVersion = "0.1.187-
|
|
20389
|
+
const bundledVersion = "0.1.187-dev.refactor-computer-use-direct-coordinates.1";
|
|
20466
20390
|
if (bundledVersion && bundledVersion !== "undefined") {
|
|
20467
20391
|
return bundledVersion;
|
|
20468
20392
|
}
|
|
@@ -24995,7 +24919,7 @@ var require_retry = __commonJS({
|
|
|
24995
24919
|
Object.defineProperty(exports2, "__esModule", { value: true });
|
|
24996
24920
|
exports2.getRetryConfig = getRetryConfig;
|
|
24997
24921
|
async function getRetryConfig(err7) {
|
|
24998
|
-
let config2 =
|
|
24922
|
+
let config2 = getConfig3(err7);
|
|
24999
24923
|
if (!err7 || !err7.config || !config2 && !err7.config.retry) {
|
|
25000
24924
|
return { shouldRetry: false };
|
|
25001
24925
|
}
|
|
@@ -25047,7 +24971,7 @@ var require_retry = __commonJS({
|
|
|
25047
24971
|
}
|
|
25048
24972
|
function shouldRetryRequest(err7) {
|
|
25049
24973
|
var _a7;
|
|
25050
|
-
const config2 =
|
|
24974
|
+
const config2 = getConfig3(err7);
|
|
25051
24975
|
if (err7.name === "AbortError" || ((_a7 = err7.error) === null || _a7 === void 0 ? void 0 : _a7.name) === "AbortError") {
|
|
25052
24976
|
return false;
|
|
25053
24977
|
}
|
|
@@ -25079,7 +25003,7 @@ var require_retry = __commonJS({
|
|
|
25079
25003
|
}
|
|
25080
25004
|
return true;
|
|
25081
25005
|
}
|
|
25082
|
-
function
|
|
25006
|
+
function getConfig3(err7) {
|
|
25083
25007
|
if (err7 && err7.config && err7.config.retryConfig) {
|
|
25084
25008
|
return err7.config.retryConfig;
|
|
25085
25009
|
}
|
|
@@ -686554,108 +686478,14 @@ function generateAndSetWallpaper(agentName) {
|
|
|
686554
686478
|
cleanupOldWallpapers(dest);
|
|
686555
686479
|
return dest;
|
|
686556
686480
|
}
|
|
686557
|
-
function wallpaperHelperSwiftSource() {
|
|
686558
|
-
return `
|
|
686559
|
-
import AppKit
|
|
686560
|
-
|
|
686561
|
-
let args = CommandLine.arguments
|
|
686562
|
-
guard args.count >= 2 else {
|
|
686563
|
-
fputs("Usage: wallpaper-helper set <path> | get\\n", stderr)
|
|
686564
|
-
exit(1)
|
|
686565
|
-
}
|
|
686566
|
-
|
|
686567
|
-
let command = args[1]
|
|
686568
|
-
|
|
686569
|
-
switch command {
|
|
686570
|
-
case "set":
|
|
686571
|
-
guard args.count >= 3 else {
|
|
686572
|
-
fputs("Usage: wallpaper-helper set <imagePath>\\n", stderr)
|
|
686573
|
-
exit(1)
|
|
686574
|
-
}
|
|
686575
|
-
let imagePath = args[2]
|
|
686576
|
-
let url = URL(fileURLWithPath: imagePath)
|
|
686577
|
-
do {
|
|
686578
|
-
for screen in NSScreen.screens {
|
|
686579
|
-
// Preserve the screen's existing display options (scaling, fill colour,
|
|
686580
|
-
// clipping). Apple's docs note these come from System Settings; if we
|
|
686581
|
-
// pass [:] we silently reset them to defaults, which can change the
|
|
686582
|
-
// visual layout of the wallpaper on some setups.
|
|
686583
|
-
let existingOptions = NSWorkspace.shared.desktopImageOptions(for: screen) ?? [:]
|
|
686584
|
-
try NSWorkspace.shared.setDesktopImageURL(url, for: screen, options: existingOptions)
|
|
686585
|
-
}
|
|
686586
|
-
} catch {
|
|
686587
|
-
fputs("Failed to set wallpaper: \\(error)\\n", stderr)
|
|
686588
|
-
exit(1)
|
|
686589
|
-
}
|
|
686590
|
-
|
|
686591
|
-
case "get":
|
|
686592
|
-
if let screen = NSScreen.main,
|
|
686593
|
-
let url = NSWorkspace.shared.desktopImageURL(for: screen) {
|
|
686594
|
-
print(url.path)
|
|
686595
|
-
}
|
|
686596
|
-
|
|
686597
|
-
default:
|
|
686598
|
-
fputs("Unknown command: \\(command). Use 'set' or 'get'.\\n", stderr)
|
|
686599
|
-
exit(1)
|
|
686600
|
-
}
|
|
686601
|
-
`;
|
|
686602
|
-
}
|
|
686603
|
-
function getHelperHash() {
|
|
686604
|
-
_helperHash ??= (0, import_node_crypto3.createHash)("sha256").update(wallpaperHelperSwiftSource()).digest("hex").slice(0, 12);
|
|
686605
|
-
return _helperHash;
|
|
686606
|
-
}
|
|
686607
|
-
function getWallpaperHelperBin() {
|
|
686608
|
-
const hash3 = getHelperHash();
|
|
686609
|
-
const binFile = import_node_path11.default.join(import_node_os7.default.tmpdir(), `visionclaw-wallpaper-helper-${hash3}`);
|
|
686610
|
-
if (!import_node_fs14.default.existsSync(binFile)) {
|
|
686611
|
-
const srcFile = import_node_path11.default.join(import_node_os7.default.tmpdir(), `visionclaw-wallpaper-helper-${hash3}.swift`);
|
|
686612
|
-
import_node_fs14.default.writeFileSync(srcFile, wallpaperHelperSwiftSource(), "utf-8");
|
|
686613
|
-
const compile = (0, import_node_child_process9.spawnSync)("swiftc", ["-O", "-o", binFile, srcFile], {
|
|
686614
|
-
encoding: "utf-8",
|
|
686615
|
-
timeout: 6e4,
|
|
686616
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
686617
|
-
});
|
|
686618
|
-
try {
|
|
686619
|
-
import_node_fs14.default.unlinkSync(srcFile);
|
|
686620
|
-
} catch {
|
|
686621
|
-
}
|
|
686622
|
-
if (compile.status !== 0) {
|
|
686623
|
-
throw new Error(`Wallpaper helper compilation failed: ${compile.stderr}`);
|
|
686624
|
-
}
|
|
686625
|
-
}
|
|
686626
|
-
return binFile;
|
|
686627
|
-
}
|
|
686628
686481
|
function setDesktopWallpaper(imagePath) {
|
|
686629
686482
|
if (process.platform !== "darwin")
|
|
686630
686483
|
return;
|
|
686631
|
-
try {
|
|
686632
|
-
const bin = getWallpaperHelperBin();
|
|
686633
|
-
const result = (0, import_node_child_process9.spawnSync)(bin, ["set", imagePath], {
|
|
686634
|
-
encoding: "utf-8",
|
|
686635
|
-
timeout: 15e3,
|
|
686636
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
686637
|
-
});
|
|
686638
|
-
if (result.status === 0)
|
|
686639
|
-
return;
|
|
686640
|
-
} catch {
|
|
686641
|
-
}
|
|
686642
686484
|
(0, import_node_child_process9.execSync)(`osascript -e 'tell application "Finder" to set desktop picture to POSIX file "${imagePath}"'`, { stdio: "ignore", timeout: 15e3 });
|
|
686643
686485
|
}
|
|
686644
686486
|
function getCurrentDesktopWallpaper() {
|
|
686645
686487
|
if (process.platform !== "darwin")
|
|
686646
686488
|
return "";
|
|
686647
|
-
try {
|
|
686648
|
-
const bin = getWallpaperHelperBin();
|
|
686649
|
-
const result = (0, import_node_child_process9.spawnSync)(bin, ["get"], {
|
|
686650
|
-
encoding: "utf-8",
|
|
686651
|
-
timeout: 15e3,
|
|
686652
|
-
stdio: ["pipe", "pipe", "pipe"]
|
|
686653
|
-
});
|
|
686654
|
-
if (result.status === 0 && result.stdout.trim()) {
|
|
686655
|
-
return result.stdout.trim();
|
|
686656
|
-
}
|
|
686657
|
-
} catch {
|
|
686658
|
-
}
|
|
686659
686489
|
try {
|
|
686660
686490
|
return (0, import_node_child_process9.execSync)(`osascript -e 'tell application "Finder" to get POSIX path of (desktop picture as alias)'`, { encoding: "utf-8", timeout: 15e3, stdio: ["pipe", "pipe", "pipe"] }).trim();
|
|
686661
686491
|
} catch {
|
|
@@ -686679,7 +686509,7 @@ function ensureWallpaper(agentName) {
|
|
|
686679
686509
|
}
|
|
686680
686510
|
return false;
|
|
686681
686511
|
}
|
|
686682
|
-
var import_node_child_process9, import_node_crypto3, import_node_fs14, import_node_os7, import_node_path11, WALLPAPER_META_FILENAME, BG_SETUP, BG_RUNNING, _rendererHash, MACOS_CODENAMES
|
|
686512
|
+
var import_node_child_process9, import_node_crypto3, import_node_fs14, import_node_os7, import_node_path11, WALLPAPER_META_FILENAME, BG_SETUP, BG_RUNNING, _rendererHash, MACOS_CODENAMES;
|
|
686683
686513
|
var init_generate_wallpaper = __esm({
|
|
686684
686514
|
"dist/onboarding/generate-wallpaper.js"() {
|
|
686685
686515
|
"use strict";
|
|
@@ -810608,7 +810438,7 @@ var require_retry4 = __commonJS({
|
|
|
810608
810438
|
Object.defineProperty(exports2, "__esModule", { value: true });
|
|
810609
810439
|
exports2.getRetryConfig = getRetryConfig;
|
|
810610
810440
|
async function getRetryConfig(err7) {
|
|
810611
|
-
let config2 =
|
|
810441
|
+
let config2 = getConfig3(err7);
|
|
810612
810442
|
if (!err7 || !err7.config || !config2 && !err7.config.retry) {
|
|
810613
810443
|
return { shouldRetry: false };
|
|
810614
810444
|
}
|
|
@@ -810659,7 +810489,7 @@ var require_retry4 = __commonJS({
|
|
|
810659
810489
|
return { shouldRetry: true, config: err7.config };
|
|
810660
810490
|
}
|
|
810661
810491
|
function shouldRetryRequest(err7) {
|
|
810662
|
-
const config2 =
|
|
810492
|
+
const config2 = getConfig3(err7);
|
|
810663
810493
|
if (err7.config.signal?.aborted && err7.code !== "TimeoutError" || err7.code === "AbortError") {
|
|
810664
810494
|
return false;
|
|
810665
810495
|
}
|
|
@@ -810691,7 +810521,7 @@ var require_retry4 = __commonJS({
|
|
|
810691
810521
|
}
|
|
810692
810522
|
return true;
|
|
810693
810523
|
}
|
|
810694
|
-
function
|
|
810524
|
+
function getConfig3(err7) {
|
|
810695
810525
|
if (err7 && err7.config && err7.config.retryConfig) {
|
|
810696
810526
|
return err7.config.retryConfig;
|
|
810697
810527
|
}
|
|
@@ -974873,7 +974703,7 @@ async function sendMessage(params) {
|
|
|
974873
974703
|
label: "sendMessage"
|
|
974874
974704
|
});
|
|
974875
974705
|
}
|
|
974876
|
-
async function
|
|
974706
|
+
async function getConfig2(params) {
|
|
974877
974707
|
const rawText = await apiPostFetch({
|
|
974878
974708
|
baseUrl: params.baseUrl,
|
|
974879
974709
|
endpoint: "ilink/bot/getconfig",
|
|
@@ -974942,7 +974772,7 @@ var init_config_cache = __esm({
|
|
|
974942
974772
|
if (shouldFetch) {
|
|
974943
974773
|
let fetchOk = false;
|
|
974944
974774
|
try {
|
|
974945
|
-
const resp = await
|
|
974775
|
+
const resp = await getConfig2({
|
|
974946
974776
|
baseUrl: this.apiOpts.baseUrl,
|
|
974947
974777
|
token: this.apiOpts.token,
|
|
974948
974778
|
ilinkUserId: userId,
|
|
@@ -976538,9 +976368,9 @@ var init_weixin = __esm({
|
|
|
976538
976368
|
setContextToken(deps.accountId, fromUserId, full.context_token);
|
|
976539
976369
|
this.contextTokens.set(fromUserId, full.context_token);
|
|
976540
976370
|
}
|
|
976541
|
-
const
|
|
976542
|
-
if (
|
|
976543
|
-
this.typingTickets.set(fromUserId,
|
|
976371
|
+
const cachedConfig = await deps.configManager.getForUser(fromUserId, full.context_token);
|
|
976372
|
+
if (cachedConfig.typingTicket) {
|
|
976373
|
+
this.typingTickets.set(fromUserId, cachedConfig.typingTicket);
|
|
976544
976374
|
}
|
|
976545
976375
|
const textBody = extractTextBody(full.item_list);
|
|
976546
976376
|
const attachments = [];
|
|
@@ -991680,9 +991510,7 @@ Even if you are using Playwright to operate the browser, you should bring the br
|
|
|
991680
991510
|
|
|
991681
991511
|
### Desktop Interactions (Computer Use)
|
|
991682
991512
|
|
|
991683
|
-
For desktop applications and UI outside the browser, use the available desktop/computer-use tools.
|
|
991684
|
-
|
|
991685
|
-
If you describe something but the computer use tool can not resolve the coordinates, try describe the target in more details. Also, the screenshot will always show the current cursor location.
|
|
991513
|
+
For desktop applications and UI outside the browser, use the available desktop/computer-use tools. The \`computer_use_*\` tools accept pixel coordinates from screenshots \u2014 always take a screenshot first with \`computer_use_screenshot\` to see the screen and determine coordinates, then use those coordinates for click/move/scroll/drag actions.
|
|
991686
991514
|
|
|
991687
991515
|
**Prefer ${config2.browserBackend === "playwriter" ? "Playwriter" : "Playwright"} browser tools** for all in-browser work. Use desktop/computer-use tools only for desktop apps, situations outside the browser, or as a fallback when the browser tools can't do the job.
|
|
991688
991516
|
|
|
@@ -996065,84 +995893,6 @@ init_transcribe_audio();
|
|
|
996065
995893
|
init_zod();
|
|
996066
995894
|
init_sdk2();
|
|
996067
995895
|
init_desktop_executor_factory();
|
|
996068
|
-
|
|
996069
|
-
// dist/tools/coordinate-resolver.js
|
|
996070
|
-
init_logger();
|
|
996071
|
-
init_screenshot();
|
|
996072
|
-
init_client_factory();
|
|
996073
|
-
function clamp(n6, min, max) {
|
|
996074
|
-
return Math.max(min, Math.min(max, n6));
|
|
996075
|
-
}
|
|
996076
|
-
async function resolveCoordinates(screenshotBase64, instruction, displayWidth, displayHeight, config2, screenshotFilePath) {
|
|
996077
|
-
const dims = screenshotFilePath ? await getImageDimensionsFromFile(screenshotFilePath) : await getImageDimensions(Buffer.from(screenshotBase64, "base64"));
|
|
996078
|
-
if (!dims) {
|
|
996079
|
-
logger.warn("Could not read screenshot dimensions, aborting coordinate resolution");
|
|
996080
|
-
return null;
|
|
996081
|
-
}
|
|
996082
|
-
const imgW = dims.width;
|
|
996083
|
-
const imgH = dims.height;
|
|
996084
|
-
const scaleX = displayWidth / imgW;
|
|
996085
|
-
const scaleY = displayHeight / imgH;
|
|
996086
|
-
logger.info(`Resolving coordinates: "${instruction}" (display ${displayWidth}x${displayHeight}, image ${imgW}x${imgH}, scaleX=${scaleX.toFixed(3)}, scaleY=${scaleY.toFixed(3)})`, { instruction, displayWidth, displayHeight, imgW, imgH, scaleX, scaleY });
|
|
996087
|
-
const client = createClient(config2);
|
|
996088
|
-
const response = await client.beta.messages.create({
|
|
996089
|
-
model: getModelId(config2),
|
|
996090
|
-
max_tokens: 1024,
|
|
996091
|
-
betas: ["computer-use-2025-11-24"],
|
|
996092
|
-
tools: [
|
|
996093
|
-
{
|
|
996094
|
-
type: "computer_20251124",
|
|
996095
|
-
name: "computer",
|
|
996096
|
-
display_width_px: imgW,
|
|
996097
|
-
display_height_px: imgH,
|
|
996098
|
-
display_number: 1
|
|
996099
|
-
}
|
|
996100
|
-
],
|
|
996101
|
-
system: "You are a helpful assistant that can resolve natural language targets to pixel coordinates using the computer tool. Call the tool directly, no questions or explanations.",
|
|
996102
|
-
tool_choice: { type: "tool", name: "computer" },
|
|
996103
|
-
messages: [
|
|
996104
|
-
{
|
|
996105
|
-
role: "user",
|
|
996106
|
-
content: [
|
|
996107
|
-
{
|
|
996108
|
-
type: "image",
|
|
996109
|
-
source: {
|
|
996110
|
-
type: "base64",
|
|
996111
|
-
media_type: "image/png",
|
|
996112
|
-
data: screenshotBase64
|
|
996113
|
-
}
|
|
996114
|
-
},
|
|
996115
|
-
{
|
|
996116
|
-
type: "text",
|
|
996117
|
-
text: instruction
|
|
996118
|
-
}
|
|
996119
|
-
]
|
|
996120
|
-
}
|
|
996121
|
-
]
|
|
996122
|
-
});
|
|
996123
|
-
for (const block of response.content) {
|
|
996124
|
-
if (block.type === "tool_use" && block.name === "computer") {
|
|
996125
|
-
const input = block.input;
|
|
996126
|
-
const coord = input.coordinate;
|
|
996127
|
-
const action = input.action ?? "left_click";
|
|
996128
|
-
logger.debug(`Coordinate resolver raw response: action=${action} coordinate=${JSON.stringify(coord)}`, { instruction, action, coordinate: coord, fullInput: input });
|
|
996129
|
-
if (Array.isArray(coord) && coord.length >= 2) {
|
|
996130
|
-
const rawX = coord[0];
|
|
996131
|
-
const rawY = coord[1];
|
|
996132
|
-
if (typeof rawX === "number" && typeof rawY === "number" && Number.isFinite(rawX) && Number.isFinite(rawY)) {
|
|
996133
|
-
const x14 = clamp(Math.round(rawX * scaleX), 0, displayWidth);
|
|
996134
|
-
const y12 = clamp(Math.round(rawY * scaleY), 0, displayHeight);
|
|
996135
|
-
logger.info(`Resolved coordinates: action=${action} raw=(${rawX},${rawY}) unscaled=(${x14},${y12}) scaleX=${scaleX.toFixed(3)} scaleY=${scaleY.toFixed(3)}`, { instruction, action, rawX, rawY, x: x14, y: y12, scaleX, scaleY });
|
|
996136
|
-
return { action, x: x14, y: y12 };
|
|
996137
|
-
}
|
|
996138
|
-
}
|
|
996139
|
-
}
|
|
996140
|
-
}
|
|
996141
|
-
logger.info(`Could not resolve coordinates: "${instruction}"`, { instruction });
|
|
996142
|
-
return null;
|
|
996143
|
-
}
|
|
996144
|
-
|
|
996145
|
-
// dist/tools/computer-use.js
|
|
996146
995896
|
init_screenshot();
|
|
996147
995897
|
init_logger();
|
|
996148
995898
|
function requireDesktop() {
|
|
@@ -996165,48 +995915,32 @@ function requireDesktop() {
|
|
|
996165
995915
|
}
|
|
996166
995916
|
return null;
|
|
996167
995917
|
}
|
|
996168
|
-
|
|
996169
|
-
|
|
996170
|
-
if (
|
|
996171
|
-
return
|
|
996172
|
-
|
|
996173
|
-
|
|
996174
|
-
|
|
996175
|
-
|
|
996176
|
-
} catch {
|
|
996177
|
-
if (process.env.ANTHROPIC_API_KEY) {
|
|
996178
|
-
cachedConfig = {
|
|
996179
|
-
model: "claude-sonnet-4-6",
|
|
996180
|
-
provider: "anthropic",
|
|
996181
|
-
anthropicApiKey: process.env.ANTHROPIC_API_KEY
|
|
996182
|
-
};
|
|
996183
|
-
return cachedConfig;
|
|
996184
|
-
}
|
|
996185
|
-
return null;
|
|
996186
|
-
}
|
|
995918
|
+
function getScreenshotSize(displaySize) {
|
|
995919
|
+
const scale = getVisionScaleFactor(displaySize.width, displaySize.height);
|
|
995920
|
+
if (scale >= 1)
|
|
995921
|
+
return { ...displaySize };
|
|
995922
|
+
return {
|
|
995923
|
+
width: Math.max(1, Math.round(displaySize.width * scale)),
|
|
995924
|
+
height: Math.max(1, Math.round(displaySize.height * scale))
|
|
995925
|
+
};
|
|
996187
995926
|
}
|
|
996188
|
-
|
|
996189
|
-
|
|
996190
|
-
|
|
996191
|
-
|
|
996192
|
-
|
|
996193
|
-
|
|
996194
|
-
|
|
996195
|
-
|
|
996196
|
-
|
|
996197
|
-
|
|
996198
|
-
|
|
996199
|
-
|
|
996200
|
-
isError: true
|
|
996201
|
-
};
|
|
996202
|
-
}
|
|
996203
|
-
await fn2(resolved.x, resolved.y);
|
|
996204
|
-
await new Promise((resolve) => setTimeout(resolve, 1e3));
|
|
995927
|
+
function screenshotToDisplay(point, screenshotSize, displaySize) {
|
|
995928
|
+
return {
|
|
995929
|
+
x: Math.round(point.x / screenshotSize.width * displaySize.width),
|
|
995930
|
+
y: Math.round(point.y / screenshotSize.height * displaySize.height)
|
|
995931
|
+
};
|
|
995932
|
+
}
|
|
995933
|
+
var pointSchema = external_exports.object({
|
|
995934
|
+
x: external_exports.number().min(0).describe("X pixel coordinate in the screenshot image (0 = left edge)"),
|
|
995935
|
+
y: external_exports.number().min(0).describe("Y pixel coordinate in the screenshot image (0 = top edge)")
|
|
995936
|
+
});
|
|
995937
|
+
async function actionResult(text, delayMs = 1e3) {
|
|
995938
|
+
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
996205
995939
|
const { base64: resultScreenshot } = await captureScreen3();
|
|
996206
995940
|
logger.screenshot(resultScreenshot);
|
|
996207
995941
|
return {
|
|
996208
995942
|
content: [
|
|
996209
|
-
{ type: "text", text
|
|
995943
|
+
{ type: "text", text },
|
|
996210
995944
|
{
|
|
996211
995945
|
type: "image",
|
|
996212
995946
|
data: resultScreenshot,
|
|
@@ -996215,184 +995949,88 @@ async function resolveAndExecute(instruction, config2, fn2) {
|
|
|
996215
995949
|
]
|
|
996216
995950
|
};
|
|
996217
995951
|
}
|
|
996218
|
-
|
|
996219
|
-
|
|
995952
|
+
function withDesktopCheck(toolName, fn2) {
|
|
995953
|
+
return (async () => {
|
|
995954
|
+
const err7 = requireDesktop();
|
|
995955
|
+
if (err7)
|
|
995956
|
+
return err7;
|
|
995957
|
+
try {
|
|
995958
|
+
return await fn2();
|
|
995959
|
+
} catch (e11) {
|
|
995960
|
+
logger.err(`${toolName} failed`, { error: String(e11) });
|
|
995961
|
+
return {
|
|
995962
|
+
content: [
|
|
995963
|
+
{
|
|
995964
|
+
type: "text",
|
|
995965
|
+
text: `${toolName} failed: ${e11 instanceof Error ? e11.message : String(e11)}`
|
|
995966
|
+
}
|
|
995967
|
+
],
|
|
995968
|
+
isError: true
|
|
995969
|
+
};
|
|
995970
|
+
}
|
|
995971
|
+
})();
|
|
995972
|
+
}
|
|
995973
|
+
var computerUseClickTool = gl("computer_use_click", "Click at a point on the desktop screen. Coordinates are pixel positions in the screenshot image returned by computer_use_screenshot. Supports left-click, right-click, and double-click.", {
|
|
995974
|
+
point: pointSchema.describe("Pixel coordinates in the screenshot image to click"),
|
|
996220
995975
|
button: external_exports.enum(["left", "right", "double"]).optional().default("left")
|
|
996221
995976
|
}, async (args) => {
|
|
996222
|
-
|
|
996223
|
-
|
|
996224
|
-
|
|
996225
|
-
|
|
996226
|
-
|
|
996227
|
-
|
|
996228
|
-
|
|
996229
|
-
|
|
996230
|
-
}
|
|
996231
|
-
|
|
996232
|
-
|
|
996233
|
-
|
|
996234
|
-
|
|
996235
|
-
return await resolveAndExecute(instruction, cfg, fn2);
|
|
996236
|
-
} catch (e11) {
|
|
996237
|
-
logger.err("computer_use_click failed", { error: String(e11) });
|
|
996238
|
-
return {
|
|
996239
|
-
content: [
|
|
996240
|
-
{
|
|
996241
|
-
type: "text",
|
|
996242
|
-
text: `computer_use_click failed: ${e11 instanceof Error ? e11.message : String(e11)}`
|
|
996243
|
-
}
|
|
996244
|
-
],
|
|
996245
|
-
isError: true
|
|
996246
|
-
};
|
|
996247
|
-
}
|
|
995977
|
+
return withDesktopCheck("computer_use_click", async () => {
|
|
995978
|
+
const displaySize = getDisplaySize3();
|
|
995979
|
+
const ssSize = getScreenshotSize(displaySize);
|
|
995980
|
+
const abs = screenshotToDisplay(args.point, ssSize, displaySize);
|
|
995981
|
+
if (args.button === "right") {
|
|
995982
|
+
await rightClick3(abs.x, abs.y);
|
|
995983
|
+
} else if (args.button === "double") {
|
|
995984
|
+
await doubleClick3(abs.x, abs.y);
|
|
995985
|
+
} else {
|
|
995986
|
+
await click3(abs.x, abs.y);
|
|
995987
|
+
}
|
|
995988
|
+
return actionResult(`${args.button === "double" ? "Double-clicked" : args.button === "right" ? "Right-clicked" : "Clicked"} at screenshot (${Math.round(args.point.x)}, ${Math.round(args.point.y)}) \u2192 display (${abs.x}, ${abs.y})`);
|
|
995989
|
+
});
|
|
996248
995990
|
});
|
|
996249
|
-
var computerUseMoveTool = gl("computer_use_move", "Move the mouse cursor to a
|
|
996250
|
-
|
|
995991
|
+
var computerUseMoveTool = gl("computer_use_move", "Move the mouse cursor to a point on the desktop screen. Coordinates are pixel positions in the screenshot image.", {
|
|
995992
|
+
point: pointSchema.describe("Pixel coordinates in the screenshot image to move the cursor to")
|
|
996251
995993
|
}, async (args) => {
|
|
996252
|
-
|
|
996253
|
-
|
|
996254
|
-
|
|
996255
|
-
|
|
996256
|
-
|
|
996257
|
-
return {
|
|
996258
|
-
|
|
996259
|
-
isError: true
|
|
996260
|
-
};
|
|
996261
|
-
}
|
|
996262
|
-
const instruction = `Move cursor to the center of: ${args.target}`;
|
|
996263
|
-
try {
|
|
996264
|
-
return await resolveAndExecute(instruction, cfg, async (x14, y12) => {
|
|
996265
|
-
await moveTo3(x14, y12);
|
|
996266
|
-
});
|
|
996267
|
-
} catch (e11) {
|
|
996268
|
-
logger.err("computer_use_move failed", { error: String(e11) });
|
|
996269
|
-
return {
|
|
996270
|
-
content: [
|
|
996271
|
-
{
|
|
996272
|
-
type: "text",
|
|
996273
|
-
text: `computer_use_move failed: ${e11 instanceof Error ? e11.message : String(e11)}`
|
|
996274
|
-
}
|
|
996275
|
-
],
|
|
996276
|
-
isError: true
|
|
996277
|
-
};
|
|
996278
|
-
}
|
|
995994
|
+
return withDesktopCheck("computer_use_move", async () => {
|
|
995995
|
+
const displaySize = getDisplaySize3();
|
|
995996
|
+
const ssSize = getScreenshotSize(displaySize);
|
|
995997
|
+
const abs = screenshotToDisplay(args.point, ssSize, displaySize);
|
|
995998
|
+
await moveTo3(abs.x, abs.y);
|
|
995999
|
+
return actionResult(`Moved cursor to screenshot (${Math.round(args.point.x)}, ${Math.round(args.point.y)}) \u2192 display (${abs.x}, ${abs.y})`);
|
|
996000
|
+
});
|
|
996279
996001
|
});
|
|
996280
|
-
var computerUseScrollTool = gl("computer_use_scroll", "Scroll at a
|
|
996281
|
-
|
|
996002
|
+
var computerUseScrollTool = gl("computer_use_scroll", "Scroll at a point on the desktop screen in a direction. Coordinates are pixel positions in the screenshot image.", {
|
|
996003
|
+
point: pointSchema.describe("Pixel coordinates in the screenshot image for scroll origin"),
|
|
996282
996004
|
direction: external_exports.enum(["up", "down", "left", "right"]),
|
|
996283
996005
|
amount: external_exports.number().optional().default(3).describe("Scroll amount (default 3)")
|
|
996284
996006
|
}, async (args) => {
|
|
996285
|
-
|
|
996286
|
-
|
|
996287
|
-
|
|
996288
|
-
|
|
996289
|
-
|
|
996290
|
-
return {
|
|
996291
|
-
|
|
996292
|
-
isError: true
|
|
996293
|
-
};
|
|
996294
|
-
}
|
|
996295
|
-
const instruction = `Move cursor to the center of: ${args.target} (for scrolling)`;
|
|
996296
|
-
const amount = args.amount;
|
|
996297
|
-
try {
|
|
996298
|
-
return await resolveAndExecute(instruction, cfg, async (x14, y12) => {
|
|
996299
|
-
await scroll3(x14, y12, args.direction, amount);
|
|
996300
|
-
});
|
|
996301
|
-
} catch (e11) {
|
|
996302
|
-
logger.err("computer_use_scroll failed", { error: String(e11) });
|
|
996303
|
-
return {
|
|
996304
|
-
content: [
|
|
996305
|
-
{
|
|
996306
|
-
type: "text",
|
|
996307
|
-
text: `computer_use_scroll failed: ${e11 instanceof Error ? e11.message : String(e11)}`
|
|
996308
|
-
}
|
|
996309
|
-
],
|
|
996310
|
-
isError: true
|
|
996311
|
-
};
|
|
996312
|
-
}
|
|
996007
|
+
return withDesktopCheck("computer_use_scroll", async () => {
|
|
996008
|
+
const displaySize = getDisplaySize3();
|
|
996009
|
+
const ssSize = getScreenshotSize(displaySize);
|
|
996010
|
+
const abs = screenshotToDisplay(args.point, ssSize, displaySize);
|
|
996011
|
+
await scroll3(abs.x, abs.y, args.direction, args.amount);
|
|
996012
|
+
return actionResult(`Scrolled ${args.direction} at screenshot (${Math.round(args.point.x)}, ${Math.round(args.point.y)}) \u2192 display (${abs.x}, ${abs.y})`);
|
|
996013
|
+
});
|
|
996313
996014
|
});
|
|
996314
|
-
var computerUseDragTool = gl("computer_use_drag", "Drag from one
|
|
996315
|
-
|
|
996316
|
-
|
|
996015
|
+
var computerUseDragTool = gl("computer_use_drag", "Drag from one point to another on the desktop screen. Coordinates are pixel positions in the screenshot image.", {
|
|
996016
|
+
startPoint: pointSchema.describe("Pixel coordinates in the screenshot image (drag source)"),
|
|
996017
|
+
endPoint: pointSchema.describe("Pixel coordinates in the screenshot image (drop target)")
|
|
996317
996018
|
}, async (args) => {
|
|
996318
|
-
|
|
996319
|
-
|
|
996320
|
-
|
|
996321
|
-
|
|
996322
|
-
|
|
996323
|
-
|
|
996324
|
-
|
|
996325
|
-
|
|
996326
|
-
};
|
|
996327
|
-
}
|
|
996328
|
-
const { base64: screenshot, filePath: screenshotPath } = await captureScreen3();
|
|
996329
|
-
const size = getDisplaySize3();
|
|
996330
|
-
const [fromResolved, toResolved] = await Promise.all([
|
|
996331
|
-
resolveCoordinates(screenshot, `Move cursor to the center of: ${args.from} (for dragging)`, size.width, size.height, cfg, screenshotPath),
|
|
996332
|
-
resolveCoordinates(screenshot, `Move cursor to the center of: ${args.to} (for dragging)`, size.width, size.height, cfg, screenshotPath)
|
|
996333
|
-
]);
|
|
996334
|
-
if (!fromResolved) {
|
|
996335
|
-
return {
|
|
996336
|
-
content: [
|
|
996337
|
-
{
|
|
996338
|
-
type: "text",
|
|
996339
|
-
text: `Could not resolve 'from' coordinates: ${args.from}. Please try again with a clearer instruction.`
|
|
996340
|
-
}
|
|
996341
|
-
],
|
|
996342
|
-
isError: true
|
|
996343
|
-
};
|
|
996344
|
-
}
|
|
996345
|
-
if (!toResolved) {
|
|
996346
|
-
return {
|
|
996347
|
-
content: [
|
|
996348
|
-
{
|
|
996349
|
-
type: "text",
|
|
996350
|
-
text: `Could not resolve 'to' coordinates: ${args.to}. Please try again with a clearer instruction.`
|
|
996351
|
-
}
|
|
996352
|
-
],
|
|
996353
|
-
isError: true
|
|
996354
|
-
};
|
|
996355
|
-
}
|
|
996356
|
-
try {
|
|
996357
|
-
await drag3(fromResolved.x, fromResolved.y, toResolved.x, toResolved.y);
|
|
996358
|
-
await new Promise((resolve) => setTimeout(resolve, 1e3));
|
|
996359
|
-
const { base64: resultScreenshot } = await captureScreen3();
|
|
996360
|
-
logger.screenshot(resultScreenshot);
|
|
996361
|
-
return {
|
|
996362
|
-
content: [
|
|
996363
|
-
{
|
|
996364
|
-
type: "text",
|
|
996365
|
-
text: `Dragged from (${fromResolved.x}, ${fromResolved.y}) to (${toResolved.x}, ${toResolved.y})`
|
|
996366
|
-
},
|
|
996367
|
-
{
|
|
996368
|
-
type: "image",
|
|
996369
|
-
data: resultScreenshot,
|
|
996370
|
-
mimeType: "image/png"
|
|
996371
|
-
}
|
|
996372
|
-
]
|
|
996373
|
-
};
|
|
996374
|
-
} catch (e11) {
|
|
996375
|
-
logger.err("computer_use_drag failed", { error: String(e11) });
|
|
996376
|
-
return {
|
|
996377
|
-
content: [
|
|
996378
|
-
{
|
|
996379
|
-
type: "text",
|
|
996380
|
-
text: `computer_use_drag failed: ${e11 instanceof Error ? e11.message : String(e11)}`
|
|
996381
|
-
}
|
|
996382
|
-
],
|
|
996383
|
-
isError: true
|
|
996384
|
-
};
|
|
996385
|
-
}
|
|
996019
|
+
return withDesktopCheck("computer_use_drag", async () => {
|
|
996020
|
+
const displaySize = getDisplaySize3();
|
|
996021
|
+
const ssSize = getScreenshotSize(displaySize);
|
|
996022
|
+
const start = screenshotToDisplay(args.startPoint, ssSize, displaySize);
|
|
996023
|
+
const end = screenshotToDisplay(args.endPoint, ssSize, displaySize);
|
|
996024
|
+
await drag3(start.x, start.y, end.x, end.y);
|
|
996025
|
+
return actionResult(`Dragged from screenshot (${Math.round(args.startPoint.x)}, ${Math.round(args.startPoint.y)}) to (${Math.round(args.endPoint.x)}, ${Math.round(args.endPoint.y)}) \u2192 display (${start.x}, ${start.y}) to (${end.x}, ${end.y})`);
|
|
996026
|
+
});
|
|
996386
996027
|
});
|
|
996387
996028
|
var computerUseTypeTool = gl("computer_use_type", "Type text into the focused field. Optionally clear the field first (replace) and/or press Enter after.", {
|
|
996388
996029
|
text: external_exports.string().describe("Text to type"),
|
|
996389
996030
|
replace: external_exports.boolean().optional().default(false).describe("If true, select-all and replace existing text first"),
|
|
996390
996031
|
press_enter: external_exports.boolean().optional().default(false).describe("If true, press Enter after typing")
|
|
996391
996032
|
}, async (args) => {
|
|
996392
|
-
|
|
996393
|
-
if (err7)
|
|
996394
|
-
return err7;
|
|
996395
|
-
try {
|
|
996033
|
+
return withDesktopCheck("computer_use_type", async () => {
|
|
996396
996034
|
if (args.replace) {
|
|
996397
996035
|
await pressKey3(process.platform === "win32" ? "ctrl+a" : "cmd+a");
|
|
996398
996036
|
}
|
|
@@ -996400,78 +996038,33 @@ var computerUseTypeTool = gl("computer_use_type", "Type text into the focused fi
|
|
|
996400
996038
|
if (args.press_enter) {
|
|
996401
996039
|
await pressKey3("enter");
|
|
996402
996040
|
}
|
|
996403
|
-
|
|
996404
|
-
|
|
996405
|
-
logger.screenshot(resultScreenshot);
|
|
996406
|
-
return {
|
|
996407
|
-
content: [
|
|
996408
|
-
{ type: "text", text: `Typed text${args.press_enter ? " and pressed Enter" : ""}` },
|
|
996409
|
-
{
|
|
996410
|
-
type: "image",
|
|
996411
|
-
data: resultScreenshot,
|
|
996412
|
-
mimeType: "image/png"
|
|
996413
|
-
}
|
|
996414
|
-
]
|
|
996415
|
-
};
|
|
996416
|
-
} catch (e11) {
|
|
996417
|
-
logger.err("computer_use_type failed", { error: String(e11) });
|
|
996418
|
-
return {
|
|
996419
|
-
content: [
|
|
996420
|
-
{
|
|
996421
|
-
type: "text",
|
|
996422
|
-
text: `computer_use_type failed: ${e11 instanceof Error ? e11.message : String(e11)}`
|
|
996423
|
-
}
|
|
996424
|
-
],
|
|
996425
|
-
isError: true
|
|
996426
|
-
};
|
|
996427
|
-
}
|
|
996041
|
+
return actionResult(`Typed text${args.press_enter ? " and pressed Enter" : ""}`);
|
|
996042
|
+
});
|
|
996428
996043
|
});
|
|
996429
996044
|
var computerUseKeyTool = gl("computer_use_key", "Press a key or key combination (e.g. 'enter', 'escape', 'cmd+s', 'ctrl+shift+t').", {
|
|
996430
996045
|
key: external_exports.string().describe("Key or combo: 'enter', 'escape', 'cmd+s', 'ctrl+shift+t', etc.")
|
|
996431
996046
|
}, async (args) => {
|
|
996432
|
-
|
|
996433
|
-
if (err7)
|
|
996434
|
-
return err7;
|
|
996435
|
-
try {
|
|
996047
|
+
return withDesktopCheck("computer_use_key", async () => {
|
|
996436
996048
|
await pressKey3(args.key);
|
|
996437
|
-
|
|
996438
|
-
|
|
996439
|
-
logger.screenshot(resultScreenshot);
|
|
996440
|
-
return {
|
|
996441
|
-
content: [
|
|
996442
|
-
{ type: "text", text: `Pressed: ${args.key}` },
|
|
996443
|
-
{
|
|
996444
|
-
type: "image",
|
|
996445
|
-
data: resultScreenshot,
|
|
996446
|
-
mimeType: "image/png"
|
|
996447
|
-
}
|
|
996448
|
-
]
|
|
996449
|
-
};
|
|
996450
|
-
} catch (e11) {
|
|
996451
|
-
logger.err("computer_use_key failed", { error: String(e11) });
|
|
996452
|
-
return {
|
|
996453
|
-
content: [
|
|
996454
|
-
{
|
|
996455
|
-
type: "text",
|
|
996456
|
-
text: `computer_use_key failed: ${e11 instanceof Error ? e11.message : String(e11)}`
|
|
996457
|
-
}
|
|
996458
|
-
],
|
|
996459
|
-
isError: true
|
|
996460
|
-
};
|
|
996461
|
-
}
|
|
996049
|
+
return actionResult(`Pressed: ${args.key}`);
|
|
996050
|
+
});
|
|
996462
996051
|
});
|
|
996463
|
-
var computerUseScreenshotTool = gl("computer_use_screenshot", "Capture a screenshot of the current desktop screen. Returns the image as base64 PNG
|
|
996052
|
+
var computerUseScreenshotTool = gl("computer_use_screenshot", "Capture a screenshot of the current desktop screen. Returns the image as base64 PNG. Use the pixel coordinates from this image when calling click, move, scroll, or drag tools.", {}, async (_args) => {
|
|
996464
996053
|
const err7 = requireDesktop();
|
|
996465
996054
|
if (err7)
|
|
996466
996055
|
return err7;
|
|
996467
996056
|
try {
|
|
996057
|
+
const displaySize = getDisplaySize3();
|
|
996058
|
+
const ssSize = getScreenshotSize(displaySize);
|
|
996468
996059
|
const { base64: base643, filePath } = await takeScreenshot();
|
|
996469
996060
|
logger.screenshot(base643);
|
|
996470
996061
|
return {
|
|
996471
996062
|
content: [
|
|
996472
996063
|
{
|
|
996473
996064
|
type: "text",
|
|
996474
|
-
text: `Screenshot saved to ${filePath}
|
|
996065
|
+
text: `Screenshot saved to ${filePath}
|
|
996066
|
+
Screenshot image size: ${ssSize.width}x${ssSize.height}px | Display resolution: ${displaySize.width}x${displaySize.height}px
|
|
996067
|
+
Use pixel coordinates from this image when calling click/move/scroll/drag tools.`
|
|
996475
996068
|
},
|
|
996476
996069
|
{
|
|
996477
996070
|
type: "image",
|
|
@@ -996910,7 +996503,7 @@ async function ensureDisplaySize() {
|
|
|
996910
996503
|
await initDisplaySize4();
|
|
996911
996504
|
}
|
|
996912
996505
|
}
|
|
996913
|
-
function
|
|
996506
|
+
function getScreenshotSize2(deviceSize) {
|
|
996914
996507
|
const scale = getVisionScaleFactor(deviceSize.width, deviceSize.height);
|
|
996915
996508
|
if (scale >= 1)
|
|
996916
996509
|
return { ...deviceSize };
|
|
@@ -996925,11 +996518,11 @@ function screenshotToDevice(point, screenshotSize, deviceSize) {
|
|
|
996925
996518
|
y: Math.round(point.y / screenshotSize.height * deviceSize.height)
|
|
996926
996519
|
};
|
|
996927
996520
|
}
|
|
996928
|
-
var
|
|
996521
|
+
var pointSchema2 = external_exports.object({
|
|
996929
996522
|
x: external_exports.number().min(0).describe("X pixel coordinate in the screenshot image (0 = left edge)"),
|
|
996930
996523
|
y: external_exports.number().min(0).describe("Y pixel coordinate in the screenshot image (0 = top edge)")
|
|
996931
996524
|
});
|
|
996932
|
-
async function
|
|
996525
|
+
async function actionResult2(text, delayMs = 1e3) {
|
|
996933
996526
|
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
996934
996527
|
const { base64: resultScreenshot } = await captureScreen4();
|
|
996935
996528
|
logger.screenshot(resultScreenshot);
|
|
@@ -996991,34 +996584,34 @@ function normalizeKey(raw) {
|
|
|
996991
996584
|
return KEY_ALIASES[normalized] ?? normalized;
|
|
996992
996585
|
}
|
|
996993
996586
|
var androidUseClickTool = gl("android_use_click", "Click (tap) at a point on the Android device screen. Coordinates are pixel positions in the screenshot image returned by android_use_screenshot. Supports single tap, double tap, and button variants.", {
|
|
996994
|
-
point:
|
|
996587
|
+
point: pointSchema2.describe("Pixel coordinates in the screenshot image to click"),
|
|
996995
996588
|
button: external_exports.enum(["left", "right", "middle"]).optional().default("left").describe("Click button (default: left). On Android, all buttons map to tap."),
|
|
996996
996589
|
double: external_exports.boolean().optional().default(false).describe("If true, perform a double-tap instead of a single tap")
|
|
996997
996590
|
}, async (args) => {
|
|
996998
996591
|
return withDeviceCheck("android_use_click", async () => {
|
|
996999
996592
|
await ensureDisplaySize();
|
|
997000
996593
|
const deviceSize = getDisplaySize4();
|
|
997001
|
-
const ssSize =
|
|
996594
|
+
const ssSize = getScreenshotSize2(deviceSize);
|
|
997002
996595
|
const abs = screenshotToDevice(args.point, ssSize, deviceSize);
|
|
997003
996596
|
if (args.double) {
|
|
997004
996597
|
await doubleTap(abs.x, abs.y);
|
|
997005
996598
|
} else {
|
|
997006
996599
|
await tap(abs.x, abs.y);
|
|
997007
996600
|
}
|
|
997008
|
-
return
|
|
996601
|
+
return actionResult2(`${args.double ? "Double-clicked" : "Clicked"} at screenshot (${Math.round(args.point.x)}, ${Math.round(args.point.y)}) \u2192 device (${abs.x}, ${abs.y})`);
|
|
997009
996602
|
});
|
|
997010
996603
|
});
|
|
997011
996604
|
var androidUseLongPressTool = gl("android_use_long_press", "Long press at a point on the Android device screen. Coordinates are pixel positions in the screenshot image.", {
|
|
997012
|
-
point:
|
|
996605
|
+
point: pointSchema2.describe("Pixel coordinates in the screenshot image to long-press"),
|
|
997013
996606
|
durationMs: external_exports.number().int().positive().optional().default(1e3).describe("Long press duration in milliseconds (default: 1000)")
|
|
997014
996607
|
}, async (args) => {
|
|
997015
996608
|
return withDeviceCheck("android_use_long_press", async () => {
|
|
997016
996609
|
await ensureDisplaySize();
|
|
997017
996610
|
const deviceSize = getDisplaySize4();
|
|
997018
|
-
const ssSize =
|
|
996611
|
+
const ssSize = getScreenshotSize2(deviceSize);
|
|
997019
996612
|
const abs = screenshotToDevice(args.point, ssSize, deviceSize);
|
|
997020
996613
|
await longPress(abs.x, abs.y, args.durationMs);
|
|
997021
|
-
return
|
|
996614
|
+
return actionResult2(`Long-pressed at screenshot (${Math.round(args.point.x)}, ${Math.round(args.point.y)}) \u2192 device (${abs.x}, ${abs.y}) for ${args.durationMs}ms`);
|
|
997022
996615
|
});
|
|
997023
996616
|
});
|
|
997024
996617
|
var androidUseTypeTool = gl("android_use_type", "Type text into the currently focused input field on the Android device. Optionally replace existing text first (select all + delete before typing).", {
|
|
@@ -997027,18 +996620,18 @@ var androidUseTypeTool = gl("android_use_type", "Type text into the currently fo
|
|
|
997027
996620
|
}, async (args) => {
|
|
997028
996621
|
return withDeviceCheck("android_use_type", async () => {
|
|
997029
996622
|
await typeText4(args.content, { replace: args.replace });
|
|
997030
|
-
return
|
|
996623
|
+
return actionResult2(`Typed "${args.content.length > 50 ? args.content.slice(0, 50) + "..." : args.content}"${args.replace ? " (replaced existing text)" : ""}`);
|
|
997031
996624
|
});
|
|
997032
996625
|
});
|
|
997033
996626
|
var androidUseScrollTool = gl("android_use_scroll", "Scroll the Android device screen in a direction from a point. Coordinates are pixel positions in the screenshot image. Amount is a float in [0, 1] representing scroll distance relative to screen size.", {
|
|
997034
996627
|
direction: external_exports.enum(["up", "down", "left", "right"]).describe("Scroll direction"),
|
|
997035
996628
|
amount: external_exports.number().min(0).max(1).describe("Scroll distance as fraction of screen size (0.0 to 1.0, e.g. 0.25 = quarter screen)"),
|
|
997036
|
-
point:
|
|
996629
|
+
point: pointSchema2.optional().describe("Pixel coordinates in the screenshot image for scroll origin (default: center of screen)")
|
|
997037
996630
|
}, async (args) => {
|
|
997038
996631
|
return withDeviceCheck("android_use_scroll", async () => {
|
|
997039
996632
|
await ensureDisplaySize();
|
|
997040
996633
|
const deviceSize = getDisplaySize4();
|
|
997041
|
-
const ssSize =
|
|
996634
|
+
const ssSize = getScreenshotSize2(deviceSize);
|
|
997042
996635
|
const ssPoint = args.point ?? {
|
|
997043
996636
|
x: ssSize.width / 2,
|
|
997044
996637
|
y: ssSize.height / 2
|
|
@@ -997065,22 +996658,22 @@ var androidUseScrollTool = gl("android_use_scroll", "Scroll the Android device s
|
|
|
997065
996658
|
endX = Math.max(0, Math.min(deviceSize.width, endX));
|
|
997066
996659
|
endY = Math.max(0, Math.min(deviceSize.height, endY));
|
|
997067
996660
|
await swipe(origin.x, origin.y, endX, endY, 300);
|
|
997068
|
-
return
|
|
996661
|
+
return actionResult2(`Scrolled ${args.direction} by ${(args.amount * 100).toFixed(0)}% from screenshot (${Math.round(ssPoint.x)}, ${Math.round(ssPoint.y)})`);
|
|
997069
996662
|
});
|
|
997070
996663
|
});
|
|
997071
996664
|
var androidUseDragTool = gl("android_use_drag", "Drag from one point to another on the Android device. Coordinates are pixel positions in the screenshot image. Suitable for drag-and-drop operations (moving icons, reordering list items).", {
|
|
997072
|
-
startPoint:
|
|
997073
|
-
endPoint:
|
|
996665
|
+
startPoint: pointSchema2.describe("Pixel coordinates in the screenshot image (drag source)"),
|
|
996666
|
+
endPoint: pointSchema2.describe("Pixel coordinates in the screenshot image (drop target)"),
|
|
997074
996667
|
durationMs: external_exports.number().int().positive().optional().default(800).describe("Drag duration in milliseconds (default: 800, longer than swipe for drag-and-drop)")
|
|
997075
996668
|
}, async (args) => {
|
|
997076
996669
|
return withDeviceCheck("android_use_drag", async () => {
|
|
997077
996670
|
await ensureDisplaySize();
|
|
997078
996671
|
const deviceSize = getDisplaySize4();
|
|
997079
|
-
const ssSize =
|
|
996672
|
+
const ssSize = getScreenshotSize2(deviceSize);
|
|
997080
996673
|
const start = screenshotToDevice(args.startPoint, ssSize, deviceSize);
|
|
997081
996674
|
const end = screenshotToDevice(args.endPoint, ssSize, deviceSize);
|
|
997082
996675
|
await swipe(start.x, start.y, end.x, end.y, args.durationMs);
|
|
997083
|
-
return
|
|
996676
|
+
return actionResult2(`Dragged from screenshot (${Math.round(args.startPoint.x)}, ${Math.round(args.startPoint.y)}) to (${Math.round(args.endPoint.x)}, ${Math.round(args.endPoint.y)}) over ${args.durationMs}ms`);
|
|
997084
996677
|
});
|
|
997085
996678
|
});
|
|
997086
996679
|
var androidUsePressTool = gl("android_use_press", "Press a key or button on the Android device. Supports standard keys: enter, tab, escape, backspace, delete, space, arrow keys (up/down/left/right), mobile keys (home, back, menu, app_switch, volume_up, volume_down, power), and function keys (f1-f12).", {
|
|
@@ -997089,7 +996682,7 @@ var androidUsePressTool = gl("android_use_press", "Press a key or button on the
|
|
|
997089
996682
|
return withDeviceCheck("android_use_press", async () => {
|
|
997090
996683
|
const normalized = normalizeKey(args.key);
|
|
997091
996684
|
await pressKey4(normalized);
|
|
997092
|
-
return
|
|
996685
|
+
return actionResult2(`Pressed: ${normalized}${normalized !== args.key ? ` (normalized from "${args.key}")` : ""}`);
|
|
997093
996686
|
});
|
|
997094
996687
|
});
|
|
997095
996688
|
var androidUseScreenshotTool = gl("android_use_screenshot", "Capture a screenshot of the connected Android device screen. Returns the image as base64 PNG. The image is resized for vision model processing. Use the pixel coordinates from this image when calling click, long_press, scroll, or drag tools.", {}, async (_args) => {
|
|
@@ -997099,7 +996692,7 @@ var androidUseScreenshotTool = gl("android_use_screenshot", "Capture a screensho
|
|
|
997099
996692
|
try {
|
|
997100
996693
|
await ensureDisplaySize();
|
|
997101
996694
|
const deviceSize = getDisplaySize4();
|
|
997102
|
-
const ssSize =
|
|
996695
|
+
const ssSize = getScreenshotSize2(deviceSize);
|
|
997103
996696
|
const { base64: base643, filePath } = await captureScreen4();
|
|
997104
996697
|
logger.screenshot(base643);
|
|
997105
996698
|
return {
|
|
@@ -997137,7 +996730,7 @@ var androidUseOpenTool = gl("android_use_open", "Open (launch) an app on the And
|
|
|
997137
996730
|
await new Promise((resolve) => setTimeout(resolve, 2e3));
|
|
997138
996731
|
await ensureDisplaySize();
|
|
997139
996732
|
const deviceSize = getDisplaySize4();
|
|
997140
|
-
const ssSize =
|
|
996733
|
+
const ssSize = getScreenshotSize2(deviceSize);
|
|
997141
996734
|
const { base64: resultScreenshot } = await captureScreen4();
|
|
997142
996735
|
logger.screenshot(resultScreenshot);
|
|
997143
996736
|
let activity = "";
|
|
@@ -1029367,7 +1028960,7 @@ var _config = void 0;
|
|
|
1029367
1028960
|
function resetArchiveConfigCache() {
|
|
1029368
1028961
|
_config = void 0;
|
|
1029369
1028962
|
}
|
|
1029370
|
-
function
|
|
1028963
|
+
function getConfig() {
|
|
1029371
1028964
|
if (_config === void 0) {
|
|
1029372
1028965
|
try {
|
|
1029373
1028966
|
_config = buildArchiveConfig(loadConfig());
|
|
@@ -1029378,7 +1028971,7 @@ function getConfig2() {
|
|
|
1029378
1028971
|
return _config;
|
|
1029379
1028972
|
}
|
|
1029380
1028973
|
function isArchiveEnabled() {
|
|
1029381
|
-
return
|
|
1028974
|
+
return getConfig() !== null;
|
|
1029382
1028975
|
}
|
|
1029383
1028976
|
var TEXT_CHUNK_KEYWORD = "vc-archive-id";
|
|
1029384
1028977
|
function pngCrc32(data2) {
|
|
@@ -1029526,7 +1029119,7 @@ async function drainFailedUploads(config2) {
|
|
|
1029526
1029119
|
}
|
|
1029527
1029120
|
}
|
|
1029528
1029121
|
function archiveImage(base64Data, mediaType, sessionId) {
|
|
1029529
|
-
const config2 =
|
|
1029122
|
+
const config2 = getConfig();
|
|
1029530
1029123
|
if (!config2)
|
|
1029531
1029124
|
return null;
|
|
1029532
1029125
|
const now = /* @__PURE__ */ new Date();
|