visionclaw 0.1.187-beta.9 → 0.1.187-dev.refactor-computer-use-direct-coordinates.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "visionclaw",
3
- "version": "0.1.187-beta.9",
3
+ "version": "0.1.187-dev.refactor-computer-use-direct-coordinates.1",
4
4
  "description": "A personal assistant agent that runs on your desktop, receives commands from messaging channels, and executes tasks autonomously.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -1,30 +0,0 @@
1
- /**
2
- * Resolves natural language targets to pixel coordinates using
3
- * Anthropic's computer_20251124 tool via the Messages API.
4
- *
5
- * Sends screenshot + instruction to Claude; Claude returns a tool_use
6
- * with action and coordinate. We extract coordinates only (no execution).
7
- *
8
- * IMPORTANT: The input screenshot is assumed to be already resized to fit
9
- * Claude's vision constraints (done by takeScreenshot). We read the actual
10
- * image dimensions, tell the computer-use tool those are the display size,
11
- * and unscale returned coordinates back to the real display space.
12
- * Reference: https://platform.claude.com/docs/en/agents-and-tools/tool-use/computer-use-tool
13
- */
14
- import type { VisionClawConfig } from "../config/types.js";
15
- export interface ResolvedAction {
16
- action: string;
17
- x: number;
18
- y: number;
19
- }
20
- /**
21
- * Resolve a natural language target to pixel coordinates.
22
- * Uses Messages API with computer_20251124 tool; returns the first tool_use result.
23
- *
24
- * The input screenshot is expected to already be resized to fit Claude's
25
- * vision constraints (by takeScreenshot). We read its actual pixel dimensions,
26
- * use those as display_width_px / display_height_px, and unscale the returned
27
- * coordinates back to the real display space using imageWidth/displayWidth ratio.
28
- */
29
- export declare function resolveCoordinates(screenshotBase64: string, instruction: string, displayWidth: number, displayHeight: number, config: VisionClawConfig, screenshotFilePath?: string): Promise<ResolvedAction | null>;
30
- //# sourceMappingURL=coordinate-resolver.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"coordinate-resolver.d.ts","sourceRoot":"","sources":["../../src/tools/coordinate-resolver.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAIH,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAG3D,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,CAAC,EAAE,MAAM,CAAC;IACV,CAAC,EAAE,MAAM,CAAC;CACX;AAQD;;;;;;;;GAQG;AACH,wBAAsB,kBAAkB,CACtC,gBAAgB,EAAE,MAAM,EACxB,WAAW,EAAE,MAAM,EACnB,YAAY,EAAE,MAAM,EACpB,aAAa,EAAE,MAAM,EACrB,MAAM,EAAE,gBAAgB,EACxB,kBAAkB,CAAC,EAAE,MAAM,GAC1B,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC,CAyFhC"}
@@ -1,104 +0,0 @@
1
- /**
2
- * Resolves natural language targets to pixel coordinates using
3
- * Anthropic's computer_20251124 tool via the Messages API.
4
- *
5
- * Sends screenshot + instruction to Claude; Claude returns a tool_use
6
- * with action and coordinate. We extract coordinates only (no execution).
7
- *
8
- * IMPORTANT: The input screenshot is assumed to be already resized to fit
9
- * Claude's vision constraints (done by takeScreenshot). We read the actual
10
- * image dimensions, tell the computer-use tool those are the display size,
11
- * and unscale returned coordinates back to the real display space.
12
- * Reference: https://platform.claude.com/docs/en/agents-and-tools/tool-use/computer-use-tool
13
- */
14
- import { logger } from "../logger.js";
15
- import { getImageDimensions, getImageDimensionsFromFile } from "./screenshot.js";
16
- import { createClient, getModelId } from "../agent/providers/client-factory.js";
17
- function clamp(n, min, max) {
18
- return Math.max(min, Math.min(max, n));
19
- }
20
- // ── Resolver ───────────────────────────────────────────────────────────
21
- /**
22
- * Resolve a natural language target to pixel coordinates.
23
- * Uses Messages API with computer_20251124 tool; returns the first tool_use result.
24
- *
25
- * The input screenshot is expected to already be resized to fit Claude's
26
- * vision constraints (by takeScreenshot). We read its actual pixel dimensions,
27
- * use those as display_width_px / display_height_px, and unscale the returned
28
- * coordinates back to the real display space using imageWidth/displayWidth ratio.
29
- */
30
- export async function resolveCoordinates(screenshotBase64, instruction, displayWidth, displayHeight, config, screenshotFilePath) {
31
- // Read actual pixel dimensions of the (already resized) screenshot.
32
- // Prefer file path to avoid base64 decode + temp file round-trip.
33
- const dims = screenshotFilePath
34
- ? await getImageDimensionsFromFile(screenshotFilePath)
35
- : await getImageDimensions(Buffer.from(screenshotBase64, "base64"));
36
- if (!dims) {
37
- logger.warn("Could not read screenshot dimensions, aborting coordinate resolution");
38
- return null;
39
- }
40
- const imgW = dims.width;
41
- const imgH = dims.height;
42
- // Scale factors to convert from image space back to actual display space
43
- const scaleX = displayWidth / imgW;
44
- const scaleY = displayHeight / imgH;
45
- logger.info(`Resolving coordinates: "${instruction}" (display ${displayWidth}x${displayHeight}, image ${imgW}x${imgH}, scaleX=${scaleX.toFixed(3)}, scaleY=${scaleY.toFixed(3)})`, { instruction, displayWidth, displayHeight, imgW, imgH, scaleX, scaleY });
46
- const client = createClient(config);
47
- const response = await client.beta.messages.create({
48
- model: getModelId(config),
49
- max_tokens: 1024,
50
- betas: ["computer-use-2025-11-24"],
51
- tools: [
52
- {
53
- type: "computer_20251124",
54
- name: "computer",
55
- display_width_px: imgW,
56
- display_height_px: imgH,
57
- display_number: 1,
58
- },
59
- ],
60
- system: "You are a helpful assistant that can resolve natural language targets to pixel coordinates using the computer tool. Call the tool directly, no questions or explanations.",
61
- tool_choice: { type: "tool", name: "computer" },
62
- messages: [
63
- {
64
- role: "user",
65
- content: [
66
- {
67
- type: "image",
68
- source: {
69
- type: "base64",
70
- media_type: "image/png",
71
- data: screenshotBase64,
72
- },
73
- },
74
- {
75
- type: "text",
76
- text: instruction,
77
- },
78
- ],
79
- },
80
- ],
81
- });
82
- for (const block of response.content) {
83
- if (block.type === "tool_use" && block.name === "computer") {
84
- const input = block.input;
85
- const coord = input.coordinate;
86
- const action = input.action ?? "left_click";
87
- logger.debug(`Coordinate resolver raw response: action=${action} coordinate=${JSON.stringify(coord)}`, { instruction, action, coordinate: coord, fullInput: input });
88
- if (Array.isArray(coord) && coord.length >= 2) {
89
- const rawX = coord[0];
90
- const rawY = coord[1];
91
- if (typeof rawX === "number" && typeof rawY === "number" && Number.isFinite(rawX) && Number.isFinite(rawY)) {
92
- // Unscale: Claude returned coords in image space → convert to actual display space
93
- const x = clamp(Math.round(rawX * scaleX), 0, displayWidth);
94
- const y = clamp(Math.round(rawY * scaleY), 0, displayHeight);
95
- logger.info(`Resolved coordinates: action=${action} raw=(${rawX},${rawY}) unscaled=(${x},${y}) scaleX=${scaleX.toFixed(3)} scaleY=${scaleY.toFixed(3)}`, { instruction, action, rawX, rawY, x, y, scaleX, scaleY });
96
- return { action, x, y };
97
- }
98
- }
99
- }
100
- }
101
- logger.info(`Could not resolve coordinates: "${instruction}"`, { instruction });
102
- return null;
103
- }
104
- //# sourceMappingURL=coordinate-resolver.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"coordinate-resolver.js","sourceRoot":"","sources":["../../src/tools/coordinate-resolver.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AACtC,OAAO,EAAE,kBAAkB,EAAE,0BAA0B,EAAE,MAAM,iBAAiB,CAAC;AAEjF,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,sCAAsC,CAAC;AAQhF,SAAS,KAAK,CAAC,CAAS,EAAE,GAAW,EAAE,GAAW;IAChD,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;AACzC,CAAC;AAED,0EAA0E;AAE1E;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,gBAAwB,EACxB,WAAmB,EACnB,YAAoB,EACpB,aAAqB,EACrB,MAAwB,EACxB,kBAA2B;IAE3B,oEAAoE;IACpE,kEAAkE;IAClE,MAAM,IAAI,GAAG,kBAAkB;QAC7B,CAAC,CAAC,MAAM,0BAA0B,CAAC,kBAAkB,CAAC;QACtD,CAAC,CAAC,MAAM,kBAAkB,CAAC,MAAM,CAAC,IAAI,CAAC,gBAAgB,EAAE,QAAQ,CAAC,CAAC,CAAC;IACtE,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,MAAM,CAAC,IAAI,CAAC,sEAAsE,CAAC,CAAC;QACpF,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC;IACxB,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC;IAEzB,yEAAyE;IACzE,MAAM,MAAM,GAAG,YAAY,GAAG,IAAI,CAAC;IACnC,MAAM,MAAM,GAAG,aAAa,GAAG,IAAI,CAAC;IAEpC,MAAM,CAAC,IAAI,CACT,2BAA2B,WAAW,cAAc,YAAY,IAAI,aAAa,WAAW,IAAI,IAAI,IAAI,YAAY,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EACrK,EAAE,WAAW,EAAE,YAAY,EAAE,aAAa,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,CACzE,CAAC;IAEF,MAAM,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;IAEpC,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;QACjD,KAAK,EAAE,UAAU,CAAC,MAAM,CAAC;QACzB,UAAU,EAAE,IAAI;QAChB,KAAK,EAAE,CAAC,yBAAyB,CAAC;QAClC,KAAK,EAAE;YACL;gBACE,IAAI,EAAE,mBAAmB;gBACzB,IAAI,EAAE,UAAU;gBAChB,gBAAgB,EAAE,IAAI;gBACtB,iBAAiB,EAAE,IAAI;gBACvB,cAAc,EAAE,CAAC;aAClB;SACF;QACD,MAAM,EAAE,2KAA2K;QACnL,WAAW,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE;QAC/C,QAAQ,EAAE;YACR;gBACE,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,OAAO;wBACb,MAAM,EAAE;4BACN,IAAI,EAAE,QAAQ;4BACd,UAAU,EAAE,WAAW;4BACvB,IAAI,EAAE,gBAAgB;yBACvB;qBACF;oBACD;wBACE,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,WAAW;qBAClB;iBACF;aACF;SACF;KACF,CAAC,CAAC;IAEH,KAAK,MAAM,KAAK,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;QACrC,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;YAC3D,MAAM,KAAK,GAAG,KAAK,CAAC,KAAgC,CAAC;YACrD,MAAM,KAAK,GAAG,KAAK,CAAC,UAAU,CAAC;YAC/B,MAAM,MAAM,GAAI,KAAK,CAAC,MAA6B,IAAI,YAAY,CAAC;YAEpE,MAAM,CAAC,KAAK,CACV,4CAA4C,MAAM,eAAe,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,EAAE,EACxF,EAAE,WAAW,EAAE,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,SAAS,EAAE,KAAK,EAAE,CAC7D,CAAC;YAEF,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;gBAC9C,MAAM,IAAI,GAAY,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC/B,MAAM,IAAI,GAAY,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC/B,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC3G,mFAAmF;oBACnF,MAAM,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,MAAM,CAAC,EAAE,CAAC,EAAE,YAAY,CAAC,CAAC;oBAC5D,MAAM,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,MAAM,CAAC,EAAE,CAAC,EAAE,aAAa,CAAC,CAAC;oBAC7D,MAAM,CAAC,IAAI,CACT,gCAAgC,MAAM,SAAS,IAAI,IAAI,IAAI,eAAe,CAAC,IAAI,CAAC,YAAY,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAC3I,EAAE,WAAW,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,CAC1D,CAAC;oBACF,OAAO,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC1B,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IACD,MAAM,CAAC,IAAI,CAAC,mCAAmC,WAAW,GAAG,EAAE,EAAE,WAAW,EAAE,CAAC,CAAC;IAChF,OAAO,IAAI,CAAC;AACd,CAAC"}