appium-desktop-driver 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +77 -0
- package/build/lib/commands/extension.js +1 -1
- package/build/lib/commands/extension.js.map +1 -1
- package/build/lib/commands/index.d.ts +1 -1
- package/build/lib/commands/index.d.ts.map +1 -1
- package/build/lib/commands/vision.d.ts +1 -1
- package/build/lib/commands/vision.d.ts.map +1 -1
- package/build/lib/commands/vision.js +11 -2
- package/build/lib/commands/vision.js.map +1 -1
- package/build/lib/driver.js +2 -2
- package/build/lib/driver.js.map +1 -1
- package/build/lib/mcp/index.js +1 -61
- package/build/lib/mcp/index.js.map +1 -1
- package/build/lib/mcp/session.d.ts.map +1 -1
- package/build/lib/mcp/session.js +57 -0
- package/build/lib/mcp/session.js.map +1 -1
- package/build/lib/mcp/tools/vision.d.ts.map +1 -1
- package/build/lib/mcp/tools/vision.js +54 -6
- package/build/lib/mcp/tools/vision.js.map +1 -1
- package/build/lib/vision-utils.d.ts +4 -4
- package/build/lib/vision-utils.d.ts.map +1 -1
- package/build/lib/vision-utils.js +43 -6
- package/build/lib/vision-utils.js.map +1 -1
- package/build/tsconfig.tsbuildinfo +1 -1
- package/package.json +2 -1
|
@@ -5,7 +5,6 @@ const zod_1 = require("zod");
|
|
|
5
5
|
const errors_js_1 = require("../errors.js");
|
|
6
6
|
const util_1 = require("../../util");
|
|
7
7
|
const vision_utils_1 = require("../../vision-utils");
|
|
8
|
-
const DEFAULT_MODEL = 'claude-opus-4-6';
|
|
9
8
|
async function buildCoordMapping(driver, ssW, ssH) {
|
|
10
9
|
try {
|
|
11
10
|
const rect = await driver.getWindowRect();
|
|
@@ -26,28 +25,77 @@ async function buildCoordMapping(driver, ssW, ssH) {
|
|
|
26
25
|
}
|
|
27
26
|
}
|
|
28
27
|
function registerVisionTools(server, session) {
|
|
28
|
+
server.registerTool('analyze_screen', {
|
|
29
|
+
description: 'Take a screenshot and return it to the calling agent for visual analysis — no external API key needed. ' +
|
|
30
|
+
'Automatically computes a DPI-aware coordinate mapping: when the agent identifies an element, ' +
|
|
31
|
+
'it receives the conversion formula (img_x/img_y → screen_x/screen_y) so returned coordinates ' +
|
|
32
|
+
'are ready to pass directly to click tools.',
|
|
33
|
+
inputSchema: {
|
|
34
|
+
prompt: zod_1.z.string().min(1).describe('Question or instruction about the screenshot. ' +
|
|
35
|
+
'For coordinate queries (e.g. "find the Submit button") the agent will return DPI-corrected screen coordinates.'),
|
|
36
|
+
},
|
|
37
|
+
annotations: { readOnlyHint: true },
|
|
38
|
+
}, async ({ prompt }) => {
|
|
39
|
+
try {
|
|
40
|
+
const driver = session.getDriver();
|
|
41
|
+
const base64 = await driver.takeScreenshot();
|
|
42
|
+
const { width: ssW, height: ssH } = (0, util_1.getPngDimensions)(base64);
|
|
43
|
+
const mapping = await buildCoordMapping(driver, ssW, ssH);
|
|
44
|
+
let instruction = `${prompt}\n\nThe image is ${ssW}×${ssH} pixels. When identifying coordinates, reason in the full ${ssW}×${ssH} pixel space — not a scaled-down view.`;
|
|
45
|
+
if (mapping) {
|
|
46
|
+
instruction +=
|
|
47
|
+
`\n\nIf your answer includes screen coordinates:\n` +
|
|
48
|
+
` Step 1 — Find the element center in the image: img_x (0–${ssW}), img_y (0–${ssH}).\n` +
|
|
49
|
+
` Step 2 — Convert to screen coordinates:\n` +
|
|
50
|
+
` screen_x = round(${mapping.offsetX} + img_x × ${mapping.scaleX})\n` +
|
|
51
|
+
` screen_y = round(${mapping.offsetY} + img_y × ${mapping.scaleY})\n` +
|
|
52
|
+
` Report img_x, img_y, screen_x, and screen_y.`;
|
|
53
|
+
}
|
|
54
|
+
return {
|
|
55
|
+
content: [
|
|
56
|
+
{ type: 'image', data: base64, mimeType: 'image/png' },
|
|
57
|
+
{ type: 'text', text: instruction },
|
|
58
|
+
],
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
catch (err) {
|
|
62
|
+
return { isError: true, content: [{ type: 'text', text: (0, errors_js_1.formatError)(err) }] };
|
|
63
|
+
}
|
|
64
|
+
});
|
|
29
65
|
server.registerTool('find_by_vision', {
|
|
30
66
|
description: 'Take a screenshot and analyze it with a vision model, returning the result directly. ' +
|
|
31
67
|
'For "coordinates" format, locates a UI element and returns {x,y,label} with actual screen ' +
|
|
32
68
|
'coordinates (DPI-corrected) ready to pass to click tools. ' +
|
|
33
69
|
'For "text" format, answers a general question about the screen in plain text. ' +
|
|
34
|
-
'Requires ANTHROPIC_API_KEY (Claude), OPENAI_API_KEY (GPT-4o / o-series),
|
|
35
|
-
'GEMINI_API_KEY (Gemini)
|
|
70
|
+
'Requires ANTHROPIC_API_KEY (Claude), OPENAI_API_KEY (GPT-4o / o-series), ' +
|
|
71
|
+
'GEMINI_API_KEY (Gemini), or AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY (Amazon Nova via Bedrock) ' +
|
|
72
|
+
'depending on the chosen model.',
|
|
36
73
|
inputSchema: {
|
|
37
74
|
prompt: zod_1.z.string().min(1).describe('Question or instruction about the screenshot'),
|
|
38
75
|
responseFormat: zod_1.z.enum(['coordinates', 'text']).default('coordinates').describe('"coordinates" (default) locates an element and returns JSON {x,y,label} with converted screen coordinates. ' +
|
|
39
76
|
'"text" answers a general question about the screen in plain text.'),
|
|
40
|
-
model: zod_1.z.string().
|
|
77
|
+
model: zod_1.z.string().min(1).describe('Vision model to use. Determines which credentials are required: ' +
|
|
78
|
+
'claude-* → ANTHROPIC_API_KEY, gpt-*/o-series → OPENAI_API_KEY, ' +
|
|
79
|
+
'gemini-* → GEMINI_API_KEY, amazon.nova-* → AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY.'),
|
|
41
80
|
},
|
|
42
81
|
annotations: { readOnlyHint: true },
|
|
43
82
|
}, async ({ prompt, responseFormat, model }) => {
|
|
44
83
|
try {
|
|
45
|
-
|
|
46
|
-
|
|
84
|
+
if (!model) {
|
|
85
|
+
throw new Error('find_by_vision requires a "model" argument. ' +
|
|
86
|
+
'Supported prefixes: claude-* (ANTHROPIC_API_KEY), gpt-*/o-series (OPENAI_API_KEY), ' +
|
|
87
|
+
'gemini-* (GEMINI_API_KEY), amazon.nova-*/us.amazon.nova-*/eu.amazon.nova-*/ap.amazon.nova-* (AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY).');
|
|
88
|
+
}
|
|
89
|
+
const visionModel = model;
|
|
90
|
+
const provider = (0, vision_utils_1.getProviderForModel)(visionModel);
|
|
91
|
+
const envVar = (0, vision_utils_1.getApiKeyEnvVar)(provider);
|
|
47
92
|
const apiKey = process.env[envVar];
|
|
48
93
|
if (!apiKey) {
|
|
49
94
|
throw new Error(`${envVar} environment variable is required for find_by_vision (model: ${visionModel})`);
|
|
50
95
|
}
|
|
96
|
+
if (provider === 'amazon' && !process.env.AWS_SECRET_ACCESS_KEY) {
|
|
97
|
+
throw new Error('AWS_SECRET_ACCESS_KEY environment variable is required for Amazon Bedrock models');
|
|
98
|
+
}
|
|
51
99
|
const driver = session.getDriver();
|
|
52
100
|
const base64 = await driver.takeScreenshot();
|
|
53
101
|
const { width: ssW, height: ssH } = (0, util_1.getPngDimensions)(base64);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vision.js","sourceRoot":"","sources":["../../../../lib/mcp/tools/vision.ts"],"names":[],"mappings":";;
|
|
1
|
+
{"version":3,"file":"vision.js","sourceRoot":"","sources":["../../../../lib/mcp/tools/vision.ts"],"names":[],"mappings":";;AA6CA,kDA2HC;AAvKD,6BAAwB;AAGxB,4CAA2C;AAC3C,qCAA8C;AAC9C,qDAS4B;AAE5B,KAAK,UAAU,iBAAiB,CAAC,MAAe,EAAE,GAAW,EAAE,GAAW;IACtE,IAAI,CAAC;QACD,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,aAAa,EAAE,CAAC;QAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QAElC,IAAI,MAAM,EAAE,CAAC;YACT,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,sBAAsB,EAAE,EAAE,CAAU,CAAC;YACjF,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC;YACpE,IAAI,CAAC,OAAO,EAAE,CAAC;gBAAC,OAAO,SAAS,CAAC;YAAC,CAAC;YACnC,OAAO,IAAA,kCAAmB,EACtB,IAAI,EACJ,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,EACvC,CAAC,EAAE,GAAG,EAAE,GAAG,EACX,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM,CAC9C,CAAC;QACN,CAAC;QAED,MAAM,QAAQ,GAAG,CAAC,MAAM,MAAM,CAAC,aAAa,CAAC,sBAAsB,EAAE,EAAE,CAAC,CAAW,CAAC;QACpF,OAAO,IAAA,kCAAmB,EACtB,KAAK,EACL,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,EACvC,QAAQ,EAAE,GAAG,EAAE,GAAG,CACrB,CAAC;IACN,CAAC;IAAC,MAAM,CAAC;QACL,OAAO,SAAS,CAAC;IACrB,CAAC;AACL,CAAC;AAED,SAAgB,mBAAmB,CAAC,MAAiB,EAAE,OAAsB;IACzE,MAAM,CAAC,YAAY,CACf,gBAAgB,EAChB;QACI,WAAW,EACP,yGAAyG;YACzG,+FAA+F;YAC/F,+FAA+F;YAC/F,4CAA4C;QAChD,WAAW,EAAE;YACT,MAAM,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAC9B,gDAAgD;gBAChD,gHAAgH,CACnH;SACJ;QACD,WAAW,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE;KACtC,EACD,KAAK,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE;QACjB,IAAI,CAAC;YACD,MAAM,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;YACnC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,cAAc,EAAY,CAAC;YACvD,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,IAAA,uBAAgB,EAAC,MAAM,CAAC,CAAC;YAC7D,MAAM,OAAO,GAAG,MAAM,iBAAiB,CAAC,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;YAE1D,IAAI,WAAW,GAAG,GAAG,MAAM,oBAAoB,GAAG,IAAI,GAAG,6DAA6D,GAAG,IAAI,GAAG,wCAAwC,CAAC;YACzK,IAAI,OAAO,EAAE,CAAC;gBACV,WAAW;oBACP,mDAAmD;wBACnD,6DAA6D,GAAG,eAAe,GAAG,MAAM;wBACxF,6CAA6C;wBAC7C,wBAAwB,OAAO,CAAC,OAAO,cAAc,OAAO,CAAC,MAAM,KAAK;wBACxE,wBAAwB,OAAO,CAAC,OAAO,cAAc,OAAO,CAAC,MAAM,KAAK;wBACxE,gDAAgD,CAAC;YACzD,CAAC;YAED,OAAO;gBACH,OAAO,EAAE;oBACL,EAAE,IAAI,EAAE,OAAgB,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAoB,EAAE;oBACxE,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,WAAW,EAAE;iBAC/C;aACJ,CAAC;QACN,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAA,uBAAW,EAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;QAC3F,CAAC;IACL,CAAC,CACJ,CAAC;IAEF,MAAM,CAAC,YAAY,CACf,gBAAgB,EAChB;QACI,WAAW,EACP,uFAAuF;YACvF,4FAA4F;YAC5F,4DAA4D;YAC5D,gFAAgF;YAChF,2EAA2E;YAC3E,kGAAkG;YAClG,gCAAgC;QACpC,WAAW,EAAE;YACT,MAAM,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,8CAA8C,CAAC;YAClF,cAAc,EAAE,OAAC,CAAC,IAAI,CAAC,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,QAAQ,CAC3E,6GAA6G;gBAC7G,mEAAmE,CACtE;YACD,KAAK,EAAE,OAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAC7B,kEAAkE;gBAClE,iEAAiE;gBACjE,uFAAuF,CAC1F;SACJ;QACD,WAAW,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE;KACtC,EACD,KAAK,EAAE,EAAE,MAAM,EAAE,cAAc,EAAE,KAAK,EAAE,EAAE,EAAE;QACxC,IAAI,CAAC;YACD,IAAI,CAAC,KAAK,EAAE,CAAC;gBACT,MAAM,IAAI,KAAK,CACX,8CAA8C;oBAC9C,qFAAqF;oBACrF,0IAA0I,CAC7I,CAAC;YACN,CAAC;YACD,MAAM,WAAW,GAAG,KAAK,CAAC;YAC1B,MAAM,QAAQ,GAAG,IAAA,kCAAmB,EAAC,WAAW,CAAC,CAAC;YAClD,MAAM,MAAM,GAAG,IAAA,8BAAe,EAAC,QAAQ,CAAC,CAAC;YACzC,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YACnC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACV,MAAM,IAAI,KAAK,CACX,GAAG,MAAM,gEAAgE,WAAW,GAAG,CAC1F,CAAC;YACN,CAAC;YACD,IAAI,QAAQ,KAAK,QAAQ,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,qBAAqB,EAAE,CAAC;gBAC9D,MAAM,IAAI,KAAK,CAAC,kFAAkF,CAAC,CAAC;YACxG,CAAC;YAED,MAAM,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;YACnC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,cAAc,EAAY,CAAC;YACvD,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,IAAA,uBAAgB,EAAC,MAAM,CAAC,CAAC;YAE7D,IAAI,cAAc,KAAK,MAAM,EAAE,CAAC;gBAC5B,MAAM,UAAU,GAAG,gDAAgD,MAAM,6BAA6B,CAAC;gBACvG,MAAM,IAAI,GAAG,MAAM,IAAA,4BAAa,EAAC,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,EAAE,IAAI,CAAC,CAAC;gBAChF,OAAO,EAAE,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC;YAC1D,CAAC;YAED,sFAAsF;YACtF,MAAM,GAAG,GAAG,MAAM,IAAA,4BAAa,EAAC,MAAM,EAAE,IAAA,gCAAiB,EAAC,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC;YAClG,MAAM,MAAM,GAAG,IAAA,gCAAiB,EAAC,GAAG,EAAE,MAAM,CAAC,CAAC;YAC9C,MAAM,OAAO,GAAG,MAAM,iBAAiB,CAAC,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;YAC1D,MAAM,MAAM,GAAG,OAAO;gBAClB,CAAC,CAAC,IAAA,gCAAiB,EAAC,OAAO,EAAE,MAAM,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC;gBAChD,CAAC,CAAC,EAAE,CAAC,EAAE,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,MAAM,CAAC,CAAC,EAAE,CAAC;YAEnC,OAAO;gBACH,OAAO,EAAE,CAAC;wBACN,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,MAAM,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,CAAC;qBAC3D,CAAC;aACL,CAAC;QACN,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAA,uBAAW,EAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;QAC3F,CAAC;IACL,CAAC,CACJ,CAAC;AACN,CAAC"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export type LLMProvider = 'anthropic' | 'openai' | 'google';
|
|
1
|
+
export type LLMProvider = 'anthropic' | 'openai' | 'google' | 'amazon';
|
|
2
2
|
export declare function getProviderForModel(model: string): LLMProvider;
|
|
3
3
|
/** Returns the environment variable name that holds the API key for the given provider. */
|
|
4
4
|
export declare function getApiKeyEnvVar(provider: LLMProvider): string;
|
|
@@ -30,9 +30,9 @@ export declare function parseVisionCoords(raw: string, prompt: string): {
|
|
|
30
30
|
};
|
|
31
31
|
/**
|
|
32
32
|
* Sends a base64 screenshot + text prompt to a vision model and returns the raw
|
|
33
|
-
* text response. Dispatches to Anthropic, OpenAI,
|
|
34
|
-
* model name prefix. The caller is responsible for building the prompt
|
|
35
|
-
* parsing the result.
|
|
33
|
+
* text response. Dispatches to Anthropic, OpenAI, Google Gemini, or Amazon Bedrock
|
|
34
|
+
* based on the model name prefix. The caller is responsible for building the prompt
|
|
35
|
+
* and parsing the result.
|
|
36
36
|
*/
|
|
37
37
|
export declare function callVisionLLM(base64: string, textPrompt: string, model: string, apiKey: string, maxTokens?: number): Promise<string>;
|
|
38
38
|
//# sourceMappingURL=vision-utils.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vision-utils.d.ts","sourceRoot":"","sources":["../../lib/vision-utils.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"vision-utils.d.ts","sourceRoot":"","sources":["../../lib/vision-utils.ts"],"names":[],"mappings":"AAGA,MAAM,MAAM,WAAW,GAAG,WAAW,GAAG,QAAQ,GAAG,QAAQ,GAAG,QAAQ,CAAC;AAcvE,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,GAAG,WAAW,CAkB9D;AAED,2FAA2F;AAC3F,wBAAgB,eAAe,CAAC,QAAQ,EAAE,WAAW,GAAG,MAAM,CAO7D;AAED,MAAM,WAAW,YAAY;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,gFAAgF;IAChF,MAAM,EAAE,MAAM,CAAC;IACf,wCAAwC;IACxC,MAAM,EAAE,MAAM,CAAC;CAClB;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,CAC/B,MAAM,EAAE,OAAO,EACf,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,EAChB,GAAG,EAAE,MAAM,EACX,GAAG,EAAE,MAAM,EACX,QAAQ,CAAC,EAAE,MAAM,EACjB,QAAQ,CAAC,EAAE,MAAM,GAClB,YAAY,CAqBd;AAED,0EAA0E;AAC1E,wBAAgB,iBAAiB,CAC7B,OAAO,EAAE,YAAY,EACrB,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,MAAM,GACb;IAAE,CAAC,EAAE,MAAM,CAAC;IAAC,CAAC,EAAE,MAAM,CAAA;CAAE,CAK1B;AAED,sEAAsE;AACtE,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,CAWlF;AAED,sFAAsF;AACtF,wBAAgB,iBAAiB,CAC7B,GAAG,EAAE,MAAM,EACX,MAAM,EAAE,MAAM,GACf;IAAE,CAAC,EAAE,MAAM,CAAC;IAAC,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAUzC;AAgJD;;;;;GAKG;AACH,wBAAsB,aAAa,CAC/B,MAAM,EAAE,MAAM,EACd,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,SAAS,SAAM,GAChB,OAAO,CAAC,MAAM,CAAC,CAQjB"}
|
|
@@ -11,12 +11,17 @@ exports.buildVisionPrompt = buildVisionPrompt;
|
|
|
11
11
|
exports.parseVisionCoords = parseVisionCoords;
|
|
12
12
|
exports.callVisionLLM = callVisionLLM;
|
|
13
13
|
const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
|
|
14
|
+
const client_bedrock_runtime_1 = require("@aws-sdk/client-bedrock-runtime");
|
|
14
15
|
/** Infers the LLM provider from the model identifier. */
|
|
15
16
|
const SUPPORTED_MODELS = [
|
|
16
|
-
'claude-*
|
|
17
|
-
'gpt-*
|
|
17
|
+
'claude-* (e.g. claude-sonnet-4-6)',
|
|
18
|
+
'gpt-* (e.g. gpt-4o)',
|
|
18
19
|
'o1, o3, o4, o1-mini, o3-pro, …',
|
|
19
|
-
'gemini-*
|
|
20
|
+
'gemini-* (e.g. gemini-1.5-pro)',
|
|
21
|
+
'amazon.nova-* (e.g. amazon.nova-pro-v1:0)',
|
|
22
|
+
'us.amazon.nova-* (cross-region inference, e.g. us.amazon.nova-pro-v1:0)',
|
|
23
|
+
'eu.amazon.nova-* (cross-region inference)',
|
|
24
|
+
'ap.amazon.nova-* (cross-region inference)',
|
|
20
25
|
];
|
|
21
26
|
function getProviderForModel(model) {
|
|
22
27
|
const lower = model.toLowerCase();
|
|
@@ -29,6 +34,9 @@ function getProviderForModel(model) {
|
|
|
29
34
|
if (lower.startsWith('claude-')) {
|
|
30
35
|
return 'anthropic';
|
|
31
36
|
}
|
|
37
|
+
if (/^(us\.|eu\.|ap\.)?amazon\./.test(lower)) {
|
|
38
|
+
return 'amazon';
|
|
39
|
+
}
|
|
32
40
|
throw new Error(`Unsupported model: "${model}". ` +
|
|
33
41
|
`Supported model prefixes are:\n ${SUPPORTED_MODELS.join('\n ')}`);
|
|
34
42
|
}
|
|
@@ -37,6 +45,7 @@ function getApiKeyEnvVar(provider) {
|
|
|
37
45
|
switch (provider) {
|
|
38
46
|
case 'openai': return 'OPENAI_API_KEY';
|
|
39
47
|
case 'google': return 'GEMINI_API_KEY';
|
|
48
|
+
case 'amazon': return 'AWS_ACCESS_KEY_ID';
|
|
40
49
|
default: return 'ANTHROPIC_API_KEY';
|
|
41
50
|
}
|
|
42
51
|
}
|
|
@@ -183,17 +192,45 @@ async function callGoogleVision(base64, textPrompt, model, apiKey, maxTokens) {
|
|
|
183
192
|
}
|
|
184
193
|
return text;
|
|
185
194
|
}
|
|
195
|
+
async function callAmazonBedrockVision(base64, textPrompt, model, maxTokens) {
|
|
196
|
+
const client = new client_bedrock_runtime_1.BedrockRuntimeClient({
|
|
197
|
+
region: process.env.AWS_REGION ?? process.env.AWS_DEFAULT_REGION ?? 'us-east-1',
|
|
198
|
+
});
|
|
199
|
+
const command = new client_bedrock_runtime_1.ConverseCommand({
|
|
200
|
+
modelId: model,
|
|
201
|
+
messages: [{
|
|
202
|
+
role: 'user',
|
|
203
|
+
content: [
|
|
204
|
+
{
|
|
205
|
+
image: {
|
|
206
|
+
format: 'png',
|
|
207
|
+
source: { bytes: Buffer.from(base64, 'base64') },
|
|
208
|
+
},
|
|
209
|
+
},
|
|
210
|
+
{ text: textPrompt },
|
|
211
|
+
],
|
|
212
|
+
}],
|
|
213
|
+
inferenceConfig: { maxTokens },
|
|
214
|
+
});
|
|
215
|
+
const response = await client.send(command);
|
|
216
|
+
const text = response.output?.message?.content?.find((b) => 'text' in b && typeof b.text === 'string');
|
|
217
|
+
if (!text) {
|
|
218
|
+
throw new Error(`Unexpected response from Amazon Bedrock model "${model}": no text content in output`);
|
|
219
|
+
}
|
|
220
|
+
return text.text;
|
|
221
|
+
}
|
|
186
222
|
/**
|
|
187
223
|
* Sends a base64 screenshot + text prompt to a vision model and returns the raw
|
|
188
|
-
* text response. Dispatches to Anthropic, OpenAI,
|
|
189
|
-
* model name prefix. The caller is responsible for building the prompt
|
|
190
|
-
* parsing the result.
|
|
224
|
+
* text response. Dispatches to Anthropic, OpenAI, Google Gemini, or Amazon Bedrock
|
|
225
|
+
* based on the model name prefix. The caller is responsible for building the prompt
|
|
226
|
+
* and parsing the result.
|
|
191
227
|
*/
|
|
192
228
|
async function callVisionLLM(base64, textPrompt, model, apiKey, maxTokens = 256) {
|
|
193
229
|
const provider = getProviderForModel(model);
|
|
194
230
|
switch (provider) {
|
|
195
231
|
case 'openai': return callOpenAIVision(base64, textPrompt, model, apiKey, maxTokens);
|
|
196
232
|
case 'google': return callGoogleVision(base64, textPrompt, model, apiKey, maxTokens);
|
|
233
|
+
case 'amazon': return callAmazonBedrockVision(base64, textPrompt, model, maxTokens);
|
|
197
234
|
default: return callAnthropicVision(base64, textPrompt, model, apiKey, maxTokens);
|
|
198
235
|
}
|
|
199
236
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vision-utils.js","sourceRoot":"","sources":["../../lib/vision-utils.ts"],"names":[],"mappings":";;;;;
|
|
1
|
+
{"version":3,"file":"vision-utils.js","sourceRoot":"","sources":["../../lib/vision-utils.ts"],"names":[],"mappings":";;;;;AAiBA,kDAkBC;AAGD,0CAOC;AAeD,kDAgCC;AAGD,8CASC;AAGD,8CAWC;AAGD,8CAaC;AAsJD,sCAcC;AA1SD,4DAA0C;AAC1C,4EAAwF;AAIxF,yDAAyD;AACzD,MAAM,gBAAgB,GAAG;IACrB,2CAA2C;IAC3C,gCAAgC;IAChC,gCAAgC;IAChC,wCAAwC;IACxC,8CAA8C;IAC9C,yEAAyE;IACzE,2CAA2C;IAC3C,2CAA2C;CAC9C,CAAC;AAEF,SAAgB,mBAAmB,CAAC,KAAa;IAC7C,MAAM,KAAK,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;IAClC,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QACjD,OAAO,QAAQ,CAAC;IACpB,CAAC;IACD,IAAI,KAAK,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,OAAO,QAAQ,CAAC;IACpB,CAAC;IACD,IAAI,KAAK,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,OAAO,WAAW,CAAC;IACvB,CAAC;IACD,IAAI,4BAA4B,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3C,OAAO,QAAQ,CAAC;IACpB,CAAC;IACD,MAAM,IAAI,KAAK,CACX,uBAAuB,KAAK,KAAK;QACjC,oCAAoC,gBAAgB,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CACtE,CAAC;AACN,CAAC;AAED,2FAA2F;AAC3F,SAAgB,eAAe,CAAC,QAAqB;IACjD,QAAQ,QAAQ,EAAE,CAAC;QACf,KAAK,QAAQ,CAAC,CAAC,OAAO,gBAAgB,CAAC;QACvC,KAAK,QAAQ,CAAC,CAAC,OAAO,gBAAgB,CAAC;QACvC,KAAK,QAAQ,CAAC,CAAC,OAAO,mBAAmB,CAAC;QAC1C,OAAO,CAAC,CAAC,OAAO,mBAAmB,CAAC;IACxC,CAAC;AACL,CAAC;AAWD;;;GAGG;AACH,SAAgB,mBAAmB,CAC/B,MAAe,EACf,KAAa,EACb,KAAa,EACb,KAAa,EACb,KAAa,EACb,QAAgB,EAChB,GAAW,EACX,GAAW,EACX,QAAiB,EACjB,QAAiB;IAEjB,IAAI,MAAM,EAAE,CAAC;QACT,OAAO;YACH,OAAO,EAAE,CAAC;YACV,OAAO,EAAE,CAAC;YACV,MAAM,EAAE,CAAC,QAAQ,IAAI,GAAG,CAAC,GAAG,GAAG;YAC/B,MAAM,EAAE,CAAC,QAAQ,IAAI,GAAG,CAAC,GAAG,GAAG;SAClC,CAAC;IACN,CAAC;IAED,8EAA8E;IAC9E,4DAA4D;IAC5D,MAAM,SAAS,GAAG,QAAQ,GAAG,IAAI,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,QAAQ,GAAG,GAAG,CAAC,GAAG,GAAG,GAAG,IAAI,CAAC;IACnF,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC;IACnD,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC;IACnD,OAAO;QACH,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK;QACzD,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK;QACzD,MAAM,EAAE,KAAK,GAAG,GAAG;QACnB,MAAM,EAAE,KAAK,GAAG,GAAG;KACtB,CAAC;AACN,CAAC;AAED,0EAA0E;AAC1E,SAAgB,iBAAiB,CAC7B,OAAqB,EACrB,IAAY,EACZ,IAAY;IAEZ,OAAO;QACH,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,GAAG,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC;QACtD,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,GAAG,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC;KACzD,CAAC;AACN,CAAC;AAED,sEAAsE;AACtE,SAAgB,iBAAiB,CAAC,MAAc,EAAE,GAAW,EAAE,GAAW;IACtE,OAAO,CACH,oDAAoD,MAAM,OAAO;QACjE,gBAAgB,GAAG,IAAI,GAAG,cAAc;QACxC,oDAAoD;QACpD,0FAA0F;QAC1F,oFAAoF;QACpF,2BAA2B,GAAG,6BAA6B,GAAG,OAAO;QACrE,iDAAiD;QACjD,4CAA4C,CAC/C,CAAC;AACN,CAAC;AAED,sFAAsF;AACtF,SAAgB,iBAAiB,CAC7B,GAAW,EACX,MAAc;IAEd,MAAM,SAAS,GAAG,GAAG,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IAC3C,IAAI,CAAC,SAAS,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,4BAA4B,GAAG,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAA4C,CAAC;IACnF,IAAI,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CAAC,uBAAuB,MAAM,GAAG,CAAC,CAAC;IACtD,CAAC;IACD,OAAO,MAAM,CAAC;AAClB,CAAC;AAED,KAAK,UAAU,mBAAmB,CAC9B,MAAc,EACd,UAAkB,EAClB,KAAa,EACb,MAAc,EACd,SAAiB;IAEjB,MAAM,MAAM,GAAG,IAAI,aAAS,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IACzC,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;QAC1C,KAAK;QACL,UAAU,EAAE,SAAS;QACrB,QAAQ,EAAE,CAAC;gBACP,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE;oBACL;wBACI,IAAI,EAAE,OAAO;wBACb,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,EAAE;qBACpE;oBACD,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE;iBACrC;aACJ,CAAC;KACL,CAAC,CAAC;IACH,OAAO,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,EAAE,IAAI,IAAI,EAAE,CAAC;AACvE,CAAC;AAED,KAAK,UAAU,gBAAgB,CAC3B,MAAc,EACd,UAAkB,EAClB,KAAa,EACb,MAAc,EACd,SAAiB;IAEjB,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,4CAA4C,EAAE;QAClE,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACL,cAAc,EAAE,kBAAkB;YAClC,eAAe,EAAE,UAAU,MAAM,EAAE;SACtC;QACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;YACjB,KAAK;YACL,UAAU,EAAE,SAAS;YACrB,QAAQ,EAAE,CAAC;oBACP,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE;wBACL,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,EAAE,GAAG,EAAE,yBAAyB,MAAM,EAAE,EAAE,EAAE;wBAC5E,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE;qBACrC;iBACJ,CAAC;SACL,CAAC;KACL,CAAC,CAAC;IACH,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACV,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,IAAI,OAAe,CAAC;QACpB,IAAI,CAAC;YACD,OAAO,GAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAqC,CAAC,KAAK,EAAE,OAAO,IAAI,IAAI,CAAC;QAC3F,CAAC;QAAC,MAAM,CAAC;YACL,OAAO,GAAG,IAAI,IAAI,GAAG,CAAC,UAAU,CAAC;QACrC,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,qBAAqB,OAAO,EAAE,CAAC,CAAC;IACpD,CAAC;IACD,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAA2D,CAAC;IACvF,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC;IACpD,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;QAC9B,MAAM,IAAI,KAAK,CAAC,0CAA0C,KAAK,kDAAkD,CAAC,CAAC;IACvH,CAAC;IACD,OAAO,OAAO,CAAC;AACnB,CAAC;AAED,KAAK,UAAU,gBAAgB,CAC3B,MAAc,EACd,UAAkB,EAClB,KAAa,EACb,MAAc,EACd,SAAiB;IAEjB,MAAM,GAAG,GAAG,2DAA2D,KAAK,kBAAkB,CAAC;IAC/F,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QACzB,MAAM,EAAE,MAAM;QACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,EAAE;QACzE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;YACjB,QAAQ,EAAE,CAAC;oBACP,KAAK,EAAE;wBACH,EAAE,WAAW,EAAE,EAAE,SAAS,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE;wBACzD,EAAE,IAAI,EAAE,UAAU,EAAE;qBACvB;iBACJ,CAAC;YACF,gBAAgB,EAAE,EAAE,eAAe,EAAE,SAAS,EAAE;SACnD,CAAC;KACL,CAAC,CAAC;IACH,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACV,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,IAAI,OAAe,CAAC;QACpB,IAAI,CAAC;YACD,OAAO,GAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAqC,CAAC,KAAK,EAAE,OAAO,IAAI,IAAI,CAAC;QAC3F,CAAC;QAAC,MAAM,CAAC;YACL,OAAO,GAAG,IAAI,IAAI,GAAG,CAAC,UAAU,CAAC;QACrC,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,qBAAqB,OAAO,EAAE,CAAC,CAAC;IACpD,CAAC;IACD,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAE1B,CAAC;IACF,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC;IAC7D,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QAC3B,MAAM,IAAI,KAAK,CAAC,0CAA0C,KAAK,mDAAmD,CAAC,CAAC;IACxH,CAAC;IACD,OAAO,IAAI,CAAC;AAChB,CAAC;AAED,KAAK,UAAU,uBAAuB,CAClC,MAAc,EACd,UAAkB,EAClB,KAAa,EACb,SAAiB;IAEjB,MAAM,MAAM,GAAG,IAAI,6CAAoB,CAAC;QACpC,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,OAAO,CAAC,GAAG,CAAC,kBAAkB,IAAI,WAAW;KAClF,CAAC,CAAC;IACH,MAAM,OAAO,GAAG,IAAI,wCAAe,CAAC;QAChC,OAAO,EAAE,KAAK;QACd,QAAQ,EAAE,CAAC;gBACP,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE;oBACL;wBACI,KAAK,EAAE;4BACH,MAAM,EAAE,KAAK;4BACb,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,QAAQ,CAAC,EAAE;yBACnD;qBACJ;oBACD,EAAE,IAAI,EAAE,UAAU,EAAE;iBACvB;aACJ,CAAC;QACF,eAAe,EAAE,EAAE,SAAS,EAAE;KACjC,CAAC,CAAC;IACH,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC5C,MAAM,IAAI,GAAG,QAAQ,CAAC,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,IAAI,CAAC,IAAI,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAiC,CAAC;IACvI,IAAI,CAAC,IAAI,EAAE,CAAC;QACR,MAAM,IAAI,KAAK,CAAC,kDAAkD,KAAK,8BAA8B,CAAC,CAAC;IAC3G,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC;AACrB,CAAC;AAED;;;;;GAKG;AACI,KAAK,UAAU,aAAa,CAC/B,MAAc,EACd,UAAkB,EAClB,KAAa,EACb,MAAc,EACd,SAAS,GAAG,GAAG;IAEf,MAAM,QAAQ,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;IAC5C,QAAQ,QAAQ,EAAE,CAAC;QACf,KAAK,QAAQ,CAAC,CAAC,OAAO,gBAAgB,CAAC,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;QACrF,KAAK,QAAQ,CAAC,CAAC,OAAO,gBAAgB,CAAC,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;QACrF,KAAK,QAAQ,CAAC,CAAC,OAAO,uBAAuB,CAAC,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;QACpF,OAAO,CAAC,CAAC,OAAO,mBAAmB,CAAC,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IACtF,CAAC;AACL,CAAC"}
|