appium-mcp 1.34.0 → 1.34.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/package.json +6 -2
  3. package/scripts/zip-assets.mjs +55 -0
  4. package/server.json +2 -2
  5. package/src/resources/submodules.zip +0 -0
  6. package/src/tests/README.md +0 -83
  7. package/src/tests/__mocks__/@appium/support.ts +0 -73
  8. package/src/tests/benchmark_model/TEST_REPORT.md +0 -331
  9. package/src/tests/benchmark_model/benchmark_model.ts +0 -798
  10. package/src/tests/benchmark_model/image.png +0 -0
  11. package/src/tests/benchmark_model/output/DeepSeek_V3_2_annotated.png +0 -0
  12. package/src/tests/benchmark_model/output/Qwen3_VL_235B_A22B_Instruct_annotated.png +0 -0
  13. package/src/tests/benchmark_model/output/claude_sonnet_4_6_annotated.png +0 -0
  14. package/src/tests/benchmark_model/output/doubao_seed_2_0_pro_260215_annotated.png +0 -0
  15. package/src/tests/benchmark_model/output/gemini_2_5_flash_annotated.png +0 -0
  16. package/src/tests/benchmark_model/output/gemini_2_5_pro_annotated.png +0 -0
  17. package/src/tests/benchmark_model/output/gemini_3_flash_preview_annotated.png +0 -0
  18. package/src/tests/benchmark_model/output/gemini_3_pro_preview_annotated.png +0 -0
  19. package/src/tests/benchmark_model/output/gpt_5_1_annotated.png +0 -0
  20. package/src/tests/benchmark_model/output/gpt_5_2_annotated.png +0 -0
  21. package/src/tests/benchmark_model/output/gpt_5_2_pro_annotated.png +0 -0
  22. package/src/tests/benchmark_model/output/gpt_5_nano_annotated.png +0 -0
  23. package/src/tests/benchmark_model/output/grok_4_1_fast_annotated.png +0 -0
  24. package/src/tests/benchmark_model/output/kimi_k2_5_annotated.png +0 -0
  25. package/src/tests/benchmark_model/output/qwen3_vl_8b_instruct_annotated.png +0 -0
  26. package/src/tests/benchmark_model/output/qwen3_vl_plus_annotated.png +0 -0
  27. package/src/tests/generate-all-locators.test.ts +0 -175
  28. package/src/tests/screenshot.test.ts +0 -332
  29. package/src/tests/session-store.test.ts +0 -498
  30. package/src/tests/test-setup-wda.ts +0 -240
  31. package/src/tests/tools/session/battery-info.test.ts +0 -102
  32. package/src/tests/tools/session/create-session.test.ts +0 -145
  33. package/src/tests/tools/session/file-transfer.test.ts +0 -158
  34. package/src/tests/vision-finder.test.ts +0 -728
package/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ ## [1.34.2](https://github.com/appium/appium-mcp/compare/v1.34.1...v1.34.2) (2026-03-23)
2
+
3
+ ### Miscellaneous Chores
4
+
5
+ * add zip/unzip asserts to distribute them in small size ([#227](https://github.com/appium/appium-mcp/issues/227)) ([126f2dd](https://github.com/appium/appium-mcp/commit/126f2dd7ede8696853eb887d7310d6aa22ea8e68))
6
+
7
+ ## [1.34.1](https://github.com/appium/appium-mcp/compare/v1.34.0...v1.34.1) (2026-03-23)
8
+
9
+ ### Miscellaneous Chores
10
+
11
+ * exclude src/tests ([#226](https://github.com/appium/appium-mcp/issues/226)) ([0c2a72d](https://github.com/appium/appium-mcp/commit/0c2a72db5bde392891482b9c3849bf9b71b51a87))
12
+
1
13
  ## [1.34.0](https://github.com/appium/appium-mcp/compare/v1.33.0...v1.34.0) (2026-03-22)
2
14
 
3
15
  ### Features
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "appium-mcp",
3
3
  "mcpName": "io.github.appium/appium-mcp",
4
- "version": "1.34.0",
4
+ "version": "1.34.2",
5
5
  "type": "module",
6
6
  "repository": {
7
7
  "type": "git",
@@ -36,7 +36,10 @@
36
36
  "index-docs": "node dist/scripts/simple-index-documentation.js",
37
37
  "query-docs": "node dist/scripts/simple-query-documentation.js",
38
38
  "sync-version": "node scripts/sync-version.mjs",
39
- "version": "npm run sync-version"
39
+ "version": "npm run sync-version",
40
+ "zip-assets": "node scripts/zip-assets.mjs zip",
41
+ "unzip-assets": "node scripts/zip-assets.mjs unzip",
42
+ "postinstall": "node scripts/zip-assets.mjs unzip"
40
43
  },
41
44
  "author": "",
42
45
  "license": "Apache-2.0",
@@ -85,6 +88,7 @@
85
88
  "files": [
86
89
  "scripts",
87
90
  "src",
91
+ "!src/tests",
88
92
  "dist",
89
93
  "CHANGELOG.md",
90
94
  "README.md",
@@ -0,0 +1,55 @@
1
+ import path from 'node:path';
2
+ import {fileURLToPath} from 'node:url';
3
+ import fs from 'node:fs/promises';
4
+
5
+ import {zip as appiumZip} from '@appium/support';
6
+
7
+ const __filename = fileURLToPath(import.meta.url);
8
+ const __dirname = path.dirname(__filename);
9
+
10
+ // Adjust these paths for your use case
11
+ const ZIP_SOURCE_DIR = path.join(__dirname, '..', 'src', 'resources', 'submodules');
12
+ const ZIP_OUTPUT_PATH = path.join(__dirname, '..', 'src', 'resources', 'submodules.zip');
13
+ const UNZIP_TARGET_DIR = path.join(__dirname, '..', 'src', 'resources', 'submodules');
14
+
15
+ export async function zipAssets() {
16
+ const zipBase64 = await appiumZip.toInMemoryZip(ZIP_SOURCE_DIR);
17
+ const zipBuffer = Buffer.from(zipBase64, 'base64');
18
+ await fs.writeFile(ZIP_OUTPUT_PATH, zipBuffer);
19
+ console.log(`Zipped ${ZIP_SOURCE_DIR} -> ${ZIP_OUTPUT_PATH}`);
20
+ }
21
+
22
+ const fileExists = async (filePath) => {
23
+ try {
24
+ await fs.access(filePath);
25
+ return true;
26
+ } catch {
27
+ return false;
28
+ }
29
+ };
30
+
31
+ export async function unzipAssets() {
32
+ if (!(await fileExists(ZIP_OUTPUT_PATH))) {
33
+ console.log(`Target directory ${ZIP_OUTPUT_PATH} does not exist. Skipping unzip.`);
34
+ return;
35
+ }
36
+ await appiumZip.extractAllTo(ZIP_OUTPUT_PATH, UNZIP_TARGET_DIR);
37
+ console.log(`Unzipped ${ZIP_OUTPUT_PATH} -> ${UNZIP_TARGET_DIR}`);
38
+ }
39
+
40
+ if (import.meta.url === `file://${process.argv[1]}`) {
41
+ const cmd = process.argv[2];
42
+ if (cmd === 'zip') {
43
+ zipAssets().catch((e) => {
44
+ console.error(e);
45
+ process.exitCode = 1;
46
+ });
47
+ } else if (cmd === 'unzip') {
48
+ unzipAssets().catch((e) => {
49
+ console.error(e);
50
+ process.exitCode = 1;
51
+ });
52
+ } else {
53
+ console.log('Usage: node zip-assets.mjs [zip|unzip]');
54
+ }
55
+ }
package/server.json CHANGED
@@ -3,12 +3,12 @@
3
3
  "name": "io.github.appium/appium-mcp",
4
4
  "title": "MCP Appium - Mobile Development and Automation Server",
5
5
  "description": "MCP server for Appium mobile automation on iOS and Android devices with test creation tools.",
6
- "version": "1.34.0",
6
+ "version": "1.34.2",
7
7
  "packages": [
8
8
  {
9
9
  "registryType": "npm",
10
10
  "identifier": "appium-mcp",
11
- "version": "1.34.0",
11
+ "version": "1.34.2",
12
12
  "transport": {
13
13
  "type": "stdio"
14
14
  }
Binary file
@@ -1,83 +0,0 @@
1
- # Tests for Mobile Agent
2
-
3
- This directory contains unit tests for the Mobile Agent project.
4
-
5
- ## Running Tests
6
-
7
- To run all tests:
8
-
9
- ```bash
10
- npm test
11
- ```
12
-
13
- To run specific test files:
14
-
15
- ```bash
16
- # Run tests for the locator generation functionality
17
- npm run test:locators
18
- ```
19
-
20
- ## Test Files
21
-
22
- ### generate-all-locators.test.ts
23
-
24
- This file contains tests for the `generateAllElementLocators` function, which is responsible for generating locators for all elements in a page source XML.
25
-
26
- The tests cover:
27
-
28
- 1. Basic functionality with valid XML
29
- 2. Handling of invalid/empty XML
30
- 3. Various filtering options:
31
- - `includeTagNames` - Include only specific element types
32
- - `excludeTagNames` - Exclude specific element types
33
- - `minAttributeCount` - Filter elements by minimum attribute count
34
- - `fetchableOnly` - Include only interactable elements (platform-specific)
35
- - `clickableOnly` - Include only clickable elements
36
-
37
- ## Adding New Tests
38
-
39
- When adding new tests:
40
-
41
- 1. Create a new test file in the `src/tests` directory with the `.test.ts` extension
42
- 2. Import the necessary functions and types from the source files
43
- 3. Use Jest's `describe`, `test`, and `expect` functions to structure your tests
44
- 4. Add a new script to `package.json` for running your specific test file
45
-
46
- ## Test Structure
47
-
48
- Tests should follow this general structure:
49
-
50
- ```typescript
51
- import { describe, test, expect } from '@jest/globals';
52
- import { functionToTest } from '../path/to/function.js';
53
-
54
- describe('functionToTest', () => {
55
- test('should do something specific', () => {
56
- // Arrange - set up test data
57
- const input = 'some input';
58
-
59
- // Act - call the function
60
- const result = functionToTest(input);
61
-
62
- // Assert - verify the result
63
- expect(result).toBe('expected output');
64
- });
65
- });
66
- ```
67
-
68
- ## Mocking
69
-
70
- For tests that require mocking dependencies, use Jest's mocking capabilities:
71
-
72
- ```typescript
73
- import { jest } from '@jest/globals';
74
-
75
- // Mock a module
76
- jest.mock('../path/to/module.js');
77
-
78
- // Create a mock function
79
- const mockFunction = jest.fn();
80
- mockFunction.mockReturnValue('mocked value');
81
- ```
82
-
83
- Note that when working with ESM modules, you may need to use different mocking approaches than with CommonJS modules.
@@ -1,73 +0,0 @@
1
- // Mock @appium/support for Jest tests
2
- // This avoids the ESM/CommonJS mismatch with uuid dependency
3
-
4
- export const logger = {
5
- getLogger: (_name: string) =>
6
- // Simple logger implementation for tests
7
- // No-op functions that match the logger interface
8
- ({
9
- debug: (_message: string, ..._args: any[]) => {
10
- // Silent in tests by default
11
- },
12
- info: (_message: string, ..._args: any[]) => {
13
- // Silent in tests by default
14
- },
15
- warn: (_message: string, ..._args: any[]) => {
16
- // Silent in tests by default
17
- },
18
- error: (_message: string, ..._args: any[]) => {
19
- // Silent in tests by default
20
- },
21
- trace: (_message: string, ..._args: any[]) => {
22
- // Silent in tests by default
23
- },
24
- }),
25
- };
26
-
27
- /**
28
- * Mock imageUtil for Jest tests.
29
- *
30
- * A single shared sharpInstance is used across all calls so tests can
31
- * inspect and override its methods (resize / jpeg / toBuffer) via
32
- * mockSharpInstance exported below.
33
- */
34
-
35
- export type MockSharpInstance = {
36
- resizeCalls: Array<[number, number]>;
37
- toBufferImpl: () => Promise<Buffer>;
38
- resize: (w: number, h: number) => MockSharpInstance;
39
- jpeg: (_opts?: unknown) => MockSharpInstance;
40
- toBuffer: () => Promise<Buffer>;
41
- reset: () => void;
42
- };
43
-
44
- /** Shared instance – tests can mutate toBufferImpl or inspect resizeCalls */
45
- export const mockSharpInstance: MockSharpInstance = {
46
- resizeCalls: [],
47
- toBufferImpl: () => Promise.resolve(Buffer.from('mock-compressed-image')),
48
- resize(w: number, h: number) {
49
- this.resizeCalls.push([w, h]);
50
- return this;
51
- },
52
- jpeg(_opts?: unknown) {
53
- return this;
54
- },
55
- toBuffer() {
56
- return this.toBufferImpl();
57
- },
58
- reset() {
59
- this.resizeCalls = [];
60
- this.toBufferImpl = () =>
61
- Promise.resolve(Buffer.from('mock-compressed-image'));
62
- },
63
- };
64
-
65
- export const imageUtil = {
66
- requireSharp: () => (_input: Buffer) => mockSharpInstance,
67
- };
68
-
69
- // Export other commonly used utilities from @appium/support if needed
70
- export default {
71
- logger,
72
- imageUtil,
73
- };
@@ -1,331 +0,0 @@
1
- # Model Benchmark Test Report
2
-
3
- **Test Date:** 2026/2/26 17:38:15
4
- **Test Type:** Automation Testing - Click Action Recognition
5
-
6
- ---
7
-
8
- ## Summary
9
-
10
- | Model Name | Duration(ms) | Status | Accuracy Score | Annotated Image |
11
- |------------|--------------|--------|----------------|-----------------|
12
- | qwen3-vl-plus | 12649 | ✅ Success | 100% | [View](output/qwen3_vl_plus_annotated.png) |
13
- | qwen3-vl-8b-instruct | 10809 | ✅ Success | 100% | [View](output/qwen3_vl_8b_instruct_annotated.png) |
14
- | Qwen3-VL-235B-A22B-Instruct | 8417 | ✅ Success | 100% | [View](output/Qwen3_VL_235B_A22B_Instruct_annotated.png) |
15
- | doubao-seed-2-0-pro-260215 | 24796 | ✅ Success | 100% | [View](output/doubao_seed_2_0_pro_260215_annotated.png) |
16
- | gemini-3-flash-preview | 17353 | ✅ Success | 100% | [View](output/gemini_3_flash_preview_annotated.png) |
17
- | gemini-3-pro-preview | 51574 | ✅ Success | 100% | [View](output/gemini_3_pro_preview_annotated.png) |
18
- | gemini-2.5-pro | 28762 | ✅ Success | 100% | [View](output/gemini_2_5_pro_annotated.png) |
19
- | gemini-2.5-flash | 17583 | ✅ Success | 100% | [View](output/gemini_2_5_flash_annotated.png) |
20
- | gpt-5.2 | 18461 | ✅ Success | 95% | [View](output/gpt_5_2_annotated.png) |
21
- | gpt-5.2-pro | 43517 | ✅ Success | 75% | [View](output/gpt_5_2_pro_annotated.png) |
22
- | kimi-k2.5 | 13021 | ✅ Success | 45% | [View](output/kimi_k2_5_annotated.png) |
23
- | gpt-5.1 | 18604 | ✅ Success | 45% | [View](output/gpt_5_1_annotated.png) |
24
- | gpt-5-nano | 25101 | ✅ Success | 20% | [View](output/gpt_5_nano_annotated.png) |
25
- | DeepSeek-V3.2 | 10187 | ✅ Success | 0% | [View](output/DeepSeek_V3_2_annotated.png) |
26
- | claude-sonnet-4-6 | 68981 | ✅ Success | 0% | [View](output/claude_sonnet_4_6_annotated.png) |
27
- | grok-4.1-fast | 16239 | ✅ Success | 0% | [View](output/grok_4_1_fast_annotated.png) |
28
-
29
- ### Statistics
30
-
31
- - **Total**: 18 models
32
- - **Success**: 16 (88.9%)
33
- - **Failed**: 2
34
- - **High Accuracy (≥70%)**: 10 (55.6%)
35
- - **Avg Accuracy Score**: 60.0%
36
- - **Average Duration**: 31332.22ms
37
- - **Min Duration**: 8417ms
38
- - **Max Duration**: 120021ms
39
-
40
- ---
41
-
42
- ## Detailed Results
43
-
44
-
45
- ============================================================
46
- ## DeepSeek-V3.2
47
-
48
- **Started at:** 2026/2/26 17:38:15
49
-
50
-
51
- ============================================================
52
- ## qwen3-vl-plus
53
-
54
- **Started at:** 2026/2/26 17:38:15
55
-
56
-
57
- ============================================================
58
- ## qwen3-vl-8b-instruct
59
-
60
- **Started at:** 2026/2/26 17:38:15
61
-
62
-
63
- ============================================================
64
- ## Qwen3-VL-235B-A22B-Instruct
65
-
66
- **Started at:** 2026/2/26 17:38:15
67
-
68
-
69
- ============================================================
70
- ## doubao-seed-2-0-pro-260215
71
-
72
- **Started at:** 2026/2/26 17:38:15
73
-
74
-
75
- ============================================================
76
- ## kimi-k2.5
77
-
78
- **Started at:** 2026/2/26 17:38:15
79
-
80
-
81
- ============================================================
82
- ## gpt-5.2-pro
83
-
84
- **Started at:** 2026/2/26 17:38:15
85
-
86
-
87
- ============================================================
88
- ## gpt-5.2
89
-
90
- **Started at:** 2026/2/26 17:38:15
91
-
92
-
93
- ============================================================
94
- ## gpt-5.1
95
-
96
- **Started at:** 2026/2/26 17:38:15
97
-
98
-
99
- ============================================================
100
- ## gpt-5-nano
101
-
102
- **Started at:** 2026/2/26 17:38:15
103
-
104
-
105
- ============================================================
106
- ## claude-sonnet-4-6
107
-
108
- **Started at:** 2026/2/26 17:38:15
109
-
110
-
111
- ============================================================
112
- ## gemini-3-flash-preview
113
-
114
- **Started at:** 2026/2/26 17:38:15
115
-
116
-
117
- ============================================================
118
- ## gemini-3-pro-preview
119
-
120
- **Started at:** 2026/2/26 17:38:15
121
-
122
-
123
- ============================================================
124
- ## gemini-2.5-pro
125
-
126
- **Started at:** 2026/2/26 17:38:15
127
-
128
-
129
- ============================================================
130
- ## gemini-2.5-flash
131
-
132
- **Started at:** 2026/2/26 17:38:15
133
-
134
-
135
- ============================================================
136
- ## grok-4.1-fast
137
-
138
- **Started at:** 2026/2/26 17:38:15
139
-
140
-
141
- **BBox:** [45, 526, 958, 579]
142
- **Target:** 搜索酒店
143
- **Annotated Image:** [Qwen3_VL_235B_A22B_Instruct_annotated.png](output/Qwen3_VL_235B_A22B_Instruct_annotated.png)
144
- **BBox:** [135, 2150, 1035, 2270]
145
- **Target:** 搜索酒店
146
- **Annotated Image:** [DeepSeek_V3_2_annotated.png](output/DeepSeek_V3_2_annotated.png)
147
- **BBox:** [46, 527, 963, 580]
148
- **Target:** 搜索酒店
149
- **Annotated Image:** [qwen3_vl_8b_instruct_annotated.png](output/qwen3_vl_8b_instruct_annotated.png)
150
- **BBox:** [42, 526, 958, 578]
151
- **Target:** 搜索酒店
152
- **Annotated Image:** [qwen3_vl_plus_annotated.png](output/qwen3_vl_plus_annotated.png)
153
- **BBox:** [50, 1120, 1120, 1220]
154
- **Target:** 搜索酒店
155
- **Annotated Image:** [kimi_k2_5_annotated.png](output/kimi_k2_5_annotated.png)
156
- **Accuracy Score:** 100%
157
- **Status:** ✅ Success
158
- **Duration:** 8417ms
159
- **Response:**
160
- ```
161
- action: **CLICK**
162
- Parameters: {"target": "搜索酒店", "bbox_2d": [45, 526, 958, 579]}
163
- ```
164
-
165
- **Accuracy Score:** 0%
166
- **Status:** ✅ Success
167
- **Duration:** 10187ms
168
- **Response:**
169
- ```
170
- action: **CLICK**
171
- Parameters: {"target": "搜索酒店", "bbox_2d": [135, 2150, 1035, 2270]}
172
- ```
173
-
174
- **BBox:** [150, 2100, 1020, 2280]
175
- **Target:** 搜索酒店
176
- **Annotated Image:** [grok_4_1_fast_annotated.png](output/grok_4_1_fast_annotated.png)
177
- **Accuracy Score:** 100%
178
- **Status:** ✅ Success
179
- **Duration:** 10809ms
180
- **Response:**
181
- ```
182
- action: **CLICK**
183
- Parameters: {"target": "搜索酒店", "bbox_2d": [46, 527, 963, 580]}
184
- ```
185
-
186
- **BBox:** [44, 1323, 1126, 1471]
187
- **Target:** 搜索酒店
188
- **Annotated Image:** [gemini_3_flash_preview_annotated.png](output/gemini_3_flash_preview_annotated.png)
189
- **BBox:** [58, 528, 1112, 598]
190
- **Target:** 搜索酒店
191
- **Annotated Image:** [gemini_2_5_flash_annotated.png](output/gemini_2_5_flash_annotated.png)
192
- **BBox:** [89, 1408, 1080, 1563]
193
- **Target:** 搜索酒店
194
- **Annotated Image:** [gpt_5_2_annotated.png](output/gpt_5_2_annotated.png)
195
- **BBox:** [119, 921, 1051, 1096]
196
- **Target:** 搜索酒店
197
- **Annotated Image:** [gpt_5_1_annotated.png](output/gpt_5_1_annotated.png)
198
- **Accuracy Score:** 100%
199
- **Status:** ✅ Success
200
- **Duration:** 12649ms
201
- **Response:**
202
- ```
203
- action: **CLICK**
204
- Parameters: {"target": "搜索酒店", "bbox_2d": [42, 526, 958, 578]}
205
- ```
206
-
207
- **Accuracy Score:** 45%
208
- **Status:** ✅ Success
209
- **Duration:** 13021ms
210
- **Response:**
211
- ```
212
- action: **CLICK**
213
- Parameters: {"target": "搜索酒店", "bbox_2d": [50, 1120, 1120, 1220]}
214
- ```
215
-
216
- **Accuracy Score:** 0%
217
- **Status:** ✅ Success
218
- **Duration:** 16239ms
219
- **Response:**
220
- ```
221
- action: **CLICK**
222
- Parameters: {"target": "搜索酒店", "bbox_2d": [150, 2100, 1020, 2280]}
223
- ```
224
-
225
- **Accuracy Score:** 100%
226
- **Status:** ✅ Success
227
- **Duration:** 17583ms
228
- **Response:**
229
- ```
230
- action: **CLICK**
231
- Parameters: {"target": "搜索酒店", "bbox_2d": [58, 528, 1112, 598]}
232
- ```
233
-
234
- **Accuracy Score:** 100%
235
- **Status:** ✅ Success
236
- **Duration:** 17353ms
237
- **Response:**
238
- ```
239
- action: **CLICK**
240
- Parameters: {"target": "搜索酒店", "bbox_2d": [44, 1323, 1126, 1471]}
241
- ```
242
-
243
- **Accuracy Score:** 95%
244
- **Status:** ✅ Success
245
- **Duration:** 18461ms
246
- **Response:**
247
- ```
248
- action: **CLICK**
249
- Parameters: {"target": "搜索酒店", "bbox_2d": [89, 1408, 1080, 1563]}
250
- ```
251
-
252
- **Accuracy Score:** 45%
253
- **Status:** ✅ Success
254
- **Duration:** 18604ms
255
- **Response:**
256
- ```
257
- action: **CLICK**
258
- Parameters: {"target": "搜索酒店", "bbox_2d": [119, 921, 1051, 1096]}
259
- ```
260
-
261
- **BBox:** [38, 525, 1132, 579]
262
- **Target:** 搜索酒店
263
- **Annotated Image:** [doubao_seed_2_0_pro_260215_annotated.png](output/doubao_seed_2_0_pro_260215_annotated.png)
264
- **BBox:** [60, 1500, 1110, 1690]
265
- **Target:** 搜索酒店
266
- **Annotated Image:** [gpt_5_nano_annotated.png](output/gpt_5_nano_annotated.png)
267
- **BBox:** [48, 535, 1122, 638]
268
- **Target:** 搜索酒店
269
- **Annotated Image:** [gemini_2_5_pro_annotated.png](output/gemini_2_5_pro_annotated.png)
270
- **Accuracy Score:** 100%
271
- **Status:** ✅ Success
272
- **Duration:** 24796ms
273
- **Response:**
274
- ```
275
- action: **CLICK**
276
- Parameters: {"target": "搜索酒店", "bbox_2d": [38, 525, 1132, 579]}
277
- ```
278
-
279
- **Accuracy Score:** 20%
280
- **Status:** ✅ Success
281
- **Duration:** 25101ms
282
- **Response:**
283
- ```
284
- action: **CLICK**
285
- Parameters: {"target": "搜索酒店", "bbox_2d": [60, 1500, 1110, 1690]}
286
- ```
287
-
288
- **Accuracy Score:** 100%
289
- **Status:** ✅ Success
290
- **Duration:** 28762ms
291
- **Response:**
292
- ```
293
- action: **CLICK**
294
- Parameters: {"target": "搜索酒店", "bbox_2d": [48, 535, 1122, 638]}
295
- ```
296
-
297
- **BBox:** [112, 1412, 1058, 1565]
298
- **Target:** 搜索酒店
299
- **Annotated Image:** [gpt_5_2_pro_annotated.png](output/gpt_5_2_pro_annotated.png)
300
- **Accuracy Score:** 75%
301
- **Status:** ✅ Success
302
- **Duration:** 43517ms
303
- **Response:**
304
- ```
305
- action: **CLICK**
306
- Parameters: {"target": "搜索酒店", "bbox_2d": [112, 1412, 1058, 1565]}
307
- ```
308
-
309
- **BBox:** [42, 1328, 1128, 1468]
310
- **Target:** 搜索酒店
311
- **Annotated Image:** [gemini_3_pro_preview_annotated.png](output/gemini_3_pro_preview_annotated.png)
312
- **Accuracy Score:** 100%
313
- **Status:** ✅ Success
314
- **Duration:** 51574ms
315
- **Response:**
316
- ```
317
- action: **CLICK**
318
- Parameters: {"target": "搜索酒店", "bbox_2d": [42, 1328, 1128, 1468]}
319
- ```
320
-
321
- **BBox:** [33, 835, 693, 900]
322
- **Target:** 搜索酒店
323
- **Annotated Image:** [claude_sonnet_4_6_annotated.png](output/claude_sonnet_4_6_annotated.png)
324
- **Accuracy Score:** 0%
325
- **Status:** ✅ Success
326
- **Duration:** 68981ms
327
- **Response:**
328
- ```
329
- action: **CLICK**
330
- Parameters: {"target": "搜索酒店", "bbox_2d": [33, 835, 693, 900]}
331
- ```