appium-mcp 1.34.0 → 1.34.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/package.json +6 -2
- package/scripts/zip-assets.mjs +55 -0
- package/server.json +2 -2
- package/src/resources/submodules.zip +0 -0
- package/src/tests/README.md +0 -83
- package/src/tests/__mocks__/@appium/support.ts +0 -73
- package/src/tests/benchmark_model/TEST_REPORT.md +0 -331
- package/src/tests/benchmark_model/benchmark_model.ts +0 -798
- package/src/tests/benchmark_model/image.png +0 -0
- package/src/tests/benchmark_model/output/DeepSeek_V3_2_annotated.png +0 -0
- package/src/tests/benchmark_model/output/Qwen3_VL_235B_A22B_Instruct_annotated.png +0 -0
- package/src/tests/benchmark_model/output/claude_sonnet_4_6_annotated.png +0 -0
- package/src/tests/benchmark_model/output/doubao_seed_2_0_pro_260215_annotated.png +0 -0
- package/src/tests/benchmark_model/output/gemini_2_5_flash_annotated.png +0 -0
- package/src/tests/benchmark_model/output/gemini_2_5_pro_annotated.png +0 -0
- package/src/tests/benchmark_model/output/gemini_3_flash_preview_annotated.png +0 -0
- package/src/tests/benchmark_model/output/gemini_3_pro_preview_annotated.png +0 -0
- package/src/tests/benchmark_model/output/gpt_5_1_annotated.png +0 -0
- package/src/tests/benchmark_model/output/gpt_5_2_annotated.png +0 -0
- package/src/tests/benchmark_model/output/gpt_5_2_pro_annotated.png +0 -0
- package/src/tests/benchmark_model/output/gpt_5_nano_annotated.png +0 -0
- package/src/tests/benchmark_model/output/grok_4_1_fast_annotated.png +0 -0
- package/src/tests/benchmark_model/output/kimi_k2_5_annotated.png +0 -0
- package/src/tests/benchmark_model/output/qwen3_vl_8b_instruct_annotated.png +0 -0
- package/src/tests/benchmark_model/output/qwen3_vl_plus_annotated.png +0 -0
- package/src/tests/generate-all-locators.test.ts +0 -175
- package/src/tests/screenshot.test.ts +0 -332
- package/src/tests/session-store.test.ts +0 -498
- package/src/tests/test-setup-wda.ts +0 -240
- package/src/tests/tools/session/battery-info.test.ts +0 -102
- package/src/tests/tools/session/create-session.test.ts +0 -145
- package/src/tests/tools/session/file-transfer.test.ts +0 -158
- package/src/tests/vision-finder.test.ts +0 -728
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,15 @@
|
|
|
1
|
+
## [1.34.2](https://github.com/appium/appium-mcp/compare/v1.34.1...v1.34.2) (2026-03-23)
|
|
2
|
+
|
|
3
|
+
### Miscellaneous Chores
|
|
4
|
+
|
|
5
|
+
* add zip/unzip asserts to distribute them in small size ([#227](https://github.com/appium/appium-mcp/issues/227)) ([126f2dd](https://github.com/appium/appium-mcp/commit/126f2dd7ede8696853eb887d7310d6aa22ea8e68))
|
|
6
|
+
|
|
7
|
+
## [1.34.1](https://github.com/appium/appium-mcp/compare/v1.34.0...v1.34.1) (2026-03-23)
|
|
8
|
+
|
|
9
|
+
### Miscellaneous Chores
|
|
10
|
+
|
|
11
|
+
* exclude src/tests ([#226](https://github.com/appium/appium-mcp/issues/226)) ([0c2a72d](https://github.com/appium/appium-mcp/commit/0c2a72db5bde392891482b9c3849bf9b71b51a87))
|
|
12
|
+
|
|
1
13
|
## [1.34.0](https://github.com/appium/appium-mcp/compare/v1.33.0...v1.34.0) (2026-03-22)
|
|
2
14
|
|
|
3
15
|
### Features
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "appium-mcp",
|
|
3
3
|
"mcpName": "io.github.appium/appium-mcp",
|
|
4
|
-
"version": "1.34.
|
|
4
|
+
"version": "1.34.2",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"repository": {
|
|
7
7
|
"type": "git",
|
|
@@ -36,7 +36,10 @@
|
|
|
36
36
|
"index-docs": "node dist/scripts/simple-index-documentation.js",
|
|
37
37
|
"query-docs": "node dist/scripts/simple-query-documentation.js",
|
|
38
38
|
"sync-version": "node scripts/sync-version.mjs",
|
|
39
|
-
"version": "npm run sync-version"
|
|
39
|
+
"version": "npm run sync-version",
|
|
40
|
+
"zip-assets": "node scripts/zip-assets.mjs zip",
|
|
41
|
+
"unzip-assets": "node scripts/zip-assets.mjs unzip",
|
|
42
|
+
"postinstall": "node scripts/zip-assets.mjs unzip"
|
|
40
43
|
},
|
|
41
44
|
"author": "",
|
|
42
45
|
"license": "Apache-2.0",
|
|
@@ -85,6 +88,7 @@
|
|
|
85
88
|
"files": [
|
|
86
89
|
"scripts",
|
|
87
90
|
"src",
|
|
91
|
+
"!src/tests",
|
|
88
92
|
"dist",
|
|
89
93
|
"CHANGELOG.md",
|
|
90
94
|
"README.md",
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import {fileURLToPath} from 'node:url';
|
|
3
|
+
import fs from 'node:fs/promises';
|
|
4
|
+
|
|
5
|
+
import {zip as appiumZip} from '@appium/support';
|
|
6
|
+
|
|
7
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
8
|
+
const __dirname = path.dirname(__filename);
|
|
9
|
+
|
|
10
|
+
// Adjust these paths for your use case
|
|
11
|
+
const ZIP_SOURCE_DIR = path.join(__dirname, '..', 'src', 'resources', 'submodules');
|
|
12
|
+
const ZIP_OUTPUT_PATH = path.join(__dirname, '..', 'src', 'resources', 'submodules.zip');
|
|
13
|
+
const UNZIP_TARGET_DIR = path.join(__dirname, '..', 'src', 'resources', 'submodules');
|
|
14
|
+
|
|
15
|
+
export async function zipAssets() {
|
|
16
|
+
const zipBase64 = await appiumZip.toInMemoryZip(ZIP_SOURCE_DIR);
|
|
17
|
+
const zipBuffer = Buffer.from(zipBase64, 'base64');
|
|
18
|
+
await fs.writeFile(ZIP_OUTPUT_PATH, zipBuffer);
|
|
19
|
+
console.log(`Zipped ${ZIP_SOURCE_DIR} -> ${ZIP_OUTPUT_PATH}`);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const fileExists = async (filePath) => {
|
|
23
|
+
try {
|
|
24
|
+
await fs.access(filePath);
|
|
25
|
+
return true;
|
|
26
|
+
} catch {
|
|
27
|
+
return false;
|
|
28
|
+
}
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
export async function unzipAssets() {
|
|
32
|
+
if (!(await fileExists(ZIP_OUTPUT_PATH))) {
|
|
33
|
+
console.log(`Target directory ${ZIP_OUTPUT_PATH} does not exist. Skipping unzip.`);
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
await appiumZip.extractAllTo(ZIP_OUTPUT_PATH, UNZIP_TARGET_DIR);
|
|
37
|
+
console.log(`Unzipped ${ZIP_OUTPUT_PATH} -> ${UNZIP_TARGET_DIR}`);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
41
|
+
const cmd = process.argv[2];
|
|
42
|
+
if (cmd === 'zip') {
|
|
43
|
+
zipAssets().catch((e) => {
|
|
44
|
+
console.error(e);
|
|
45
|
+
process.exitCode = 1;
|
|
46
|
+
});
|
|
47
|
+
} else if (cmd === 'unzip') {
|
|
48
|
+
unzipAssets().catch((e) => {
|
|
49
|
+
console.error(e);
|
|
50
|
+
process.exitCode = 1;
|
|
51
|
+
});
|
|
52
|
+
} else {
|
|
53
|
+
console.log('Usage: node zip-assets.mjs [zip|unzip]');
|
|
54
|
+
}
|
|
55
|
+
}
|
package/server.json
CHANGED
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
"name": "io.github.appium/appium-mcp",
|
|
4
4
|
"title": "MCP Appium - Mobile Development and Automation Server",
|
|
5
5
|
"description": "MCP server for Appium mobile automation on iOS and Android devices with test creation tools.",
|
|
6
|
-
"version": "1.34.
|
|
6
|
+
"version": "1.34.2",
|
|
7
7
|
"packages": [
|
|
8
8
|
{
|
|
9
9
|
"registryType": "npm",
|
|
10
10
|
"identifier": "appium-mcp",
|
|
11
|
-
"version": "1.34.
|
|
11
|
+
"version": "1.34.2",
|
|
12
12
|
"transport": {
|
|
13
13
|
"type": "stdio"
|
|
14
14
|
}
|
|
Binary file
|
package/src/tests/README.md
DELETED
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
# Tests for Mobile Agent
|
|
2
|
-
|
|
3
|
-
This directory contains unit tests for the Mobile Agent project.
|
|
4
|
-
|
|
5
|
-
## Running Tests
|
|
6
|
-
|
|
7
|
-
To run all tests:
|
|
8
|
-
|
|
9
|
-
```bash
|
|
10
|
-
npm test
|
|
11
|
-
```
|
|
12
|
-
|
|
13
|
-
To run specific test files:
|
|
14
|
-
|
|
15
|
-
```bash
|
|
16
|
-
# Run tests for the locator generation functionality
|
|
17
|
-
npm run test:locators
|
|
18
|
-
```
|
|
19
|
-
|
|
20
|
-
## Test Files
|
|
21
|
-
|
|
22
|
-
### generate-all-locators.test.ts
|
|
23
|
-
|
|
24
|
-
This file contains tests for the `generateAllElementLocators` function, which is responsible for generating locators for all elements in a page source XML.
|
|
25
|
-
|
|
26
|
-
The tests cover:
|
|
27
|
-
|
|
28
|
-
1. Basic functionality with valid XML
|
|
29
|
-
2. Handling of invalid/empty XML
|
|
30
|
-
3. Various filtering options:
|
|
31
|
-
- `includeTagNames` - Include only specific element types
|
|
32
|
-
- `excludeTagNames` - Exclude specific element types
|
|
33
|
-
- `minAttributeCount` - Filter elements by minimum attribute count
|
|
34
|
-
- `fetchableOnly` - Include only interactable elements (platform-specific)
|
|
35
|
-
- `clickableOnly` - Include only clickable elements
|
|
36
|
-
|
|
37
|
-
## Adding New Tests
|
|
38
|
-
|
|
39
|
-
When adding new tests:
|
|
40
|
-
|
|
41
|
-
1. Create a new test file in the `src/tests` directory with the `.test.ts` extension
|
|
42
|
-
2. Import the necessary functions and types from the source files
|
|
43
|
-
3. Use Jest's `describe`, `test`, and `expect` functions to structure your tests
|
|
44
|
-
4. Add a new script to `package.json` for running your specific test file
|
|
45
|
-
|
|
46
|
-
## Test Structure
|
|
47
|
-
|
|
48
|
-
Tests should follow this general structure:
|
|
49
|
-
|
|
50
|
-
```typescript
|
|
51
|
-
import { describe, test, expect } from '@jest/globals';
|
|
52
|
-
import { functionToTest } from '../path/to/function.js';
|
|
53
|
-
|
|
54
|
-
describe('functionToTest', () => {
|
|
55
|
-
test('should do something specific', () => {
|
|
56
|
-
// Arrange - set up test data
|
|
57
|
-
const input = 'some input';
|
|
58
|
-
|
|
59
|
-
// Act - call the function
|
|
60
|
-
const result = functionToTest(input);
|
|
61
|
-
|
|
62
|
-
// Assert - verify the result
|
|
63
|
-
expect(result).toBe('expected output');
|
|
64
|
-
});
|
|
65
|
-
});
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
## Mocking
|
|
69
|
-
|
|
70
|
-
For tests that require mocking dependencies, use Jest's mocking capabilities:
|
|
71
|
-
|
|
72
|
-
```typescript
|
|
73
|
-
import { jest } from '@jest/globals';
|
|
74
|
-
|
|
75
|
-
// Mock a module
|
|
76
|
-
jest.mock('../path/to/module.js');
|
|
77
|
-
|
|
78
|
-
// Create a mock function
|
|
79
|
-
const mockFunction = jest.fn();
|
|
80
|
-
mockFunction.mockReturnValue('mocked value');
|
|
81
|
-
```
|
|
82
|
-
|
|
83
|
-
Note that when working with ESM modules, you may need to use different mocking approaches than with CommonJS modules.
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
// Mock @appium/support for Jest tests
|
|
2
|
-
// This avoids the ESM/CommonJS mismatch with uuid dependency
|
|
3
|
-
|
|
4
|
-
export const logger = {
|
|
5
|
-
getLogger: (_name: string) =>
|
|
6
|
-
// Simple logger implementation for tests
|
|
7
|
-
// No-op functions that match the logger interface
|
|
8
|
-
({
|
|
9
|
-
debug: (_message: string, ..._args: any[]) => {
|
|
10
|
-
// Silent in tests by default
|
|
11
|
-
},
|
|
12
|
-
info: (_message: string, ..._args: any[]) => {
|
|
13
|
-
// Silent in tests by default
|
|
14
|
-
},
|
|
15
|
-
warn: (_message: string, ..._args: any[]) => {
|
|
16
|
-
// Silent in tests by default
|
|
17
|
-
},
|
|
18
|
-
error: (_message: string, ..._args: any[]) => {
|
|
19
|
-
// Silent in tests by default
|
|
20
|
-
},
|
|
21
|
-
trace: (_message: string, ..._args: any[]) => {
|
|
22
|
-
// Silent in tests by default
|
|
23
|
-
},
|
|
24
|
-
}),
|
|
25
|
-
};
|
|
26
|
-
|
|
27
|
-
/**
|
|
28
|
-
* Mock imageUtil for Jest tests.
|
|
29
|
-
*
|
|
30
|
-
* A single shared sharpInstance is used across all calls so tests can
|
|
31
|
-
* inspect and override its methods (resize / jpeg / toBuffer) via
|
|
32
|
-
* mockSharpInstance exported below.
|
|
33
|
-
*/
|
|
34
|
-
|
|
35
|
-
export type MockSharpInstance = {
|
|
36
|
-
resizeCalls: Array<[number, number]>;
|
|
37
|
-
toBufferImpl: () => Promise<Buffer>;
|
|
38
|
-
resize: (w: number, h: number) => MockSharpInstance;
|
|
39
|
-
jpeg: (_opts?: unknown) => MockSharpInstance;
|
|
40
|
-
toBuffer: () => Promise<Buffer>;
|
|
41
|
-
reset: () => void;
|
|
42
|
-
};
|
|
43
|
-
|
|
44
|
-
/** Shared instance – tests can mutate toBufferImpl or inspect resizeCalls */
|
|
45
|
-
export const mockSharpInstance: MockSharpInstance = {
|
|
46
|
-
resizeCalls: [],
|
|
47
|
-
toBufferImpl: () => Promise.resolve(Buffer.from('mock-compressed-image')),
|
|
48
|
-
resize(w: number, h: number) {
|
|
49
|
-
this.resizeCalls.push([w, h]);
|
|
50
|
-
return this;
|
|
51
|
-
},
|
|
52
|
-
jpeg(_opts?: unknown) {
|
|
53
|
-
return this;
|
|
54
|
-
},
|
|
55
|
-
toBuffer() {
|
|
56
|
-
return this.toBufferImpl();
|
|
57
|
-
},
|
|
58
|
-
reset() {
|
|
59
|
-
this.resizeCalls = [];
|
|
60
|
-
this.toBufferImpl = () =>
|
|
61
|
-
Promise.resolve(Buffer.from('mock-compressed-image'));
|
|
62
|
-
},
|
|
63
|
-
};
|
|
64
|
-
|
|
65
|
-
export const imageUtil = {
|
|
66
|
-
requireSharp: () => (_input: Buffer) => mockSharpInstance,
|
|
67
|
-
};
|
|
68
|
-
|
|
69
|
-
// Export other commonly used utilities from @appium/support if needed
|
|
70
|
-
export default {
|
|
71
|
-
logger,
|
|
72
|
-
imageUtil,
|
|
73
|
-
};
|
|
@@ -1,331 +0,0 @@
|
|
|
1
|
-
# Model Benchmark Test Report
|
|
2
|
-
|
|
3
|
-
**Test Date:** 2026/2/26 17:38:15
|
|
4
|
-
**Test Type:** Automation Testing - Click Action Recognition
|
|
5
|
-
|
|
6
|
-
---
|
|
7
|
-
|
|
8
|
-
## Summary
|
|
9
|
-
|
|
10
|
-
| Model Name | Duration(ms) | Status | Accuracy Score | Annotated Image |
|
|
11
|
-
|------------|--------------|--------|----------------|-----------------|
|
|
12
|
-
| qwen3-vl-plus | 12649 | ✅ Success | 100% | [View](output/qwen3_vl_plus_annotated.png) |
|
|
13
|
-
| qwen3-vl-8b-instruct | 10809 | ✅ Success | 100% | [View](output/qwen3_vl_8b_instruct_annotated.png) |
|
|
14
|
-
| Qwen3-VL-235B-A22B-Instruct | 8417 | ✅ Success | 100% | [View](output/Qwen3_VL_235B_A22B_Instruct_annotated.png) |
|
|
15
|
-
| doubao-seed-2-0-pro-260215 | 24796 | ✅ Success | 100% | [View](output/doubao_seed_2_0_pro_260215_annotated.png) |
|
|
16
|
-
| gemini-3-flash-preview | 17353 | ✅ Success | 100% | [View](output/gemini_3_flash_preview_annotated.png) |
|
|
17
|
-
| gemini-3-pro-preview | 51574 | ✅ Success | 100% | [View](output/gemini_3_pro_preview_annotated.png) |
|
|
18
|
-
| gemini-2.5-pro | 28762 | ✅ Success | 100% | [View](output/gemini_2_5_pro_annotated.png) |
|
|
19
|
-
| gemini-2.5-flash | 17583 | ✅ Success | 100% | [View](output/gemini_2_5_flash_annotated.png) |
|
|
20
|
-
| gpt-5.2 | 18461 | ✅ Success | 95% | [View](output/gpt_5_2_annotated.png) |
|
|
21
|
-
| gpt-5.2-pro | 43517 | ✅ Success | 75% | [View](output/gpt_5_2_pro_annotated.png) |
|
|
22
|
-
| kimi-k2.5 | 13021 | ✅ Success | 45% | [View](output/kimi_k2_5_annotated.png) |
|
|
23
|
-
| gpt-5.1 | 18604 | ✅ Success | 45% | [View](output/gpt_5_1_annotated.png) |
|
|
24
|
-
| gpt-5-nano | 25101 | ✅ Success | 20% | [View](output/gpt_5_nano_annotated.png) |
|
|
25
|
-
| DeepSeek-V3.2 | 10187 | ✅ Success | 0% | [View](output/DeepSeek_V3_2_annotated.png) |
|
|
26
|
-
| claude-sonnet-4-6 | 68981 | ✅ Success | 0% | [View](output/claude_sonnet_4_6_annotated.png) |
|
|
27
|
-
| grok-4.1-fast | 16239 | ✅ Success | 0% | [View](output/grok_4_1_fast_annotated.png) |
|
|
28
|
-
|
|
29
|
-
### Statistics
|
|
30
|
-
|
|
31
|
-
- **Total**: 18 models
|
|
32
|
-
- **Success**: 16 (88.9%)
|
|
33
|
-
- **Failed**: 2
|
|
34
|
-
- **High Accuracy (≥70%)**: 10 (55.6%)
|
|
35
|
-
- **Avg Accuracy Score**: 60.0%
|
|
36
|
-
- **Average Duration**: 31332.22ms
|
|
37
|
-
- **Min Duration**: 8417ms
|
|
38
|
-
- **Max Duration**: 120021ms
|
|
39
|
-
|
|
40
|
-
---
|
|
41
|
-
|
|
42
|
-
## Detailed Results
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
============================================================
|
|
46
|
-
## DeepSeek-V3.2
|
|
47
|
-
|
|
48
|
-
**Started at:** 2026/2/26 17:38:15
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
============================================================
|
|
52
|
-
## qwen3-vl-plus
|
|
53
|
-
|
|
54
|
-
**Started at:** 2026/2/26 17:38:15
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
============================================================
|
|
58
|
-
## qwen3-vl-8b-instruct
|
|
59
|
-
|
|
60
|
-
**Started at:** 2026/2/26 17:38:15
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
============================================================
|
|
64
|
-
## Qwen3-VL-235B-A22B-Instruct
|
|
65
|
-
|
|
66
|
-
**Started at:** 2026/2/26 17:38:15
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
============================================================
|
|
70
|
-
## doubao-seed-2-0-pro-260215
|
|
71
|
-
|
|
72
|
-
**Started at:** 2026/2/26 17:38:15
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
============================================================
|
|
76
|
-
## kimi-k2.5
|
|
77
|
-
|
|
78
|
-
**Started at:** 2026/2/26 17:38:15
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
============================================================
|
|
82
|
-
## gpt-5.2-pro
|
|
83
|
-
|
|
84
|
-
**Started at:** 2026/2/26 17:38:15
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
============================================================
|
|
88
|
-
## gpt-5.2
|
|
89
|
-
|
|
90
|
-
**Started at:** 2026/2/26 17:38:15
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
============================================================
|
|
94
|
-
## gpt-5.1
|
|
95
|
-
|
|
96
|
-
**Started at:** 2026/2/26 17:38:15
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
============================================================
|
|
100
|
-
## gpt-5-nano
|
|
101
|
-
|
|
102
|
-
**Started at:** 2026/2/26 17:38:15
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
============================================================
|
|
106
|
-
## claude-sonnet-4-6
|
|
107
|
-
|
|
108
|
-
**Started at:** 2026/2/26 17:38:15
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
============================================================
|
|
112
|
-
## gemini-3-flash-preview
|
|
113
|
-
|
|
114
|
-
**Started at:** 2026/2/26 17:38:15
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
============================================================
|
|
118
|
-
## gemini-3-pro-preview
|
|
119
|
-
|
|
120
|
-
**Started at:** 2026/2/26 17:38:15
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
============================================================
|
|
124
|
-
## gemini-2.5-pro
|
|
125
|
-
|
|
126
|
-
**Started at:** 2026/2/26 17:38:15
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
============================================================
|
|
130
|
-
## gemini-2.5-flash
|
|
131
|
-
|
|
132
|
-
**Started at:** 2026/2/26 17:38:15
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
============================================================
|
|
136
|
-
## grok-4.1-fast
|
|
137
|
-
|
|
138
|
-
**Started at:** 2026/2/26 17:38:15
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
**BBox:** [45, 526, 958, 579]
|
|
142
|
-
**Target:** 搜索酒店
|
|
143
|
-
**Annotated Image:** [Qwen3_VL_235B_A22B_Instruct_annotated.png](output/Qwen3_VL_235B_A22B_Instruct_annotated.png)
|
|
144
|
-
**BBox:** [135, 2150, 1035, 2270]
|
|
145
|
-
**Target:** 搜索酒店
|
|
146
|
-
**Annotated Image:** [DeepSeek_V3_2_annotated.png](output/DeepSeek_V3_2_annotated.png)
|
|
147
|
-
**BBox:** [46, 527, 963, 580]
|
|
148
|
-
**Target:** 搜索酒店
|
|
149
|
-
**Annotated Image:** [qwen3_vl_8b_instruct_annotated.png](output/qwen3_vl_8b_instruct_annotated.png)
|
|
150
|
-
**BBox:** [42, 526, 958, 578]
|
|
151
|
-
**Target:** 搜索酒店
|
|
152
|
-
**Annotated Image:** [qwen3_vl_plus_annotated.png](output/qwen3_vl_plus_annotated.png)
|
|
153
|
-
**BBox:** [50, 1120, 1120, 1220]
|
|
154
|
-
**Target:** 搜索酒店
|
|
155
|
-
**Annotated Image:** [kimi_k2_5_annotated.png](output/kimi_k2_5_annotated.png)
|
|
156
|
-
**Accuracy Score:** 100%
|
|
157
|
-
**Status:** ✅ Success
|
|
158
|
-
**Duration:** 8417ms
|
|
159
|
-
**Response:**
|
|
160
|
-
```
|
|
161
|
-
action: **CLICK**
|
|
162
|
-
Parameters: {"target": "搜索酒店", "bbox_2d": [45, 526, 958, 579]}
|
|
163
|
-
```
|
|
164
|
-
|
|
165
|
-
**Accuracy Score:** 0%
|
|
166
|
-
**Status:** ✅ Success
|
|
167
|
-
**Duration:** 10187ms
|
|
168
|
-
**Response:**
|
|
169
|
-
```
|
|
170
|
-
action: **CLICK**
|
|
171
|
-
Parameters: {"target": "搜索酒店", "bbox_2d": [135, 2150, 1035, 2270]}
|
|
172
|
-
```
|
|
173
|
-
|
|
174
|
-
**BBox:** [150, 2100, 1020, 2280]
|
|
175
|
-
**Target:** 搜索酒店
|
|
176
|
-
**Annotated Image:** [grok_4_1_fast_annotated.png](output/grok_4_1_fast_annotated.png)
|
|
177
|
-
**Accuracy Score:** 100%
|
|
178
|
-
**Status:** ✅ Success
|
|
179
|
-
**Duration:** 10809ms
|
|
180
|
-
**Response:**
|
|
181
|
-
```
|
|
182
|
-
action: **CLICK**
|
|
183
|
-
Parameters: {"target": "搜索酒店", "bbox_2d": [46, 527, 963, 580]}
|
|
184
|
-
```
|
|
185
|
-
|
|
186
|
-
**BBox:** [44, 1323, 1126, 1471]
|
|
187
|
-
**Target:** 搜索酒店
|
|
188
|
-
**Annotated Image:** [gemini_3_flash_preview_annotated.png](output/gemini_3_flash_preview_annotated.png)
|
|
189
|
-
**BBox:** [58, 528, 1112, 598]
|
|
190
|
-
**Target:** 搜索酒店
|
|
191
|
-
**Annotated Image:** [gemini_2_5_flash_annotated.png](output/gemini_2_5_flash_annotated.png)
|
|
192
|
-
**BBox:** [89, 1408, 1080, 1563]
|
|
193
|
-
**Target:** 搜索酒店
|
|
194
|
-
**Annotated Image:** [gpt_5_2_annotated.png](output/gpt_5_2_annotated.png)
|
|
195
|
-
**BBox:** [119, 921, 1051, 1096]
|
|
196
|
-
**Target:** 搜索酒店
|
|
197
|
-
**Annotated Image:** [gpt_5_1_annotated.png](output/gpt_5_1_annotated.png)
|
|
198
|
-
**Accuracy Score:** 100%
|
|
199
|
-
**Status:** ✅ Success
|
|
200
|
-
**Duration:** 12649ms
|
|
201
|
-
**Response:**
|
|
202
|
-
```
|
|
203
|
-
action: **CLICK**
|
|
204
|
-
Parameters: {"target": "搜索酒店", "bbox_2d": [42, 526, 958, 578]}
|
|
205
|
-
```
|
|
206
|
-
|
|
207
|
-
**Accuracy Score:** 45%
|
|
208
|
-
**Status:** ✅ Success
|
|
209
|
-
**Duration:** 13021ms
|
|
210
|
-
**Response:**
|
|
211
|
-
```
|
|
212
|
-
action: **CLICK**
|
|
213
|
-
Parameters: {"target": "搜索酒店", "bbox_2d": [50, 1120, 1120, 1220]}
|
|
214
|
-
```
|
|
215
|
-
|
|
216
|
-
**Accuracy Score:** 0%
|
|
217
|
-
**Status:** ✅ Success
|
|
218
|
-
**Duration:** 16239ms
|
|
219
|
-
**Response:**
|
|
220
|
-
```
|
|
221
|
-
action: **CLICK**
|
|
222
|
-
Parameters: {"target": "搜索酒店", "bbox_2d": [150, 2100, 1020, 2280]}
|
|
223
|
-
```
|
|
224
|
-
|
|
225
|
-
**Accuracy Score:** 100%
|
|
226
|
-
**Status:** ✅ Success
|
|
227
|
-
**Duration:** 17583ms
|
|
228
|
-
**Response:**
|
|
229
|
-
```
|
|
230
|
-
action: **CLICK**
|
|
231
|
-
Parameters: {"target": "搜索酒店", "bbox_2d": [58, 528, 1112, 598]}
|
|
232
|
-
```
|
|
233
|
-
|
|
234
|
-
**Accuracy Score:** 100%
|
|
235
|
-
**Status:** ✅ Success
|
|
236
|
-
**Duration:** 17353ms
|
|
237
|
-
**Response:**
|
|
238
|
-
```
|
|
239
|
-
action: **CLICK**
|
|
240
|
-
Parameters: {"target": "搜索酒店", "bbox_2d": [44, 1323, 1126, 1471]}
|
|
241
|
-
```
|
|
242
|
-
|
|
243
|
-
**Accuracy Score:** 95%
|
|
244
|
-
**Status:** ✅ Success
|
|
245
|
-
**Duration:** 18461ms
|
|
246
|
-
**Response:**
|
|
247
|
-
```
|
|
248
|
-
action: **CLICK**
|
|
249
|
-
Parameters: {"target": "搜索酒店", "bbox_2d": [89, 1408, 1080, 1563]}
|
|
250
|
-
```
|
|
251
|
-
|
|
252
|
-
**Accuracy Score:** 45%
|
|
253
|
-
**Status:** ✅ Success
|
|
254
|
-
**Duration:** 18604ms
|
|
255
|
-
**Response:**
|
|
256
|
-
```
|
|
257
|
-
action: **CLICK**
|
|
258
|
-
Parameters: {"target": "搜索酒店", "bbox_2d": [119, 921, 1051, 1096]}
|
|
259
|
-
```
|
|
260
|
-
|
|
261
|
-
**BBox:** [38, 525, 1132, 579]
|
|
262
|
-
**Target:** 搜索酒店
|
|
263
|
-
**Annotated Image:** [doubao_seed_2_0_pro_260215_annotated.png](output/doubao_seed_2_0_pro_260215_annotated.png)
|
|
264
|
-
**BBox:** [60, 1500, 1110, 1690]
|
|
265
|
-
**Target:** 搜索酒店
|
|
266
|
-
**Annotated Image:** [gpt_5_nano_annotated.png](output/gpt_5_nano_annotated.png)
|
|
267
|
-
**BBox:** [48, 535, 1122, 638]
|
|
268
|
-
**Target:** 搜索酒店
|
|
269
|
-
**Annotated Image:** [gemini_2_5_pro_annotated.png](output/gemini_2_5_pro_annotated.png)
|
|
270
|
-
**Accuracy Score:** 100%
|
|
271
|
-
**Status:** ✅ Success
|
|
272
|
-
**Duration:** 24796ms
|
|
273
|
-
**Response:**
|
|
274
|
-
```
|
|
275
|
-
action: **CLICK**
|
|
276
|
-
Parameters: {"target": "搜索酒店", "bbox_2d": [38, 525, 1132, 579]}
|
|
277
|
-
```
|
|
278
|
-
|
|
279
|
-
**Accuracy Score:** 20%
|
|
280
|
-
**Status:** ✅ Success
|
|
281
|
-
**Duration:** 25101ms
|
|
282
|
-
**Response:**
|
|
283
|
-
```
|
|
284
|
-
action: **CLICK**
|
|
285
|
-
Parameters: {"target": "搜索酒店", "bbox_2d": [60, 1500, 1110, 1690]}
|
|
286
|
-
```
|
|
287
|
-
|
|
288
|
-
**Accuracy Score:** 100%
|
|
289
|
-
**Status:** ✅ Success
|
|
290
|
-
**Duration:** 28762ms
|
|
291
|
-
**Response:**
|
|
292
|
-
```
|
|
293
|
-
action: **CLICK**
|
|
294
|
-
Parameters: {"target": "搜索酒店", "bbox_2d": [48, 535, 1122, 638]}
|
|
295
|
-
```
|
|
296
|
-
|
|
297
|
-
**BBox:** [112, 1412, 1058, 1565]
|
|
298
|
-
**Target:** 搜索酒店
|
|
299
|
-
**Annotated Image:** [gpt_5_2_pro_annotated.png](output/gpt_5_2_pro_annotated.png)
|
|
300
|
-
**Accuracy Score:** 75%
|
|
301
|
-
**Status:** ✅ Success
|
|
302
|
-
**Duration:** 43517ms
|
|
303
|
-
**Response:**
|
|
304
|
-
```
|
|
305
|
-
action: **CLICK**
|
|
306
|
-
Parameters: {"target": "搜索酒店", "bbox_2d": [112, 1412, 1058, 1565]}
|
|
307
|
-
```
|
|
308
|
-
|
|
309
|
-
**BBox:** [42, 1328, 1128, 1468]
|
|
310
|
-
**Target:** 搜索酒店
|
|
311
|
-
**Annotated Image:** [gemini_3_pro_preview_annotated.png](output/gemini_3_pro_preview_annotated.png)
|
|
312
|
-
**Accuracy Score:** 100%
|
|
313
|
-
**Status:** ✅ Success
|
|
314
|
-
**Duration:** 51574ms
|
|
315
|
-
**Response:**
|
|
316
|
-
```
|
|
317
|
-
action: **CLICK**
|
|
318
|
-
Parameters: {"target": "搜索酒店", "bbox_2d": [42, 1328, 1128, 1468]}
|
|
319
|
-
```
|
|
320
|
-
|
|
321
|
-
**BBox:** [33, 835, 693, 900]
|
|
322
|
-
**Target:** 搜索酒店
|
|
323
|
-
**Annotated Image:** [claude_sonnet_4_6_annotated.png](output/claude_sonnet_4_6_annotated.png)
|
|
324
|
-
**Accuracy Score:** 0%
|
|
325
|
-
**Status:** ✅ Success
|
|
326
|
-
**Duration:** 68981ms
|
|
327
|
-
**Response:**
|
|
328
|
-
```
|
|
329
|
-
action: **CLICK**
|
|
330
|
-
Parameters: {"target": "搜索酒店", "bbox_2d": [33, 835, 693, 900]}
|
|
331
|
-
```
|