preflite 1.1.0 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +113 -62
- package/dist/infrastructure/midscene/MidsceneRuntimeReal.js +41 -3
- package/dist/mcp/agentHttpClient.js +19 -6
- package/dist/mcp/cli.js +27 -1
- package/dist/mcp/exploration/tools-intelligent.js +50 -7
- package/dist/mcp/exploration/tools-session.js +1 -0
- package/dist/mcp/network-mocks/NetworkMockServer.js +579 -0
- package/dist/mcp/network-mocks/NetworkMockService.js +156 -0
- package/dist/mcp/network-mocks/device-proxy.js +31 -0
- package/dist/mcp/network-mocks/index.js +3 -0
- package/dist/mcp/network-mocks/types.js +1 -0
- package/dist/mcp/runManager.js +4 -1
- package/dist/mcp/runtimeInstall.js +44 -10
- package/dist/mcp/server.js +229 -23
- package/dist/mcp/setup.js +9 -5
- package/dist/mcp/visual-flow/types.js +1 -1
- package/dist/mcp/visual-flow/validate.js +171 -0
- package/docs/visual-flow-ir-llm.md +221 -0
- package/package.json +3 -1
- package/scripts/hdc-bridge.sh +4 -0
- package/scripts/nvm-use-repo.sh +31 -0
- package/scripts/run-midscene-task.sh +43 -0
- package/scripts/start-ios-wda.sh +328 -0
- package/scripts/stop-ios-wda.sh +44 -0
package/README.md
CHANGED
|
@@ -1,39 +1,36 @@
|
|
|
1
1
|
# Preflight
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Local MCP server for testing mobile apps on real Android, iOS, and HarmonyOS devices with AI coding assistants.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
[](https://www.npmjs.com/package/preflite)
|
|
6
|
+
[](./LICENSE)
|
|
7
|
+
[](https://github.com/zifengjiang/Preflight/actions/workflows/release.yml)
|
|
8
|
+
|
|
9
|
+
Preflight gives Claude Code, Cursor, Codex, and other MCP clients a real mobile device to inspect, operate, and test. It turns natural-language testing requests into structured visual-flow runs with live viewing, screenshots, and saved reports.
|
|
6
10
|
|
|
7
11
|
<p align="center">
|
|
8
|
-
<img src="docs/demo.gif" alt="Preflight
|
|
12
|
+
<img src="docs/demo.gif" alt="Preflight running a mobile test through an AI coding assistant" width="720">
|
|
9
13
|
</p>
|
|
10
14
|
|
|
11
|
-
##
|
|
15
|
+
## Why Preflight?
|
|
12
16
|
|
|
13
|
-
|
|
17
|
+
- Test on real devices instead of mocked browser views or screenshots.
|
|
18
|
+
- Let coding agents inspect screens, tap, type, swipe, wait, and assert UI state.
|
|
19
|
+
- Generate visual-flow tests without fragile XPath selectors, accessibility-id assumptions, or fixed coordinates.
|
|
20
|
+
- Keep the runtime local: devices, reports, configuration, and the MCP server stay on your machine.
|
|
21
|
+
- Use the same workflow across Android, iOS, and HarmonyOS.
|
|
14
22
|
|
|
15
|
-
|
|
16
|
-
|-----------|-------------|-------|
|
|
17
|
-
| **Node.js** ≥ 20.11 | All platforms | Install via [nvm](https://github.com/nvm-sh/nvm) or [nodejs.org](https://nodejs.org/) |
|
|
18
|
-
| **adb** | Android | Ship with Android SDK / [platform-tools](https://developer.android.com/studio/releases/platform-tools). Ensure `adb` is on your `PATH`, or set `ADB_BINARY_PATH` in config. |
|
|
19
|
-
| **Xcode** + **iproxy** | iOS | Xcode from [Mac App Store](https://apps.apple.com/app/xcode/id497799835). `iproxy` ships with `brew install libimobiledevice`. |
|
|
20
|
-
| **WebDriverAgent** | iOS | [Facebook WebDriverAgent](https://github.com/facebook/WebDriverAgent) — build and deploy to your device from Xcode. Set `WDA_PROJECT_ROOT` in config to point at your local copy. |
|
|
21
|
-
| **hdc** | HarmonyOS | Ships with [DevEco Studio](https://developer.huawei.com/consumer/cn/deveco-studio/). Ensure `hdc` is on your `PATH`. |
|
|
22
|
-
| **AI model API key** | All platforms | A [Midscene](https://midscenejs.com/)-compatible vision model. Supports OpenAI, Anthropic, Doubao, and more. |
|
|
23
|
+
## Quick Start
|
|
23
24
|
|
|
24
|
-
|
|
25
|
+
Install the local runtime and register the MCP server:
|
|
25
26
|
|
|
26
27
|
```bash
|
|
27
|
-
npx preflite setup
|
|
28
|
+
npx -y preflite@latest setup
|
|
28
29
|
```
|
|
29
30
|
|
|
30
|
-
This
|
|
31
|
+
This installs the packaged runtime under `~/.preflight/runtime`, writes a Preflight MCP entry for Codex (`~/.codex/config.toml`), and writes a Cursor project entry when you run it inside a repository (`.cursor/mcp.json`).
|
|
31
32
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
### 2. Configure your AI model
|
|
35
|
-
|
|
36
|
-
Create `~/.preflight/config.json`:
|
|
33
|
+
Create `~/.preflight/config.json` with a Midscene-compatible vision model:
|
|
37
34
|
|
|
38
35
|
```json
|
|
39
36
|
{
|
|
@@ -41,24 +38,50 @@ Create `~/.preflight/config.json`:
|
|
|
41
38
|
"MIDSCENE_MODEL_BASE_URL": "https://ark.cn-beijing.volces.com/api/v3",
|
|
42
39
|
"MIDSCENE_MODEL_API_KEY": "sk-xxxxxxxxxxxxxxxx",
|
|
43
40
|
"MIDSCENE_MODEL_NAME": "doubao-seed-2-0-lite-260215",
|
|
44
|
-
"MIDSCENE_MODEL_FAMILY": "doubao-seed"
|
|
41
|
+
"MIDSCENE_MODEL_FAMILY": "doubao-seed",
|
|
42
|
+
"MIDSCENE_MODEL_REASONING_ENABLED": "false"
|
|
45
43
|
}
|
|
46
44
|
}
|
|
47
45
|
```
|
|
48
46
|
|
|
49
|
-
|
|
47
|
+
Restart your AI coding assistant, then ask it to use Preflight:
|
|
48
|
+
|
|
49
|
+
> Check my connected devices and run a smoke test on the app.
|
|
50
50
|
|
|
51
|
-
|
|
52
|
-
|
|
51
|
+
## What You Can Ask
|
|
52
|
+
|
|
53
|
+
Once the MCP server is available, your coding agent can work with real devices through requests like:
|
|
54
|
+
|
|
55
|
+
- "List my connected Android and iOS devices."
|
|
56
|
+
- "Open the app and verify the login flow."
|
|
57
|
+
- "Explore the settings screen and turn it into a visual-flow test."
|
|
58
|
+
- "Install this APK and run a smoke test."
|
|
59
|
+
- "Run the flow, watch it live, and save the report."
|
|
60
|
+
|
|
61
|
+
## Requirements
|
|
62
|
+
|
|
63
|
+
| Dependency | Required for | Notes |
|
|
64
|
+
|-----------|-------------|-------|
|
|
65
|
+
| Node.js >= 20.11 | All platforms | Install via [nvm](https://github.com/nvm-sh/nvm) or [nodejs.org](https://nodejs.org/). |
|
|
66
|
+
| AI model API key | All platforms | Any [Midscene](https://midscenejs.com/)-compatible vision model. |
|
|
67
|
+
| adb | Android | Ships with Android SDK [platform-tools](https://developer.android.com/studio/releases/platform-tools). Ensure `adb` is on your `PATH`, or set `ADB_BINARY_PATH`. |
|
|
68
|
+
| Xcode + iproxy | iOS | Xcode from the [Mac App Store](https://apps.apple.com/app/xcode/id497799835). `iproxy` ships with `brew install libimobiledevice`. |
|
|
69
|
+
| WebDriverAgent | iOS | Build and deploy [WebDriverAgent](https://github.com/facebook/WebDriverAgent) to your device, then set `WDA_PROJECT_ROOT`. |
|
|
70
|
+
| hdc | HarmonyOS | Ships with [DevEco Studio](https://developer.huawei.com/consumer/cn/deveco-studio/). Ensure `hdc` is on your `PATH`. |
|
|
71
|
+
|
|
72
|
+
## Model Configuration
|
|
73
|
+
|
|
74
|
+
Preflight loads model configuration from `~/.preflight/config.json`, `~/.preflight/config.yaml`, or `~/.preflight/config.yml`. You can also rely on standard provider environment variables such as `OPENAI_API_KEY` or `ANTHROPIC_API_KEY`.
|
|
75
|
+
|
|
76
|
+
| Provider | `MIDSCENE_MODEL_BASE_URL` | Example `MIDSCENE_MODEL_NAME` |
|
|
77
|
+
|----------|--------------------------|-------------------------------|
|
|
53
78
|
| OpenAI | `https://api.openai.com/v1` | `gpt-4o` |
|
|
54
79
|
| Anthropic | `https://api.anthropic.com/v1` | `claude-sonnet-4-20250514` |
|
|
55
|
-
| Doubao
|
|
56
|
-
|
|
57
|
-
You can also set standard env vars (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.) — Preflight picks them up automatically.
|
|
80
|
+
| Doubao / Volcengine | `https://ark.cn-beijing.volces.com/api/v3` | `doubao-seed-2-0-lite-260215` |
|
|
58
81
|
|
|
59
|
-
|
|
82
|
+
## iOS Setup
|
|
60
83
|
|
|
61
|
-
iOS testing requires
|
|
84
|
+
iOS testing requires WebDriverAgent running on your device:
|
|
62
85
|
|
|
63
86
|
```bash
|
|
64
87
|
git clone https://github.com/facebook/WebDriverAgent.git
|
|
@@ -67,7 +90,7 @@ cd WebDriverAgent
|
|
|
67
90
|
open WebDriverAgent.xcodeproj
|
|
68
91
|
```
|
|
69
92
|
|
|
70
|
-
Build the `WebDriverAgentRunner` scheme targeting your device. Then add to `~/.preflight/config.json`:
|
|
93
|
+
Build the `WebDriverAgentRunner` scheme targeting your device. Then add the project path to `~/.preflight/config.json`:
|
|
71
94
|
|
|
72
95
|
```json
|
|
73
96
|
{
|
|
@@ -78,38 +101,28 @@ Build the `WebDriverAgentRunner` scheme targeting your device. Then add to `~/.p
|
|
|
78
101
|
}
|
|
79
102
|
```
|
|
80
103
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
Restart your AI coding assistant. Now you can say things like:
|
|
84
|
-
|
|
85
|
-
> *"Check my devices, explore the Notes app, and write a test for creating a new note."*
|
|
86
|
-
|
|
87
|
-
The AI calls `list_devices` → `exploration_start` → explores your app naturally → generates and runs a visual-flow test on the real device.
|
|
88
|
-
|
|
89
|
-
## How It Uses Midscene
|
|
90
|
-
|
|
91
|
-
[**Midscene**](https://midscenejs.com/) is the visual AI engine that powers Preflight's ability to see and act on mobile screens. It works by feeding screenshots and structured instructions to a multimodal LLM, then parsing the model's response into concrete UI actions.
|
|
104
|
+
## How It Works
|
|
92
105
|
|
|
93
|
-
Preflight
|
|
106
|
+
Preflight connects your AI assistant to local device automation through MCP. The assistant does not need to write platform-specific automation scripts; it asks Preflight for device state, explores screens, validates visual-flow JSON, and runs the flow through the local runtime.
|
|
94
107
|
|
|
95
108
|
```mermaid
|
|
96
109
|
flowchart LR
|
|
97
|
-
subgraph AI["
|
|
98
|
-
MCP["MCP
|
|
110
|
+
subgraph AI["AI Coding Assistant"]
|
|
111
|
+
MCP["MCP Client"]
|
|
99
112
|
end
|
|
100
113
|
|
|
101
114
|
subgraph Preflight["Preflight MCP Server"]
|
|
102
|
-
EXP["Exploration
|
|
103
|
-
IR["Visual Flow IR
|
|
104
|
-
AGENT["
|
|
115
|
+
EXP["Exploration Tools"]
|
|
116
|
+
IR["Visual Flow IR"]
|
|
117
|
+
AGENT["Local Runtime"]
|
|
105
118
|
end
|
|
106
119
|
|
|
107
120
|
subgraph Midscene["Midscene SDK"]
|
|
108
|
-
MS["
|
|
121
|
+
MS["Vision Agent"]
|
|
109
122
|
end
|
|
110
123
|
|
|
111
|
-
subgraph Device["
|
|
112
|
-
SCREEN["Screen
|
|
124
|
+
subgraph Device["Real Device"]
|
|
125
|
+
SCREEN["Screen and UI Tree"]
|
|
113
126
|
end
|
|
114
127
|
|
|
115
128
|
AI <-->|stdio| MCP
|
|
@@ -122,22 +135,60 @@ flowchart LR
|
|
|
122
135
|
SCREEN -.->|screenshots| MS
|
|
123
136
|
```
|
|
124
137
|
|
|
125
|
-
|
|
138
|
+
The main pieces are:
|
|
126
139
|
|
|
127
|
-
1. **Exploration
|
|
128
|
-
2. **Visual Flow IR**
|
|
129
|
-
3. **Midscene SDK**
|
|
140
|
+
1. **Exploration tools** help the assistant understand the current screen and choose what to test.
|
|
141
|
+
2. **Visual Flow IR** captures a test as structured JSON with steps, assertions, and app context.
|
|
142
|
+
3. **Midscene SDK** converts high-level visual instructions into device actions such as tap, type, swipe, wait, and assert.
|
|
130
143
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
## Tools Overview
|
|
144
|
+
## MCP Tools
|
|
134
145
|
|
|
135
146
|
| Category | Tools |
|
|
136
147
|
|----------|-------|
|
|
137
|
-
|
|
|
138
|
-
|
|
|
139
|
-
|
|
|
140
|
-
|
|
|
148
|
+
| Agent | `agent_health` · `start_agent` · `stop_agent` · `doctor` · `config_status` |
|
|
149
|
+
| Device | `list_devices` · `install_app` |
|
|
150
|
+
| Exploration | `exploration_start` · `exploration_end` · `exploration_get_page_summary` · `exploration_ai_act` · `exploration_ask_about_screen` · `exploration_screenshot` · `exploration_type` · `exploration_wait` |
|
|
151
|
+
| Visual Flow | `get_visual_flow_ir_rules` · `validate_visual_flow` · `run_flow` · `watch_run` · `cancel_run` · `save_report` · `read_report` |
|
|
152
|
+
|
|
153
|
+
## Reports
|
|
154
|
+
|
|
155
|
+
Runs write report assets under:
|
|
156
|
+
|
|
157
|
+
```text
|
|
158
|
+
~/.preflight/midscene_run/report/<reportName>/
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
A report can include the HTML summary, execution JSON, screenshots, and compressed recordings when the platform recorder and `ffmpeg` are available.
|
|
162
|
+
|
|
163
|
+
## Development
|
|
164
|
+
|
|
165
|
+
Clone the repository when you want to work on Preflight itself:
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
git clone https://github.com/zifengjiang/Preflight.git
|
|
169
|
+
cd Preflight
|
|
170
|
+
npm install
|
|
171
|
+
npm test
|
|
172
|
+
npm run check
|
|
173
|
+
npm run build
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Register the local development MCP server:
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
npm run mcp:setup
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## Release
|
|
183
|
+
|
|
184
|
+
Releases are triggered by version tags:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
git tag v1.1.1
|
|
188
|
+
git push origin v1.1.1
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
The release workflow publishes the npm package and creates a GitHub Release.
|
|
141
192
|
|
|
142
193
|
## License
|
|
143
194
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { mkdir, readFile, stat, writeFile } from "node:fs/promises";
|
|
1
|
+
import { mkdir, readFile, readdir, stat, writeFile } from "node:fs/promises";
|
|
2
2
|
import net from "node:net";
|
|
3
3
|
import { tmpdir } from "node:os";
|
|
4
4
|
import path from "node:path";
|
|
@@ -542,9 +542,47 @@ export class MidsceneRuntimeReal {
|
|
|
542
542
|
console.warn(`[MidsceneRuntimeReal] report artifact missing or unreadable: ${err instanceof Error ? err.message : String(err)}`);
|
|
543
543
|
}
|
|
544
544
|
}
|
|
545
|
+
let stepFailureMessage;
|
|
546
|
+
if (reportPaths && result.ok) {
|
|
547
|
+
const execDir = reportPaths.bundleDir ?? path.dirname(reportPaths.reportHtmlPath);
|
|
548
|
+
try {
|
|
549
|
+
const dirEntries = await readdir(execDir);
|
|
550
|
+
for (const name of dirEntries) {
|
|
551
|
+
if (!/^\d+\.execution\.json$/.test(name))
|
|
552
|
+
continue;
|
|
553
|
+
try {
|
|
554
|
+
const raw = await readFile(path.join(execDir, name), "utf8");
|
|
555
|
+
const parsed = JSON.parse(raw);
|
|
556
|
+
const executions = parsed.executions;
|
|
557
|
+
if (!Array.isArray(executions))
|
|
558
|
+
continue;
|
|
559
|
+
for (const exec of executions) {
|
|
560
|
+
if (!exec || typeof exec !== "object")
|
|
561
|
+
continue;
|
|
562
|
+
const tasks = exec.tasks;
|
|
563
|
+
if (!Array.isArray(tasks))
|
|
564
|
+
continue;
|
|
565
|
+
for (const task of tasks) {
|
|
566
|
+
if (task && typeof task === "object" && task.status === "failed") {
|
|
567
|
+
const msg = task.errorMessage;
|
|
568
|
+
stepFailureMessage = typeof msg === "string" && msg.trim() ? msg.trim() : "midscene step execution failed";
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
catch {
|
|
574
|
+
// skip unparseable files
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
catch {
|
|
579
|
+
// directory not found or unreadable
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
const ok = result.ok && stepFailureMessage === undefined;
|
|
545
583
|
return {
|
|
546
|
-
ok
|
|
547
|
-
message: result.ok ? "midscene real execution success" : result.stderr || "midscene execution failed",
|
|
584
|
+
ok,
|
|
585
|
+
message: stepFailureMessage ?? (result.ok ? "midscene real execution success" : result.stderr || "midscene execution failed"),
|
|
548
586
|
artifacts,
|
|
549
587
|
reportInfo,
|
|
550
588
|
};
|
|
@@ -64,13 +64,26 @@ export class AgentHttpClient {
|
|
|
64
64
|
const headers = new Headers(init.headers);
|
|
65
65
|
if (this.config.token)
|
|
66
66
|
headers.set("Authorization", `Bearer ${this.config.token}`);
|
|
67
|
-
const
|
|
68
|
-
const
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
67
|
+
const controller = new AbortController();
|
|
68
|
+
const timeout = setTimeout(() => controller.abort(), 8_000);
|
|
69
|
+
try {
|
|
70
|
+
const resp = await fetch(url, { ...init, headers, signal: controller.signal });
|
|
71
|
+
const text = await resp.text();
|
|
72
|
+
const body = text ? JSON.parse(text) : null;
|
|
73
|
+
if (!resp.ok) {
|
|
74
|
+
throw new AgentHttpError(resp.status, body && typeof body === "object" ? JSON.stringify(body) : text);
|
|
75
|
+
}
|
|
76
|
+
return body;
|
|
77
|
+
}
|
|
78
|
+
catch (error) {
|
|
79
|
+
if (controller.signal.aborted) {
|
|
80
|
+
throw new AgentHttpError(408, `Agent HTTP request timed out after 8000ms: ${url.pathname}`);
|
|
81
|
+
}
|
|
82
|
+
throw error;
|
|
83
|
+
}
|
|
84
|
+
finally {
|
|
85
|
+
clearTimeout(timeout);
|
|
72
86
|
}
|
|
73
|
-
return body;
|
|
74
87
|
}
|
|
75
88
|
}
|
|
76
89
|
export class AgentHttpError extends Error {
|
package/dist/mcp/cli.js
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
3
|
+
import { access } from "node:fs/promises";
|
|
4
|
+
import { dirname, join } from "node:path";
|
|
5
|
+
import { fileURLToPath } from "node:url";
|
|
3
6
|
import { createPreflightMcpServer } from "./server.js";
|
|
4
7
|
import { setupLocalMcp } from "./setup.js";
|
|
5
8
|
// Prevent unhandled rejections / exceptions from crashing the server mid-session
|
|
@@ -17,11 +20,12 @@ if (command === "serve") {
|
|
|
17
20
|
}
|
|
18
21
|
else if (command === "setup") {
|
|
19
22
|
const projectRoot = argValue("--project-root") ?? process.cwd();
|
|
23
|
+
const runtimeSourceRoot = argValue("--runtime-source-root") ?? await findPackageRoot();
|
|
20
24
|
const agentBaseUrl = argValue("--agent-base-url") ?? process.env.AGENT_BASE_URL ?? "http://127.0.0.1:18998";
|
|
21
25
|
const livePort = Number(argValue("--live-port") ?? process.env.MCP_LIVE_PORT ?? "18999");
|
|
22
26
|
const runtimeRoot = argValue("--runtime-root") ?? process.env.AGENT_RUNTIME_ROOT?.trim() ?? undefined;
|
|
23
27
|
const installRuntime = !process.argv.includes("--no-install-runtime");
|
|
24
|
-
const result = await setupLocalMcp({ projectRoot, agentBaseUrl, livePort, runtimeRoot, installRuntime });
|
|
28
|
+
const result = await setupLocalMcp({ projectRoot, runtimeSourceRoot, agentBaseUrl, livePort, runtimeRoot, installRuntime });
|
|
25
29
|
console.log(JSON.stringify({ ok: true, ...result }, null, 2));
|
|
26
30
|
}
|
|
27
31
|
else {
|
|
@@ -34,3 +38,25 @@ function argValue(name) {
|
|
|
34
38
|
return undefined;
|
|
35
39
|
return process.argv[idx + 1];
|
|
36
40
|
}
|
|
41
|
+
async function findPackageRoot() {
|
|
42
|
+
let current = dirname(fileURLToPath(import.meta.url));
|
|
43
|
+
while (true) {
|
|
44
|
+
if (await exists(join(current, "package.json"))) {
|
|
45
|
+
return current;
|
|
46
|
+
}
|
|
47
|
+
const parent = dirname(current);
|
|
48
|
+
if (parent === current) {
|
|
49
|
+
throw new Error("Could not locate the preflite package root.");
|
|
50
|
+
}
|
|
51
|
+
current = parent;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
async function exists(path) {
|
|
55
|
+
try {
|
|
56
|
+
await access(path);
|
|
57
|
+
return true;
|
|
58
|
+
}
|
|
59
|
+
catch {
|
|
60
|
+
return false;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
@@ -58,17 +58,60 @@ async function detectForegroundApp(session) {
|
|
|
58
58
|
return null;
|
|
59
59
|
}
|
|
60
60
|
// ---------------------------------------------------------------------------
|
|
61
|
+
// Safe aiAsk: uses aiQuery under the hood to avoid doubao key-mismatch errors
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
async function aiAskSafe(session, prompt) {
|
|
64
|
+
const result = await session.agent.aiQuery(`${prompt}, string`);
|
|
65
|
+
if (typeof result === "string")
|
|
66
|
+
return result;
|
|
67
|
+
if (result && typeof result === "object") {
|
|
68
|
+
const values = Object.values(result);
|
|
69
|
+
const firstString = values.find((v) => typeof v === "string");
|
|
70
|
+
if (firstString)
|
|
71
|
+
return firstString;
|
|
72
|
+
return JSON.stringify(result);
|
|
73
|
+
}
|
|
74
|
+
return String(result);
|
|
75
|
+
}
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
// Retry wrapper for transient Midscene model errors
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
const RETRY_MAX = 3;
|
|
80
|
+
const RETRY_DELAY_MS = 2000;
|
|
81
|
+
async function withRetry(fn, label) {
|
|
82
|
+
let lastError;
|
|
83
|
+
for (let attempt = 1; attempt <= RETRY_MAX; attempt++) {
|
|
84
|
+
try {
|
|
85
|
+
return await fn();
|
|
86
|
+
}
|
|
87
|
+
catch (err) {
|
|
88
|
+
lastError = err;
|
|
89
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
90
|
+
const retryable = msg.includes("No result in query data") ||
|
|
91
|
+
msg.includes("BadRequestError") ||
|
|
92
|
+
msg.includes("RateLimitError") ||
|
|
93
|
+
msg.includes("ServiceUnavailableError") ||
|
|
94
|
+
msg.includes("InternalServerError");
|
|
95
|
+
if (!retryable || attempt === RETRY_MAX)
|
|
96
|
+
throw err;
|
|
97
|
+
// Midscene model偶发错误,短暂延迟后重试
|
|
98
|
+
await new Promise((r) => setTimeout(r, RETRY_DELAY_MS * attempt));
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
throw lastError;
|
|
102
|
+
}
|
|
103
|
+
// ---------------------------------------------------------------------------
|
|
61
104
|
// Tool handlers
|
|
62
105
|
// ---------------------------------------------------------------------------
|
|
63
106
|
export function getPageSummaryHandler(ctx) {
|
|
64
107
|
return async (input) => {
|
|
65
108
|
const session = await resolveSession(input.sessionId, ctx);
|
|
66
|
-
const summary = await session
|
|
109
|
+
const summary = await withRetry(() => aiAskSafe(session, "详细描述当前页面。请按从上到下的顺序列出所有可见区域、交互元素和文案。\n" +
|
|
67
110
|
"特别注意:\n" +
|
|
68
111
|
"1) 页面底部是否有更多内容(是否可滚动)?如果底部紧贴导航栏/状态栏则说明是固定单屏布局\n" +
|
|
69
112
|
"2) 是否有弹窗、广告或遮挡物?\n" +
|
|
70
113
|
"3) 整体布局类型:固定单屏 / 可滚动长页面 / 多Tab / 列表\n" +
|
|
71
|
-
"先判断布局类型,再逐一描述每个区域的内容。");
|
|
114
|
+
"先判断布局类型,再逐一描述每个区域的内容。"), "getPageSummary");
|
|
72
115
|
const state = getSession(input.sessionId);
|
|
73
116
|
const foregroundApp = await detectForegroundApp(session);
|
|
74
117
|
const appRef = state.appRef;
|
|
@@ -88,7 +131,7 @@ export function getPageSummaryHandler(ctx) {
|
|
|
88
131
|
export function askAboutScreenHandler(ctx) {
|
|
89
132
|
return async (input) => {
|
|
90
133
|
const session = await resolveSession(input.sessionId, ctx);
|
|
91
|
-
const answer = await session
|
|
134
|
+
const answer = await withRetry(() => aiAskSafe(session, input.question), "askAboutScreen");
|
|
92
135
|
return { answer };
|
|
93
136
|
};
|
|
94
137
|
}
|
|
@@ -97,16 +140,16 @@ export function aiActHandler(ctx) {
|
|
|
97
140
|
const session = await resolveSession(input.sessionId, ctx);
|
|
98
141
|
const state = getSession(input.sessionId);
|
|
99
142
|
// Before-state: reuse from get_page_summary if available, otherwise grab a quick one
|
|
100
|
-
const beforeSummary = state.lastPageSummary ?? await session
|
|
101
|
-
await session.agent.aiAct(input.intent);
|
|
102
|
-
const afterSummary = await session
|
|
143
|
+
const beforeSummary = state.lastPageSummary ?? await withRetry(() => aiAskSafe(session, "用一句话描述当前页面的最关键特征:什么类型的页面(列表/表单/弹窗/首页等),最显著的内容是什么。"), "aiAct.beforeSummary");
|
|
144
|
+
await withRetry(() => session.agent.aiAct(input.intent), "aiAct");
|
|
145
|
+
const afterSummary = await withRetry(() => aiAskSafe(session, `操作前的页面:${beforeSummary}\n` +
|
|
103
146
|
`执行的操作:${input.intent}\n` +
|
|
104
147
|
"请判断操作结果:\n" +
|
|
105
148
|
"1) 页面内容是否发生了变化?(进入了新页面、弹出弹窗、滚动到底部、输入框获得焦点等)\n" +
|
|
106
149
|
"2) 如果操作是滑动,是否已到达底部或页面没有变化?\n" +
|
|
107
150
|
"3) 当前页面布局类型是固定单屏还是可滚动长页面?\n" +
|
|
108
151
|
"4) 当前页面最关键的变化是什么?\n" +
|
|
109
|
-
"如果操作没有产生任何实际变化(比如反复滑动但没有新内容),请明确指出\"页面没有变化\"。");
|
|
152
|
+
"如果操作没有产生任何实际变化(比如反复滑动但没有新内容),请明确指出\"页面没有变化\"。"), "aiAct.afterSummary");
|
|
110
153
|
// Update for next aiAct call
|
|
111
154
|
state.lastPageSummary = afterSummary;
|
|
112
155
|
const foregroundApp = await detectForegroundApp(session);
|
|
@@ -64,6 +64,7 @@ export async function ensureIosWdaStarted(resourceId, runtimeEnv, projectRoot) {
|
|
|
64
64
|
const child = spawn("/bin/bash", [scriptPath, udid, String(port)], {
|
|
65
65
|
stdio: ["ignore", "pipe", "pipe"],
|
|
66
66
|
timeout: 120_000,
|
|
67
|
+
env: { ...process.env, ...runtimeEnv },
|
|
67
68
|
});
|
|
68
69
|
let stdout = "";
|
|
69
70
|
let stderr = "";
|