preflite 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -53,7 +53,7 @@ export function registerExplorationTools(server, ctx) {
|
|
|
53
53
|
// ---------------------------------------------------------------------------
|
|
54
54
|
server.registerTool("exploration_get_page_summary", {
|
|
55
55
|
title: "Get Page Summary",
|
|
56
|
-
description: "Get a natural language summary of the current screen, including layout type (fixed single-screen / scrollable long page / multi-tab / list). Call this first when entering a new page to understand its structure before acting. Use this INSTEAD of ai_act if you just want to observe — ai_act is for changing state, not for looking around.",
|
|
56
|
+
description: "Get a natural language summary of the current screen, including layout type (fixed single-screen / scrollable long page / multi-tab / list). Call this first when entering a new page to understand its structure before acting. The summary is saved internally; if you then call ai_act, it will be used as the before-state for change comparison. Use this INSTEAD of ai_act if you just want to observe — ai_act is for changing state, not for looking around.",
|
|
57
57
|
inputSchema: {
|
|
58
58
|
sessionId: z.string().describe("Session ID from exploration_start"),
|
|
59
59
|
},
|
|
@@ -76,7 +76,7 @@ export function registerExplorationTools(server, ctx) {
|
|
|
76
76
|
// ---------------------------------------------------------------------------
|
|
77
77
|
server.registerTool("exploration_ai_act", {
|
|
78
78
|
title: "AI Act",
|
|
79
|
-
description: "Perform a high-level UI interaction described in natural language. Examples: 'Go back', 'Tap the settings icon', 'Type text in the search box'. After execution, returns a summary of the new page state including whether the action actually changed anything.\n\nIMPORTANT: Use get_page_summary FIRST to check if the page is a fixed single-screen layout. If it is, do NOT request scroll actions — there is nothing to scroll to. Use ai_act only for meaningful interactions (tap, type, swipe between tabs), not for 'look around' or 'scroll to see more'. If the post-action summary reports that the page did not change, stop acting on this page and move on.",
|
|
79
|
+
description: "Perform a high-level UI interaction described in natural language. Examples: 'Go back', 'Tap the settings icon', 'Type text in the search box'. After execution, returns a summary of the new page state including whether the action actually changed anything.\n\nIMPORTANT: Use get_page_summary FIRST to check if the page is a fixed single-screen layout. If it is, do NOT request scroll actions — there is nothing to scroll to. Use ai_act only for meaningful interactions (tap, type, swipe between tabs), not for 'look around' or 'scroll to see more'. If the post-action summary reports that the page did not change, stop acting on this page and move on.\n\nNote on before/after comparison: The AI model is stateless — each summary only sees the current screenshot. To detect changes, ai_act internally captures a before-state description (either from your prior get_page_summary call, or by grabbing a quick snapshot before acting) and passes it to the after-summary for comparison. For richer change detection, call get_page_summary before ai_act.",
|
|
80
80
|
inputSchema: {
|
|
81
81
|
sessionId: z.string().describe("Session ID from exploration_start"),
|
|
82
82
|
intent: z.string().describe("Description of what to do"),
|
|
@@ -24,6 +24,7 @@ async function persistMeta(state, env) {
|
|
|
24
24
|
resourceId: state.resourceId,
|
|
25
25
|
platform: state.platform,
|
|
26
26
|
env,
|
|
27
|
+
appRef: state.appRef,
|
|
27
28
|
createdAt: state.createdAt,
|
|
28
29
|
lastActivityAt: state.lastActivityAt,
|
|
29
30
|
};
|
|
@@ -50,12 +51,13 @@ async function removeMeta(id) {
|
|
|
50
51
|
// Session lifecycle
|
|
51
52
|
// ---------------------------------------------------------------------------
|
|
52
53
|
/** Create a new exploration session and persist metadata to disk. */
|
|
53
|
-
export function createSession(id, resourceId, platform, session, env) {
|
|
54
|
+
export function createSession(id, resourceId, platform, session, env, appRef) {
|
|
54
55
|
const state = {
|
|
55
56
|
id,
|
|
56
57
|
resourceId,
|
|
57
58
|
platform,
|
|
58
59
|
session,
|
|
60
|
+
appRef,
|
|
59
61
|
createdAt: Date.now(),
|
|
60
62
|
lastActivityAt: Date.now(),
|
|
61
63
|
};
|
|
@@ -88,6 +90,7 @@ export function storeSession(id, session, meta) {
|
|
|
88
90
|
resourceId: meta.resourceId,
|
|
89
91
|
platform: meta.platform,
|
|
90
92
|
session,
|
|
93
|
+
appRef: meta.appRef,
|
|
91
94
|
createdAt: meta.createdAt,
|
|
92
95
|
lastActivityAt: Date.now(),
|
|
93
96
|
};
|
|
@@ -1,4 +1,65 @@
|
|
|
1
|
+
import http from "node:http";
|
|
2
|
+
import { execSync } from "node:child_process";
|
|
1
3
|
import { resolveSession } from "./tools-session.js";
|
|
4
|
+
import { getSession } from "./sessionManager.js";
|
|
5
|
+
async function detectIosForegroundApp(session) {
|
|
6
|
+
if (session.platform !== "ios")
|
|
7
|
+
return null;
|
|
8
|
+
const { wdaHost, wdaPort } = session.target;
|
|
9
|
+
return new Promise((resolve) => {
|
|
10
|
+
const req = http.get(`http://${wdaHost}:${wdaPort}/wda/activeAppInfo`, { timeout: 3000 }, (res) => {
|
|
11
|
+
let body = "";
|
|
12
|
+
res.on("data", (chunk) => { body += chunk; });
|
|
13
|
+
res.on("end", () => {
|
|
14
|
+
try {
|
|
15
|
+
const data = JSON.parse(body);
|
|
16
|
+
if (data?.value?.bundleId) {
|
|
17
|
+
resolve({ bundleId: data.value.bundleId, name: data.value.name });
|
|
18
|
+
}
|
|
19
|
+
else {
|
|
20
|
+
resolve(null);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
catch {
|
|
24
|
+
resolve(null);
|
|
25
|
+
}
|
|
26
|
+
});
|
|
27
|
+
});
|
|
28
|
+
req.on("error", () => resolve(null));
|
|
29
|
+
req.on("timeout", () => { req.destroy(); resolve(null); });
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
function detectAndroidForegroundApp(session) {
|
|
33
|
+
if (session.platform !== "android")
|
|
34
|
+
return null;
|
|
35
|
+
const { serial, adbHost, adbPort } = session.target;
|
|
36
|
+
try {
|
|
37
|
+
const output = execSync(`adb -H ${adbHost} -P ${adbPort} -s ${serial} shell dumpsys window 2>/dev/null | grep -E 'mCurrentFocus|mFocusedApp'`, { encoding: "utf8", timeout: 5000 });
|
|
38
|
+
// Match patterns like:
|
|
39
|
+
// mCurrentFocus=Window{... com.example.app/...}
|
|
40
|
+
// mFocusedApp=AppWindowToken{... token=... appPackageName=com.example.app}
|
|
41
|
+
const focusMatch = output.match(/mCurrentFocus[=:].*?\s+([^\s/}]+)/);
|
|
42
|
+
const packageMatch = output.match(/appPackageName=([^\s}]+)/);
|
|
43
|
+
if (focusMatch)
|
|
44
|
+
return { bundleId: focusMatch[1] };
|
|
45
|
+
if (packageMatch)
|
|
46
|
+
return { bundleId: packageMatch[1] };
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
// adb not reachable or no foreground window
|
|
50
|
+
}
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
async function detectForegroundApp(session) {
|
|
54
|
+
if (session.platform === "ios")
|
|
55
|
+
return detectIosForegroundApp(session);
|
|
56
|
+
if (session.platform === "android")
|
|
57
|
+
return detectAndroidForegroundApp(session);
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
60
|
+
// ---------------------------------------------------------------------------
|
|
61
|
+
// Tool handlers
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
2
63
|
export function getPageSummaryHandler(ctx) {
|
|
3
64
|
return async (input) => {
|
|
4
65
|
const session = await resolveSession(input.sessionId, ctx);
|
|
@@ -8,7 +69,20 @@ export function getPageSummaryHandler(ctx) {
|
|
|
8
69
|
"2) 是否有弹窗、广告或遮挡物?\n" +
|
|
9
70
|
"3) 整体布局类型:固定单屏 / 可滚动长页面 / 多Tab / 列表\n" +
|
|
10
71
|
"先判断布局类型,再逐一描述每个区域的内容。");
|
|
11
|
-
|
|
72
|
+
const state = getSession(input.sessionId);
|
|
73
|
+
const foregroundApp = await detectForegroundApp(session);
|
|
74
|
+
const appRef = state.appRef;
|
|
75
|
+
// Save for aiActHandler's before/after comparison
|
|
76
|
+
state.lastPageSummary = summary;
|
|
77
|
+
return {
|
|
78
|
+
summary,
|
|
79
|
+
app: {
|
|
80
|
+
platform: session.platform,
|
|
81
|
+
resourceId: state.resourceId,
|
|
82
|
+
...(appRef ? { appRef } : {}),
|
|
83
|
+
...(foregroundApp ? { foregroundApp } : {}),
|
|
84
|
+
},
|
|
85
|
+
};
|
|
12
86
|
};
|
|
13
87
|
}
|
|
14
88
|
export function askAboutScreenHandler(ctx) {
|
|
@@ -21,13 +95,30 @@ export function askAboutScreenHandler(ctx) {
|
|
|
21
95
|
export function aiActHandler(ctx) {
|
|
22
96
|
return async (input) => {
|
|
23
97
|
const session = await resolveSession(input.sessionId, ctx);
|
|
98
|
+
const state = getSession(input.sessionId);
|
|
99
|
+
// Before-state: reuse from get_page_summary if available, otherwise grab a quick one
|
|
100
|
+
const beforeSummary = state.lastPageSummary ?? await session.agent.aiAsk("用一句话描述当前页面的最关键特征:什么类型的页面(列表/表单/弹窗/首页等),最显著的内容是什么。");
|
|
24
101
|
await session.agent.aiAct(input.intent);
|
|
25
|
-
const afterSummary = await session.agent.aiAsk(
|
|
26
|
-
|
|
27
|
-
"
|
|
102
|
+
const afterSummary = await session.agent.aiAsk(`操作前的页面:${beforeSummary}\n` +
|
|
103
|
+
`执行的操作:${input.intent}\n` +
|
|
104
|
+
"请判断操作结果:\n" +
|
|
105
|
+
"1) 页面内容是否发生了变化?(进入了新页面、弹出弹窗、滚动到底部、输入框获得焦点等)\n" +
|
|
106
|
+
"2) 如果操作是滑动,是否已到达底部或页面没有变化?\n" +
|
|
28
107
|
"3) 当前页面布局类型是固定单屏还是可滚动长页面?\n" +
|
|
29
|
-
"4)
|
|
30
|
-
"
|
|
31
|
-
|
|
108
|
+
"4) 当前页面最关键的变化是什么?\n" +
|
|
109
|
+
"如果操作没有产生任何实际变化(比如反复滑动但没有新内容),请明确指出\"页面没有变化\"。");
|
|
110
|
+
// Update for next aiAct call
|
|
111
|
+
state.lastPageSummary = afterSummary;
|
|
112
|
+
const foregroundApp = await detectForegroundApp(session);
|
|
113
|
+
return {
|
|
114
|
+
ok: true,
|
|
115
|
+
summary: afterSummary,
|
|
116
|
+
app: {
|
|
117
|
+
platform: session.platform,
|
|
118
|
+
resourceId: state.resourceId,
|
|
119
|
+
...(state.appRef ? { appRef: state.appRef } : {}),
|
|
120
|
+
...(foregroundApp ? { foregroundApp } : {}),
|
|
121
|
+
},
|
|
122
|
+
};
|
|
32
123
|
};
|
|
33
124
|
}
|
|
@@ -241,25 +241,26 @@ export function getExplorationStartHandler(ctx) {
|
|
|
241
241
|
}
|
|
242
242
|
const session = await createMidsceneSessionFromResourceId(resourceId, runtimeEnv);
|
|
243
243
|
const sessionId = generateSessionId();
|
|
244
|
-
|
|
244
|
+
const appRef = input.appRef;
|
|
245
|
+
createSession(sessionId, resourceId, platform, session, runtimeEnv, appRef);
|
|
245
246
|
try {
|
|
246
|
-
if (
|
|
247
|
-
if (isInstallableRef(
|
|
248
|
-
await ctx.client.installApp(resourceId,
|
|
247
|
+
if (appRef) {
|
|
248
|
+
if (isInstallableRef(appRef)) {
|
|
249
|
+
await ctx.client.installApp(resourceId, appRef);
|
|
249
250
|
return {
|
|
250
251
|
sessionId,
|
|
251
252
|
device: { platform, resourceId },
|
|
252
|
-
note: `App installed from ${
|
|
253
|
+
note: `App installed from ${appRef}. Use exploration_ai_act to launch it.`,
|
|
253
254
|
};
|
|
254
255
|
}
|
|
255
|
-
await session.agent.launch(
|
|
256
|
+
await session.agent.launch(appRef);
|
|
256
257
|
}
|
|
257
258
|
}
|
|
258
259
|
catch (err) {
|
|
259
260
|
await destroySessionById(sessionId).catch(() => { });
|
|
260
261
|
throw err;
|
|
261
262
|
}
|
|
262
|
-
return { sessionId, device: { platform, resourceId } };
|
|
263
|
+
return { sessionId, device: { platform, resourceId }, appRef: appRef ?? undefined };
|
|
263
264
|
};
|
|
264
265
|
}
|
|
265
266
|
export function getExplorationEndHandler() {
|