preflite 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,7 +53,7 @@ export function registerExplorationTools(server, ctx) {
53
53
  // ---------------------------------------------------------------------------
54
54
  server.registerTool("exploration_get_page_summary", {
55
55
  title: "Get Page Summary",
56
- description: "Get a natural language summary of the current screen, including layout type (fixed single-screen / scrollable long page / multi-tab / list). Call this first when entering a new page to understand its structure before acting. Use this INSTEAD of ai_act if you just want to observe — ai_act is for changing state, not for looking around.",
56
+ description: "Get a natural language summary of the current screen, including layout type (fixed single-screen / scrollable long page / multi-tab / list). Call this first when entering a new page to understand its structure before acting. The summary is saved internally; if you then call ai_act, it will be used as the before-state for change comparison. Use this INSTEAD of ai_act if you just want to observe — ai_act is for changing state, not for looking around.",
57
57
  inputSchema: {
58
58
  sessionId: z.string().describe("Session ID from exploration_start"),
59
59
  },
@@ -76,7 +76,7 @@ export function registerExplorationTools(server, ctx) {
76
76
  // ---------------------------------------------------------------------------
77
77
  server.registerTool("exploration_ai_act", {
78
78
  title: "AI Act",
79
- description: "Perform a high-level UI interaction described in natural language. Examples: 'Go back', 'Tap the settings icon', 'Type text in the search box'. After execution, returns a summary of the new page state including whether the action actually changed anything.\n\nIMPORTANT: Use get_page_summary FIRST to check if the page is a fixed single-screen layout. If it is, do NOT request scroll actions — there is nothing to scroll to. Use ai_act only for meaningful interactions (tap, type, swipe between tabs), not for 'look around' or 'scroll to see more'. If the post-action summary reports that the page did not change, stop acting on this page and move on.",
79
+ description: "Perform a high-level UI interaction described in natural language. Examples: 'Go back', 'Tap the settings icon', 'Type text in the search box'. After execution, returns a summary of the new page state including whether the action actually changed anything.\n\nIMPORTANT: Use get_page_summary FIRST to check if the page is a fixed single-screen layout. If it is, do NOT request scroll actions — there is nothing to scroll to. Use ai_act only for meaningful interactions (tap, type, swipe between tabs), not for 'look around' or 'scroll to see more'. If the post-action summary reports that the page did not change, stop acting on this page and move on.\n\nNote on before/after comparison: The AI model is stateless — each summary only sees the current screenshot. To detect changes, ai_act internally captures a before-state description (either from your prior get_page_summary call, or by grabbing a quick snapshot before acting) and passes it to the after-summary for comparison. For richer change detection, call get_page_summary before ai_act.",
80
80
  inputSchema: {
81
81
  sessionId: z.string().describe("Session ID from exploration_start"),
82
82
  intent: z.string().describe("Description of what to do"),
@@ -24,6 +24,7 @@ async function persistMeta(state, env) {
24
24
  resourceId: state.resourceId,
25
25
  platform: state.platform,
26
26
  env,
27
+ appRef: state.appRef,
27
28
  createdAt: state.createdAt,
28
29
  lastActivityAt: state.lastActivityAt,
29
30
  };
@@ -50,12 +51,13 @@ async function removeMeta(id) {
50
51
  // Session lifecycle
51
52
  // ---------------------------------------------------------------------------
52
53
  /** Create a new exploration session and persist metadata to disk. */
53
- export function createSession(id, resourceId, platform, session, env) {
54
+ export function createSession(id, resourceId, platform, session, env, appRef) {
54
55
  const state = {
55
56
  id,
56
57
  resourceId,
57
58
  platform,
58
59
  session,
60
+ appRef,
59
61
  createdAt: Date.now(),
60
62
  lastActivityAt: Date.now(),
61
63
  };
@@ -88,6 +90,7 @@ export function storeSession(id, session, meta) {
88
90
  resourceId: meta.resourceId,
89
91
  platform: meta.platform,
90
92
  session,
93
+ appRef: meta.appRef,
91
94
  createdAt: meta.createdAt,
92
95
  lastActivityAt: Date.now(),
93
96
  };
@@ -1,4 +1,65 @@
1
+ import http from "node:http";
2
+ import { execSync } from "node:child_process";
1
3
  import { resolveSession } from "./tools-session.js";
4
+ import { getSession } from "./sessionManager.js";
5
+ async function detectIosForegroundApp(session) {
6
+ if (session.platform !== "ios")
7
+ return null;
8
+ const { wdaHost, wdaPort } = session.target;
9
+ return new Promise((resolve) => {
10
+ const req = http.get(`http://${wdaHost}:${wdaPort}/wda/activeAppInfo`, { timeout: 3000 }, (res) => {
11
+ let body = "";
12
+ res.on("data", (chunk) => { body += chunk; });
13
+ res.on("end", () => {
14
+ try {
15
+ const data = JSON.parse(body);
16
+ if (data?.value?.bundleId) {
17
+ resolve({ bundleId: data.value.bundleId, name: data.value.name });
18
+ }
19
+ else {
20
+ resolve(null);
21
+ }
22
+ }
23
+ catch {
24
+ resolve(null);
25
+ }
26
+ });
27
+ });
28
+ req.on("error", () => resolve(null));
29
+ req.on("timeout", () => { req.destroy(); resolve(null); });
30
+ });
31
+ }
32
+ function detectAndroidForegroundApp(session) {
33
+ if (session.platform !== "android")
34
+ return null;
35
+ const { serial, adbHost, adbPort } = session.target;
36
+ try {
37
+ const output = execSync(`adb -H ${adbHost} -P ${adbPort} -s ${serial} shell dumpsys window 2>/dev/null | grep -E 'mCurrentFocus|mFocusedApp'`, { encoding: "utf8", timeout: 5000 });
38
+ // Match patterns like:
39
+ // mCurrentFocus=Window{... com.example.app/...}
40
+ // mFocusedApp=AppWindowToken{... token=... appPackageName=com.example.app}
41
+ const focusMatch = output.match(/mCurrentFocus[=:].*?\s+([^\s/}]+)/);
42
+ const packageMatch = output.match(/appPackageName=([^\s}]+)/);
43
+ if (focusMatch)
44
+ return { bundleId: focusMatch[1] };
45
+ if (packageMatch)
46
+ return { bundleId: packageMatch[1] };
47
+ }
48
+ catch {
49
+ // adb not reachable or no foreground window
50
+ }
51
+ return null;
52
+ }
53
+ async function detectForegroundApp(session) {
54
+ if (session.platform === "ios")
55
+ return detectIosForegroundApp(session);
56
+ if (session.platform === "android")
57
+ return detectAndroidForegroundApp(session);
58
+ return null;
59
+ }
60
+ // ---------------------------------------------------------------------------
61
+ // Tool handlers
62
+ // ---------------------------------------------------------------------------
2
63
  export function getPageSummaryHandler(ctx) {
3
64
  return async (input) => {
4
65
  const session = await resolveSession(input.sessionId, ctx);
@@ -8,7 +69,20 @@ export function getPageSummaryHandler(ctx) {
8
69
  "2) 是否有弹窗、广告或遮挡物?\n" +
9
70
  "3) 整体布局类型:固定单屏 / 可滚动长页面 / 多Tab / 列表\n" +
10
71
  "先判断布局类型,再逐一描述每个区域的内容。");
11
- return { summary };
72
+ const state = getSession(input.sessionId);
73
+ const foregroundApp = await detectForegroundApp(session);
74
+ const appRef = state.appRef;
75
+ // Save for aiActHandler's before/after comparison
76
+ state.lastPageSummary = summary;
77
+ return {
78
+ summary,
79
+ app: {
80
+ platform: session.platform,
81
+ resourceId: state.resourceId,
82
+ ...(appRef ? { appRef } : {}),
83
+ ...(foregroundApp ? { foregroundApp } : {}),
84
+ },
85
+ };
12
86
  };
13
87
  }
14
88
  export function askAboutScreenHandler(ctx) {
@@ -21,13 +95,30 @@ export function askAboutScreenHandler(ctx) {
21
95
  export function aiActHandler(ctx) {
22
96
  return async (input) => {
23
97
  const session = await resolveSession(input.sessionId, ctx);
98
+ const state = getSession(input.sessionId);
99
+ // Before-state: reuse from get_page_summary if available, otherwise grab a quick one
100
+ const beforeSummary = state.lastPageSummary ?? await session.agent.aiAsk("用一句话描述当前页面的最关键特征:什么类型的页面(列表/表单/弹窗/首页等),最显著的内容是什么。");
24
101
  await session.agent.aiAct(input.intent);
25
- const afterSummary = await session.agent.aiAsk("刚刚的操作已完成。请判断:\n" +
26
- "1) 操作是否改变了页面内容?(新页面、弹窗、滚动到底部、输入框获得焦点等)\n" +
27
- "2) 如果操作是滑动页面,是否滑到了底部或页面内容没有变化?\n" +
102
+ const afterSummary = await session.agent.aiAsk(`操作前的页面:${beforeSummary}\n` +
103
+ `执行的操作:${input.intent}\n` +
104
+ "请判断操作结果:\n" +
105
+ "1) 页面内容是否发生了变化?(进入了新页面、弹出弹窗、滚动到底部、输入框获得焦点等)\n" +
106
+ "2) 如果操作是滑动,是否已到达底部或页面没有变化?\n" +
28
107
  "3) 当前页面布局类型是固定单屏还是可滚动长页面?\n" +
29
- "4) 当前页面出现的最关键变化是什么?\n" +
30
- "如果你发现操作没有产生任何实际变化(比如反复滑动但没有新内容),请明确指出\"页面没有变化\"。");
31
- return { ok: true, summary: afterSummary };
108
+ "4) 当前页面最关键的变化是什么?\n" +
109
+ "如果操作没有产生任何实际变化(比如反复滑动但没有新内容),请明确指出\"页面没有变化\"。");
110
+ // Update for next aiAct call
111
+ state.lastPageSummary = afterSummary;
112
+ const foregroundApp = await detectForegroundApp(session);
113
+ return {
114
+ ok: true,
115
+ summary: afterSummary,
116
+ app: {
117
+ platform: session.platform,
118
+ resourceId: state.resourceId,
119
+ ...(state.appRef ? { appRef: state.appRef } : {}),
120
+ ...(foregroundApp ? { foregroundApp } : {}),
121
+ },
122
+ };
32
123
  };
33
124
  }
@@ -241,25 +241,26 @@ export function getExplorationStartHandler(ctx) {
241
241
  }
242
242
  const session = await createMidsceneSessionFromResourceId(resourceId, runtimeEnv);
243
243
  const sessionId = generateSessionId();
244
- createSession(sessionId, resourceId, platform, session, runtimeEnv);
244
+ const appRef = input.appRef;
245
+ createSession(sessionId, resourceId, platform, session, runtimeEnv, appRef);
245
246
  try {
246
- if (input.appRef) {
247
- if (isInstallableRef(input.appRef)) {
248
- await ctx.client.installApp(resourceId, input.appRef);
247
+ if (appRef) {
248
+ if (isInstallableRef(appRef)) {
249
+ await ctx.client.installApp(resourceId, appRef);
249
250
  return {
250
251
  sessionId,
251
252
  device: { platform, resourceId },
252
- note: `App installed from ${input.appRef}. Use exploration_ai_act to launch it.`,
253
+ note: `App installed from ${appRef}. Use exploration_ai_act to launch it.`,
253
254
  };
254
255
  }
255
- await session.agent.launch(input.appRef);
256
+ await session.agent.launch(appRef);
256
257
  }
257
258
  }
258
259
  catch (err) {
259
260
  await destroySessionById(sessionId).catch(() => { });
260
261
  throw err;
261
262
  }
262
- return { sessionId, device: { platform, resourceId } };
263
+ return { sessionId, device: { platform, resourceId }, appRef: appRef ?? undefined };
263
264
  };
264
265
  }
265
266
  export function getExplorationEndHandler() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "preflite",
3
- "version": "1.0.1",
3
+ "version": "1.1.0",
4
4
  "description": "Preflight — Local mobile AI testing via MCP. AI-assisted testing on real Android/iOS/Harmony devices.",
5
5
  "license": "Apache-2.0",
6
6
  "type": "module",