@mobileai/react-native 0.9.27 → 0.9.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -11
- package/android/build.gradle +17 -0
- package/android/src/main/java/com/mobileai/overlay/FloatingOverlayDialogRootViewGroup.kt +243 -0
- package/android/src/main/java/com/mobileai/overlay/FloatingOverlayView.kt +281 -87
- package/android/src/newarch/com/mobileai/overlay/FloatingOverlayViewManager.kt +52 -17
- package/android/src/oldarch/com/mobileai/overlay/FloatingOverlayViewManager.kt +49 -2
- package/bin/generate-map.cjs +45 -6
- package/ios/Podfile +63 -0
- package/ios/Podfile.lock +2290 -0
- package/ios/Podfile.properties.json +4 -0
- package/ios/mobileaireactnative/AppDelegate.swift +69 -0
- package/ios/mobileaireactnative/Images.xcassets/AppIcon.appiconset/Contents.json +13 -0
- package/ios/mobileaireactnative/Images.xcassets/Contents.json +6 -0
- package/ios/mobileaireactnative/Images.xcassets/SplashScreenLegacy.imageset/Contents.json +21 -0
- package/ios/mobileaireactnative/Images.xcassets/SplashScreenLegacy.imageset/SplashScreenLegacy.png +0 -0
- package/ios/mobileaireactnative/Info.plist +55 -0
- package/ios/mobileaireactnative/PrivacyInfo.xcprivacy +48 -0
- package/ios/mobileaireactnative/SplashScreen.storyboard +47 -0
- package/ios/mobileaireactnative/Supporting/Expo.plist +6 -0
- package/ios/mobileaireactnative/mobileaireactnative-Bridging-Header.h +3 -0
- package/ios/mobileaireactnative.xcodeproj/project.pbxproj +547 -0
- package/ios/mobileaireactnative.xcodeproj/xcshareddata/xcschemes/mobileaireactnative.xcscheme +88 -0
- package/ios/mobileaireactnative.xcworkspace/contents.xcworkspacedata +10 -0
- package/lib/module/components/AIAgent.js +405 -168
- package/lib/module/components/AgentChatBar.js +250 -59
- package/lib/module/components/FloatingOverlayWrapper.js +68 -32
- package/lib/module/config/endpoints.js +22 -1
- package/lib/module/core/AgentRuntime.js +103 -1
- package/lib/module/core/FiberTreeWalker.js +98 -0
- package/lib/module/core/OutcomeVerifier.js +149 -0
- package/lib/module/core/systemPrompt.js +96 -25
- package/lib/module/providers/GeminiProvider.js +9 -3
- package/lib/module/services/telemetry/TelemetryService.js +21 -2
- package/lib/module/services/telemetry/TouchAutoCapture.js +45 -35
- package/lib/module/specs/FloatingOverlayNativeComponent.ts +7 -1
- package/lib/module/support/supportPrompt.js +22 -7
- package/lib/module/support/supportStyle.js +55 -0
- package/lib/module/support/types.js +2 -0
- package/lib/module/tools/typeTool.js +20 -0
- package/lib/module/utils/humanizeScreenName.js +49 -0
- package/lib/typescript/src/components/AIAgent.d.ts +6 -2
- package/lib/typescript/src/components/AgentChatBar.d.ts +15 -1
- package/lib/typescript/src/components/FloatingOverlayWrapper.d.ts +22 -10
- package/lib/typescript/src/config/endpoints.d.ts +4 -0
- package/lib/typescript/src/core/AgentRuntime.d.ts +9 -0
- package/lib/typescript/src/core/FiberTreeWalker.d.ts +12 -1
- package/lib/typescript/src/core/OutcomeVerifier.d.ts +46 -0
- package/lib/typescript/src/core/systemPrompt.d.ts +3 -10
- package/lib/typescript/src/core/types.d.ts +35 -0
- package/lib/typescript/src/index.d.ts +1 -0
- package/lib/typescript/src/services/telemetry/TelemetryService.d.ts +7 -1
- package/lib/typescript/src/services/telemetry/types.d.ts +1 -1
- package/lib/typescript/src/specs/FloatingOverlayNativeComponent.d.ts +5 -0
- package/lib/typescript/src/support/index.d.ts +1 -0
- package/lib/typescript/src/support/supportStyle.d.ts +9 -0
- package/lib/typescript/src/support/types.d.ts +3 -0
- package/lib/typescript/src/utils/humanizeScreenName.d.ts +6 -0
- package/package.json +5 -2
- package/src/specs/FloatingOverlayNativeComponent.ts +7 -1
- package/ios/MobileAIFloatingOverlayComponentView.mm +0 -73
- package/ios/MobileAIPilotIntents.swift +0 -51
|
@@ -15,10 +15,12 @@ import { logger } from "../utils/logger.js";
|
|
|
15
15
|
import { walkFiberTree } from "./FiberTreeWalker.js";
|
|
16
16
|
import { dehydrateScreen } from "./ScreenDehydrator.js";
|
|
17
17
|
import { buildSystemPrompt, buildKnowledgeOnlyPrompt } from "./systemPrompt.js";
|
|
18
|
+
import { buildVerificationAction, createVerificationSnapshot, OutcomeVerifier } from "./OutcomeVerifier.js";
|
|
18
19
|
import { KnowledgeBaseService } from "../services/KnowledgeBaseService.js";
|
|
19
20
|
import { installAlertInterceptor, uninstallAlertInterceptor } from "./NativeAlertInterceptor.js";
|
|
20
21
|
import { createTapTool, createLongPressTool, createTypeTool, createScrollTool, createSliderTool, createPickerTool, createDatePickerTool, createKeyboardTool, createGuideTool, createSimplifyTool, createRestoreTool } from "../tools/index.js";
|
|
21
22
|
import { actionRegistry } from "./ActionRegistry.js";
|
|
23
|
+
import { createProvider } from "../providers/ProviderFactory.js";
|
|
22
24
|
const DEFAULT_MAX_STEPS = 25;
|
|
23
25
|
function generateTraceId() {
|
|
24
26
|
return `trace_${Date.now()}_${Math.random().toString(36).slice(2, 10)}`;
|
|
@@ -39,6 +41,10 @@ export class AgentRuntime {
|
|
|
39
41
|
knowledgeService = null;
|
|
40
42
|
lastDehydratedRoot = null;
|
|
41
43
|
currentTraceId = null;
|
|
44
|
+
currentUserGoal = '';
|
|
45
|
+
verifierProvider = null;
|
|
46
|
+
outcomeVerifier = null;
|
|
47
|
+
pendingCriticalVerification = null;
|
|
42
48
|
|
|
43
49
|
// ─── Task-scoped error suppression ──────────────────────────
|
|
44
50
|
// Installed once at execute() start, removed after grace period.
|
|
@@ -147,6 +153,77 @@ export class AgentRuntime {
|
|
|
147
153
|
}
|
|
148
154
|
}
|
|
149
155
|
}
|
|
156
|
+
getVerifier() {
|
|
157
|
+
if (this.config.verifier?.enabled === false) {
|
|
158
|
+
return null;
|
|
159
|
+
}
|
|
160
|
+
if (!this.outcomeVerifier) {
|
|
161
|
+
const verifierConfig = this.config.verifier;
|
|
162
|
+
if (verifierConfig?.provider || verifierConfig?.model || verifierConfig?.proxyUrl || verifierConfig?.proxyHeaders) {
|
|
163
|
+
this.verifierProvider = createProvider(verifierConfig.provider || this.config.provider || 'gemini', this.config.apiKey, verifierConfig.model || this.config.model, verifierConfig.proxyUrl || this.config.proxyUrl, verifierConfig.proxyHeaders || this.config.proxyHeaders);
|
|
164
|
+
} else {
|
|
165
|
+
this.verifierProvider = this.provider;
|
|
166
|
+
}
|
|
167
|
+
this.outcomeVerifier = new OutcomeVerifier(this.verifierProvider, this.config);
|
|
168
|
+
}
|
|
169
|
+
return this.outcomeVerifier;
|
|
170
|
+
}
|
|
171
|
+
createCurrentVerificationSnapshot(screenName, screenContent, elements, screenshot) {
|
|
172
|
+
return createVerificationSnapshot(screenName, screenContent, elements, screenshot);
|
|
173
|
+
}
|
|
174
|
+
async updateCriticalVerification(screenName, screenContent, elements, screenshot, stepIndex) {
|
|
175
|
+
if (!this.pendingCriticalVerification) return;
|
|
176
|
+
const verifier = this.getVerifier();
|
|
177
|
+
if (!verifier) {
|
|
178
|
+
this.pendingCriticalVerification = null;
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
181
|
+
const postAction = this.createCurrentVerificationSnapshot(screenName, screenContent, elements, screenshot);
|
|
182
|
+
this.pendingCriticalVerification.followupSteps += 1;
|
|
183
|
+
const result = await verifier.verify({
|
|
184
|
+
goal: this.pendingCriticalVerification.goal,
|
|
185
|
+
action: this.pendingCriticalVerification.action,
|
|
186
|
+
preAction: this.pendingCriticalVerification.preAction,
|
|
187
|
+
postAction
|
|
188
|
+
});
|
|
189
|
+
this.emitTrace('critical_action_verified', {
|
|
190
|
+
action: this.pendingCriticalVerification.action.toolName,
|
|
191
|
+
label: this.pendingCriticalVerification.action.label,
|
|
192
|
+
status: result.status,
|
|
193
|
+
failureKind: result.failureKind,
|
|
194
|
+
evidence: result.evidence,
|
|
195
|
+
source: result.source,
|
|
196
|
+
followupSteps: this.pendingCriticalVerification.followupSteps
|
|
197
|
+
}, stepIndex);
|
|
198
|
+
if (result.status === 'success') {
|
|
199
|
+
this.pendingCriticalVerification = null;
|
|
200
|
+
return;
|
|
201
|
+
}
|
|
202
|
+
if (result.status === 'error') {
|
|
203
|
+
this.observations.push(`Outcome verifier: The previous action "${this.pendingCriticalVerification.action.label}" did NOT complete successfully. ${result.evidence} Treat this as a ${result.failureKind} failure, do not claim success, and either recover or explain the issue clearly.`);
|
|
204
|
+
return;
|
|
205
|
+
}
|
|
206
|
+
const maxFollowupSteps = verifier.getMaxFollowupSteps();
|
|
207
|
+
const ageNote = this.pendingCriticalVerification.followupSteps >= maxFollowupSteps ? ` This critical action is still unverified after ${this.pendingCriticalVerification.followupSteps} follow-up checks.` : '';
|
|
208
|
+
this.observations.push(`Outcome verifier: The previous action "${this.pendingCriticalVerification.action.label}" is still unverified. ${result.evidence}${ageNote} Before calling done(success=true), keep checking for success or error evidence on the current screen.`);
|
|
209
|
+
}
|
|
210
|
+
maybeStartCriticalVerification(toolName, args, preAction) {
|
|
211
|
+
const verifier = this.getVerifier();
|
|
212
|
+
if (!verifier) return;
|
|
213
|
+
const action = buildVerificationAction(toolName, args, preAction.elements, this.getToolStatusLabel(toolName, args));
|
|
214
|
+
if (!verifier.isCriticalAction(action)) {
|
|
215
|
+
return;
|
|
216
|
+
}
|
|
217
|
+
this.pendingCriticalVerification = {
|
|
218
|
+
goal: this.currentUserGoal,
|
|
219
|
+
action,
|
|
220
|
+
preAction,
|
|
221
|
+
followupSteps: 0
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
shouldBlockSuccessCompletion() {
|
|
225
|
+
return this.pendingCriticalVerification !== null;
|
|
226
|
+
}
|
|
150
227
|
|
|
151
228
|
// ─── Tool Registration ─────────────────────────────────────
|
|
152
229
|
|
|
@@ -1328,6 +1405,10 @@ ${screen.elementsText}
|
|
|
1328
1405
|
this.currentTraceId = generateTraceId();
|
|
1329
1406
|
this.observations = [];
|
|
1330
1407
|
this.lastScreenName = '';
|
|
1408
|
+
this.pendingCriticalVerification = null;
|
|
1409
|
+
this.outcomeVerifier = null;
|
|
1410
|
+
this.verifierProvider = null;
|
|
1411
|
+
this.currentUserGoal = userMessage;
|
|
1331
1412
|
// Reset workflow approval for each new task
|
|
1332
1413
|
this.resetAppActionApproval('new task');
|
|
1333
1414
|
const maxSteps = this.config.maxSteps || DEFAULT_MAX_STEPS;
|
|
@@ -1347,6 +1428,7 @@ ${screen.elementsText}
|
|
|
1347
1428
|
contextualMessage = `(Note: You just asked the user: "${this.lastAskUserQuestion}")\n\nUser replied: ${userMessage}`;
|
|
1348
1429
|
this.lastAskUserQuestion = null; // Consume the question
|
|
1349
1430
|
}
|
|
1431
|
+
this.currentUserGoal = contextualMessage;
|
|
1350
1432
|
logger.info('AgentRuntime', `Starting execution: "${contextualMessage}"`);
|
|
1351
1433
|
|
|
1352
1434
|
// Lifecycle: onBeforeTask
|
|
@@ -1491,12 +1573,13 @@ ${screen.elementsText}
|
|
|
1491
1573
|
|
|
1492
1574
|
// 4.5. Capture screenshot for Gemini vision (optional)
|
|
1493
1575
|
const screenshot = await this.captureScreenshot();
|
|
1576
|
+
await this.updateCriticalVerification(screenName, screenContent, screen.elements, screenshot, step);
|
|
1494
1577
|
|
|
1495
1578
|
// 5. Send to AI provider
|
|
1496
1579
|
this.config.onStatusUpdate?.('Thinking...');
|
|
1497
1580
|
const hasKnowledge = !!this.knowledgeService;
|
|
1498
1581
|
const isCopilot = this.config.interactionMode !== 'autopilot';
|
|
1499
|
-
const systemPrompt = buildSystemPrompt('en', hasKnowledge, isCopilot);
|
|
1582
|
+
const systemPrompt = buildSystemPrompt('en', hasKnowledge, isCopilot, this.config.supportStyle);
|
|
1500
1583
|
const tools = this.buildToolsForProvider();
|
|
1501
1584
|
logger.info('AgentRuntime', `Sending to AI with ${tools.length} tools...`);
|
|
1502
1585
|
logger.debug('AgentRuntime', 'System prompt length:', systemPrompt.length);
|
|
@@ -1561,6 +1644,13 @@ ${screen.elementsText}
|
|
|
1561
1644
|
|
|
1562
1645
|
// 6. Process tool calls
|
|
1563
1646
|
if (!response.toolCalls || response.toolCalls.length === 0) {
|
|
1647
|
+
if (this.shouldBlockSuccessCompletion()) {
|
|
1648
|
+
this.emitTrace('task_completion_blocked_needs_verification', {
|
|
1649
|
+
responseText: response.text,
|
|
1650
|
+
pendingVerification: this.pendingCriticalVerification
|
|
1651
|
+
}, step);
|
|
1652
|
+
continue;
|
|
1653
|
+
}
|
|
1564
1654
|
logger.warn('AgentRuntime', 'No tool calls in response. Text:', response.text);
|
|
1565
1655
|
this.emitTrace('task_completed_without_tool', {
|
|
1566
1656
|
responseText: response.text
|
|
@@ -1605,6 +1695,7 @@ ${screen.elementsText}
|
|
|
1605
1695
|
// Prefer the human-readable plan over the raw tool status if available to avoid double statuses
|
|
1606
1696
|
const statusDisplay = reasoning.plan || statusLabel;
|
|
1607
1697
|
this.config.onStatusUpdate?.(statusDisplay);
|
|
1698
|
+
const preActionSnapshot = this.createCurrentVerificationSnapshot(screenName, screenContent, screen.elements, screenshot);
|
|
1608
1699
|
|
|
1609
1700
|
// Find and execute the tool
|
|
1610
1701
|
const tool = this.tools.get(toolCall.name) || this.buildToolsForProvider().find(t => t.name === toolCall.name);
|
|
@@ -1624,6 +1715,11 @@ ${screen.elementsText}
|
|
|
1624
1715
|
args: toolCall.args,
|
|
1625
1716
|
output
|
|
1626
1717
|
}, step);
|
|
1718
|
+
if (output.startsWith('✅')) {
|
|
1719
|
+
this.maybeStartCriticalVerification(toolCall.name, toolCall.args, preActionSnapshot);
|
|
1720
|
+
} else if (toolCall.name !== 'done') {
|
|
1721
|
+
this.pendingCriticalVerification = null;
|
|
1722
|
+
}
|
|
1627
1723
|
if (output === APPROVAL_ALREADY_DONE_TOKEN) {
|
|
1628
1724
|
const result = {
|
|
1629
1725
|
success: true,
|
|
@@ -1652,6 +1748,12 @@ ${screen.elementsText}
|
|
|
1652
1748
|
|
|
1653
1749
|
// Check if done
|
|
1654
1750
|
if (toolCall.name === 'done') {
|
|
1751
|
+
if (toolCall.args.success !== false && this.shouldBlockSuccessCompletion()) {
|
|
1752
|
+
this.emitTrace('done_blocked_needs_verification', {
|
|
1753
|
+
pendingVerification: this.pendingCriticalVerification
|
|
1754
|
+
}, step);
|
|
1755
|
+
continue;
|
|
1756
|
+
}
|
|
1655
1757
|
const result = {
|
|
1656
1758
|
success: toolCall.args.success !== false,
|
|
1657
1759
|
message: toolCall.args.text || toolCall.args.message || output || reasoning.plan || (toolCall.args.success === false ? 'Action stopped.' : 'Action completed.'),
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
*
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
|
+
import { Dimensions } from 'react-native';
|
|
12
13
|
import { logger } from "../utils/logger.js";
|
|
13
14
|
import { getChild, getSibling, getParent, getProps, getStateNode, getType, getDisplayName } from "./FiberAdapter.js";
|
|
14
15
|
import { getActiveAlert } from "./NativeAlertInterceptor.js";
|
|
@@ -1171,4 +1172,101 @@ function resolveNativeScrollRef(fiberNode) {
|
|
|
1171
1172
|
logger.debug('FiberTreeWalker', 'Could not resolve native scroll ref — returning stateNode as fallback');
|
|
1172
1173
|
return stateNode;
|
|
1173
1174
|
}
|
|
1175
|
+
|
|
1176
|
+
// ─── Wireframe Capture ─────────────────────────────────────────
|
|
1177
|
+
|
|
1178
|
+
/** Max elements to measure — keeps bridge work bounded */
|
|
1179
|
+
const WIREFRAME_MAX_ELEMENTS = 50;
|
|
1180
|
+
/** Measure this many elements per frame, then yield */
|
|
1181
|
+
const WIREFRAME_BATCH_SIZE = 10;
|
|
1182
|
+
|
|
1183
|
+
/**
|
|
1184
|
+
* Measure a single element on the native bridge.
|
|
1185
|
+
* Returns null if the element is off-screen or unmeasurable.
|
|
1186
|
+
*/
|
|
1187
|
+
function measureElement(el) {
|
|
1188
|
+
return new Promise(resolve => {
|
|
1189
|
+
try {
|
|
1190
|
+
const stateNode = getStateNode(el.fiberNode);
|
|
1191
|
+
if (!stateNode || typeof stateNode.measure !== 'function') {
|
|
1192
|
+
resolve(null);
|
|
1193
|
+
return;
|
|
1194
|
+
}
|
|
1195
|
+
stateNode.measure((_x, _y, width, height, pageX, pageY) => {
|
|
1196
|
+
if (width > 0 && height > 0) {
|
|
1197
|
+
resolve({
|
|
1198
|
+
type: el.type,
|
|
1199
|
+
label: el.label || el.type,
|
|
1200
|
+
x: pageX,
|
|
1201
|
+
y: pageY,
|
|
1202
|
+
width,
|
|
1203
|
+
height
|
|
1204
|
+
});
|
|
1205
|
+
} else {
|
|
1206
|
+
resolve(null);
|
|
1207
|
+
}
|
|
1208
|
+
});
|
|
1209
|
+
} catch {
|
|
1210
|
+
resolve(null);
|
|
1211
|
+
}
|
|
1212
|
+
});
|
|
1213
|
+
}
|
|
1214
|
+
|
|
1215
|
+
/**
|
|
1216
|
+
* Yield one frame so measure work doesn't block gestures/animations.
|
|
1217
|
+
* Uses requestAnimationFrame where available, falls back to setTimeout(16ms).
|
|
1218
|
+
*/
|
|
1219
|
+
function yieldFrame() {
|
|
1220
|
+
return new Promise(resolve => {
|
|
1221
|
+
if (typeof requestAnimationFrame === 'function') {
|
|
1222
|
+
requestAnimationFrame(() => resolve());
|
|
1223
|
+
} else {
|
|
1224
|
+
setTimeout(resolve, 16);
|
|
1225
|
+
}
|
|
1226
|
+
});
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
/**
|
|
1230
|
+
* Capture a privacy-safe wireframe of the current screen.
|
|
1231
|
+
*
|
|
1232
|
+
* Performance guarantees:
|
|
1233
|
+
* - Capped at WIREFRAME_MAX_ELEMENTS (50) — enough for wireframe context
|
|
1234
|
+
* - Measures in batches of WIREFRAME_BATCH_SIZE (10), yielding a frame
|
|
1235
|
+
* between batches so the bridge stays free for user interactions
|
|
1236
|
+
* - The caller (AIAgent) defers this via InteractionManager so it
|
|
1237
|
+
* never competes with screen transitions or gestures
|
|
1238
|
+
*/
|
|
1239
|
+
export async function captureWireframe(rootRef, config = {}) {
|
|
1240
|
+
const result = walkFiberTree(rootRef, config);
|
|
1241
|
+
const elements = result.interactives;
|
|
1242
|
+
if (elements.length === 0) return null;
|
|
1243
|
+
|
|
1244
|
+
// Cap the number of elements to keep bridge work bounded
|
|
1245
|
+
const capped = elements.slice(0, WIREFRAME_MAX_ELEMENTS);
|
|
1246
|
+
const components = [];
|
|
1247
|
+
for (let i = 0; i < capped.length; i += WIREFRAME_BATCH_SIZE) {
|
|
1248
|
+
const batch = capped.slice(i, i + WIREFRAME_BATCH_SIZE);
|
|
1249
|
+
const batchResults = await Promise.all(batch.map(measureElement));
|
|
1250
|
+
for (const r of batchResults) {
|
|
1251
|
+
if (r) components.push(r);
|
|
1252
|
+
}
|
|
1253
|
+
|
|
1254
|
+
// Yield between batches — never monopolize the bridge
|
|
1255
|
+
if (i + WIREFRAME_BATCH_SIZE < capped.length) {
|
|
1256
|
+
await yieldFrame();
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
if (components.length === 0) return null;
|
|
1260
|
+
const {
|
|
1261
|
+
width: deviceWidth,
|
|
1262
|
+
height: deviceHeight
|
|
1263
|
+
} = Dimensions.get('window');
|
|
1264
|
+
return {
|
|
1265
|
+
screen: config.screenName || 'Unknown',
|
|
1266
|
+
components,
|
|
1267
|
+
deviceWidth,
|
|
1268
|
+
deviceHeight,
|
|
1269
|
+
capturedAt: new Date().toISOString()
|
|
1270
|
+
};
|
|
1271
|
+
}
|
|
1174
1272
|
//# sourceMappingURL=FiberTreeWalker.js.map
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
const COMMIT_ACTION_PATTERN = /\b(save|submit|confirm|apply|pay|place|update|continue|finish|send|checkout|complete|verify|review|publish|post|delete|cancel)\b/i;
|
|
4
|
+
const SUCCESS_SIGNAL_PATTERNS = [/\b(success|successful|saved|updated|submitted|completed|done|confirmed|applied|verified)\b/i, /\bthank you\b/i, /\border confirmed\b/i, /\bchanges saved\b/i];
|
|
5
|
+
const ERROR_SIGNAL_PATTERNS = [/\berror\b/i, /\bfailed\b/i, /\binvalid\b/i, /\brequired\b/i, /\bincorrect\b/i, /\btry again\b/i, /\bcould not\b/i, /\bunable to\b/i, /\bverification\b.{0,30}\b(error|failed|invalid|required)\b/i, /\bcode\b.{0,30}\b(error|failed|invalid|required)\b/i];
|
|
6
|
+
const UNCONTROLLABLE_ERROR_PATTERNS = [/\bnetwork\b/i, /\bserver\b/i, /\bservice unavailable\b/i, /\btemporarily unavailable\b/i, /\btimeout\b/i, /\btry later\b/i, /\bconnection\b/i];
|
|
7
|
+
function normalizeText(text) {
|
|
8
|
+
return text.replace(/\[[^\]]+\]/g, ' ').replace(/\s+/g, ' ').trim();
|
|
9
|
+
}
|
|
10
|
+
function elementStillPresent(elements, target) {
|
|
11
|
+
if (!target) return false;
|
|
12
|
+
return elements.some(element => element.index === target.index || element.type === target.type && element.label.trim().length > 0 && element.label.trim() === target.label.trim());
|
|
13
|
+
}
|
|
14
|
+
export function createVerificationSnapshot(screenName, screenContent, elements, screenshot) {
|
|
15
|
+
return {
|
|
16
|
+
screenName,
|
|
17
|
+
screenContent,
|
|
18
|
+
elements,
|
|
19
|
+
screenshot
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
export function buildVerificationAction(toolName, args, elements, fallbackLabel) {
|
|
23
|
+
const targetElement = typeof args.index === 'number' ? elements.find(element => element.index === args.index) : undefined;
|
|
24
|
+
return {
|
|
25
|
+
toolName,
|
|
26
|
+
args,
|
|
27
|
+
label: targetElement?.label || fallbackLabel,
|
|
28
|
+
targetElement
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
export function isCriticalVerificationAction(action) {
|
|
32
|
+
if (action.targetElement?.requiresConfirmation) return true;
|
|
33
|
+
if (!['tap', 'long_press', 'adjust_slider', 'select_picker', 'set_date'].includes(action.toolName)) {
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
const label = action.label || '';
|
|
37
|
+
return COMMIT_ACTION_PATTERN.test(label);
|
|
38
|
+
}
|
|
39
|
+
function deterministicVerify(context) {
|
|
40
|
+
const normalizedPost = normalizeText(context.postAction.screenContent);
|
|
41
|
+
if (ERROR_SIGNAL_PATTERNS.some(pattern => pattern.test(normalizedPost))) {
|
|
42
|
+
const failureKind = UNCONTROLLABLE_ERROR_PATTERNS.some(pattern => pattern.test(normalizedPost)) ? 'uncontrollable' : 'controllable';
|
|
43
|
+
return {
|
|
44
|
+
status: 'error',
|
|
45
|
+
failureKind,
|
|
46
|
+
evidence: 'Visible validation or error feedback appeared after the action.',
|
|
47
|
+
source: 'deterministic'
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
if (context.postAction.screenName !== context.preAction.screenName) {
|
|
51
|
+
return {
|
|
52
|
+
status: 'success',
|
|
53
|
+
failureKind: 'controllable',
|
|
54
|
+
evidence: `The app navigated from "${context.preAction.screenName}" to "${context.postAction.screenName}".`,
|
|
55
|
+
source: 'deterministic'
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
if (SUCCESS_SIGNAL_PATTERNS.some(pattern => pattern.test(normalizedPost))) {
|
|
59
|
+
return {
|
|
60
|
+
status: 'success',
|
|
61
|
+
failureKind: 'controllable',
|
|
62
|
+
evidence: 'The current screen shows explicit success or completion language.',
|
|
63
|
+
source: 'deterministic'
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
if (context.action.targetElement && elementStillPresent(context.preAction.elements, context.action.targetElement) && !elementStillPresent(context.postAction.elements, context.action.targetElement)) {
|
|
67
|
+
return {
|
|
68
|
+
status: 'success',
|
|
69
|
+
failureKind: 'controllable',
|
|
70
|
+
evidence: 'The commit control is no longer present on the current screen.',
|
|
71
|
+
source: 'deterministic'
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
return {
|
|
75
|
+
status: 'uncertain',
|
|
76
|
+
failureKind: 'controllable',
|
|
77
|
+
evidence: 'The current UI does not yet prove either success or failure.',
|
|
78
|
+
source: 'deterministic'
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
async function llmVerify(provider, context) {
|
|
82
|
+
const verificationTool = {
|
|
83
|
+
name: 'report_verification',
|
|
84
|
+
description: 'Report whether the action succeeded, failed, or remains uncertain based only on the UI evidence.',
|
|
85
|
+
parameters: {
|
|
86
|
+
status: {
|
|
87
|
+
type: 'string',
|
|
88
|
+
description: 'success, error, or uncertain',
|
|
89
|
+
required: true,
|
|
90
|
+
enum: ['success', 'error', 'uncertain']
|
|
91
|
+
},
|
|
92
|
+
failureKind: {
|
|
93
|
+
type: 'string',
|
|
94
|
+
description: 'controllable or uncontrollable',
|
|
95
|
+
required: true,
|
|
96
|
+
enum: ['controllable', 'uncontrollable']
|
|
97
|
+
},
|
|
98
|
+
evidence: {
|
|
99
|
+
type: 'string',
|
|
100
|
+
description: 'Brief explanation grounded in the current UI evidence',
|
|
101
|
+
required: true
|
|
102
|
+
}
|
|
103
|
+
},
|
|
104
|
+
execute: async () => 'reported'
|
|
105
|
+
};
|
|
106
|
+
const systemPrompt = ['You are an outcome verifier for a mobile app agent.', 'Your job is to decide whether the last critical UI action actually succeeded.', 'The current UI is the source of truth. Ignore the actor model’s prior claims when they conflict with the UI.', 'Return success only when the current UI clearly proves completion.', 'Return error when the UI shows validation, verification, submission, or other failure feedback.', 'Return uncertain when the UI does not yet prove either success or error.'].join(' ');
|
|
107
|
+
const userPrompt = [`<goal>${context.goal}</goal>`, `<action tool="${context.action.toolName}" label="${context.action.label}">${JSON.stringify(context.action.args)}</action>`, `<pre_action screen="${context.preAction.screenName}">\n${context.preAction.screenContent}\n</pre_action>`, `<post_action screen="${context.postAction.screenName}">\n${context.postAction.screenContent}\n</post_action>`].join('\n\n');
|
|
108
|
+
const response = await provider.generateContent(systemPrompt, userPrompt, [verificationTool], [], context.postAction.screenshot);
|
|
109
|
+
const toolCall = response.toolCalls?.[0];
|
|
110
|
+
if (!toolCall || toolCall.name !== 'report_verification') {
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
const status = toolCall.args.status;
|
|
114
|
+
const failureKind = toolCall.args.failureKind;
|
|
115
|
+
const evidence = typeof toolCall.args.evidence === 'string' ? toolCall.args.evidence : '';
|
|
116
|
+
if (!status || !failureKind || !evidence) {
|
|
117
|
+
return null;
|
|
118
|
+
}
|
|
119
|
+
return {
|
|
120
|
+
status,
|
|
121
|
+
failureKind,
|
|
122
|
+
evidence,
|
|
123
|
+
source: 'llm'
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
export class OutcomeVerifier {
|
|
127
|
+
constructor(provider, config) {
|
|
128
|
+
this.provider = provider;
|
|
129
|
+
this.config = config;
|
|
130
|
+
}
|
|
131
|
+
isEnabled() {
|
|
132
|
+
return this.config.verifier?.enabled !== false;
|
|
133
|
+
}
|
|
134
|
+
getMaxFollowupSteps() {
|
|
135
|
+
return this.config.verifier?.maxFollowupSteps ?? 2;
|
|
136
|
+
}
|
|
137
|
+
isCriticalAction(action) {
|
|
138
|
+
return isCriticalVerificationAction(action);
|
|
139
|
+
}
|
|
140
|
+
async verify(context) {
|
|
141
|
+
const stageA = deterministicVerify(context);
|
|
142
|
+
if (stageA.status !== 'uncertain') {
|
|
143
|
+
return stageA;
|
|
144
|
+
}
|
|
145
|
+
const stageB = await llmVerify(this.provider, context);
|
|
146
|
+
return stageB ?? stageA;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
//# sourceMappingURL=OutcomeVerifier.js.map
|