npm - @mobileai/react-native - Versions diffs - 0.9.27 → 0.9.28 - Mend

@mobileai/react-native 0.9.27 → 0.9.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

package/lib/module/core/AgentRuntime.js CHANGED Viewed

@@ -15,10 +15,12 @@ import { logger } from "../utils/logger.js";
 import { walkFiberTree } from "./FiberTreeWalker.js";
 import { dehydrateScreen } from "./ScreenDehydrator.js";
 import { buildSystemPrompt, buildKnowledgeOnlyPrompt } from "./systemPrompt.js";
+import { buildVerificationAction, createVerificationSnapshot, OutcomeVerifier } from "./OutcomeVerifier.js";
 import { KnowledgeBaseService } from "../services/KnowledgeBaseService.js";
 import { installAlertInterceptor, uninstallAlertInterceptor } from "./NativeAlertInterceptor.js";
 import { createTapTool, createLongPressTool, createTypeTool, createScrollTool, createSliderTool, createPickerTool, createDatePickerTool, createKeyboardTool, createGuideTool, createSimplifyTool, createRestoreTool } from "../tools/index.js";
 import { actionRegistry } from "./ActionRegistry.js";
+import { createProvider } from "../providers/ProviderFactory.js";
 const DEFAULT_MAX_STEPS = 25;
 function generateTraceId() {
   return `trace_${Date.now()}_${Math.random().toString(36).slice(2, 10)}`;
@@ -39,6 +41,10 @@ export class AgentRuntime {
   knowledgeService = null;
   lastDehydratedRoot = null;
   currentTraceId = null;
+  currentUserGoal = '';
+  verifierProvider = null;
+  outcomeVerifier = null;
+  pendingCriticalVerification = null;
   // ─── Task-scoped error suppression ──────────────────────────
   // Installed once at execute() start, removed after grace period.
@@ -147,6 +153,77 @@ export class AgentRuntime {
       }
     }
   }
+  getVerifier() {
+    if (this.config.verifier?.enabled === false) {
+      return null;
+    }
+    if (!this.outcomeVerifier) {
+      const verifierConfig = this.config.verifier;
+      if (verifierConfig?.provider || verifierConfig?.model || verifierConfig?.proxyUrl || verifierConfig?.proxyHeaders) {
+        this.verifierProvider = createProvider(verifierConfig.provider || this.config.provider || 'gemini', this.config.apiKey, verifierConfig.model || this.config.model, verifierConfig.proxyUrl || this.config.proxyUrl, verifierConfig.proxyHeaders || this.config.proxyHeaders);
+      } else {
+        this.verifierProvider = this.provider;
+      }
+      this.outcomeVerifier = new OutcomeVerifier(this.verifierProvider, this.config);
+    }
+    return this.outcomeVerifier;
+  }
+  createCurrentVerificationSnapshot(screenName, screenContent, elements, screenshot) {
+    return createVerificationSnapshot(screenName, screenContent, elements, screenshot);
+  }
+  async updateCriticalVerification(screenName, screenContent, elements, screenshot, stepIndex) {
+    if (!this.pendingCriticalVerification) return;
+    const verifier = this.getVerifier();
+    if (!verifier) {
+      this.pendingCriticalVerification = null;
+      return;
+    }
+    const postAction = this.createCurrentVerificationSnapshot(screenName, screenContent, elements, screenshot);
+    this.pendingCriticalVerification.followupSteps += 1;
+    const result = await verifier.verify({
+      goal: this.pendingCriticalVerification.goal,
+      action: this.pendingCriticalVerification.action,
+      preAction: this.pendingCriticalVerification.preAction,
+      postAction
+    });
+    this.emitTrace('critical_action_verified', {
+      action: this.pendingCriticalVerification.action.toolName,
+      label: this.pendingCriticalVerification.action.label,
+      status: result.status,
+      failureKind: result.failureKind,
+      evidence: result.evidence,
+      source: result.source,
+      followupSteps: this.pendingCriticalVerification.followupSteps
+    }, stepIndex);
+    if (result.status === 'success') {
+      this.pendingCriticalVerification = null;
+      return;
+    }
+    if (result.status === 'error') {
+      this.observations.push(`Outcome verifier: The previous action "${this.pendingCriticalVerification.action.label}" did NOT complete successfully. ${result.evidence} Treat this as a ${result.failureKind} failure, do not claim success, and either recover or explain the issue clearly.`);
+      return;
+    }
+    const maxFollowupSteps = verifier.getMaxFollowupSteps();
+    const ageNote = this.pendingCriticalVerification.followupSteps >= maxFollowupSteps ? ` This critical action is still unverified after ${this.pendingCriticalVerification.followupSteps} follow-up checks.` : '';
+    this.observations.push(`Outcome verifier: The previous action "${this.pendingCriticalVerification.action.label}" is still unverified. ${result.evidence}${ageNote} Before calling done(success=true), keep checking for success or error evidence on the current screen.`);
+  }
+  maybeStartCriticalVerification(toolName, args, preAction) {
+    const verifier = this.getVerifier();
+    if (!verifier) return;
+    const action = buildVerificationAction(toolName, args, preAction.elements, this.getToolStatusLabel(toolName, args));
+    if (!verifier.isCriticalAction(action)) {
+      return;
+    }
+    this.pendingCriticalVerification = {
+      goal: this.currentUserGoal,
+      action,
+      preAction,
+      followupSteps: 0
+    };
+  }
+  shouldBlockSuccessCompletion() {
+    return this.pendingCriticalVerification !== null;
+  }
   // ─── Tool Registration ─────────────────────────────────────
@@ -1328,6 +1405,10 @@ ${screen.elementsText}
     this.currentTraceId = generateTraceId();
     this.observations = [];
     this.lastScreenName = '';
+    this.pendingCriticalVerification = null;
+    this.outcomeVerifier = null;
+    this.verifierProvider = null;
+    this.currentUserGoal = userMessage;
     // Reset workflow approval for each new task
     this.resetAppActionApproval('new task');
     const maxSteps = this.config.maxSteps || DEFAULT_MAX_STEPS;
@@ -1347,6 +1428,7 @@ ${screen.elementsText}
       contextualMessage = `(Note: You just asked the user: "${this.lastAskUserQuestion}")\n\nUser replied: ${userMessage}`;
       this.lastAskUserQuestion = null; // Consume the question
     }
+    this.currentUserGoal = contextualMessage;
     logger.info('AgentRuntime', `Starting execution: "${contextualMessage}"`);
     // Lifecycle: onBeforeTask
@@ -1491,12 +1573,13 @@ ${screen.elementsText}
         // 4.5. Capture screenshot for Gemini vision (optional)
         const screenshot = await this.captureScreenshot();
+        await this.updateCriticalVerification(screenName, screenContent, screen.elements, screenshot, step);
         // 5. Send to AI provider
         this.config.onStatusUpdate?.('Thinking...');
         const hasKnowledge = !!this.knowledgeService;
         const isCopilot = this.config.interactionMode !== 'autopilot';
-        const systemPrompt = buildSystemPrompt('en', hasKnowledge, isCopilot);
+        const systemPrompt = buildSystemPrompt('en', hasKnowledge, isCopilot, this.config.supportStyle);
         const tools = this.buildToolsForProvider();
         logger.info('AgentRuntime', `Sending to AI with ${tools.length} tools...`);
         logger.debug('AgentRuntime', 'System prompt length:', systemPrompt.length);
@@ -1561,6 +1644,13 @@ ${screen.elementsText}
         // 6. Process tool calls
         if (!response.toolCalls || response.toolCalls.length === 0) {
+          if (this.shouldBlockSuccessCompletion()) {
+            this.emitTrace('task_completion_blocked_needs_verification', {
+              responseText: response.text,
+              pendingVerification: this.pendingCriticalVerification
+            }, step);
+            continue;
+          }
           logger.warn('AgentRuntime', 'No tool calls in response. Text:', response.text);
           this.emitTrace('task_completed_without_tool', {
             responseText: response.text
@@ -1605,6 +1695,7 @@ ${screen.elementsText}
         // Prefer the human-readable plan over the raw tool status if available to avoid double statuses
         const statusDisplay = reasoning.plan || statusLabel;
         this.config.onStatusUpdate?.(statusDisplay);
+        const preActionSnapshot = this.createCurrentVerificationSnapshot(screenName, screenContent, screen.elements, screenshot);
         // Find and execute the tool
         const tool = this.tools.get(toolCall.name) || this.buildToolsForProvider().find(t => t.name === toolCall.name);
@@ -1624,6 +1715,11 @@ ${screen.elementsText}
           args: toolCall.args,
           output
         }, step);
+        if (output.startsWith('✅')) {
+          this.maybeStartCriticalVerification(toolCall.name, toolCall.args, preActionSnapshot);
+        } else if (toolCall.name !== 'done') {
+          this.pendingCriticalVerification = null;
+        }
         if (output === APPROVAL_ALREADY_DONE_TOKEN) {
           const result = {
             success: true,
@@ -1652,6 +1748,12 @@ ${screen.elementsText}
         // Check if done
         if (toolCall.name === 'done') {
+          if (toolCall.args.success !== false && this.shouldBlockSuccessCompletion()) {
+            this.emitTrace('done_blocked_needs_verification', {
+              pendingVerification: this.pendingCriticalVerification
+            }, step);
+            continue;
+          }
           const result = {
             success: toolCall.args.success !== false,
             message: toolCall.args.text || toolCall.args.message || output || reasoning.plan || (toolCall.args.success === false ? 'Action stopped.' : 'Action completed.'),

package/lib/module/core/FiberTreeWalker.js CHANGED Viewed

@@ -9,6 +9,7 @@
  *
  */
+import { Dimensions } from 'react-native';
 import { logger } from "../utils/logger.js";
 import { getChild, getSibling, getParent, getProps, getStateNode, getType, getDisplayName } from "./FiberAdapter.js";
 import { getActiveAlert } from "./NativeAlertInterceptor.js";
@@ -1171,4 +1172,101 @@ function resolveNativeScrollRef(fiberNode) {
   logger.debug('FiberTreeWalker', 'Could not resolve native scroll ref — returning stateNode as fallback');
   return stateNode;
 }
+// ─── Wireframe Capture ─────────────────────────────────────────
+/** Max elements to measure — keeps bridge work bounded */
+const WIREFRAME_MAX_ELEMENTS = 50;
+/** Measure this many elements per frame, then yield */
+const WIREFRAME_BATCH_SIZE = 10;
+/**
+ * Measure a single element on the native bridge.
+ * Returns null if the element is off-screen or unmeasurable.
+ */
+function measureElement(el) {
+  return new Promise(resolve => {
+    try {
+      const stateNode = getStateNode(el.fiberNode);
+      if (!stateNode || typeof stateNode.measure !== 'function') {
+        resolve(null);
+        return;
+      }
+      stateNode.measure((_x, _y, width, height, pageX, pageY) => {
+        if (width > 0 && height > 0) {
+          resolve({
+            type: el.type,
+            label: el.label || el.type,
+            x: pageX,
+            y: pageY,
+            width,
+            height
+          });
+        } else {
+          resolve(null);
+        }
+      });
+    } catch {
+      resolve(null);
+    }
+  });
+}
+/**
+ * Yield one frame so measure work doesn't block gestures/animations.
+ * Uses requestAnimationFrame where available, falls back to setTimeout(16ms).
+ */
+function yieldFrame() {
+  return new Promise(resolve => {
+    if (typeof requestAnimationFrame === 'function') {
+      requestAnimationFrame(() => resolve());
+    } else {
+      setTimeout(resolve, 16);
+    }
+  });
+}
+/**
+ * Capture a privacy-safe wireframe of the current screen.
+ *
+ * Performance guarantees:
+ * - Capped at WIREFRAME_MAX_ELEMENTS (50) — enough for wireframe context
+ * - Measures in batches of WIREFRAME_BATCH_SIZE (10), yielding a frame
+ *   between batches so the bridge stays free for user interactions
+ * - The caller (AIAgent) defers this via InteractionManager so it
+ *   never competes with screen transitions or gestures
+ */
+export async function captureWireframe(rootRef, config = {}) {
+  const result = walkFiberTree(rootRef, config);
+  const elements = result.interactives;
+  if (elements.length === 0) return null;
+  // Cap the number of elements to keep bridge work bounded
+  const capped = elements.slice(0, WIREFRAME_MAX_ELEMENTS);
+  const components = [];
+  for (let i = 0; i < capped.length; i += WIREFRAME_BATCH_SIZE) {
+    const batch = capped.slice(i, i + WIREFRAME_BATCH_SIZE);
+    const batchResults = await Promise.all(batch.map(measureElement));
+    for (const r of batchResults) {
+      if (r) components.push(r);
+    }
+    // Yield between batches — never monopolize the bridge
+    if (i + WIREFRAME_BATCH_SIZE < capped.length) {
+      await yieldFrame();
+    }
+  }
+  if (components.length === 0) return null;
+  const {
+    width: deviceWidth,
+    height: deviceHeight
+  } = Dimensions.get('window');
+  return {
+    screen: config.screenName || 'Unknown',
+    components,
+    deviceWidth,
+    deviceHeight,
+    capturedAt: new Date().toISOString()
+  };
+}
 //# sourceMappingURL=FiberTreeWalker.js.map

package/lib/module/core/OutcomeVerifier.js ADDED Viewed

@@ -0,0 +1,149 @@
+"use strict";
+const COMMIT_ACTION_PATTERN = /\b(save|submit|confirm|apply|pay|place|update|continue|finish|send|checkout|complete|verify|review|publish|post|delete|cancel)\b/i;
+const SUCCESS_SIGNAL_PATTERNS = [/\b(success|successful|saved|updated|submitted|completed|done|confirmed|applied|verified)\b/i, /\bthank you\b/i, /\border confirmed\b/i, /\bchanges saved\b/i];
+const ERROR_SIGNAL_PATTERNS = [/\berror\b/i, /\bfailed\b/i, /\binvalid\b/i, /\brequired\b/i, /\bincorrect\b/i, /\btry again\b/i, /\bcould not\b/i, /\bunable to\b/i, /\bverification\b.{0,30}\b(error|failed|invalid|required)\b/i, /\bcode\b.{0,30}\b(error|failed|invalid|required)\b/i];
+const UNCONTROLLABLE_ERROR_PATTERNS = [/\bnetwork\b/i, /\bserver\b/i, /\bservice unavailable\b/i, /\btemporarily unavailable\b/i, /\btimeout\b/i, /\btry later\b/i, /\bconnection\b/i];
+function normalizeText(text) {
+  return text.replace(/\[[^\]]+\]/g, ' ').replace(/\s+/g, ' ').trim();
+}
+function elementStillPresent(elements, target) {
+  if (!target) return false;
+  return elements.some(element => element.index === target.index || element.type === target.type && element.label.trim().length > 0 && element.label.trim() === target.label.trim());
+}
+export function createVerificationSnapshot(screenName, screenContent, elements, screenshot) {
+  return {
+    screenName,
+    screenContent,
+    elements,
+    screenshot
+  };
+}
+export function buildVerificationAction(toolName, args, elements, fallbackLabel) {
+  const targetElement = typeof args.index === 'number' ? elements.find(element => element.index === args.index) : undefined;
+  return {
+    toolName,
+    args,
+    label: targetElement?.label || fallbackLabel,
+    targetElement
+  };
+}
+export function isCriticalVerificationAction(action) {
+  if (action.targetElement?.requiresConfirmation) return true;
+  if (!['tap', 'long_press', 'adjust_slider', 'select_picker', 'set_date'].includes(action.toolName)) {
+    return false;
+  }
+  const label = action.label || '';
+  return COMMIT_ACTION_PATTERN.test(label);
+}
+function deterministicVerify(context) {
+  const normalizedPost = normalizeText(context.postAction.screenContent);
+  if (ERROR_SIGNAL_PATTERNS.some(pattern => pattern.test(normalizedPost))) {
+    const failureKind = UNCONTROLLABLE_ERROR_PATTERNS.some(pattern => pattern.test(normalizedPost)) ? 'uncontrollable' : 'controllable';
+    return {
+      status: 'error',
+      failureKind,
+      evidence: 'Visible validation or error feedback appeared after the action.',
+      source: 'deterministic'
+    };
+  }
+  if (context.postAction.screenName !== context.preAction.screenName) {
+    return {
+      status: 'success',
+      failureKind: 'controllable',
+      evidence: `The app navigated from "${context.preAction.screenName}" to "${context.postAction.screenName}".`,
+      source: 'deterministic'
+    };
+  }
+  if (SUCCESS_SIGNAL_PATTERNS.some(pattern => pattern.test(normalizedPost))) {
+    return {
+      status: 'success',
+      failureKind: 'controllable',
+      evidence: 'The current screen shows explicit success or completion language.',
+      source: 'deterministic'
+    };
+  }
+  if (context.action.targetElement && elementStillPresent(context.preAction.elements, context.action.targetElement) && !elementStillPresent(context.postAction.elements, context.action.targetElement)) {
+    return {
+      status: 'success',
+      failureKind: 'controllable',
+      evidence: 'The commit control is no longer present on the current screen.',
+      source: 'deterministic'
+    };
+  }
+  return {
+    status: 'uncertain',
+    failureKind: 'controllable',
+    evidence: 'The current UI does not yet prove either success or failure.',
+    source: 'deterministic'
+  };
+}
+async function llmVerify(provider, context) {
+  const verificationTool = {
+    name: 'report_verification',
+    description: 'Report whether the action succeeded, failed, or remains uncertain based only on the UI evidence.',
+    parameters: {
+      status: {
+        type: 'string',
+        description: 'success, error, or uncertain',
+        required: true,
+        enum: ['success', 'error', 'uncertain']
+      },
+      failureKind: {
+        type: 'string',
+        description: 'controllable or uncontrollable',
+        required: true,
+        enum: ['controllable', 'uncontrollable']
+      },
+      evidence: {
+        type: 'string',
+        description: 'Brief explanation grounded in the current UI evidence',
+        required: true
+      }
+    },
+    execute: async () => 'reported'
+  };
+  const systemPrompt = ['You are an outcome verifier for a mobile app agent.', 'Your job is to decide whether the last critical UI action actually succeeded.', 'The current UI is the source of truth. Ignore the actor model’s prior claims when they conflict with the UI.', 'Return success only when the current UI clearly proves completion.', 'Return error when the UI shows validation, verification, submission, or other failure feedback.', 'Return uncertain when the UI does not yet prove either success or error.'].join(' ');
+  const userPrompt = [`<goal>${context.goal}</goal>`, `<action tool="${context.action.toolName}" label="${context.action.label}">${JSON.stringify(context.action.args)}</action>`, `<pre_action screen="${context.preAction.screenName}">\n${context.preAction.screenContent}\n</pre_action>`, `<post_action screen="${context.postAction.screenName}">\n${context.postAction.screenContent}\n</post_action>`].join('\n\n');
+  const response = await provider.generateContent(systemPrompt, userPrompt, [verificationTool], [], context.postAction.screenshot);
+  const toolCall = response.toolCalls?.[0];
+  if (!toolCall || toolCall.name !== 'report_verification') {
+    return null;
+  }
+  const status = toolCall.args.status;
+  const failureKind = toolCall.args.failureKind;
+  const evidence = typeof toolCall.args.evidence === 'string' ? toolCall.args.evidence : '';
+  if (!status || !failureKind || !evidence) {
+    return null;
+  }
+  return {
+    status,
+    failureKind,
+    evidence,
+    source: 'llm'
+  };
+}
+export class OutcomeVerifier {
+  constructor(provider, config) {
+    this.provider = provider;
+    this.config = config;
+  }
+  isEnabled() {
+    return this.config.verifier?.enabled !== false;
+  }
+  getMaxFollowupSteps() {
+    return this.config.verifier?.maxFollowupSteps ?? 2;
+  }
+  isCriticalAction(action) {
+    return isCriticalVerificationAction(action);
+  }
+  async verify(context) {
+    const stageA = deterministicVerify(context);
+    if (stageA.status !== 'uncertain') {
+      return stageA;
+    }
+    const stageB = await llmVerify(this.provider, context);
+    return stageB ?? stageA;
+  }
+}
+//# sourceMappingURL=OutcomeVerifier.js.map