npm - @mobileai/react-native - Versions diffs - 0.1.0 → 0.3.0 - Mend

@mobileai/react-native 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/README.md +78 -7
package/lib/module/components/AIAgent.js +40 -4
package/lib/module/components/AIAgent.js.map +1 -1
package/lib/module/components/AgentChatBar.js +177 -29
package/lib/module/components/AgentChatBar.js.map +1 -1
package/lib/module/core/AgentRuntime.js +268 -126
package/lib/module/core/AgentRuntime.js.map +1 -1
package/lib/module/core/FiberTreeWalker.js +74 -20
package/lib/module/core/FiberTreeWalker.js.map +1 -1
package/lib/module/core/systemPrompt.js +164 -0
package/lib/module/core/systemPrompt.js.map +1 -0
package/lib/module/providers/GeminiProvider.js +189 -73
package/lib/module/providers/GeminiProvider.js.map +1 -1
package/lib/typescript/src/components/AIAgent.d.ts +9 -1
package/lib/typescript/src/components/AIAgent.d.ts.map +1 -1
package/lib/typescript/src/components/AgentChatBar.d.ts +4 -3
package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -1
package/lib/typescript/src/core/AgentRuntime.d.ts +16 -0
package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -1
package/lib/typescript/src/core/FiberTreeWalker.d.ts +5 -0
package/lib/typescript/src/core/FiberTreeWalker.d.ts.map +1 -1
package/lib/typescript/src/core/systemPrompt.d.ts +9 -0
package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -0
package/lib/typescript/src/core/types.d.ts +51 -13
package/lib/typescript/src/core/types.d.ts.map +1 -1
package/lib/typescript/src/providers/GeminiProvider.d.ts +33 -13
package/lib/typescript/src/providers/GeminiProvider.d.ts.map +1 -1
package/package.json +16 -14
package/src/components/AIAgent.tsx +41 -1
package/src/components/AgentChatBar.tsx +150 -28
package/src/core/AgentRuntime.ts +287 -131
package/src/core/FiberTreeWalker.ts +74 -19
package/src/core/systemPrompt.ts +162 -0
package/src/core/types.ts +58 -10
package/src/providers/GeminiProvider.ts +174 -101

package/src/core/FiberTreeWalker.ts CHANGED Viewed

@@ -37,6 +37,55 @@ const SWITCH_TYPES = new Set(['Switch', 'RCTSwitch']);
 const TEXT_TYPES = new Set(['Text', 'RCTText']);
 // ScrollView/FlatList/SectionList detection can be added later for scroll tool
+// ─── State Extraction (mirrors page-agent DEFAULT_INCLUDE_ATTRIBUTES) ──
+/** Props to extract as state attributes — covers lazy devs who skip accessibility */
+const STATE_PROPS = ['value', 'checked', 'selected', 'active', 'on', 'isOn', 'toggled', 'enabled'];
+/**
+ * Extract state attributes from a fiber node's props.
+ * Mirrors page-agent's DEFAULT_INCLUDE_ATTRIBUTES extraction.
+ * Priority: accessibilityState > accessibilityRole > direct scalar props.
+ */
+function extractStateAttributes(props: any): string {
+  const parts: string[] = [];
+  // Priority 1: accessibilityState (proper ARIA equivalent)
+  if (props.accessibilityState && typeof props.accessibilityState === 'object') {
+    for (const [k, v] of Object.entries(props.accessibilityState)) {
+      if (v !== undefined) parts.push(`${k}="${v}"`);
+    }
+  }
+  // Priority 2: accessibilityRole
+  if (props.accessibilityRole) {
+    parts.push(`role="${props.accessibilityRole}"`);
+  }
+  // Priority 3: Direct scalar props fallback (lazy developer support)
+  for (const key of STATE_PROPS) {
+    if (props[key] !== undefined && typeof props[key] !== 'function' && typeof props[key] !== 'object') {
+      parts.push(`${key}="${props[key]}"`);
+    }
+  }
+  return parts.join(' ');
+}
+/**
+ * Check if a node has ANY event handler prop (on* function).
+ * Mirrors RNTL's getEventHandlerFromProps pattern.
+ */
+export function hasAnyEventHandler(props: any): boolean {
+  if (!props || typeof props !== 'object') return false;
+  for (const key of Object.keys(props)) {
+    if (key.startsWith('on') && typeof props[key] === 'function') {
+      return true;
+    }
+  }
+  return false;
+}
 // ─── Fiber Node Helpers ────────────────────────────────────────
 /**
@@ -66,23 +115,27 @@ function getElementType(fiber: any): ElementType | null {
   const name = getComponentName(fiber);
   const props = fiber.memoizedProps || {};
-  // Check by component name
+  // Check by component name (known React Native types)
   if (name && PRESSABLE_TYPES.has(name)) return 'pressable';
   if (name && TEXT_INPUT_TYPES.has(name)) return 'text-input';
   if (name && SWITCH_TYPES.has(name)) return 'switch';
-  // Check by props — any component with onPress is interactive
-  if (props.onPress && typeof props.onPress === 'function') return 'pressable';
-  // Check by accessibility role
+  // Check by accessibilityRole (covers custom components with proper ARIA)
   const role = props.accessibilityRole || props.role;
+  if (role === 'switch') return 'switch';
   if (role === 'button' || role === 'link' || role === 'checkbox' || role === 'radio') {
-    if (props.onPress) return 'pressable';
+    return 'pressable';
   }
+  // Check by props — any component with onPress is interactive
+  if (props.onPress && typeof props.onPress === 'function') return 'pressable';
   // TextInput detection by props
   if (props.onChangeText && typeof props.onChangeText === 'function') return 'text-input';
+  // Switch detection by props (custom switches with onValueChange)
+  if (props.onValueChange && typeof props.onValueChange === 'function') return 'switch';
   return null;
 }
@@ -267,7 +320,7 @@ export function walkFiberTree(rootRef: any, config?: WalkConfig): WalkResult {
   let currentIndex = 0;
   const hasWhitelist = config?.interactiveWhitelist && (config.interactiveWhitelist.length ?? 0) > 0;
-  function processNode(node: any, depth: number = 0): string {
+  function processNode(node: any, depth: number = 0, isInsideInteractive: boolean = false): string {
     if (!node) return '';
     const props = node.memoizedProps || {};
@@ -275,29 +328,28 @@ export function walkFiberTree(rootRef: any, config?: WalkConfig): WalkResult {
     // ── Security Constraints ──
     if (props.aiIgnore === true) return '';
     if (matchesRefList(node, config?.interactiveBlacklist)) {
-      // Blacklisted nodes themselves aren't interactive, but we still walk children for structure
       let childText = '';
       let currentChild = node.child;
       while (currentChild) {
-        childText += processNode(currentChild, depth);
+        childText += processNode(currentChild, depth, isInsideInteractive);
         currentChild = currentChild.sibling;
       }
       return childText;
     }
-    // Process all children first
+    // Interactive check — skip if already inside an interactive ancestor (dedup nested TextInput layers)
+    const isWhitelisted = matchesRefList(node, config?.interactiveWhitelist);
+    const elementType = getElementType(node);
+    const shouldInclude = !isInsideInteractive && (hasWhitelist ? isWhitelisted : (elementType && !isDisabled(node)));
+    // Process children — if this node IS interactive, children won't register as separate interactives
     let childrenText = '';
     let currentChild = node.child;
     while (currentChild) {
-      childrenText += processNode(currentChild, depth + 1);
+      childrenText += processNode(currentChild, depth + 1, isInsideInteractive || !!shouldInclude);
       currentChild = currentChild.sibling;
     }
-    // Interactive Check
-    const isWhitelisted = matchesRefList(node, config?.interactiveWhitelist);
-    const elementType = getElementType(node);
-    const shouldInclude = hasWhitelist ? isWhitelisted : (elementType && !isDisabled(node));
     const indent = '  '.repeat(depth);
     if (shouldInclude) {
@@ -307,16 +359,19 @@ export function walkFiberTree(rootRef: any, config?: WalkConfig): WalkResult {
         label = props.placeholder;
       }
-      // Record interactive element
       interactives.push({
         index: currentIndex,
         type: resolvedType,
         label: label || `[${resolvedType}]`,
         fiberNode: node,
-        props: { ...props }, // snapshot
+        props: { ...props },
       });
-      const elementOutput = `${indent}[${currentIndex}]<${resolvedType}>${label ? label + ' ' : ''}${childrenText.trim() ? childrenText.trim() : ''}</>\n`;
+      // Build output tag with state attributes (mirrors page-agent format)
+      const stateAttrs = extractStateAttributes(props);
+      const attrStr = stateAttrs ? ` ${stateAttrs}` : '';
+      const textContent = label || '';
+      const elementOutput = `${indent}[${currentIndex}]<${resolvedType}${attrStr}>${textContent} />${childrenText.trim() ? '\n' + childrenText : ''}\n`;
       currentIndex++;
       return elementOutput;
     }

package/src/core/systemPrompt.ts ADDED Viewed

@@ -0,0 +1,162 @@
+/**
+ * System prompt for the AI agent — adapted from page-agent reference.
+ *
+ * Separated into its own file for maintainability.
+ * The prompt uses XML-style tags (matching page-agent's structure)
+ * to give the LLM clear, structured instructions.
+ */
+export function buildSystemPrompt(language: string): string {
+  const isArabic = language === 'ar';
+  return `You are an AI agent designed to operate in an iterative loop to automate tasks in a React Native mobile app. Your ultimate goal is accomplishing the task provided in <user_request>.
+<intro>
+You excel at the following tasks:
+1. Reading and understanding mobile app screens to extract precise information
+2. Automating UI interactions like tapping buttons and filling forms
+3. Gathering information from the screen and reporting it to the user
+4. Operating effectively in an agent loop
+5. Answering user questions based on what is visible on screen
+</intro>
+<language_settings>
+${isArabic ? '- Working language: **Arabic**. Respond in Arabic.' : '- Working language: **English**. Respond in English.'}
+- Use the language that the user is using. Return in user's language.
+</language_settings>
+<input>
+At every step, your input will consist of:
+1. <agent_history>: Your previous steps and their results.
+2. <user_request>: The user's original request.
+3. <screen_state>: Current screen name, available screens, and interactive elements indexed for actions.
+Agent history uses the following format per step:
+<step_N>
+Previous Goal Eval: Assessment of last action
+Memory: Key facts to remember
+Plan: What you did next
+Action Result: Result of the action
+</step_N>
+System messages may appear as <sys>...</sys> between steps.
+</input>
+<screen_state>
+Interactive elements are listed as [index]<type attrs>label />
+- index: numeric identifier for interaction
+- type: element type (pressable, text-input, switch)
+- attrs: state attributes like value="true", checked="false", role="switch"
+- label: visible text content of the element
+Only elements with [index] are interactive. Use the index to tap or type into them.
+Pure text elements without [] are NOT interactive — they are informational content you can read.
+</screen_state>
+<tools>
+Available tools:
+- tap(index): Tap an interactive element by its index. Works universally on buttons, switches, and custom components. For switches, this toggles their state.
+- type(index, text): Type text into a text-input element by its index.
+- navigate(screen, params): Navigate to a specific screen. params is optional JSON object.
+- done(text, success): Complete task. Text is your final response to the user — keep it concise unless the user explicitly asks for detail.
+- ask_user(question): Ask the user for clarification ONLY when you cannot determine what action to take.
+</tools>
+<rules>
+- There are 2 types of requests — always determine which type BEFORE acting:
+  1. Information requests (e.g. "what's available?", "how much is X?", "list the items"):
+     Read the screen content and call done() with the answer. Do NOT perform any tap/type/navigate actions.
+  2. Action requests (e.g. "add margherita to cart", "go to checkout", "fill in my name"):
+     Execute the required UI interactions using tap/type/navigate tools.
+- For action requests, determine whether the user gave specific step-by-step instructions or an open-ended task:
+  1. Specific instructions: Follow each step precisely, do not skip.
+  2. Open-ended tasks: Plan the steps yourself.
+- Only interact with elements that have an [index].
+- After tapping an element, the screen may change. Wait for the next step to see updated elements.
+- If the current screen doesn't have what you need, use navigate() to go to another screen.
+- If a tap navigates to another screen, the next step will show the new screen's elements.
+- Do not repeat one action for more than 3 times unless some conditions changed.
+- After typing into a text input, check if the screen changed (e.g., suggestions or autocomplete appeared). If so, interact with the new elements.
+- After typing into a search field, you may need to tap a search button, press enter, or select from a dropdown to complete the search.
+- If the user request includes specific details (product type, price, category), use available filters or search to be more efficient.
+- Do not fill in login/signup forms unless the user provides credentials. If asked to log in, use ask_user to request their email and password first.
+- Do not guess or auto-fill sensitive data (passwords, payment info, personal details). Always ask the user.
+- Trying too hard can be harmful. If stuck, call done() with partial results rather than repeating failed actions.
+- If you do not know how to proceed with the current screen, use ask_user to request specific instructions from the user.
+</rules>
+<task_completion_rules>
+You must call the done action in one of these cases:
+- When you have fully completed the USER REQUEST.
+- When the user asked for information and you can see the answer on screen.
+- When you reach the final allowed step, even if the task is incomplete.
+- When you feel stuck or unable to solve the user request.
+BEFORE calling done() for action requests that changed state (added items, submitted forms, etc.):
+1. First, navigate to the result screen (e.g., Cart, confirmation, order summary) so the user can see the outcome.
+2. Wait for the next step to see the result screen content.
+3. THEN call done() with a summary of what you did.
+Do NOT call done() immediately after the last action — the user needs to SEE the result.
+The done action is your opportunity to communicate findings and provide a coherent reply to the user:
+- Set success to true only if the full USER REQUEST has been completed.
+- Use the text field to answer questions, summarize what you found, or explain what you did.
+- You are ONLY ALLOWED to call done as a single action. Do not call it together with other actions.
+The ask_user action should ONLY be used when the user gave an action request but you lack specific information to execute it (e.g., user says "order a pizza" but there are multiple options and you don't know which one).
+- Do NOT use ask_user to confirm actions the user explicitly requested. If they said "place my order", just do it.
+- NEVER ask for the same confirmation twice. If the user already answered, proceed with their answer.
+- For destructive/purchase actions (place order, delete, pay), tap the button exactly ONCE. Do not repeat the same action — the user could be charged multiple times.
+</task_completion_rules>
+<capability>
+- It is ok to just provide information without performing any actions.
+- User can ask questions about what's on screen — answer them directly via done().
+- It is ok to fail the task. User would rather you report failure than repeat failed actions endlessly.
+- The user can be wrong. If the request is not achievable, tell the user via done().
+- The app can have bugs. If something is not working as expected, report it to the user.
+</capability>
+<ux_rules>
+UX best practices for mobile agent interactions:
+- Confirm what you did: When completing actions, summarize exactly what happened (e.g., "Added 2x Margherita ($10 each) to your cart. Total: $20").
+- Be transparent about errors: If an action fails, explain what failed and why — do not silently skip it or pretend it succeeded.
+- Track multi-item progress: For requests involving multiple items, keep track and report which ones succeeded and which did not.
+- Stay on the user's screen: For information requests, read from the current screen. Only navigate away if the needed information is on another screen.
+- Fail gracefully: If stuck after multiple attempts, call done() with what you accomplished and what remains, rather than repeating failed actions.
+- Be concise: Keep responses short and actionable. Users are on mobile — avoid walls of text.
+- Suggest next steps: After completing an action, briefly suggest what the user might want to do next (e.g., "Added to cart. Would you like to checkout or add more items?").
+- When a request is ambiguous, pick the most common interpretation rather than always asking. State your assumption in the done() text.
+</ux_rules>
+<reasoning_rules>
+Exhibit the following reasoning patterns to successfully achieve the <user_request>:
+- Reason about <agent_history> to track progress and context toward <user_request>.
+- Analyze the most recent action result in <agent_history> and clearly state what you previously tried to achieve.
+- Explicitly judge success/failure of the last action. If the expected change is missing, mark the last action as failed and plan a recovery.
+- Analyze whether you are stuck, e.g. when you repeat the same actions multiple times without any progress. Then consider alternative approaches.
+- If you see information relevant to <user_request>, include it in your response via done().
+- Always compare the current trajectory with the user request — make sure every action moves you closer to the goal.
+- Save important information to memory: field values you collected, items found, pages visited, etc.
+</reasoning_rules>
+<output>
+You MUST call the agent_step tool on every step. Provide:
+1. previous_goal_eval: "One-sentence result of your last action — success, failure, or uncertain. Skip on first step."
+2. memory: "Key facts to persist: values collected, items found, progress so far. Be specific."
+3. plan: "Your immediate next goal — what action you will take and why."
+4. action_name: Choose one action to execute
+5. Action parameters (index, text, screen, etc. depending on the action)
+Examples:
+previous_goal_eval: "Typed email into field [0]. Verdict: Success"
+memory: "Email: user@test.com entered. Still need password."
+plan: "Ask the user for their password using ask_user."
+previous_goal_eval: "Navigated to Cart screen. Verdict: Success"
+memory: "Added 2x Margherita pizza. Cart total visible."
+plan: "Call done to report the cart contents to the user."
+</output>`;
+}

package/src/core/types.ts CHANGED Viewed

@@ -19,7 +19,8 @@ export interface InteractiveElement {
   props: {
     onPress?: (...args: any[]) => void;
     onChangeText?: (text: string) => void;
-    value?: string;
+    onValueChange?: (value: boolean) => void;
+    value?: string | boolean;
     placeholder?: string;
     checked?: boolean;
     disabled?: boolean;
@@ -45,11 +46,7 @@ export interface DehydratedScreen {
 export interface AgentStep {
   stepIndex: number;
-  reflection: {
-    evaluationPreviousGoal: string;
-    memory: string;
-    nextGoal: string;
-  };
+  reflection: AgentReasoning;
   action: {
     name: string;
     input: Record<string, any>;
@@ -129,6 +126,40 @@ export interface AgentConfig {
   /** Delay between steps in ms (page-agent default: 400ms). */
   stepDelay?: number;
+  // ─── Status Updates ──────────────────────────────────────────────────────
+  /**
+   * Called with a human-readable status string at each step.
+   * Use this to show dynamic loading text (e.g., "Tapping 'Add'...").
+   */
+  onStatusUpdate?: (status: string) => void;
+  /**
+   * Callback for when agent needs user input (ask_user tool).
+   * Mirrors page-agent: the agent loop blocks until the user responds.
+   * If not set, ask_user tool will break the loop (legacy behavior).
+   * @example onAskUser: (q) => new Promise(resolve => showPrompt(q, resolve))
+   */
+  onAskUser?: (question: string) => Promise<string>;
+  // ─── Expo Router Support ─────────────────────────────────────────────────
+  /**
+   * Expo Router instance (from useRouter()).
+   * When provided, the navigate tool uses router.push('/path') instead of navRef.navigate().
+   */
+  router?: {
+    push: (href: string) => void;
+    replace: (href: string) => void;
+    back: () => void;
+  };
+  /**
+   * Current pathname from Expo Router (from usePathname()).
+   * Used to determine the current screen when using Expo Router.
+   */
+  pathname?: string;
   // ─── MCP Bridge Integration ──────────────────────────────────────────────
   /**
@@ -172,14 +203,31 @@ export interface ActionDefinition {
 // ─── Provider Interface ──────────────────────────────────────
+/** Structured reasoning returned per step via the agent_step tool. */
+export interface AgentReasoning {
+  /** Assessment of whether the previous action succeeded or failed. */
+  previousGoalEval: string;
+  /** What to remember for future steps (progress, items found, etc). */
+  memory: string;
+  /** The immediate next goal and why. */
+  plan: string;
+}
+/** Result from the AI provider's generateContent call. */
+export interface ProviderResult {
+  /** Extracted action tool call (action_name + params). */
+  toolCalls: Array<{ name: string; args: Record<string, any> }>;
+  /** Structured reasoning from MacroTool (evaluation, memory, next_goal). */
+  reasoning: AgentReasoning;
+  /** Raw text response (if any). */
+  text?: string;
+}
 export interface AIProvider {
   generateContent(
     systemPrompt: string,
     userMessage: string,
     tools: ToolDefinition[],
     history: AgentStep[],
-  ): Promise<{
-    toolCalls: Array<{ name: string; args: Record<string, any> }>;
-    text?: string;
-  }>;
+  ): Promise<ProviderResult>;
 }