npm - @mobileai/react-native - Versions diffs - 0.1.0 → 0.3.0 - Mend

@mobileai/react-native 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/README.md +78 -7
package/lib/module/components/AIAgent.js +40 -4
package/lib/module/components/AIAgent.js.map +1 -1
package/lib/module/components/AgentChatBar.js +177 -29
package/lib/module/components/AgentChatBar.js.map +1 -1
package/lib/module/core/AgentRuntime.js +268 -126
package/lib/module/core/AgentRuntime.js.map +1 -1
package/lib/module/core/FiberTreeWalker.js +74 -20
package/lib/module/core/FiberTreeWalker.js.map +1 -1
package/lib/module/core/systemPrompt.js +164 -0
package/lib/module/core/systemPrompt.js.map +1 -0
package/lib/module/providers/GeminiProvider.js +189 -73
package/lib/module/providers/GeminiProvider.js.map +1 -1
package/lib/typescript/src/components/AIAgent.d.ts +9 -1
package/lib/typescript/src/components/AIAgent.d.ts.map +1 -1
package/lib/typescript/src/components/AgentChatBar.d.ts +4 -3
package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -1
package/lib/typescript/src/core/AgentRuntime.d.ts +16 -0
package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -1
package/lib/typescript/src/core/FiberTreeWalker.d.ts +5 -0
package/lib/typescript/src/core/FiberTreeWalker.d.ts.map +1 -1
package/lib/typescript/src/core/systemPrompt.d.ts +9 -0
package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -0
package/lib/typescript/src/core/types.d.ts +51 -13
package/lib/typescript/src/core/types.d.ts.map +1 -1
package/lib/typescript/src/providers/GeminiProvider.d.ts +33 -13
package/lib/typescript/src/providers/GeminiProvider.d.ts.map +1 -1
package/package.json +16 -14
package/src/components/AIAgent.tsx +41 -1
package/src/components/AgentChatBar.tsx +150 -28
package/src/core/AgentRuntime.ts +287 -131
package/src/core/FiberTreeWalker.ts +74 -19
package/src/core/systemPrompt.ts +162 -0
package/src/core/types.ts +58 -10
package/src/providers/GeminiProvider.ts +174 -101

package/src/core/AgentRuntime.ts CHANGED Viewed

@@ -13,6 +13,7 @@ import { logger } from '../utils/logger';
 import { walkFiberTree } from './FiberTreeWalker';
 import type { WalkConfig } from './FiberTreeWalker';
 import { dehydrateScreen } from './ScreenDehydrator';
+import { buildSystemPrompt } from './systemPrompt';
 import type {
   AIProvider,
   AgentConfig,
@@ -24,51 +25,6 @@ import type {
 const DEFAULT_MAX_STEPS = 10;
-// ─── System Prompt ─────────────────────────────────────────────
-function buildSystemPrompt(language: string): string {
-  const isArabic = language === 'ar';
-  return `You are an AI agent that controls a React Native mobile app. You operate in an iterative loop to accomplish user requests.
-${isArabic ? 'Respond to the user in Arabic.' : 'Respond to the user in English.'}
-<input>
-At every step you receive:
-1. <screen_state>: Current screen name, available screens, and interactive elements indexed for actions.
-2. <agent_history>: Your previous steps and their results.
-3. <user_request>: The user's original request.
-</input>
-<screen_state>
-Interactive elements are listed as [index]<type attrs>label</>
-- index: numeric identifier for interaction
-- type: element type (pressable, text-input, switch)
-- label: visible text content of the element
-Only elements with [index] are interactive. Use the index to tap or type into them.
-</screen_state>
-<tools>
-Available tools:
-- tap(index): Tap an interactive element by its index. This triggers its onPress handler.
-- type(index, text): Type text into a text-input element by its index.
-- navigate(screen, params): Navigate to a specific screen. params is optional JSON object.
-- done(text, success): Complete the task. text is your response to the user.
-- ask_user(question): Ask the user for clarification if needed.
-</tools>
-<rules>
-- Only interact with elements that have an [index].
-- After tapping an element, the screen may change. Wait for the next step to see updated elements.
-- If the current screen doesn't have what you need, use navigate() to go to another screen.
-- If you're stuck or need more info, use ask_user().
-- When the task is complete, ALWAYS call done() with a summary.
-- Be efficient — complete tasks in as few steps as possible.
-- If a tap navigates to another screen, the next step will show the new screen's elements.
-</rules>`;
-}
 // ─── Agent Runtime ─────────────────────────────────────────────
 export class AgentRuntime {
@@ -112,10 +68,10 @@ export class AgentRuntime {
   // ─── Tool Registration ─────────────────────────────────────
   private registerBuiltInTools(): void {
-    // tap — tap an interactive element by index
+    // tap — universal interaction (mirrors RNTL's dispatchEvent pattern)
     this.tools.set('tap', {
       name: 'tap',
-      description: 'Tap an interactive element by its index to trigger its onPress handler.',
+      description: 'Tap an interactive element by its index. Works universally on buttons, switches, and custom components.',
       parameters: {
         index: { type: 'number', description: 'The index of the element to tap', required: true },
       },
@@ -125,17 +81,48 @@ export class AgentRuntime {
         if (!element) {
           return `❌ Element with index ${args.index} not found. Available indexes: ${elements.map(e => e.index).join(', ')}`;
         }
-        if (!element.props.onPress) {
-          return `❌ Element [${args.index}] "${element.label}" does not have an onPress handler.`;
+        // Strategy 1: Switch — call onValueChange (like RNTL's fireEvent('valueChange'))
+        if (element.type === 'switch' && element.props.onValueChange) {
+          try {
+            element.props.onValueChange(!element.props.value);
+            await new Promise(resolve => setTimeout(resolve, 500));
+            return `✅ Toggled [${args.index}] "${element.label}" to ${!element.props.value}`;
+          } catch (error: any) {
+            return `❌ Error toggling [${args.index}]: ${error.message}`;
+          }
         }
-        try {
-          element.props.onPress();
-          // Wait for UI to update after tap
-          await new Promise(resolve => setTimeout(resolve, 500));
-          return `✅ Tapped [${args.index}] "${element.label}"`;
-        } catch (error: any) {
-          return `❌ Error tapping [${args.index}]: ${error.message}`;
+        // Strategy 2: Direct onPress (covers Pressable, Button, custom components)
+        if (element.props.onPress) {
+          try {
+            element.props.onPress();
+            await new Promise(resolve => setTimeout(resolve, 500));
+            return `✅ Tapped [${args.index}] "${element.label}"`;
+          } catch (error: any) {
+            return `❌ Error tapping [${args.index}]: ${error.message}`;
+          }
         }
+        // Strategy 3: Bubble up Fiber tree (like RNTL's findEventHandler → element.parent)
+        let fiber = element.fiberNode?.return;
+        let bubbleDepth = 0;
+        while (fiber && bubbleDepth < 5) {
+          const parentProps = fiber.memoizedProps || {};
+          if (parentProps.onPress && typeof parentProps.onPress === 'function') {
+            try {
+              parentProps.onPress();
+              await new Promise(resolve => setTimeout(resolve, 500));
+              return `✅ Tapped parent of [${args.index}] "${element.label}"`;
+            } catch (error: any) {
+              return `❌ Error tapping parent of [${args.index}]: ${error.message}`;
+            }
+          }
+          fiber = fiber.return;
+          bubbleDepth++;
+        }
+        return `❌ Element [${args.index}] "${element.label}" has no tap handler (no onPress or onValueChange found).`;
       },
     });
@@ -165,25 +152,35 @@ export class AgentRuntime {
       },
     });
-    // navigate — navigate to a screen
+    // navigate — navigate to a screen (supports React Navigation + Expo Router)
     this.tools.set('navigate', {
       name: 'navigate',
       description: 'Navigate to a specific screen in the app.',
       parameters: {
-        screen: { type: 'string', description: 'Screen name to navigate to', required: true },
+        screen: { type: 'string', description: 'Screen name or path to navigate to', required: true },
         params: { type: 'string', description: 'Optional JSON params object', required: false },
       },
       execute: async (args) => {
+        // Expo Router path: use router.push()
+        if (this.config.router) {
+          try {
+            const path = args.screen.startsWith('/') ? args.screen : `/${args.screen}`;
+            this.config.router.push(path);
+            await new Promise(resolve => setTimeout(resolve, 500));
+            return `✅ Navigated to "${path}"`;
+          } catch (error: any) {
+            return `❌ Navigation error: ${error.message}`;
+          }
+        }
+        // React Navigation path: use navRef.navigate()
         if (!this.navRef) {
           return '❌ Navigation ref not available.';
         }
-        // Per React Navigation docs: must check isReady() before navigate
-        // https://reactnavigation.org/docs/navigating-without-navigation-prop#handling-initialization
         if (!this.navRef.isReady()) {
-          // Wait a bit and retry — navigator may still be mounting
           await new Promise(resolve => setTimeout(resolve, 1000));
           if (!this.navRef.isReady()) {
-            return '❌ Navigation is not ready yet. The navigator may not have finished mounting.';
+            return '❌ Navigation is not ready yet.';
           }
         }
         try {
@@ -210,14 +207,21 @@ export class AgentRuntime {
       },
     });
-    // ask_user — ask for clarification
+    // ask_user — ask for clarification (mirrors page-agent: blocks until user responds)
     this.tools.set('ask_user', {
       name: 'ask_user',
-      description: 'Ask the user for clarification or more information.',
+      description: 'Ask the user a question and wait for their answer. Use this if you need more information or clarification.',
       parameters: {
         question: { type: 'string', description: 'Question to ask the user', required: true },
       },
       execute: async (args) => {
+        if (this.config.onAskUser) {
+          // Page-agent pattern: block until user responds, then continue the loop
+          this.config.onStatusUpdate?.('Waiting for your answer...');
+          const answer = await this.config.onAskUser(args.question);
+          return `User answered: ${answer}`;
+        }
+        // Legacy fallback: break the loop (context will be lost)
         return `❓ ${args.question}`;
       },
     });
@@ -236,30 +240,85 @@ export class AgentRuntime {
   // ─── Navigation Helpers ────────────────────────────────────
+  /**
+   * Recursively collect ALL screen names from the navigation state tree.
+   * This handles tabs, drawers, and nested stacks.
+   */
   private getRouteNames(): string[] {
     try {
       if (!this.navRef?.isReady?.()) return [];
       const state = this.navRef?.getRootState?.() || this.navRef?.getState?.();
-      if (state?.routeNames) return state.routeNames;
-      if (state?.routes) return state.routes.map((r: any) => r.name);
-      return [];
+      if (!state) return [];
+      return this.collectRouteNames(state);
     } catch {
       return [];
     }
   }
+  private collectRouteNames(state: any): string[] {
+    const names: string[] = [];
+    if (state?.routes) {
+      for (const route of state.routes) {
+        names.push(route.name);
+        // Recurse into nested navigator states
+        if (route.state) {
+          names.push(...this.collectRouteNames(route.state));
+        }
+      }
+    }
+    return [...new Set(names)];
+  }
+  /**
+   * Recursively find the deepest active screen name.
+   * For tabs: follows active tab → active screen inside that tab.
+   */
   private getCurrentScreenName(): string {
+    // Expo Router: use pathname
+    if (this.config.pathname) {
+      const segments = this.config.pathname.split('/').filter(Boolean);
+      return segments[segments.length - 1] || 'Unknown';
+    }
     try {
       if (!this.navRef?.isReady?.()) return 'Unknown';
       const state = this.navRef?.getRootState?.() || this.navRef?.getState?.();
       if (!state) return 'Unknown';
-      const route = state.routes[state.index];
-      return route?.name || 'Unknown';
+      return this.getDeepestScreenName(state);
     } catch {
       return 'Unknown';
     }
   }
+  private getDeepestScreenName(state: any): string {
+    if (!state?.routes || state.index == null) return 'Unknown';
+    const route = state.routes[state.index];
+    if (!route) return 'Unknown';
+    // If this route has a nested state, recurse deeper
+    if (route.state) {
+      return this.getDeepestScreenName(route.state);
+    }
+    return route.name || 'Unknown';
+  }
+  /** Maps a tool call to a user-friendly status label for the loading overlay. */
+  private getToolStatusLabel(toolName: string, args: Record<string, any>): string {
+    switch (toolName) {
+      case 'tap':
+        return `Tapping element ${args.index ?? ''}...`;
+      case 'type':
+        return `Typing into field...`;
+      case 'navigate':
+        return `Navigating to ${args.screen || 'screen'}...`;
+      case 'done':
+        return 'Wrapping up...';
+      case 'ask_user':
+        return 'Asking you a question...';
+      default:
+        return `Running ${toolName}...`;
+    }
+  }
   // ─── Build Tools Array for Provider ────────────────────────
   private buildToolsForProvider(): ToolDefinition[] {
@@ -324,7 +383,87 @@ export class AgentRuntime {
     return result ? `<instructions>\n${result}</instructions>\n\n` : '';
   }
-  // ─── Main Execution Loop (mirrors PageAgentCore.execute) ───────
+  // ─── Observation System (mirrors PageAgentCore.#handleObservations) ──
+  private observations: string[] = [];
+  private lastScreenName: string = '';
+  private handleObservations(step: number, maxSteps: number, screenName: string): void {
+    // Screen change detection
+    if (this.lastScreenName && screenName !== this.lastScreenName) {
+      this.observations.push(`Screen navigated to → ${screenName}`);
+    }
+    this.lastScreenName = screenName;
+    // Remaining steps warning
+    const remaining = maxSteps - step;
+    if (remaining === 5) {
+      this.observations.push(
+        `⚠️ Only ${remaining} steps remaining. Consider wrapping up or calling done with partial results.`
+      );
+    } else if (remaining === 2) {
+      this.observations.push(
+        `⚠️ Critical: Only ${remaining} steps left! You must finish the task or call done immediately.`
+      );
+    }
+  }
+  // ─── User Prompt Assembly (mirrors PageAgentCore.#assembleUserPrompt) ──
+  private assembleUserPrompt(
+    step: number,
+    maxSteps: number,
+    contextualMessage: string,
+    screenName: string,
+    screenContent: string,
+  ): string {
+    let prompt = '';
+    // 1. <instructions> (optional system/screen instructions)
+    prompt += this.getInstructions(screenName);
+    // 2. <agent_state> — user request + step info (mirrors page-agent)
+    prompt += '<agent_state>\n';
+    prompt += '<user_request>\n';
+    prompt += `${contextualMessage}\n`;
+    prompt += '</user_request>\n';
+    prompt += '<step_info>\n';
+    prompt += `Step ${step + 1} of ${maxSteps} max possible steps\n`;
+    prompt += '</step_info>\n';
+    prompt += '</agent_state>\n\n';
+    // 3. <agent_history> — structured per-step history (mirrors page-agent)
+    prompt += '<agent_history>\n';
+    let stepIndex = 0;
+    for (const event of this.history) {
+      stepIndex++;
+      prompt += `<step_${stepIndex}>\n`;
+      prompt += `Previous Goal Eval: ${event.reflection.previousGoalEval}\n`;
+      prompt += `Memory: ${event.reflection.memory}\n`;
+      prompt += `Plan: ${event.reflection.plan}\n`;
+      prompt += `Action Result: ${event.action.output}\n`;
+      prompt += `</step_${stepIndex}>\n`;
+    }
+    // Inject system observations
+    for (const obs of this.observations) {
+      prompt += `<sys>${obs}</sys>\n`;
+    }
+    this.observations = [];
+    prompt += '</agent_history>\n\n';
+    // 4. <screen_state> — dehydrated screen content
+    prompt += '<screen_state>\n';
+    prompt += `Current Screen: ${screenName}\n`;
+    prompt += screenContent + '\n';
+    prompt += '</screen_state>\n';
+    return prompt;
+  }
+  // ─── Main Execution Loop ──────────────────────────────────────
   async execute(userMessage: string): Promise<ExecutionResult> {
     if (this.isRunning) {
@@ -333,6 +472,8 @@ export class AgentRuntime {
     this.isRunning = true;
     this.history = [];
+    this.observations = [];
+    this.lastScreenName = '';
     const maxSteps = this.config.maxSteps || DEFAULT_MAX_STEPS;
     const stepDelay = this.config.stepDelay ?? 300;
@@ -374,13 +515,16 @@ export class AgentRuntime {
           screenContent = await this.config.transformScreenContent(screenContent);
         }
-        // 3. Build context message with instructions + screen state
-        const instructionsBlock = this.getInstructions(screenName);
-        const contextMessage = step === 0
-          ? `${instructionsBlock}<user_request>${contextualMessage}</user_request>\n\n<screen_state>\n${screenContent}\n</screen_state>`
-          : `${instructionsBlock}<screen_state>\n${screenContent}\n</screen_state>`;
+        // 3. Handle observations (mirrors page-agent #handleObservations)
+        this.handleObservations(step, maxSteps, screenName);
+        // 4. Assemble structured user prompt (mirrors page-agent #assembleUserPrompt)
+        const contextMessage = this.assembleUserPrompt(
+          step, maxSteps, contextualMessage, screenName, screenContent,
+        );
-        // 4. Send to AI provider
+        // 5. Send to AI provider
+        this.config.onStatusUpdate?.('Analyzing screen...');
         const systemPrompt = buildSystemPrompt(this.config.language || 'en');
         const tools = this.buildToolsForProvider();
@@ -393,7 +537,7 @@ export class AgentRuntime {
           this.history,
         );
-        // 5. Process tool calls
+        // 6. Process tool calls
         if (!response.toolCalls || response.toolCalls.length === 0) {
           logger.warn('AgentRuntime', 'No tool calls in response. Text:', response.text);
           const result: ExecutionResult = {
@@ -405,65 +549,77 @@ export class AgentRuntime {
           return result;
         }
-        for (const toolCall of response.toolCalls) {
-          logger.info('AgentRuntime', `Tool: ${toolCall.name}(${JSON.stringify(toolCall.args)})`);
+        // 7. Structured reasoning from provider (no regex parsing needed)
+        const { reasoning } = response;
+        logger.info('AgentRuntime', `🧠 Plan: ${reasoning.plan}`);
+        if (reasoning.memory) {
+          logger.debug('AgentRuntime', `💾 Memory: ${reasoning.memory}`);
+        }
+        // Only process the FIRST tool call per step (one action per step).
+        // After one action, the loop re-reads the screen with fresh indexes.
+        const toolCall = response.toolCalls[0]!;
+        if (response.toolCalls.length > 1) {
+          logger.warn('AgentRuntime', `AI returned ${response.toolCalls.length} tool calls, executing only the first one.`);
+        }
-          // Find and execute the tool
-          const tool = this.tools.get(toolCall.name) ||
-            this.buildToolsForProvider().find(t => t.name === toolCall.name);
+        logger.info('AgentRuntime', `Tool: ${toolCall.name}(${JSON.stringify(toolCall.args)})`);
-          let output: string;
-          if (tool) {
-            output = await tool.execute(toolCall.args);
-          } else {
-            output = `❌ Unknown tool: ${toolCall.name}`;
-          }
+        // Dynamic status update based on tool being executed
+        const statusLabel = this.getToolStatusLabel(toolCall.name, toolCall.args);
+        this.config.onStatusUpdate?.(statusLabel);
-          logger.info('AgentRuntime', `Result: ${output}`);
-          // Record step
-          const agentStep: AgentStep = {
-            stepIndex: step,
-            reflection: {
-              evaluationPreviousGoal: step > 0 ? 'Evaluating...' : 'First step',
-              memory: '',
-              nextGoal: '',
-            },
-            action: {
-              name: toolCall.name,
-              input: toolCall.args,
-              output,
-            },
+        // Find and execute the tool
+        const tool = this.tools.get(toolCall.name) ||
+          this.buildToolsForProvider().find(t => t.name === toolCall.name);
+        let output: string;
+        if (tool) {
+          output = await tool.execute(toolCall.args);
+        } else {
+          output = `❌ Unknown tool: ${toolCall.name}`;
+        }
+        logger.info('AgentRuntime', `Result: ${output}`);
+        // Record step with structured reasoning
+        const agentStep: AgentStep = {
+          stepIndex: step,
+          reflection: reasoning,
+          action: {
+            name: toolCall.name,
+            input: toolCall.args,
+            output,
+          },
+        };
+        this.history.push(agentStep);
+        // Lifecycle: onAfterStep (mirrors page-agent)
+        await this.config.onAfterStep?.(this.history);
+        // Check if done
+        if (toolCall.name === 'done') {
+          const result: ExecutionResult = {
+            success: toolCall.args.success !== false,
+            message: toolCall.args.text || output,
+            steps: this.history,
           };
-          this.history.push(agentStep);
-          // Lifecycle: onAfterStep (mirrors page-agent)
-          await this.config.onAfterStep?.(this.history);
-          // Check if done
-          if (toolCall.name === 'done') {
-            const result: ExecutionResult = {
-              success: toolCall.args.success !== false,
-              message: output,
-              steps: this.history,
-            };
-            logger.info('AgentRuntime', `Task completed: ${output}`);
-            await this.config.onAfterTask?.(result);
-            return result;
-          }
+          logger.info('AgentRuntime', `Task completed: ${result.message}`);
+          await this.config.onAfterTask?.(result);
+          return result;
+        }
-          // Check if asking user
-          if (toolCall.name === 'ask_user') {
-            this.lastAskUserQuestion = toolCall.args.question || output;
-            const result: ExecutionResult = {
-              success: true,
-              message: output,
-              steps: this.history,
-            };
-            await this.config.onAfterTask?.(result);
-            return result;
-          }
+        // Check if asking user (legacy path — only breaks loop when onAskUser is NOT set)
+        if (toolCall.name === 'ask_user' && !this.config.onAskUser) {
+          this.lastAskUserQuestion = toolCall.args.question || output;
+          const result: ExecutionResult = {
+            success: true,
+            message: output,
+            steps: this.history,
+          };
+          await this.config.onAfterTask?.(result);
+          return result;
         }
         // Step delay (mirrors page-agent stepDelay)