npm - @mobileai/react-native - Versions diffs - 0.1.0 → 0.3.0 - Mend

@mobileai/react-native 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/README.md +78 -7
package/lib/module/components/AIAgent.js +40 -4
package/lib/module/components/AIAgent.js.map +1 -1
package/lib/module/components/AgentChatBar.js +177 -29
package/lib/module/components/AgentChatBar.js.map +1 -1
package/lib/module/core/AgentRuntime.js +268 -126
package/lib/module/core/AgentRuntime.js.map +1 -1
package/lib/module/core/FiberTreeWalker.js +74 -20
package/lib/module/core/FiberTreeWalker.js.map +1 -1
package/lib/module/core/systemPrompt.js +164 -0
package/lib/module/core/systemPrompt.js.map +1 -0
package/lib/module/providers/GeminiProvider.js +189 -73
package/lib/module/providers/GeminiProvider.js.map +1 -1
package/lib/typescript/src/components/AIAgent.d.ts +9 -1
package/lib/typescript/src/components/AIAgent.d.ts.map +1 -1
package/lib/typescript/src/components/AgentChatBar.d.ts +4 -3
package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -1
package/lib/typescript/src/core/AgentRuntime.d.ts +16 -0
package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -1
package/lib/typescript/src/core/FiberTreeWalker.d.ts +5 -0
package/lib/typescript/src/core/FiberTreeWalker.d.ts.map +1 -1
package/lib/typescript/src/core/systemPrompt.d.ts +9 -0
package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -0
package/lib/typescript/src/core/types.d.ts +51 -13
package/lib/typescript/src/core/types.d.ts.map +1 -1
package/lib/typescript/src/providers/GeminiProvider.d.ts +33 -13
package/lib/typescript/src/providers/GeminiProvider.d.ts.map +1 -1
package/package.json +16 -14
package/src/components/AIAgent.tsx +41 -1
package/src/components/AgentChatBar.tsx +150 -28
package/src/core/AgentRuntime.ts +287 -131
package/src/core/FiberTreeWalker.ts +74 -19
package/src/core/systemPrompt.ts +162 -0
package/src/core/types.ts +58 -10
package/src/providers/GeminiProvider.ts +174 -101

package/src/providers/GeminiProvider.ts CHANGED Viewed

@@ -1,30 +1,30 @@
 /**
- * GeminiProvider — Simplified Gemini API integration.
- * Sends dehydrated screen state + tools to Gemini and parses tool call responses.
+ * GeminiProvider — Gemini API integration with structured action pattern.
+ *
+ * Uses a single forced function call (`agent_step`) that bundles
+ * structured reasoning (evaluation, memory, plan) alongside the action.
+ * This replaces free-form text + separate tool calls for stability.
  */
 import { logger } from '../utils/logger';
-import type { AIProvider, ToolDefinition, AgentStep } from '../core/types';
+import type { AIProvider, ToolDefinition, AgentStep, ProviderResult, AgentReasoning } from '../core/types';
-// ─── Gemini API Types ──────────────────────────────────────────
+// ─── Constants ─────────────────────────────────────────────────
-interface GeminiTool {
-  functionDeclarations: GeminiFunctionDeclaration[];
-}
+const AGENT_STEP_FN = 'agent_step';
-interface GeminiFunctionDeclaration {
-  name: string;
-  description: string;
-  parameters: {
-    type: string;
-    properties: Record<string, { type: string; description: string; enum?: string[] }>;
-    required: string[];
-  };
-}
+// Reasoning fields that are always present in the agent_step schema
+const REASONING_FIELDS = ['previous_goal_eval', 'memory', 'plan'] as const;
+// ─── Gemini API Types ──────────────────────────────────────────
 interface GeminiContent {
   role: 'user' | 'model';
-  parts: Array<{ text?: string; functionCall?: { name: string; args: any }; functionResponse?: { name: string; response: any } }>;
+  parts: Array<{
+    text?: string;
+    functionCall?: { name: string; args: any };
+    functionResponse?: { name: string; response: any };
+  }>;
 }
 // ─── Provider ──────────────────────────────────────────────────
@@ -33,7 +33,6 @@ export class GeminiProvider implements AIProvider {
   private apiKey: string;
   private model: string;
   constructor(apiKey: string, model: string = 'gemini-2.5-flash') {
     this.apiKey = apiKey;
     this.model = model;
@@ -44,14 +43,14 @@ export class GeminiProvider implements AIProvider {
     userMessage: string,
     tools: ToolDefinition[],
     history: AgentStep[],
-  ): Promise<{ toolCalls: Array<{ name: string; args: Record<string, any> }>; text?: string }> {
+  ): Promise<ProviderResult> {
     logger.info('GeminiProvider', `Sending request. Model: ${this.model}, Tools: ${tools.length}`);
-    // Build Gemini tools
-    const geminiTools = this.buildGeminiTools(tools);
+    // Build single agent_step function declaration
+    const agentStepDeclaration = this.buildAgentStepDeclaration(tools);
-    // Build conversation history
+    // Build conversation history with proper function call/response pairs
     const contents = this.buildContents(userMessage, history);
     // Make API request
@@ -59,8 +58,15 @@ export class GeminiProvider implements AIProvider {
     const body: any = {
       contents,
-      tools: geminiTools.length > 0 ? geminiTools : undefined,
+      tools: [{ functionDeclarations: [agentStepDeclaration] }],
       systemInstruction: { parts: [{ text: systemPrompt }] },
+      // Force the model to always call agent_step
+      tool_config: {
+        function_calling_config: {
+          mode: 'ANY',
+          allowed_function_names: [AGENT_STEP_FN],
+        },
+      },
       generationConfig: {
         temperature: 0.2,
         maxOutputTokens: 2048,
@@ -87,44 +93,85 @@ export class GeminiProvider implements AIProvider {
       const data = await response.json();
-      // Parse response
-      return this.parseResponse(data);
+      return this.parseAgentStepResponse(data, tools);
     } catch (error: any) {
       logger.error('GeminiProvider', 'Request failed:', error.message);
       throw error;
     }
   }
-  // ─── Build Gemini Tools ────────────────────────────────────
+  // ─── Build agent_step Declaration ──────────────────────────
+  /**
+   * Builds a single `agent_step` function declaration that combines:
+   * - Structured reasoning fields (previous_goal_eval, memory, plan)
+   * - action_name (enum of all available tool names)
+   * - All tool parameter fields as flat top-level properties
+   *
+   * Flat schema avoids Gemini's "deeply nested schema" rejection in ANY mode.
+   */
+  private buildAgentStepDeclaration(tools: ToolDefinition[]): any {
+    const toolNames = tools.map(t => t.name);
+    // Collect all unique parameter fields across all tools
+    const actionProperties: Record<string, any> = {};
+    for (const tool of tools) {
+      for (const [paramName, param] of Object.entries(tool.parameters)) {
+        // Skip if already added (shared field names like 'text', 'index')
+        if (actionProperties[paramName]) continue;
+        actionProperties[paramName] = {
+          type: this.mapParamType(param.type),
+          description: param.description,
+          ...(param.enum ? { enum: param.enum } : {}),
+        };
+      }
+    }
-  private buildGeminiTools(tools: ToolDefinition[]): GeminiTool[] {
-    const declarations: GeminiFunctionDeclaration[] = tools.map(tool => ({
-      name: tool.name,
-      description: tool.description,
+    // Build tool descriptions for the action_name enum
+    const toolDescriptions = tools
+      .map(t => {
+        const params = Object.keys(t.parameters).join(', ');
+        return `- ${t.name}(${params}): ${t.description}`;
+      })
+      .join('\n');
+    return {
+      name: AGENT_STEP_FN,
+      description: `Execute one agent step. Choose an action and provide reasoning.\n\nAvailable actions:\n${toolDescriptions}`,
       parameters: {
         type: 'OBJECT',
-        properties: Object.fromEntries(
-          Object.entries(tool.parameters).map(([key, param]) => [
-            key,
-            {
-              type: this.mapParamType(param.type),
-              description: param.description,
-              ...(param.enum ? { enum: param.enum } : {}),
-            },
-          ]),
-        ),
-        required: Object.entries(tool.parameters)
-          .filter(([, param]) => param.required !== false)
-          .map(([key]) => key),
+        properties: {
+          // ── Reasoning fields ──
+          previous_goal_eval: {
+            type: 'STRING',
+            description: 'One-sentence assessment of your last action. State success, failure, or uncertain. Skip on first step.',
+          },
+          memory: {
+            type: 'STRING',
+            description: 'Key facts to remember for future steps: progress made, items found, counters, field values already collected.',
+          },
+          plan: {
+            type: 'STRING',
+            description: 'Your immediate next goal — what action you will take and why.',
+          },
+          // ── Action selection ──
+          action_name: {
+            type: 'STRING',
+            description: 'Which action to execute.',
+            enum: toolNames,
+          },
+          // ── Action parameters (flat) ──
+          ...actionProperties,
+        },
+        required: ['plan', 'action_name'],
       },
-    }));
-    return [{ functionDeclarations: declarations }];
+    };
   }
   private mapParamType(type: string): string {
     switch (type) {
       case 'number': return 'NUMBER';
+      case 'integer': return 'INTEGER';
       case 'boolean': return 'BOOLEAN';
       case 'string':
       default: return 'STRING';
@@ -133,78 +180,104 @@ export class GeminiProvider implements AIProvider {
   // ─── Build Contents ────────────────────────────────────────
-  private buildContents(userMessage: string, history: AgentStep[]): GeminiContent[] {
-    const contents: GeminiContent[] = [];
-    // Add history as conversation turns
-    for (const step of history) {
-      // User turn (screen state was sent)
-      contents.push({
-        role: 'user',
-        parts: [{ text: `Step ${step.stepIndex + 1} result: ${step.action.output}` }],
-      });
-    }
-    // Current user message
-    contents.push({
+  /**
+   * Builds Gemini conversation contents.
+   *
+   * Each step is a STATELESS single-turn request (matching page-agent's approach):
+   * - System prompt has general instructions
+   * - User message contains full context: task, history, screen state
+   * - Model responds with agent_step function call
+   *
+   * History is embedded as text in assembleUserPrompt (via <agent_history>),
+   * NOT as functionCall/functionResponse pairs. This avoids Gemini's
+   * conversation format requirements and thought_signature complexity.
+   */
+  private buildContents(userMessage: string, _history: AgentStep[]): GeminiContent[] {
+    return [{
       role: 'user',
       parts: [{ text: userMessage }],
-    });
-    // Ensure alternating roles (Gemini requirement)
-    return this.ensureAlternatingRoles(contents);
-  }
-  private ensureAlternatingRoles(contents: GeminiContent[]): GeminiContent[] {
-    if (contents.length <= 1) return contents;
-    const merged: GeminiContent[] = [contents[0]!];
-    for (let i = 1; i < contents.length; i++) {
-      const prev = merged[merged.length - 1]!;
-      const curr = contents[i]!;
-      if (prev.role === curr.role) {
-        // Merge same-role messages
-        prev.parts.push(...curr.parts);
-      } else {
-        merged.push(curr);
-      }
-    }
-    return merged;
+    }];
   }
   // ─── Parse Response ────────────────────────────────────────
-  private parseResponse(data: any): { toolCalls: Array<{ name: string; args: Record<string, any> }>; text?: string } {
-    const toolCalls: Array<{ name: string; args: Record<string, any> }> = [];
-    let text: string | undefined;
+  /**
+   * Parses the Gemini response expecting a single agent_step function call.
+   * Extracts structured reasoning + action, and determines which tool to execute.
+   */
+  private parseAgentStepResponse(data: any, tools: ToolDefinition[]): ProviderResult {
     if (!data.candidates || data.candidates.length === 0) {
       logger.warn('GeminiProvider', 'No candidates in response');
-      return { toolCalls, text: 'No response generated.' };
+      return {
+        toolCalls: [{ name: 'done', args: { text: 'No response generated.', success: false } }],
+        reasoning: { previousGoalEval: '', memory: '', plan: '' },
+        text: 'No response generated.',
+      };
     }
     const candidate = data.candidates[0];
     const parts = candidate.content?.parts || [];
-    for (const part of parts) {
-      if (part.functionCall) {
-        toolCalls.push({
-          name: part.functionCall.name,
-          args: part.functionCall.args || {},
-        });
+    // Find the function call part
+    const fnCallPart = parts.find((p: any) => p.functionCall);
+    const textPart = parts.find((p: any) => p.text);
+    if (!fnCallPart?.functionCall) {
+      logger.warn('GeminiProvider', 'No function call in response. Text:', textPart?.text);
+      return {
+        toolCalls: [{ name: 'done', args: { text: textPart?.text || 'No action taken.', success: false } }],
+        reasoning: { previousGoalEval: '', memory: '', plan: '' },
+        text: textPart?.text,
+      };
+    }
+    const args = fnCallPart.functionCall.args || {};
+    // Extract reasoning fields
+    const reasoning: AgentReasoning = {
+      previousGoalEval: args.previous_goal_eval || '',
+      memory: args.memory || '',
+      plan: args.plan || '',
+    };
+    // Extract action
+    const actionName = args.action_name;
+    if (!actionName) {
+      logger.warn('GeminiProvider', 'No action_name in agent_step. Falling back to done.');
+      return {
+        toolCalls: [{ name: 'done', args: { text: 'Agent did not choose an action.', success: false } }],
+        reasoning,
+        text: textPart?.text,
+      };
+    }
+    // Build action args: everything except reasoning fields and action_name
+    const actionArgs: Record<string, any> = {};
+    const reservedKeys = new Set([...REASONING_FIELDS, 'action_name']);
+    // Find the matching tool to know which params belong to it
+    const matchedTool = tools.find(t => t.name === actionName);
+    if (matchedTool) {
+      for (const paramName of Object.keys(matchedTool.parameters)) {
+        if (args[paramName] !== undefined) {
+          actionArgs[paramName] = args[paramName];
+        }
       }
-      if (part.text) {
-        text = (text || '') + part.text;
+    } else {
+      // Custom/registered tool — grab all non-reserved fields
+      for (const [key, value] of Object.entries(args)) {
+        if (!reservedKeys.has(key)) {
+          actionArgs[key] = value;
+        }
       }
     }
-    logger.info('GeminiProvider', `Parsed: ${toolCalls.length} tool calls, text: ${text ? 'yes' : 'no'}`);
+    logger.info('GeminiProvider', `Parsed: action=${actionName}, plan="${reasoning.plan}"`);
-    return { toolCalls, text };
+    return {
+      toolCalls: [{ name: actionName, args: actionArgs }],
+      reasoning,
+      text: textPart?.text,
+    };
   }
 }