npm - @mobileai/react-native - Versions diffs - 0.4.5 → 0.5.0 - Mend

@mobileai/react-native 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/README.md +80 -15
package/lib/module/components/AIAgent.js +181 -38
package/lib/module/components/AIAgent.js.map +1 -1
package/lib/module/components/AgentChatBar.js +53 -29
package/lib/module/components/AgentChatBar.js.map +1 -1
package/lib/module/components/Icons.js +337 -0
package/lib/module/components/Icons.js.map +1 -0
package/lib/module/core/AgentRuntime.js +74 -3
package/lib/module/core/AgentRuntime.js.map +1 -1
package/lib/module/core/systemPrompt.js +87 -34
package/lib/module/core/systemPrompt.js.map +1 -1
package/lib/module/services/AudioInputService.js +73 -2
package/lib/module/services/AudioInputService.js.map +1 -1
package/lib/module/services/AudioOutputService.js +58 -5
package/lib/module/services/AudioOutputService.js.map +1 -1
package/lib/module/services/VoiceService.js +284 -239
package/lib/module/services/VoiceService.js.map +1 -1
package/lib/typescript/src/components/AIAgent.d.ts.map +1 -1
package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -1
package/lib/typescript/src/components/Icons.d.ts +43 -0
package/lib/typescript/src/components/Icons.d.ts.map +1 -0
package/lib/typescript/src/core/AgentRuntime.d.ts +12 -0
package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -1
package/lib/typescript/src/core/systemPrompt.d.ts +7 -4
package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -1
package/lib/typescript/src/services/AudioInputService.d.ts +13 -0
package/lib/typescript/src/services/AudioInputService.d.ts.map +1 -1
package/lib/typescript/src/services/AudioOutputService.d.ts.map +1 -1
package/lib/typescript/src/services/VoiceService.d.ts +41 -24
package/lib/typescript/src/services/VoiceService.d.ts.map +1 -1
package/package.json +1 -1
package/src/components/AIAgent.tsx +194 -38
package/src/components/AgentChatBar.tsx +44 -25
package/src/components/Icons.tsx +253 -0
package/src/core/AgentRuntime.ts +70 -3
package/src/core/systemPrompt.ts +87 -34
package/src/services/AudioInputService.ts +77 -2
package/src/services/AudioOutputService.ts +59 -5
package/src/services/VoiceService.ts +280 -252

package/src/components/Icons.tsx ADDED Viewed

@@ -0,0 +1,253 @@
+/**
+ * Icons — Zero-dependency, View-based icons for the AI Agent chat bar.
+ *
+ * Why not emoji? iOS Simulator 26+ has a bug where emoji renders as "?".
+ * Why not Unicode symbols? They look obscure and unprofessional.
+ * Why not icon libraries? This is a library — zero runtime dependencies.
+ *
+ * These icons are built purely from React Native View components,
+ * rendering identically on every platform and screen size.
+ */
+import { View } from 'react-native';
+// ─── Mic Icon (pill + stem + base) ────────────────────────────
+export function MicIcon({ size = 20, color = '#fff' }: { size?: number; color?: string }) {
+  const pillW = size * 0.4;
+  const pillH = size * 0.5;
+  const stemW = size * 0.08;
+  const stemH = size * 0.18;
+  const baseW = size * 0.35;
+  const arcW = size * 0.55;
+  const arcH = size * 0.35;
+  const arcBorder = size * 0.07;
+  return (
+    <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
+      {/* Pill (mic head) */}
+      <View style={{
+        width: pillW,
+        height: pillH,
+        borderRadius: pillW / 2,
+        backgroundColor: color,
+      }} />
+      {/* Arc (U-shape around mic) */}
+      <View style={{
+        width: arcW,
+        height: arcH,
+        borderBottomLeftRadius: arcW / 2,
+        borderBottomRightRadius: arcW / 2,
+        borderWidth: arcBorder,
+        borderTopWidth: 0,
+        borderColor: color,
+        marginTop: -(pillH * 0.3),
+      }} />
+      {/* Stem */}
+      <View style={{
+        width: stemW,
+        height: stemH,
+        backgroundColor: color,
+        marginTop: -1,
+      }} />
+      {/* Base */}
+      <View style={{
+        width: baseW,
+        height: stemW,
+        backgroundColor: color,
+        borderRadius: stemW / 2,
+      }} />
+    </View>
+  );
+}
+// ─── Speaker Icon (cone + sound waves) ────────────────────────
+export function SpeakerIcon({ size = 20, color = '#fff', muted = false }: { size?: number; color?: string; muted?: boolean }) {
+  const bodyW = size * 0.25;
+  const bodyH = size * 0.3;
+  const coneW = size * 0.2;
+  return (
+    <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center', flexDirection: 'row' }}>
+      {/* Speaker body (rectangle) */}
+      <View style={{
+        width: bodyW,
+        height: bodyH,
+        backgroundColor: color,
+        borderRadius: size * 0.03,
+      }} />
+      {/* Speaker cone (triangle via borders) */}
+      <View style={{
+        width: 0,
+        height: 0,
+        borderTopWidth: size * 0.25,
+        borderTopColor: 'transparent',
+        borderBottomWidth: size * 0.25,
+        borderBottomColor: 'transparent',
+        borderLeftWidth: coneW,
+        borderLeftColor: color,
+        marginLeft: -1,
+      }} />
+      {muted ? (
+        /* Mute slash */
+        <View style={{
+          position: 'absolute',
+          width: size * 0.08,
+          height: size * 0.8,
+          backgroundColor: color,
+          borderRadius: size * 0.04,
+          transform: [{ rotate: '45deg' }],
+        }} />
+      ) : (
+        /* Sound waves */
+        <View style={{ marginLeft: size * 0.05 }}>
+          <View style={{
+            width: size * 0.15,
+            height: size * 0.3,
+            borderWidth: size * 0.05,
+            borderColor: color,
+            borderLeftWidth: 0,
+            borderTopLeftRadius: 0,
+            borderBottomLeftRadius: 0,
+            borderTopRightRadius: size * 0.15,
+            borderBottomRightRadius: size * 0.15,
+          }} />
+        </View>
+      )}
+    </View>
+  );
+}
+// ─── Send Arrow (upward arrow) ────────────────────────────────
+export function SendArrowIcon({ size = 18, color = '#fff' }: { size?: number; color?: string }) {
+  // Filled right-pointing triangle (like iOS Messages send button)
+  const triH = size * 0.55;
+  return (
+    <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
+      <View style={{
+        width: 0,
+        height: 0,
+        borderTopWidth: triH / 2,
+        borderTopColor: 'transparent',
+        borderBottomWidth: triH / 2,
+        borderBottomColor: 'transparent',
+        borderLeftWidth: triH * 0.85,
+        borderLeftColor: color,
+        marginLeft: size * 0.1,
+      }} />
+    </View>
+  );
+}
+// ─── Stop Icon (filled square) ────────────────────────────────
+export function StopIcon({ size = 18, color = '#fff' }: { size?: number; color?: string }) {
+  const sq = size * 0.45;
+  return (
+    <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
+      <View style={{
+        width: sq,
+        height: sq,
+        backgroundColor: color,
+        borderRadius: size * 0.05,
+      }} />
+    </View>
+  );
+}
+// ─── Recording Dot (pulsing filled circle) ────────────────────
+export function RecordingDot({ size = 18, color = '#FF3B30' }: { size?: number; color?: string }) {
+  const dotSize = size * 0.45;
+  return (
+    <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
+      <View style={{
+        width: dotSize,
+        height: dotSize,
+        borderRadius: dotSize / 2,
+        backgroundColor: color,
+      }} />
+    </View>
+  );
+}
+// ─── Loading Spinner (three dots) ─────────────────────────────
+export function LoadingDots({ size = 18, color = '#fff' }: { size?: number; color?: string }) {
+  const dotSize = size * 0.15;
+  return (
+    <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center', flexDirection: 'row', gap: dotSize * 0.8 }}>
+      {[0.4, 0.7, 1].map((opacity, i) => (
+        <View key={i} style={{
+          width: dotSize,
+          height: dotSize,
+          borderRadius: dotSize / 2,
+          backgroundColor: color,
+          opacity,
+        }} />
+      ))}
+    </View>
+  );
+}
+// ─── Close / Dismiss (X mark) ─────────────────────────────────
+export function CloseIcon({ size = 14, color = 'rgba(255,255,255,0.6)' }: { size?: number; color?: string }) {
+  const barW = size * 0.7;
+  const barH = size * 0.12;
+  return (
+    <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
+      <View style={{
+        position: 'absolute',
+        width: barW,
+        height: barH,
+        backgroundColor: color,
+        borderRadius: barH,
+        transform: [{ rotate: '45deg' }],
+      }} />
+      <View style={{
+        position: 'absolute',
+        width: barW,
+        height: barH,
+        backgroundColor: color,
+        borderRadius: barH,
+        transform: [{ rotate: '-45deg' }],
+      }} />
+    </View>
+  );
+}
+// ─── AI Badge (for FAB) ───────────────────────────────────────
+export function AIBadge({ size = 28 }: { size?: number }) {
+  // Chat bubble — clean, universally represents AI assistant
+  const bubbleW = size * 0.6;
+  const bubbleH = size * 0.45;
+  const tailSize = size * 0.12;
+  return (
+    <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
+      {/* Bubble body */}
+      <View style={{
+        width: bubbleW,
+        height: bubbleH,
+        backgroundColor: '#fff',
+        borderRadius: size * 0.12,
+        marginBottom: tailSize * 0.5,
+      }} />
+      {/* Tail (small triangle at bottom-left) */}
+      <View style={{
+        position: 'absolute',
+        bottom: size * 0.18,
+        left: size * 0.22,
+        width: 0,
+        height: 0,
+        borderTopWidth: tailSize,
+        borderTopColor: '#fff',
+        borderRightWidth: tailSize,
+        borderRightColor: 'transparent',
+      }} />
+    </View>
+  );
+}

package/src/core/AgentRuntime.ts CHANGED Viewed

@@ -146,6 +146,9 @@ export class AgentRuntime {
         }
         try {
           element.props.onChangeText(args.text);
+          // Wait for React to process the state update and re-render
+          // (same pattern as navigate tool's 500ms post-action delay)
+          await new Promise(resolve => setTimeout(resolve, 500));
           return `✅ Typed "${args.text}" into [${args.index}] "${element.label}"`;
         } catch (error: any) {
           return `❌ Error typing: ${error.message}`;
@@ -174,7 +177,7 @@ export class AgentRuntime {
           }
         }
-        // React Navigation path: use navRef.navigate()
+        // React Navigation path: use navRef
         if (!this.navRef) {
           return '❌ Navigation ref not available.';
         }
@@ -188,10 +191,31 @@ export class AgentRuntime {
           const params = args.params ? (typeof args.params === 'string' ? JSON.parse(args.params) : args.params) : undefined;
           // Case-insensitive screen name matching
           const availableRoutes = this.getRouteNames();
+          logger.info('AgentRuntime', `🧭 Navigate requested: "${args.screen}" | Available: [${availableRoutes.join(', ')}] | Params: ${JSON.stringify(params)}`);
           const matchedScreen = availableRoutes.find(
             r => r.toLowerCase() === args.screen.toLowerCase()
-          ) || args.screen;
-          this.navRef.navigate(matchedScreen, params);
+          );
+          // Guard: screen must exist in the navigation tree
+          if (!matchedScreen) {
+            const errMsg = `❌ "${args.screen}" is not a screen — it may be content within a screen. Available screens: ${availableRoutes.join(', ')}. Look at the current screen context for "${args.screen}" as a section, category, or element, and scroll/tap to find it. If it's on a different screen, navigate to the correct screen first.`;
+            logger.warn('AgentRuntime', `🧭 Navigate REJECTED: ${errMsg}`);
+            return errMsg;
+          }
+          logger.info('AgentRuntime', `🧭 Navigate matched: "${args.screen}" → "${matchedScreen}"`);
+          // Find the path to the screen (handles nested navigators)
+          const screenPath = this.findScreenPath(matchedScreen);
+          if (screenPath.length > 1) {
+            // Nested screen: navigate using parent → { screen: child } pattern
+            // e.g. navigate('HomeTab', { screen: 'Home', params })
+            logger.info('AgentRuntime', `Nested navigation: ${screenPath.join(' → ')}`);
+            const nestedParams = this.buildNestedParams(screenPath, params);
+            this.navRef.navigate(screenPath[0], nestedParams);
+          } else {
+            // Top-level screen: direct navigate
+            this.navRef.navigate(matchedScreen, params);
+          }
           await new Promise(resolve => setTimeout(resolve, 500));
           return `✅ Navigated to "${matchedScreen}"${params ? ` with params: ${JSON.stringify(params)}` : ''}`;
         } catch (error: any) {
@@ -289,6 +313,49 @@ export class AgentRuntime {
     return [...new Set(names)];
   }
+  /**
+   * Find the path from root navigator to a target screen.
+   * Returns [parentTab, screen] for nested screens, or [screen] for top-level.
+   * Example: findScreenPath('Home') → ['HomeTab', 'Home']
+   */
+  private findScreenPath(targetScreen: string): string[] {
+    try {
+      const state = this.navRef?.getRootState?.() || this.navRef?.getState?.();
+      if (!state?.routes) return [targetScreen];
+      // Check if target is a direct top-level route
+      if (state.routes.some((r: any) => r.name === targetScreen)) {
+        return [targetScreen];
+      }
+      // Search nested navigators
+      for (const route of state.routes) {
+        const nestedNames = route.state ? this.collectRouteNames(route.state) : [];
+        if (nestedNames.includes(targetScreen)) {
+          return [route.name, targetScreen];
+        }
+      }
+      return [targetScreen]; // Fallback: try direct
+    } catch {
+      return [targetScreen];
+    }
+  }
+  /**
+   * Build nested params for React Navigation nested screen navigation.
+   * ['HomeTab', 'Home'] → { screen: 'Home', params }
+   * ['Tab', 'Stack', 'Screen'] → { screen: 'Stack', params: { screen: 'Screen', params } }
+   */
+  private buildNestedParams(path: string[], leafParams?: any): any {
+    // Build from the end: innermost screen gets the leafParams
+    let result = leafParams;
+    for (let i = path.length - 1; i >= 1; i--) {
+      result = { screen: path[i], ...(result !== undefined ? { params: result } : {}) };
+    }
+    return result;
+  }
   /**
    * Recursively find the deepest active screen name.
    * For tabs: follows active tab → active screen inside that tab.

package/src/core/systemPrompt.ts CHANGED Viewed

@@ -62,6 +62,12 @@ Available tools:
 - ask_user(question): Ask the user for clarification ONLY when you cannot determine what action to take.
 </tools>
+<custom_actions>
+In addition to the built-in tools above, the app may register custom actions (e.g. checkout, addToCart). These appear as additional callable tools in your tool list.
+When a custom action exists for something the user wants to do, ALWAYS call the action instead of tapping a UI button — even if you see a matching button on screen. Custom actions may include security flows like user confirmation dialogs.
+If a UI element is hidden (aiIgnore) but a matching custom action exists, use the action.
+</custom_actions>
 <rules>
 - There are 2 types of requests — always determine which type BEFORE acting:
   1. Information requests (e.g. "what's available?", "how much is X?", "list the items"):
@@ -162,11 +168,14 @@ plan: "Call done to report the cart contents to the user."
 }
 /**
- * Voice-optimized system prompt for the Gemini Live API.
+ * Voice-adapted system prompt for the Gemini Live API.
  *
- * Includes the same screen format and tool semantics as text mode,
- * but condensed for voice context and with guardrails against
- * unprompted actions.
+ * Uses the same core rules/tools/screen format as text mode (buildSystemPrompt)
+ * but adapted for voice interaction:
+ * - No agent-loop directives (no "MUST call agent_step on every step")
+ * - No agent_history/user_request references (voice is conversational)
+ * - Explicit "wait for user voice command" guardrails
+ * - Voice-specific UX (concise spoken responses)
  */
 export function buildVoiceSystemPrompt(
   language: string,
@@ -174,46 +183,90 @@ export function buildVoiceSystemPrompt(
 ): string {
   const isArabic = language === 'ar';
-  let prompt = `You are a voice-controlled AI agent operating a React Native mobile app. You can see the screen content and interact with UI elements using tools.
-<language>
-${isArabic ? 'Respond in Arabic.' : 'Respond in English.'}
-Use the same language as the user.
-</language>
+  let prompt = `You are a voice-controlled AI assistant for a React Native mobile app.
-<screen_format>
-You receive periodic screen updates showing the current UI. Interactive elements appear as:
-[index]<type attrs>label />
+You always have access to the current screen context — it shows you exactly what the user sees on their phone. Use it to answer questions and execute actions when the user speaks a command. Wait for the user to speak a clear voice command before taking any action. Screen context updates arrive automatically as the UI changes.
-- index: numeric ID for interaction (use with tap/type tools)
+<screen_state>
+Interactive elements are listed as [index]<type attrs>label />
+- index: numeric identifier for interaction
 - type: element type (pressable, text-input, switch)
-- attrs: state like value="true", checked="false", role="switch"
-- label: visible text content
+- attrs: state attributes like value="true", checked="false", role="switch"
+- label: visible text content of the element
-Only elements with [index] are interactive. Text without [] is display-only.
-Example: [5]<switch value="true">Order Updates /> means element 5 is a switch currently ON.
-</screen_format>
+Only elements with [index] are interactive. Use the index to tap or type into them.
+Pure text elements without [] are NOT interactive — they are informational content you can read.
+</screen_state>
 <tools>
 Available tools:
-- tap(index): Tap an element. For switches, this toggles their value.
-- type(index, text): Type text into a text-input.
-- navigate(screen): Navigate to a named screen.
-- done(text, success): Complete the task with a spoken response.
-- ask_user(question): Ask the user for clarification.
+- tap(index): Tap an interactive element by its index. Works universally on buttons, switches, and custom components. For switches, this toggles their state.
+- type(index, text): Type text into a text-input element by its index. ONLY works on text-input elements.
+- navigate(screen, params): Navigate to a screen listed in Available Screens. ONLY use screen names from the Available Screens list — section titles, category names, or other visible text are content within a screen, not navigable screens.
+- done(text, success): Complete task and respond to the user.
+CRITICAL — tool call protocol:
+When you decide to use a tool, emit the function call IMMEDIATELY as the first thing in your response — before any speech or audio output.
+Speaking before a tool call causes a fatal connection error. Always: call the tool first, wait for the result, then speak about what happened.
+Correct: [function call] → receive result → speak to user about the outcome.
+Wrong: "Sure, let me tap on..." → [function call] → crash.
 </tools>
+<custom_actions>
+In addition to the built-in tools above, the app may register custom actions (e.g. checkout, addToCart). These appear as additional callable tools in your tool list.
+When a custom action exists for something the user wants to do, ALWAYS call the action instead of tapping a UI button — even if you see a matching button on screen. Custom actions may include security flows like user confirmation dialogs.
+If a UI element is hidden but a matching custom action exists, use the action.
+</custom_actions>
 <rules>
-CRITICAL ACTION RULES:
-- ONLY perform actions (tap, type, navigate) when the user explicitly asks you to do something.
-- NEVER tap or navigate on your own initiative — wait for the user's voice command.
-- When the user asks a question about what's on screen, answer verbally via done(). Do NOT tap anything.
-- When the user asks to toggle/enable/disable something, find the matching element by its label and use tap(index).
-- When a screen update arrives, do NOT interact with elements unless the user asked you to.
-- Use element indexes from the most recent screen update — they refresh every few seconds.
-- For switches: tap(index) toggles the value. You do NOT need to find a separate button.
-- Keep spoken responses concise — the user is listening, not reading.
-</rules>`;
+- There are 2 types of requests — always determine which type BEFORE acting:
+  1. Information requests (e.g. "what's available?", "how much is X?", "list the items"):
+     Read the screen content and answer by speaking. Do NOT perform any tap/type/navigate actions.
+  2. Action requests (e.g. "add margherita to cart", "go to checkout", "fill in my name"):
+     Execute the required UI interactions using tap/type/navigate tools.
+- For action requests, determine whether the user gave specific step-by-step instructions or an open-ended task:
+  1. Specific instructions: Follow each step precisely, do not skip.
+  2. Open-ended tasks: Plan the steps yourself.
+- Only interact with elements that have an [index].
+- After tapping an element, the screen may change. Wait for updated screen context before the next action.
+- If the current screen doesn't have what you need, use navigate() to go to another screen from the Available Screens list.
+- If a tap navigates to another screen, the next screen context update will show the new screen's elements.
+- Do not repeat one action more than 3 times unless conditions changed.
+- After typing into a text input, check if the screen changed (e.g., suggestions or autocomplete appeared). If so, interact with the new elements.
+- After typing into a search field, you may need to tap a search button, press enter, or select from a dropdown to complete the search.
+- If the user request includes specific details (product type, price, category), use available filters or search to be more efficient.
+- For destructive/purchase actions (place order, delete, pay), tap the button exactly ONCE. Do not repeat — the user could be charged multiple times.
+- SECURITY & PRIVACY: Do not guess or auto-fill sensitive data (passwords, payment info, personal details). Ask the user verbally.
+- SECURITY & PRIVACY: Do not fill in login/signup forms unless the user provides credentials.
+- Do NOT ask for confirmation of actions the user explicitly requested. If they said "place my order", just do it.
+</rules>
+<capability>
+- You can see the current screen context — use it to answer questions directly.
+- It is ok to just provide information without performing any actions.
+- It is ok to fail the task. The user would rather you report failure than repeat failed actions endlessly.
+- The user can be wrong. If the request is not achievable, tell them.
+- The app can have bugs. If something is not working as expected, tell the user.
+- Trying too hard can be harmful. If stuck, tell the user what you accomplished and what remains.
+</capability>
+<speech_rules>
+- Keep spoken output to 1-2 short sentences.
+- Speak naturally — no markdown, no headers, no bullet points.
+- Only speak confirmations and answers. Do not narrate your reasoning.
+- Confirm what you did: summarize the action result briefly (e.g., "Added to cart" or "Navigated to Settings").
+- Be transparent about errors: If an action fails, explain what failed and why.
+- Track multi-item progress: For requests involving multiple items, keep track and report which ones succeeded and which did not.
+- Stay on the user's screen: For information requests, read from the current screen. Only navigate away if the needed information is on another screen.
+- When a request is ambiguous, pick the most common interpretation rather than always asking. State your assumption in your spoken response.
+- Suggest next steps: After completing an action, briefly suggest what the user might want to do next.
+- Be concise: Users are on mobile — avoid long speech.
+</speech_rules>
+<language_settings>
+${isArabic ? '- Working language: **Arabic**. Respond in Arabic.' : '- Working language: **English**. Respond in English.'}
+- Use the same language as the user.
+</language_settings>`;
   // Append user-provided instructions if any
   if (userInstructions?.trim()) {

package/src/services/AudioInputService.ts CHANGED Viewed

@@ -5,6 +5,9 @@
  * PCM streaming from the microphone. Each chunk is converted from Float32
  * to Int16 PCM and base64-encoded for the Gemini Live API.
  *
+ * Echo cancellation is handled at the OS/hardware level via
+ * react-native-incall-manager (VOICE_COMMUNICATION mode) — not in JS.
+ *
  * Requires: react-native-audio-api (development build only, not Expo Go)
  */
@@ -32,6 +35,14 @@ export class AudioInputService {
   private status: RecordingStatus = 'idle';
   private recorder: any = null;
+  // Auto-recovery: detect when mic session dies after audio playback.
+  // This is a react-native-audio-api bug where AudioRecorder loses mic access
+  // after AudioBufferQueueSourceNode plays audio (audio session conflict).
+  private consecutiveSilentFrames = 0;
+  private isRecovering = false;
+  private static readonly SILENT_THRESHOLD = 0.01;
+  private static readonly SILENT_FRAMES_BEFORE_RESTART = 15;
   constructor(config: AudioInputConfig) {
     this.config = config;
   }
@@ -71,6 +82,7 @@ export class AudioInputService {
       // Create AudioRecorder
       this.recorder = new audioApi.AudioRecorder();
+      this.consecutiveSilentFrames = 0;
       const sampleRate = this.config.sampleRate || 16000;
       const bufferLength = this.config.bufferLength || 4096;
@@ -84,9 +96,53 @@ export class AudioInputService {
           try {
             // event.buffer is an AudioBuffer — get Float32 channel data
             const float32Data = event.buffer.getChannelData(0);
-            // Convert Float32 → Int16 → base64 for Gemini
+            // Measure peak amplitude for diagnostics + silent detection
+            let maxAmp = 0;
+            for (let i = 0; i < float32Data.length; i++) {
+              const abs = Math.abs(float32Data[i] || 0);
+              if (abs > maxAmp) maxAmp = abs;
+            }
+            // Diagnostic: log amplitude on first 5 frames, then every 10th
+            if (frameCount <= 5 || frameCount % 10 === 0) {
+              logger.info('AudioInput', `🔬 Frame #${frameCount}: maxAmp=${maxAmp.toFixed(6)}, samples=${float32Data.length}`);
+            }
+            // ─── Auto-Recovery: Silent mic detection ─────────────
+            // After audio playback, react-native-audio-api's AudioRecorder
+            // can lose its mic session (all-zero frames). Detect this and
+            // restart the recorder to re-acquire the audio session.
+            if (maxAmp < AudioInputService.SILENT_THRESHOLD) {
+              this.consecutiveSilentFrames++;
+              if (
+                this.consecutiveSilentFrames >= AudioInputService.SILENT_FRAMES_BEFORE_RESTART &&
+                !this.isRecovering
+              ) {
+                this.isRecovering = true;
+                logger.warn('AudioInput', `⚠️ ${this.consecutiveSilentFrames} silent frames — restarting recorder...`);
+                this.restartRecorder().then(() => {
+                  this.isRecovering = false;
+                  this.consecutiveSilentFrames = 0;
+                  logger.info('AudioInput', '✅ Recorder restarted — mic session re-acquired');
+                }).catch((err: any) => {
+                  this.isRecovering = false;
+                  logger.error('AudioInput', `❌ Recorder restart failed: ${err?.message || err}`);
+                });
+                return; // Skip this frame
+              }
+            } else {
+              // Got real audio — reset counter
+              if (this.consecutiveSilentFrames > 5) {
+                logger.info('AudioInput', `🎤 Mic recovered after ${this.consecutiveSilentFrames} silent frames`);
+              }
+              this.consecutiveSilentFrames = 0;
+            }
             const base64Chunk = float32ToInt16Base64(float32Data);
-            logger.debug('AudioInput', `🎤 Frame #${frameCount}: size=${base64Chunk.length}`);
+            if (frameCount <= 5 || frameCount % 10 === 0) {
+              logger.info('AudioInput', `🎤 Frame #${frameCount}: chunk=${base64Chunk.length} chars, calling onAudioChunk...`);
+            }
             this.config.onAudioChunk(base64Chunk);
           } catch (err: any) {
             logger.error('AudioInput', `Frame processing error: ${err.message}`);
@@ -121,6 +177,7 @@ export class AudioInputService {
       }
       this.recorder = null;
       this.status = 'idle';
+      this.consecutiveSilentFrames = 0;
       logger.info('AudioInput', 'Streaming stopped');
     } catch (error: any) {
       logger.error('AudioInput', `Failed to stop: ${error.message}`);
@@ -129,6 +186,24 @@ export class AudioInputService {
     }
   }
+  // ─── Auto-Recovery ─────────────────────────────────────────
+  /**
+   * Restart the recorder to re-acquire the audio session.
+   * Fixes react-native-audio-api bug where AudioRecorder loses mic access
+   * after AudioBufferQueueSourceNode plays audio.
+   */
+  private async restartRecorder(): Promise<void> {
+    logger.info('AudioInput', '🔄 Restarting recorder for mic recovery...');
+    await this.stop();
+    // Brief pause to let the audio system release resources
+    await new Promise(resolve => setTimeout(resolve, 300));
+    const ok = await this.start();
+    if (!ok) {
+      throw new Error('Recorder restart failed');
+    }
+  }
   // ─── Status ───────────────────────────────────────────────
   get isRecording(): boolean {