npm - @mobileai/react-native - Versions diffs - 0.4.6 → 0.5.0 - Mend

@mobileai/react-native 0.4.6 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/README.md +80 -4
package/lib/module/components/AIAgent.js +179 -38
package/lib/module/components/AIAgent.js.map +1 -1
package/lib/module/components/AgentChatBar.js +53 -29
package/lib/module/components/AgentChatBar.js.map +1 -1
package/lib/module/components/Icons.js +337 -0
package/lib/module/components/Icons.js.map +1 -0
package/lib/module/core/AgentRuntime.js +74 -3
package/lib/module/core/AgentRuntime.js.map +1 -1
package/lib/module/core/systemPrompt.js +57 -38
package/lib/module/core/systemPrompt.js.map +1 -1
package/lib/module/index.js +3 -9
package/lib/module/index.js.map +1 -1
package/lib/module/services/AudioInputService.js +73 -2
package/lib/module/services/AudioInputService.js.map +1 -1
package/lib/module/services/AudioOutputService.js +58 -5
package/lib/module/services/AudioOutputService.js.map +1 -1
package/lib/module/services/VoiceService.js +281 -275
package/lib/module/services/VoiceService.js.map +1 -1
package/lib/typescript/src/components/AIAgent.d.ts.map +1 -1
package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -1
package/lib/typescript/src/components/Icons.d.ts +43 -0
package/lib/typescript/src/components/Icons.d.ts.map +1 -0
package/lib/typescript/src/core/AgentRuntime.d.ts +12 -0
package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -1
package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -1
package/lib/typescript/src/index.d.ts +4 -0
package/lib/typescript/src/index.d.ts.map +1 -1
package/lib/typescript/src/services/AudioInputService.d.ts +13 -0
package/lib/typescript/src/services/AudioInputService.d.ts.map +1 -1
package/lib/typescript/src/services/AudioOutputService.d.ts.map +1 -1
package/lib/typescript/src/services/VoiceService.d.ts +38 -29
package/lib/typescript/src/services/VoiceService.d.ts.map +1 -1
package/package.json +1 -1
package/src/components/AIAgent.tsx +192 -39
package/src/components/AgentChatBar.tsx +44 -25
package/src/components/Icons.tsx +253 -0
package/src/core/AgentRuntime.ts +70 -3
package/src/core/systemPrompt.ts +57 -38
package/src/index.ts +8 -8
package/src/services/AudioInputService.ts +77 -2
package/src/services/AudioOutputService.ts +59 -5
package/src/services/VoiceService.ts +278 -290

package/src/components/Icons.tsx ADDED Viewed

@@ -0,0 +1,253 @@
+/**
+ * Icons — Zero-dependency, View-based icons for the AI Agent chat bar.
+ *
+ * Why not emoji? iOS Simulator 26+ has a bug where emoji renders as "?".
+ * Why not Unicode symbols? They look obscure and unprofessional.
+ * Why not icon libraries? This is a library — zero runtime dependencies.
+ *
+ * These icons are built purely from React Native View components,
+ * rendering identically on every platform and screen size.
+ */
+import { View } from 'react-native';
+// ─── Mic Icon (pill + stem + base) ────────────────────────────
+export function MicIcon({ size = 20, color = '#fff' }: { size?: number; color?: string }) {
+  const pillW = size * 0.4;
+  const pillH = size * 0.5;
+  const stemW = size * 0.08;
+  const stemH = size * 0.18;
+  const baseW = size * 0.35;
+  const arcW = size * 0.55;
+  const arcH = size * 0.35;
+  const arcBorder = size * 0.07;
+  return (
+    <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
+      {/* Pill (mic head) */}
+      <View style={{
+        width: pillW,
+        height: pillH,
+        borderRadius: pillW / 2,
+        backgroundColor: color,
+      }} />
+      {/* Arc (U-shape around mic) */}
+      <View style={{
+        width: arcW,
+        height: arcH,
+        borderBottomLeftRadius: arcW / 2,
+        borderBottomRightRadius: arcW / 2,
+        borderWidth: arcBorder,
+        borderTopWidth: 0,
+        borderColor: color,
+        marginTop: -(pillH * 0.3),
+      }} />
+      {/* Stem */}
+      <View style={{
+        width: stemW,
+        height: stemH,
+        backgroundColor: color,
+        marginTop: -1,
+      }} />
+      {/* Base */}
+      <View style={{
+        width: baseW,
+        height: stemW,
+        backgroundColor: color,
+        borderRadius: stemW / 2,
+      }} />
+    </View>
+  );
+}
+// ─── Speaker Icon (cone + sound waves) ────────────────────────
+export function SpeakerIcon({ size = 20, color = '#fff', muted = false }: { size?: number; color?: string; muted?: boolean }) {
+  const bodyW = size * 0.25;
+  const bodyH = size * 0.3;
+  const coneW = size * 0.2;
+  return (
+    <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center', flexDirection: 'row' }}>
+      {/* Speaker body (rectangle) */}
+      <View style={{
+        width: bodyW,
+        height: bodyH,
+        backgroundColor: color,
+        borderRadius: size * 0.03,
+      }} />
+      {/* Speaker cone (triangle via borders) */}
+      <View style={{
+        width: 0,
+        height: 0,
+        borderTopWidth: size * 0.25,
+        borderTopColor: 'transparent',
+        borderBottomWidth: size * 0.25,
+        borderBottomColor: 'transparent',
+        borderLeftWidth: coneW,
+        borderLeftColor: color,
+        marginLeft: -1,
+      }} />
+      {muted ? (
+        /* Mute slash */
+        <View style={{
+          position: 'absolute',
+          width: size * 0.08,
+          height: size * 0.8,
+          backgroundColor: color,
+          borderRadius: size * 0.04,
+          transform: [{ rotate: '45deg' }],
+        }} />
+      ) : (
+        /* Sound waves */
+        <View style={{ marginLeft: size * 0.05 }}>
+          <View style={{
+            width: size * 0.15,
+            height: size * 0.3,
+            borderWidth: size * 0.05,
+            borderColor: color,
+            borderLeftWidth: 0,
+            borderTopLeftRadius: 0,
+            borderBottomLeftRadius: 0,
+            borderTopRightRadius: size * 0.15,
+            borderBottomRightRadius: size * 0.15,
+          }} />
+        </View>
+      )}
+    </View>
+  );
+}
+// ─── Send Arrow (upward arrow) ────────────────────────────────
+export function SendArrowIcon({ size = 18, color = '#fff' }: { size?: number; color?: string }) {
+  // Filled right-pointing triangle (like iOS Messages send button)
+  const triH = size * 0.55;
+  return (
+    <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
+      <View style={{
+        width: 0,
+        height: 0,
+        borderTopWidth: triH / 2,
+        borderTopColor: 'transparent',
+        borderBottomWidth: triH / 2,
+        borderBottomColor: 'transparent',
+        borderLeftWidth: triH * 0.85,
+        borderLeftColor: color,
+        marginLeft: size * 0.1,
+      }} />
+    </View>
+  );
+}
+// ─── Stop Icon (filled square) ────────────────────────────────
+export function StopIcon({ size = 18, color = '#fff' }: { size?: number; color?: string }) {
+  const sq = size * 0.45;
+  return (
+    <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
+      <View style={{
+        width: sq,
+        height: sq,
+        backgroundColor: color,
+        borderRadius: size * 0.05,
+      }} />
+    </View>
+  );
+}
+// ─── Recording Dot (pulsing filled circle) ────────────────────
+export function RecordingDot({ size = 18, color = '#FF3B30' }: { size?: number; color?: string }) {
+  const dotSize = size * 0.45;
+  return (
+    <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
+      <View style={{
+        width: dotSize,
+        height: dotSize,
+        borderRadius: dotSize / 2,
+        backgroundColor: color,
+      }} />
+    </View>
+  );
+}
+// ─── Loading Spinner (three dots) ─────────────────────────────
+export function LoadingDots({ size = 18, color = '#fff' }: { size?: number; color?: string }) {
+  const dotSize = size * 0.15;
+  return (
+    <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center', flexDirection: 'row', gap: dotSize * 0.8 }}>
+      {[0.4, 0.7, 1].map((opacity, i) => (
+        <View key={i} style={{
+          width: dotSize,
+          height: dotSize,
+          borderRadius: dotSize / 2,
+          backgroundColor: color,
+          opacity,
+        }} />
+      ))}
+    </View>
+  );
+}
+// ─── Close / Dismiss (X mark) ─────────────────────────────────
+export function CloseIcon({ size = 14, color = 'rgba(255,255,255,0.6)' }: { size?: number; color?: string }) {
+  const barW = size * 0.7;
+  const barH = size * 0.12;
+  return (
+    <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
+      <View style={{
+        position: 'absolute',
+        width: barW,
+        height: barH,
+        backgroundColor: color,
+        borderRadius: barH,
+        transform: [{ rotate: '45deg' }],
+      }} />
+      <View style={{
+        position: 'absolute',
+        width: barW,
+        height: barH,
+        backgroundColor: color,
+        borderRadius: barH,
+        transform: [{ rotate: '-45deg' }],
+      }} />
+    </View>
+  );
+}
+// ─── AI Badge (for FAB) ───────────────────────────────────────
+export function AIBadge({ size = 28 }: { size?: number }) {
+  // Chat bubble — clean, universally represents AI assistant
+  const bubbleW = size * 0.6;
+  const bubbleH = size * 0.45;
+  const tailSize = size * 0.12;
+  return (
+    <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
+      {/* Bubble body */}
+      <View style={{
+        width: bubbleW,
+        height: bubbleH,
+        backgroundColor: '#fff',
+        borderRadius: size * 0.12,
+        marginBottom: tailSize * 0.5,
+      }} />
+      {/* Tail (small triangle at bottom-left) */}
+      <View style={{
+        position: 'absolute',
+        bottom: size * 0.18,
+        left: size * 0.22,
+        width: 0,
+        height: 0,
+        borderTopWidth: tailSize,
+        borderTopColor: '#fff',
+        borderRightWidth: tailSize,
+        borderRightColor: 'transparent',
+      }} />
+    </View>
+  );
+}

package/src/core/AgentRuntime.ts CHANGED Viewed

@@ -146,6 +146,9 @@ export class AgentRuntime {
         }
         try {
           element.props.onChangeText(args.text);
+          // Wait for React to process the state update and re-render
+          // (same pattern as navigate tool's 500ms post-action delay)
+          await new Promise(resolve => setTimeout(resolve, 500));
           return `✅ Typed "${args.text}" into [${args.index}] "${element.label}"`;
         } catch (error: any) {
           return `❌ Error typing: ${error.message}`;
@@ -174,7 +177,7 @@ export class AgentRuntime {
           }
         }
-        // React Navigation path: use navRef.navigate()
+        // React Navigation path: use navRef
         if (!this.navRef) {
           return '❌ Navigation ref not available.';
         }
@@ -188,10 +191,31 @@ export class AgentRuntime {
           const params = args.params ? (typeof args.params === 'string' ? JSON.parse(args.params) : args.params) : undefined;
           // Case-insensitive screen name matching
           const availableRoutes = this.getRouteNames();
+          logger.info('AgentRuntime', `🧭 Navigate requested: "${args.screen}" | Available: [${availableRoutes.join(', ')}] | Params: ${JSON.stringify(params)}`);
           const matchedScreen = availableRoutes.find(
             r => r.toLowerCase() === args.screen.toLowerCase()
-          ) || args.screen;
-          this.navRef.navigate(matchedScreen, params);
+          );
+          // Guard: screen must exist in the navigation tree
+          if (!matchedScreen) {
+            const errMsg = `❌ "${args.screen}" is not a screen — it may be content within a screen. Available screens: ${availableRoutes.join(', ')}. Look at the current screen context for "${args.screen}" as a section, category, or element, and scroll/tap to find it. If it's on a different screen, navigate to the correct screen first.`;
+            logger.warn('AgentRuntime', `🧭 Navigate REJECTED: ${errMsg}`);
+            return errMsg;
+          }
+          logger.info('AgentRuntime', `🧭 Navigate matched: "${args.screen}" → "${matchedScreen}"`);
+          // Find the path to the screen (handles nested navigators)
+          const screenPath = this.findScreenPath(matchedScreen);
+          if (screenPath.length > 1) {
+            // Nested screen: navigate using parent → { screen: child } pattern
+            // e.g. navigate('HomeTab', { screen: 'Home', params })
+            logger.info('AgentRuntime', `Nested navigation: ${screenPath.join(' → ')}`);
+            const nestedParams = this.buildNestedParams(screenPath, params);
+            this.navRef.navigate(screenPath[0], nestedParams);
+          } else {
+            // Top-level screen: direct navigate
+            this.navRef.navigate(matchedScreen, params);
+          }
           await new Promise(resolve => setTimeout(resolve, 500));
           return `✅ Navigated to "${matchedScreen}"${params ? ` with params: ${JSON.stringify(params)}` : ''}`;
         } catch (error: any) {
@@ -289,6 +313,49 @@ export class AgentRuntime {
     return [...new Set(names)];
   }
+  /**
+   * Find the path from root navigator to a target screen.
+   * Returns [parentTab, screen] for nested screens, or [screen] for top-level.
+   * Example: findScreenPath('Home') → ['HomeTab', 'Home']
+   */
+  private findScreenPath(targetScreen: string): string[] {
+    try {
+      const state = this.navRef?.getRootState?.() || this.navRef?.getState?.();
+      if (!state?.routes) return [targetScreen];
+      // Check if target is a direct top-level route
+      if (state.routes.some((r: any) => r.name === targetScreen)) {
+        return [targetScreen];
+      }
+      // Search nested navigators
+      for (const route of state.routes) {
+        const nestedNames = route.state ? this.collectRouteNames(route.state) : [];
+        if (nestedNames.includes(targetScreen)) {
+          return [route.name, targetScreen];
+        }
+      }
+      return [targetScreen]; // Fallback: try direct
+    } catch {
+      return [targetScreen];
+    }
+  }
+  /**
+   * Build nested params for React Navigation nested screen navigation.
+   * ['HomeTab', 'Home'] → { screen: 'Home', params }
+   * ['Tab', 'Stack', 'Screen'] → { screen: 'Stack', params: { screen: 'Screen', params } }
+   */
+  private buildNestedParams(path: string[], leafParams?: any): any {
+    // Build from the end: innermost screen gets the leafParams
+    let result = leafParams;
+    for (let i = path.length - 1; i >= 1; i--) {
+      result = { screen: path[i], ...(result !== undefined ? { params: result } : {}) };
+    }
+    return result;
+  }
   /**
    * Recursively find the deepest active screen name.
    * For tabs: follows active tab → active screen inside that tab.

package/src/core/systemPrompt.ts CHANGED Viewed

@@ -62,6 +62,12 @@ Available tools:
 - ask_user(question): Ask the user for clarification ONLY when you cannot determine what action to take.
 </tools>
+<custom_actions>
+In addition to the built-in tools above, the app may register custom actions (e.g. checkout, addToCart). These appear as additional callable tools in your tool list.
+When a custom action exists for something the user wants to do, ALWAYS call the action instead of tapping a UI button — even if you see a matching button on screen. Custom actions may include security flows like user confirmation dialogs.
+If a UI element is hidden (aiIgnore) but a matching custom action exists, use the action.
+</custom_actions>
 <rules>
 - There are 2 types of requests — always determine which type BEFORE acting:
   1. Information requests (e.g. "what's available?", "how much is X?", "list the items"):
@@ -177,12 +183,9 @@ export function buildVoiceSystemPrompt(
 ): string {
   const isArabic = language === 'ar';
-  let prompt = `You are a voice-controlled AI agent operating a React Native mobile app. You receive periodic screen updates showing what's currently visible, and you can interact with UI elements using tools. You respond to the user via spoken audio.
+  let prompt = `You are a voice-controlled AI assistant for a React Native mobile app.
-<language_settings>
-${isArabic ? '- Working language: **Arabic**. Respond in Arabic.' : '- Working language: **English**. Respond in English.'}
-- Use the same language as the user. Return in user's language.
-</language_settings>
+You always have access to the current screen context — it shows you exactly what the user sees on their phone. Use it to answer questions and execute actions when the user speaks a command. Wait for the user to speak a clear voice command before taking any action. Screen context updates arrive automatically as the UI changes.
 <screen_state>
 Interactive elements are listed as [index]<type attrs>label />
@@ -198,56 +201,72 @@ Pure text elements without [] are NOT interactive — they are informational con
 <tools>
 Available tools:
 - tap(index): Tap an interactive element by its index. Works universally on buttons, switches, and custom components. For switches, this toggles their state.
-- type(index, text): Type text into a text-input element by its index.
-- navigate(screen, params): Navigate to a specific screen. params is optional JSON object.
-- done(text, success): Complete task. Text is your final response to the user.
-- ask_user(question): Ask the user for clarification ONLY when you cannot determine what action to take.
-When you need to perform an action, call the appropriate tool function directly.
+- type(index, text): Type text into a text-input element by its index. ONLY works on text-input elements.
+- navigate(screen, params): Navigate to a screen listed in Available Screens. ONLY use screen names from the Available Screens list — section titles, category names, or other visible text are content within a screen, not navigable screens.
+- done(text, success): Complete task and respond to the user.
+CRITICAL — tool call protocol:
+When you decide to use a tool, emit the function call IMMEDIATELY as the first thing in your response — before any speech or audio output.
+Speaking before a tool call causes a fatal connection error. Always: call the tool first, wait for the result, then speak about what happened.
+Correct: [function call] → receive result → speak to user about the outcome.
+Wrong: "Sure, let me tap on..." → [function call] → crash.
 </tools>
-<voice_interaction_rules>
-CRITICAL — THESE RULES OVERRIDE EVERYTHING ELSE:
-- You are in a LIVE VOICE conversation. Wait for the user to SPEAK before doing anything.
-- Screen updates arrive as passive context — they are NOT commands. Do NOT act on them.
-- ONLY take action (tap, type, navigate) when the user explicitly asks you to via voice.
-- When you have NO voice command from the user, stay silent. Do NOT narrate the screen.
-- When the user speaks, determine the request type BEFORE acting:
-  1. Information requests ("what's on screen?", "how much is X?"): Respond with spoken audio. Do NOT call any tools.
-  2. Action requests ("go to settings", "add pizza to cart"): Call the appropriate tool function directly (e.g. navigate, tap).
-- After completing an action, speak a brief confirmation to the user.
-- Keep all spoken responses concise — the user is listening, not reading.
-</voice_interaction_rules>
+<custom_actions>
+In addition to the built-in tools above, the app may register custom actions (e.g. checkout, addToCart). These appear as additional callable tools in your tool list.
+When a custom action exists for something the user wants to do, ALWAYS call the action instead of tapping a UI button — even if you see a matching button on screen. Custom actions may include security flows like user confirmation dialogs.
+If a UI element is hidden but a matching custom action exists, use the action.
+</custom_actions>
 <rules>
 - There are 2 types of requests — always determine which type BEFORE acting:
   1. Information requests (e.g. "what's available?", "how much is X?", "list the items"):
-     Respond verbally with the answer. Do NOT perform any tap/type/navigate actions.
+     Read the screen content and answer by speaking. Do NOT perform any tap/type/navigate actions.
   2. Action requests (e.g. "add margherita to cart", "go to checkout", "fill in my name"):
      Execute the required UI interactions using tap/type/navigate tools.
+- For action requests, determine whether the user gave specific step-by-step instructions or an open-ended task:
+  1. Specific instructions: Follow each step precisely, do not skip.
+  2. Open-ended tasks: Plan the steps yourself.
 - Only interact with elements that have an [index].
-- If the current screen doesn't have what you need, use navigate() to go to another screen.
-- When the user asks to go to a specific screen by name and it's listed in Available Screens, use navigate(screen) instead of tapping.
-- Do not repeat one action for more than 3 times unless conditions changed.
-- Do not fill in login/signup forms unless the user provides credentials. If asked to log in, use ask_user to request their email and password first.
-- Do not guess or auto-fill sensitive data (passwords, payment info, personal details). Always ask the user.
-- If stuck, tell the user what happened rather than repeating failed actions.
+- After tapping an element, the screen may change. Wait for updated screen context before the next action.
+- If the current screen doesn't have what you need, use navigate() to go to another screen from the Available Screens list.
+- If a tap navigates to another screen, the next screen context update will show the new screen's elements.
+- Do not repeat one action more than 3 times unless conditions changed.
+- After typing into a text input, check if the screen changed (e.g., suggestions or autocomplete appeared). If so, interact with the new elements.
+- After typing into a search field, you may need to tap a search button, press enter, or select from a dropdown to complete the search.
+- If the user request includes specific details (product type, price, category), use available filters or search to be more efficient.
+- For destructive/purchase actions (place order, delete, pay), tap the button exactly ONCE. Do not repeat — the user could be charged multiple times.
+- SECURITY & PRIVACY: Do not guess or auto-fill sensitive data (passwords, payment info, personal details). Ask the user verbally.
+- SECURITY & PRIVACY: Do not fill in login/signup forms unless the user provides credentials.
+- Do NOT ask for confirmation of actions the user explicitly requested. If they said "place my order", just do it.
 </rules>
 <capability>
+- You can see the current screen context — use it to answer questions directly.
 - It is ok to just provide information without performing any actions.
-- User can ask questions about what's on screen — answer them directly by speaking.
-- It is ok to fail the task. User would rather you report failure than repeat failed actions endlessly.
-- The user can be wrong. If the request is not achievable, tell the user.
+- It is ok to fail the task. The user would rather you report failure than repeat failed actions endlessly.
+- The user can be wrong. If the request is not achievable, tell them.
+- The app can have bugs. If something is not working as expected, tell the user.
+- Trying too hard can be harmful. If stuck, tell the user what you accomplished and what remains.
 </capability>
-<ux_rules>
-- Confirm what you did: When completing actions, briefly say what happened.
+<speech_rules>
+- Keep spoken output to 1-2 short sentences.
+- Speak naturally — no markdown, no headers, no bullet points.
+- Only speak confirmations and answers. Do not narrate your reasoning.
+- Confirm what you did: summarize the action result briefly (e.g., "Added to cart" or "Navigated to Settings").
 - Be transparent about errors: If an action fails, explain what failed and why.
-- Be concise: Keep spoken responses short and clear. No walls of text.
+- Track multi-item progress: For requests involving multiple items, keep track and report which ones succeeded and which did not.
+- Stay on the user's screen: For information requests, read from the current screen. Only navigate away if the needed information is on another screen.
+- When a request is ambiguous, pick the most common interpretation rather than always asking. State your assumption in your spoken response.
 - Suggest next steps: After completing an action, briefly suggest what the user might want to do next.
-- When a request is ambiguous, pick the most common interpretation and state your assumption.
-</ux_rules>`;
+- Be concise: Users are on mobile — avoid long speech.
+</speech_rules>
+<language_settings>
+${isArabic ? '- Working language: **Arabic**. Respond in Arabic.' : '- Working language: **English**. Respond in English.'}
+- Use the same language as the user.
+</language_settings>`;
   // Append user-provided instructions if any
   if (userInstructions?.trim()) {

package/src/index.ts CHANGED Viewed

@@ -12,9 +12,9 @@ export { AIAgent } from './components/AIAgent';
 export { useAction } from './hooks/useAction';
 // ─── Services ────────────────────────────────────────────────
-// export { VoiceService } from './services/VoiceService';
-// export { AudioInputService } from './services/AudioInputService';
-// export { AudioOutputService } from './services/AudioOutputService';
+export { VoiceService } from './services/VoiceService';
+export { AudioInputService } from './services/AudioInputService';
+export { AudioOutputService } from './services/AudioOutputService';
 // ─── Utilities ───────────────────────────────────────────────
 export { logger } from './utils/logger';
@@ -31,8 +31,8 @@ export type {
   TokenUsage,
 } from './core/types';
-// export type {
-//   VoiceServiceConfig,
-//   VoiceServiceCallbacks,
-//   VoiceStatus,
-// } from './services/VoiceService';
+export type {
+  VoiceServiceConfig,
+  VoiceServiceCallbacks,
+  VoiceStatus,
+} from './services/VoiceService';

package/src/services/AudioInputService.ts CHANGED Viewed

@@ -5,6 +5,9 @@
  * PCM streaming from the microphone. Each chunk is converted from Float32
  * to Int16 PCM and base64-encoded for the Gemini Live API.
  *
+ * Echo cancellation is handled at the OS/hardware level via
+ * react-native-incall-manager (VOICE_COMMUNICATION mode) — not in JS.
+ *
  * Requires: react-native-audio-api (development build only, not Expo Go)
  */
@@ -32,6 +35,14 @@ export class AudioInputService {
   private status: RecordingStatus = 'idle';
   private recorder: any = null;
+  // Auto-recovery: detect when mic session dies after audio playback.
+  // This is a react-native-audio-api bug where AudioRecorder loses mic access
+  // after AudioBufferQueueSourceNode plays audio (audio session conflict).
+  private consecutiveSilentFrames = 0;
+  private isRecovering = false;
+  private static readonly SILENT_THRESHOLD = 0.01;
+  private static readonly SILENT_FRAMES_BEFORE_RESTART = 15;
   constructor(config: AudioInputConfig) {
     this.config = config;
   }
@@ -71,6 +82,7 @@ export class AudioInputService {
       // Create AudioRecorder
       this.recorder = new audioApi.AudioRecorder();
+      this.consecutiveSilentFrames = 0;
       const sampleRate = this.config.sampleRate || 16000;
       const bufferLength = this.config.bufferLength || 4096;
@@ -84,9 +96,53 @@ export class AudioInputService {
           try {
             // event.buffer is an AudioBuffer — get Float32 channel data
             const float32Data = event.buffer.getChannelData(0);
-            // Convert Float32 → Int16 → base64 for Gemini
+            // Measure peak amplitude for diagnostics + silent detection
+            let maxAmp = 0;
+            for (let i = 0; i < float32Data.length; i++) {
+              const abs = Math.abs(float32Data[i] || 0);
+              if (abs > maxAmp) maxAmp = abs;
+            }
+            // Diagnostic: log amplitude on first 5 frames, then every 10th
+            if (frameCount <= 5 || frameCount % 10 === 0) {
+              logger.info('AudioInput', `🔬 Frame #${frameCount}: maxAmp=${maxAmp.toFixed(6)}, samples=${float32Data.length}`);
+            }
+            // ─── Auto-Recovery: Silent mic detection ─────────────
+            // After audio playback, react-native-audio-api's AudioRecorder
+            // can lose its mic session (all-zero frames). Detect this and
+            // restart the recorder to re-acquire the audio session.
+            if (maxAmp < AudioInputService.SILENT_THRESHOLD) {
+              this.consecutiveSilentFrames++;
+              if (
+                this.consecutiveSilentFrames >= AudioInputService.SILENT_FRAMES_BEFORE_RESTART &&
+                !this.isRecovering
+              ) {
+                this.isRecovering = true;
+                logger.warn('AudioInput', `⚠️ ${this.consecutiveSilentFrames} silent frames — restarting recorder...`);
+                this.restartRecorder().then(() => {
+                  this.isRecovering = false;
+                  this.consecutiveSilentFrames = 0;
+                  logger.info('AudioInput', '✅ Recorder restarted — mic session re-acquired');
+                }).catch((err: any) => {
+                  this.isRecovering = false;
+                  logger.error('AudioInput', `❌ Recorder restart failed: ${err?.message || err}`);
+                });
+                return; // Skip this frame
+              }
+            } else {
+              // Got real audio — reset counter
+              if (this.consecutiveSilentFrames > 5) {
+                logger.info('AudioInput', `🎤 Mic recovered after ${this.consecutiveSilentFrames} silent frames`);
+              }
+              this.consecutiveSilentFrames = 0;
+            }
             const base64Chunk = float32ToInt16Base64(float32Data);
-            logger.debug('AudioInput', `🎤 Frame #${frameCount}: size=${base64Chunk.length}`);
+            if (frameCount <= 5 || frameCount % 10 === 0) {
+              logger.info('AudioInput', `🎤 Frame #${frameCount}: chunk=${base64Chunk.length} chars, calling onAudioChunk...`);
+            }
             this.config.onAudioChunk(base64Chunk);
           } catch (err: any) {
             logger.error('AudioInput', `Frame processing error: ${err.message}`);
@@ -121,6 +177,7 @@ export class AudioInputService {
       }
       this.recorder = null;
       this.status = 'idle';
+      this.consecutiveSilentFrames = 0;
       logger.info('AudioInput', 'Streaming stopped');
     } catch (error: any) {
       logger.error('AudioInput', `Failed to stop: ${error.message}`);
@@ -129,6 +186,24 @@ export class AudioInputService {
     }
   }
+  // ─── Auto-Recovery ─────────────────────────────────────────
+  /**
+   * Restart the recorder to re-acquire the audio session.
+   * Fixes react-native-audio-api bug where AudioRecorder loses mic access
+   * after AudioBufferQueueSourceNode plays audio.
+   */
+  private async restartRecorder(): Promise<void> {
+    logger.info('AudioInput', '🔄 Restarting recorder for mic recovery...');
+    await this.stop();
+    // Brief pause to let the audio system release resources
+    await new Promise(resolve => setTimeout(resolve, 300));
+    const ok = await this.start();
+    if (!ok) {
+      throw new Error('Recorder restart failed');
+    }
+  }
   // ─── Status ───────────────────────────────────────────────
   get isRecording(): boolean {