@mobileai/react-native 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +80 -15
  2. package/lib/module/components/AIAgent.js +181 -38
  3. package/lib/module/components/AIAgent.js.map +1 -1
  4. package/lib/module/components/AgentChatBar.js +53 -29
  5. package/lib/module/components/AgentChatBar.js.map +1 -1
  6. package/lib/module/components/Icons.js +337 -0
  7. package/lib/module/components/Icons.js.map +1 -0
  8. package/lib/module/core/AgentRuntime.js +74 -3
  9. package/lib/module/core/AgentRuntime.js.map +1 -1
  10. package/lib/module/core/systemPrompt.js +87 -34
  11. package/lib/module/core/systemPrompt.js.map +1 -1
  12. package/lib/module/services/AudioInputService.js +73 -2
  13. package/lib/module/services/AudioInputService.js.map +1 -1
  14. package/lib/module/services/AudioOutputService.js +58 -5
  15. package/lib/module/services/AudioOutputService.js.map +1 -1
  16. package/lib/module/services/VoiceService.js +284 -239
  17. package/lib/module/services/VoiceService.js.map +1 -1
  18. package/lib/typescript/src/components/AIAgent.d.ts.map +1 -1
  19. package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -1
  20. package/lib/typescript/src/components/Icons.d.ts +43 -0
  21. package/lib/typescript/src/components/Icons.d.ts.map +1 -0
  22. package/lib/typescript/src/core/AgentRuntime.d.ts +12 -0
  23. package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -1
  24. package/lib/typescript/src/core/systemPrompt.d.ts +7 -4
  25. package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -1
  26. package/lib/typescript/src/services/AudioInputService.d.ts +13 -0
  27. package/lib/typescript/src/services/AudioInputService.d.ts.map +1 -1
  28. package/lib/typescript/src/services/AudioOutputService.d.ts.map +1 -1
  29. package/lib/typescript/src/services/VoiceService.d.ts +41 -24
  30. package/lib/typescript/src/services/VoiceService.d.ts.map +1 -1
  31. package/package.json +1 -1
  32. package/src/components/AIAgent.tsx +194 -38
  33. package/src/components/AgentChatBar.tsx +44 -25
  34. package/src/components/Icons.tsx +253 -0
  35. package/src/core/AgentRuntime.ts +70 -3
  36. package/src/core/systemPrompt.ts +87 -34
  37. package/src/services/AudioInputService.ts +77 -2
  38. package/src/services/AudioOutputService.ts +59 -5
  39. package/src/services/VoiceService.ts +280 -252
@@ -0,0 +1,253 @@
1
+ /**
2
+ * Icons — Zero-dependency, View-based icons for the AI Agent chat bar.
3
+ *
4
+ * Why not emoji? iOS Simulator 26+ has a bug where emoji renders as "?".
5
+ * Why not Unicode symbols? They look obscure and unprofessional.
6
+ * Why not icon libraries? This is a library — zero runtime dependencies.
7
+ *
8
+ * These icons are built purely from React Native View components,
9
+ * rendering identically on every platform and screen size.
10
+ */
11
+
12
+ import { View } from 'react-native';
13
+
14
+ // ─── Mic Icon (pill + stem + base) ────────────────────────────
15
+
16
+ export function MicIcon({ size = 20, color = '#fff' }: { size?: number; color?: string }) {
17
+ const pillW = size * 0.4;
18
+ const pillH = size * 0.5;
19
+ const stemW = size * 0.08;
20
+ const stemH = size * 0.18;
21
+ const baseW = size * 0.35;
22
+ const arcW = size * 0.55;
23
+ const arcH = size * 0.35;
24
+ const arcBorder = size * 0.07;
25
+
26
+ return (
27
+ <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
28
+ {/* Pill (mic head) */}
29
+ <View style={{
30
+ width: pillW,
31
+ height: pillH,
32
+ borderRadius: pillW / 2,
33
+ backgroundColor: color,
34
+ }} />
35
+ {/* Arc (U-shape around mic) */}
36
+ <View style={{
37
+ width: arcW,
38
+ height: arcH,
39
+ borderBottomLeftRadius: arcW / 2,
40
+ borderBottomRightRadius: arcW / 2,
41
+ borderWidth: arcBorder,
42
+ borderTopWidth: 0,
43
+ borderColor: color,
44
+ marginTop: -(pillH * 0.3),
45
+ }} />
46
+ {/* Stem */}
47
+ <View style={{
48
+ width: stemW,
49
+ height: stemH,
50
+ backgroundColor: color,
51
+ marginTop: -1,
52
+ }} />
53
+ {/* Base */}
54
+ <View style={{
55
+ width: baseW,
56
+ height: stemW,
57
+ backgroundColor: color,
58
+ borderRadius: stemW / 2,
59
+ }} />
60
+ </View>
61
+ );
62
+ }
63
+
64
+ // ─── Speaker Icon (cone + sound waves) ────────────────────────
65
+
66
+ export function SpeakerIcon({ size = 20, color = '#fff', muted = false }: { size?: number; color?: string; muted?: boolean }) {
67
+ const bodyW = size * 0.25;
68
+ const bodyH = size * 0.3;
69
+ const coneW = size * 0.2;
70
+
71
+ return (
72
+ <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center', flexDirection: 'row' }}>
73
+ {/* Speaker body (rectangle) */}
74
+ <View style={{
75
+ width: bodyW,
76
+ height: bodyH,
77
+ backgroundColor: color,
78
+ borderRadius: size * 0.03,
79
+ }} />
80
+ {/* Speaker cone (triangle via borders) */}
81
+ <View style={{
82
+ width: 0,
83
+ height: 0,
84
+ borderTopWidth: size * 0.25,
85
+ borderTopColor: 'transparent',
86
+ borderBottomWidth: size * 0.25,
87
+ borderBottomColor: 'transparent',
88
+ borderLeftWidth: coneW,
89
+ borderLeftColor: color,
90
+ marginLeft: -1,
91
+ }} />
92
+ {muted ? (
93
+ /* Mute slash */
94
+ <View style={{
95
+ position: 'absolute',
96
+ width: size * 0.08,
97
+ height: size * 0.8,
98
+ backgroundColor: color,
99
+ borderRadius: size * 0.04,
100
+ transform: [{ rotate: '45deg' }],
101
+ }} />
102
+ ) : (
103
+ /* Sound waves */
104
+ <View style={{ marginLeft: size * 0.05 }}>
105
+ <View style={{
106
+ width: size * 0.15,
107
+ height: size * 0.3,
108
+ borderWidth: size * 0.05,
109
+ borderColor: color,
110
+ borderLeftWidth: 0,
111
+ borderTopLeftRadius: 0,
112
+ borderBottomLeftRadius: 0,
113
+ borderTopRightRadius: size * 0.15,
114
+ borderBottomRightRadius: size * 0.15,
115
+ }} />
116
+ </View>
117
+ )}
118
+ </View>
119
+ );
120
+ }
121
+
122
+ // ─── Send Arrow (upward arrow) ────────────────────────────────
123
+
124
+ export function SendArrowIcon({ size = 18, color = '#fff' }: { size?: number; color?: string }) {
125
+ // Filled right-pointing triangle (like iOS Messages send button)
126
+ const triH = size * 0.55;
127
+ return (
128
+ <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
129
+ <View style={{
130
+ width: 0,
131
+ height: 0,
132
+ borderTopWidth: triH / 2,
133
+ borderTopColor: 'transparent',
134
+ borderBottomWidth: triH / 2,
135
+ borderBottomColor: 'transparent',
136
+ borderLeftWidth: triH * 0.85,
137
+ borderLeftColor: color,
138
+ marginLeft: size * 0.1,
139
+ }} />
140
+ </View>
141
+ );
142
+ }
143
+
144
+ // ─── Stop Icon (filled square) ────────────────────────────────
145
+
146
+ export function StopIcon({ size = 18, color = '#fff' }: { size?: number; color?: string }) {
147
+ const sq = size * 0.45;
148
+ return (
149
+ <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
150
+ <View style={{
151
+ width: sq,
152
+ height: sq,
153
+ backgroundColor: color,
154
+ borderRadius: size * 0.05,
155
+ }} />
156
+ </View>
157
+ );
158
+ }
159
+
160
+ // ─── Recording Dot (pulsing filled circle) ────────────────────
161
+
162
+ export function RecordingDot({ size = 18, color = '#FF3B30' }: { size?: number; color?: string }) {
163
+ const dotSize = size * 0.45;
164
+ return (
165
+ <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
166
+ <View style={{
167
+ width: dotSize,
168
+ height: dotSize,
169
+ borderRadius: dotSize / 2,
170
+ backgroundColor: color,
171
+ }} />
172
+ </View>
173
+ );
174
+ }
175
+
176
+ // ─── Loading Spinner (three dots) ─────────────────────────────
177
+
178
+ export function LoadingDots({ size = 18, color = '#fff' }: { size?: number; color?: string }) {
179
+ const dotSize = size * 0.15;
180
+ return (
181
+ <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center', flexDirection: 'row', gap: dotSize * 0.8 }}>
182
+ {[0.4, 0.7, 1].map((opacity, i) => (
183
+ <View key={i} style={{
184
+ width: dotSize,
185
+ height: dotSize,
186
+ borderRadius: dotSize / 2,
187
+ backgroundColor: color,
188
+ opacity,
189
+ }} />
190
+ ))}
191
+ </View>
192
+ );
193
+ }
194
+
195
+ // ─── Close / Dismiss (X mark) ─────────────────────────────────
196
+
197
+ export function CloseIcon({ size = 14, color = 'rgba(255,255,255,0.6)' }: { size?: number; color?: string }) {
198
+ const barW = size * 0.7;
199
+ const barH = size * 0.12;
200
+ return (
201
+ <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
202
+ <View style={{
203
+ position: 'absolute',
204
+ width: barW,
205
+ height: barH,
206
+ backgroundColor: color,
207
+ borderRadius: barH,
208
+ transform: [{ rotate: '45deg' }],
209
+ }} />
210
+ <View style={{
211
+ position: 'absolute',
212
+ width: barW,
213
+ height: barH,
214
+ backgroundColor: color,
215
+ borderRadius: barH,
216
+ transform: [{ rotate: '-45deg' }],
217
+ }} />
218
+ </View>
219
+ );
220
+ }
221
+
222
+ // ─── AI Badge (for FAB) ───────────────────────────────────────
223
+
224
+ export function AIBadge({ size = 28 }: { size?: number }) {
225
+ // Chat bubble — clean, universally represents AI assistant
226
+ const bubbleW = size * 0.6;
227
+ const bubbleH = size * 0.45;
228
+ const tailSize = size * 0.12;
229
+ return (
230
+ <View style={{ width: size, height: size, alignItems: 'center', justifyContent: 'center' }}>
231
+ {/* Bubble body */}
232
+ <View style={{
233
+ width: bubbleW,
234
+ height: bubbleH,
235
+ backgroundColor: '#fff',
236
+ borderRadius: size * 0.12,
237
+ marginBottom: tailSize * 0.5,
238
+ }} />
239
+ {/* Tail (small triangle at bottom-left) */}
240
+ <View style={{
241
+ position: 'absolute',
242
+ bottom: size * 0.18,
243
+ left: size * 0.22,
244
+ width: 0,
245
+ height: 0,
246
+ borderTopWidth: tailSize,
247
+ borderTopColor: '#fff',
248
+ borderRightWidth: tailSize,
249
+ borderRightColor: 'transparent',
250
+ }} />
251
+ </View>
252
+ );
253
+ }
@@ -146,6 +146,9 @@ export class AgentRuntime {
146
146
  }
147
147
  try {
148
148
  element.props.onChangeText(args.text);
149
+ // Wait for React to process the state update and re-render
150
+ // (same pattern as navigate tool's 500ms post-action delay)
151
+ await new Promise(resolve => setTimeout(resolve, 500));
149
152
  return `✅ Typed "${args.text}" into [${args.index}] "${element.label}"`;
150
153
  } catch (error: any) {
151
154
  return `❌ Error typing: ${error.message}`;
@@ -174,7 +177,7 @@ export class AgentRuntime {
174
177
  }
175
178
  }
176
179
 
177
- // React Navigation path: use navRef.navigate()
180
+ // React Navigation path: use navRef
178
181
  if (!this.navRef) {
179
182
  return '❌ Navigation ref not available.';
180
183
  }
@@ -188,10 +191,31 @@ export class AgentRuntime {
188
191
  const params = args.params ? (typeof args.params === 'string' ? JSON.parse(args.params) : args.params) : undefined;
189
192
  // Case-insensitive screen name matching
190
193
  const availableRoutes = this.getRouteNames();
194
+ logger.info('AgentRuntime', `🧭 Navigate requested: "${args.screen}" | Available: [${availableRoutes.join(', ')}] | Params: ${JSON.stringify(params)}`);
191
195
  const matchedScreen = availableRoutes.find(
192
196
  r => r.toLowerCase() === args.screen.toLowerCase()
193
- ) || args.screen;
194
- this.navRef.navigate(matchedScreen, params);
197
+ );
198
+
199
+ // Guard: screen must exist in the navigation tree
200
+ if (!matchedScreen) {
201
+ const errMsg = `❌ "${args.screen}" is not a screen — it may be content within a screen. Available screens: ${availableRoutes.join(', ')}. Look at the current screen context for "${args.screen}" as a section, category, or element, and scroll/tap to find it. If it's on a different screen, navigate to the correct screen first.`;
202
+ logger.warn('AgentRuntime', `🧭 Navigate REJECTED: ${errMsg}`);
203
+ return errMsg;
204
+ }
205
+ logger.info('AgentRuntime', `🧭 Navigate matched: "${args.screen}" → "${matchedScreen}"`);
206
+
207
+ // Find the path to the screen (handles nested navigators)
208
+ const screenPath = this.findScreenPath(matchedScreen);
209
+ if (screenPath.length > 1) {
210
+ // Nested screen: navigate using parent → { screen: child } pattern
211
+ // e.g. navigate('HomeTab', { screen: 'Home', params })
212
+ logger.info('AgentRuntime', `Nested navigation: ${screenPath.join(' → ')}`);
213
+ const nestedParams = this.buildNestedParams(screenPath, params);
214
+ this.navRef.navigate(screenPath[0], nestedParams);
215
+ } else {
216
+ // Top-level screen: direct navigate
217
+ this.navRef.navigate(matchedScreen, params);
218
+ }
195
219
  await new Promise(resolve => setTimeout(resolve, 500));
196
220
  return `✅ Navigated to "${matchedScreen}"${params ? ` with params: ${JSON.stringify(params)}` : ''}`;
197
221
  } catch (error: any) {
@@ -289,6 +313,49 @@ export class AgentRuntime {
289
313
  return [...new Set(names)];
290
314
  }
291
315
 
316
+ /**
317
+ * Find the path from root navigator to a target screen.
318
+ * Returns [parentTab, screen] for nested screens, or [screen] for top-level.
319
+ * Example: findScreenPath('Home') → ['HomeTab', 'Home']
320
+ */
321
+ private findScreenPath(targetScreen: string): string[] {
322
+ try {
323
+ const state = this.navRef?.getRootState?.() || this.navRef?.getState?.();
324
+ if (!state?.routes) return [targetScreen];
325
+
326
+ // Check if target is a direct top-level route
327
+ if (state.routes.some((r: any) => r.name === targetScreen)) {
328
+ return [targetScreen];
329
+ }
330
+
331
+ // Search nested navigators
332
+ for (const route of state.routes) {
333
+ const nestedNames = route.state ? this.collectRouteNames(route.state) : [];
334
+ if (nestedNames.includes(targetScreen)) {
335
+ return [route.name, targetScreen];
336
+ }
337
+ }
338
+
339
+ return [targetScreen]; // Fallback: try direct
340
+ } catch {
341
+ return [targetScreen];
342
+ }
343
+ }
344
+
345
+ /**
346
+ * Build nested params for React Navigation nested screen navigation.
347
+ * ['HomeTab', 'Home'] → { screen: 'Home', params }
348
+ * ['Tab', 'Stack', 'Screen'] → { screen: 'Stack', params: { screen: 'Screen', params } }
349
+ */
350
+ private buildNestedParams(path: string[], leafParams?: any): any {
351
+ // Build from the end: innermost screen gets the leafParams
352
+ let result = leafParams;
353
+ for (let i = path.length - 1; i >= 1; i--) {
354
+ result = { screen: path[i], ...(result !== undefined ? { params: result } : {}) };
355
+ }
356
+ return result;
357
+ }
358
+
292
359
  /**
293
360
  * Recursively find the deepest active screen name.
294
361
  * For tabs: follows active tab → active screen inside that tab.
@@ -62,6 +62,12 @@ Available tools:
62
62
  - ask_user(question): Ask the user for clarification ONLY when you cannot determine what action to take.
63
63
  </tools>
64
64
 
65
+ <custom_actions>
66
+ In addition to the built-in tools above, the app may register custom actions (e.g. checkout, addToCart). These appear as additional callable tools in your tool list.
67
+ When a custom action exists for something the user wants to do, ALWAYS call the action instead of tapping a UI button — even if you see a matching button on screen. Custom actions may include security flows like user confirmation dialogs.
68
+ If a UI element is hidden (aiIgnore) but a matching custom action exists, use the action.
69
+ </custom_actions>
70
+
65
71
  <rules>
66
72
  - There are 2 types of requests — always determine which type BEFORE acting:
67
73
  1. Information requests (e.g. "what's available?", "how much is X?", "list the items"):
@@ -162,11 +168,14 @@ plan: "Call done to report the cart contents to the user."
162
168
  }
163
169
 
164
170
  /**
165
- * Voice-optimized system prompt for the Gemini Live API.
171
+ * Voice-adapted system prompt for the Gemini Live API.
166
172
  *
167
- * Includes the same screen format and tool semantics as text mode,
168
- * but condensed for voice context and with guardrails against
169
- * unprompted actions.
173
+ * Uses the same core rules/tools/screen format as text mode (buildSystemPrompt)
174
+ * but adapted for voice interaction:
175
+ * - No agent-loop directives (no "MUST call agent_step on every step")
176
+ * - No agent_history/user_request references (voice is conversational)
177
+ * - Explicit "wait for user voice command" guardrails
178
+ * - Voice-specific UX (concise spoken responses)
170
179
  */
171
180
  export function buildVoiceSystemPrompt(
172
181
  language: string,
@@ -174,46 +183,90 @@ export function buildVoiceSystemPrompt(
174
183
  ): string {
175
184
  const isArabic = language === 'ar';
176
185
 
177
- let prompt = `You are a voice-controlled AI agent operating a React Native mobile app. You can see the screen content and interact with UI elements using tools.
178
-
179
- <language>
180
- ${isArabic ? 'Respond in Arabic.' : 'Respond in English.'}
181
- Use the same language as the user.
182
- </language>
186
+ let prompt = `You are a voice-controlled AI assistant for a React Native mobile app.
183
187
 
184
- <screen_format>
185
- You receive periodic screen updates showing the current UI. Interactive elements appear as:
186
- [index]<type attrs>label />
188
+ You always have access to the current screen context — it shows you exactly what the user sees on their phone. Use it to answer questions and execute actions when the user speaks a command. Wait for the user to speak a clear voice command before taking any action. Screen context updates arrive automatically as the UI changes.
187
189
 
188
- - index: numeric ID for interaction (use with tap/type tools)
190
+ <screen_state>
191
+ Interactive elements are listed as [index]<type attrs>label />
192
+ - index: numeric identifier for interaction
189
193
  - type: element type (pressable, text-input, switch)
190
- - attrs: state like value="true", checked="false", role="switch"
191
- - label: visible text content
194
+ - attrs: state attributes like value="true", checked="false", role="switch"
195
+ - label: visible text content of the element
192
196
 
193
- Only elements with [index] are interactive. Text without [] is display-only.
194
- Example: [5]<switch value="true">Order Updates /> means element 5 is a switch currently ON.
195
- </screen_format>
197
+ Only elements with [index] are interactive. Use the index to tap or type into them.
198
+ Pure text elements without [] are NOT interactive they are informational content you can read.
199
+ </screen_state>
196
200
 
197
201
  <tools>
198
202
  Available tools:
199
- - tap(index): Tap an element. For switches, this toggles their value.
200
- - type(index, text): Type text into a text-input.
201
- - navigate(screen): Navigate to a named screen.
202
- - done(text, success): Complete the task with a spoken response.
203
- - ask_user(question): Ask the user for clarification.
203
+ - tap(index): Tap an interactive element by its index. Works universally on buttons, switches, and custom components. For switches, this toggles their state.
204
+ - type(index, text): Type text into a text-input element by its index. ONLY works on text-input elements.
205
+ - navigate(screen, params): Navigate to a screen listed in Available Screens. ONLY use screen names from the Available Screens list — section titles, category names, or other visible text are content within a screen, not navigable screens.
206
+ - done(text, success): Complete task and respond to the user.
207
+
208
+ CRITICAL — tool call protocol:
209
+ When you decide to use a tool, emit the function call IMMEDIATELY as the first thing in your response — before any speech or audio output.
210
+ Speaking before a tool call causes a fatal connection error. Always: call the tool first, wait for the result, then speak about what happened.
211
+ Correct: [function call] → receive result → speak to user about the outcome.
212
+ Wrong: "Sure, let me tap on..." → [function call] → crash.
204
213
  </tools>
205
214
 
215
+ <custom_actions>
216
+ In addition to the built-in tools above, the app may register custom actions (e.g. checkout, addToCart). These appear as additional callable tools in your tool list.
217
+ When a custom action exists for something the user wants to do, ALWAYS call the action instead of tapping a UI button — even if you see a matching button on screen. Custom actions may include security flows like user confirmation dialogs.
218
+ If a UI element is hidden but a matching custom action exists, use the action.
219
+ </custom_actions>
220
+
206
221
  <rules>
207
- CRITICAL ACTION RULES:
208
- - ONLY perform actions (tap, type, navigate) when the user explicitly asks you to do something.
209
- - NEVER tap or navigate on your own initiative wait for the user's voice command.
210
- - When the user asks a question about what's on screen, answer verbally via done(). Do NOT tap anything.
211
- - When the user asks to toggle/enable/disable something, find the matching element by its label and use tap(index).
212
- - When a screen update arrives, do NOT interact with elements unless the user asked you to.
213
- - Use element indexes from the most recent screen update — they refresh every few seconds.
214
- - For switches: tap(index) toggles the value. You do NOT need to find a separate button.
215
- - Keep spoken responses concise the user is listening, not reading.
216
- </rules>`;
222
+ - There are 2 types of requests — always determine which type BEFORE acting:
223
+ 1. Information requests (e.g. "what's available?", "how much is X?", "list the items"):
224
+ Read the screen content and answer by speaking. Do NOT perform any tap/type/navigate actions.
225
+ 2. Action requests (e.g. "add margherita to cart", "go to checkout", "fill in my name"):
226
+ Execute the required UI interactions using tap/type/navigate tools.
227
+ - For action requests, determine whether the user gave specific step-by-step instructions or an open-ended task:
228
+ 1. Specific instructions: Follow each step precisely, do not skip.
229
+ 2. Open-ended tasks: Plan the steps yourself.
230
+ - Only interact with elements that have an [index].
231
+ - After tapping an element, the screen may change. Wait for updated screen context before the next action.
232
+ - If the current screen doesn't have what you need, use navigate() to go to another screen from the Available Screens list.
233
+ - If a tap navigates to another screen, the next screen context update will show the new screen's elements.
234
+ - Do not repeat one action more than 3 times unless conditions changed.
235
+ - After typing into a text input, check if the screen changed (e.g., suggestions or autocomplete appeared). If so, interact with the new elements.
236
+ - After typing into a search field, you may need to tap a search button, press enter, or select from a dropdown to complete the search.
237
+ - If the user request includes specific details (product type, price, category), use available filters or search to be more efficient.
238
+ - For destructive/purchase actions (place order, delete, pay), tap the button exactly ONCE. Do not repeat — the user could be charged multiple times.
239
+ - SECURITY & PRIVACY: Do not guess or auto-fill sensitive data (passwords, payment info, personal details). Ask the user verbally.
240
+ - SECURITY & PRIVACY: Do not fill in login/signup forms unless the user provides credentials.
241
+ - Do NOT ask for confirmation of actions the user explicitly requested. If they said "place my order", just do it.
242
+ </rules>
243
+
244
+ <capability>
245
+ - You can see the current screen context — use it to answer questions directly.
246
+ - It is ok to just provide information without performing any actions.
247
+ - It is ok to fail the task. The user would rather you report failure than repeat failed actions endlessly.
248
+ - The user can be wrong. If the request is not achievable, tell them.
249
+ - The app can have bugs. If something is not working as expected, tell the user.
250
+ - Trying too hard can be harmful. If stuck, tell the user what you accomplished and what remains.
251
+ </capability>
252
+
253
+ <speech_rules>
254
+ - Keep spoken output to 1-2 short sentences.
255
+ - Speak naturally — no markdown, no headers, no bullet points.
256
+ - Only speak confirmations and answers. Do not narrate your reasoning.
257
+ - Confirm what you did: summarize the action result briefly (e.g., "Added to cart" or "Navigated to Settings").
258
+ - Be transparent about errors: If an action fails, explain what failed and why.
259
+ - Track multi-item progress: For requests involving multiple items, keep track and report which ones succeeded and which did not.
260
+ - Stay on the user's screen: For information requests, read from the current screen. Only navigate away if the needed information is on another screen.
261
+ - When a request is ambiguous, pick the most common interpretation rather than always asking. State your assumption in your spoken response.
262
+ - Suggest next steps: After completing an action, briefly suggest what the user might want to do next.
263
+ - Be concise: Users are on mobile — avoid long speech.
264
+ </speech_rules>
265
+
266
+ <language_settings>
267
+ ${isArabic ? '- Working language: **Arabic**. Respond in Arabic.' : '- Working language: **English**. Respond in English.'}
268
+ - Use the same language as the user.
269
+ </language_settings>`;
217
270
 
218
271
  // Append user-provided instructions if any
219
272
  if (userInstructions?.trim()) {
@@ -5,6 +5,9 @@
5
5
  * PCM streaming from the microphone. Each chunk is converted from Float32
6
6
  * to Int16 PCM and base64-encoded for the Gemini Live API.
7
7
  *
8
+ * Echo cancellation is handled at the OS/hardware level via
9
+ * react-native-incall-manager (VOICE_COMMUNICATION mode) — not in JS.
10
+ *
8
11
  * Requires: react-native-audio-api (development build only, not Expo Go)
9
12
  */
10
13
 
@@ -32,6 +35,14 @@ export class AudioInputService {
32
35
  private status: RecordingStatus = 'idle';
33
36
  private recorder: any = null;
34
37
 
38
+ // Auto-recovery: detect when mic session dies after audio playback.
39
+ // This is a react-native-audio-api bug where AudioRecorder loses mic access
40
+ // after AudioBufferQueueSourceNode plays audio (audio session conflict).
41
+ private consecutiveSilentFrames = 0;
42
+ private isRecovering = false;
43
+ private static readonly SILENT_THRESHOLD = 0.01;
44
+ private static readonly SILENT_FRAMES_BEFORE_RESTART = 15;
45
+
35
46
  constructor(config: AudioInputConfig) {
36
47
  this.config = config;
37
48
  }
@@ -71,6 +82,7 @@ export class AudioInputService {
71
82
 
72
83
  // Create AudioRecorder
73
84
  this.recorder = new audioApi.AudioRecorder();
85
+ this.consecutiveSilentFrames = 0;
74
86
 
75
87
  const sampleRate = this.config.sampleRate || 16000;
76
88
  const bufferLength = this.config.bufferLength || 4096;
@@ -84,9 +96,53 @@ export class AudioInputService {
84
96
  try {
85
97
  // event.buffer is an AudioBuffer — get Float32 channel data
86
98
  const float32Data = event.buffer.getChannelData(0);
87
- // Convert Float32 → Int16 → base64 for Gemini
99
+
100
+ // Measure peak amplitude for diagnostics + silent detection
101
+ let maxAmp = 0;
102
+ for (let i = 0; i < float32Data.length; i++) {
103
+ const abs = Math.abs(float32Data[i] || 0);
104
+ if (abs > maxAmp) maxAmp = abs;
105
+ }
106
+
107
+ // Diagnostic: log amplitude on first 5 frames, then every 10th
108
+ if (frameCount <= 5 || frameCount % 10 === 0) {
109
+ logger.info('AudioInput', `🔬 Frame #${frameCount}: maxAmp=${maxAmp.toFixed(6)}, samples=${float32Data.length}`);
110
+ }
111
+
112
+ // ─── Auto-Recovery: Silent mic detection ─────────────
113
+ // After audio playback, react-native-audio-api's AudioRecorder
114
+ // can lose its mic session (all-zero frames). Detect this and
115
+ // restart the recorder to re-acquire the audio session.
116
+ if (maxAmp < AudioInputService.SILENT_THRESHOLD) {
117
+ this.consecutiveSilentFrames++;
118
+ if (
119
+ this.consecutiveSilentFrames >= AudioInputService.SILENT_FRAMES_BEFORE_RESTART &&
120
+ !this.isRecovering
121
+ ) {
122
+ this.isRecovering = true;
123
+ logger.warn('AudioInput', `⚠️ ${this.consecutiveSilentFrames} silent frames — restarting recorder...`);
124
+ this.restartRecorder().then(() => {
125
+ this.isRecovering = false;
126
+ this.consecutiveSilentFrames = 0;
127
+ logger.info('AudioInput', '✅ Recorder restarted — mic session re-acquired');
128
+ }).catch((err: any) => {
129
+ this.isRecovering = false;
130
+ logger.error('AudioInput', `❌ Recorder restart failed: ${err?.message || err}`);
131
+ });
132
+ return; // Skip this frame
133
+ }
134
+ } else {
135
+ // Got real audio — reset counter
136
+ if (this.consecutiveSilentFrames > 5) {
137
+ logger.info('AudioInput', `🎤 Mic recovered after ${this.consecutiveSilentFrames} silent frames`);
138
+ }
139
+ this.consecutiveSilentFrames = 0;
140
+ }
141
+
88
142
  const base64Chunk = float32ToInt16Base64(float32Data);
89
- logger.debug('AudioInput', `🎤 Frame #${frameCount}: size=${base64Chunk.length}`);
143
+ if (frameCount <= 5 || frameCount % 10 === 0) {
144
+ logger.info('AudioInput', `🎤 Frame #${frameCount}: chunk=${base64Chunk.length} chars, calling onAudioChunk...`);
145
+ }
90
146
  this.config.onAudioChunk(base64Chunk);
91
147
  } catch (err: any) {
92
148
  logger.error('AudioInput', `Frame processing error: ${err.message}`);
@@ -121,6 +177,7 @@ export class AudioInputService {
121
177
  }
122
178
  this.recorder = null;
123
179
  this.status = 'idle';
180
+ this.consecutiveSilentFrames = 0;
124
181
  logger.info('AudioInput', 'Streaming stopped');
125
182
  } catch (error: any) {
126
183
  logger.error('AudioInput', `Failed to stop: ${error.message}`);
@@ -129,6 +186,24 @@ export class AudioInputService {
129
186
  }
130
187
  }
131
188
 
189
+ // ─── Auto-Recovery ─────────────────────────────────────────
190
+
191
+ /**
192
+ * Restart the recorder to re-acquire the audio session.
193
+ * Fixes react-native-audio-api bug where AudioRecorder loses mic access
194
+ * after AudioBufferQueueSourceNode plays audio.
195
+ */
196
+ private async restartRecorder(): Promise<void> {
197
+ logger.info('AudioInput', '🔄 Restarting recorder for mic recovery...');
198
+ await this.stop();
199
+ // Brief pause to let the audio system release resources
200
+ await new Promise(resolve => setTimeout(resolve, 300));
201
+ const ok = await this.start();
202
+ if (!ok) {
203
+ throw new Error('Recorder restart failed');
204
+ }
205
+ }
206
+
132
207
  // ─── Status ───────────────────────────────────────────────
133
208
 
134
209
  get isRecording(): boolean {