@mobileai/react-native 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +80 -15
  2. package/lib/module/components/AIAgent.js +181 -38
  3. package/lib/module/components/AIAgent.js.map +1 -1
  4. package/lib/module/components/AgentChatBar.js +53 -29
  5. package/lib/module/components/AgentChatBar.js.map +1 -1
  6. package/lib/module/components/Icons.js +337 -0
  7. package/lib/module/components/Icons.js.map +1 -0
  8. package/lib/module/core/AgentRuntime.js +74 -3
  9. package/lib/module/core/AgentRuntime.js.map +1 -1
  10. package/lib/module/core/systemPrompt.js +87 -34
  11. package/lib/module/core/systemPrompt.js.map +1 -1
  12. package/lib/module/services/AudioInputService.js +73 -2
  13. package/lib/module/services/AudioInputService.js.map +1 -1
  14. package/lib/module/services/AudioOutputService.js +58 -5
  15. package/lib/module/services/AudioOutputService.js.map +1 -1
  16. package/lib/module/services/VoiceService.js +284 -239
  17. package/lib/module/services/VoiceService.js.map +1 -1
  18. package/lib/typescript/src/components/AIAgent.d.ts.map +1 -1
  19. package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -1
  20. package/lib/typescript/src/components/Icons.d.ts +43 -0
  21. package/lib/typescript/src/components/Icons.d.ts.map +1 -0
  22. package/lib/typescript/src/core/AgentRuntime.d.ts +12 -0
  23. package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -1
  24. package/lib/typescript/src/core/systemPrompt.d.ts +7 -4
  25. package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -1
  26. package/lib/typescript/src/services/AudioInputService.d.ts +13 -0
  27. package/lib/typescript/src/services/AudioInputService.d.ts.map +1 -1
  28. package/lib/typescript/src/services/AudioOutputService.d.ts.map +1 -1
  29. package/lib/typescript/src/services/VoiceService.d.ts +41 -24
  30. package/lib/typescript/src/services/VoiceService.d.ts.map +1 -1
  31. package/package.json +1 -1
  32. package/src/components/AIAgent.tsx +194 -38
  33. package/src/components/AgentChatBar.tsx +44 -25
  34. package/src/components/Icons.tsx +253 -0
  35. package/src/core/AgentRuntime.ts +70 -3
  36. package/src/core/systemPrompt.ts +87 -34
  37. package/src/services/AudioInputService.ts +77 -2
  38. package/src/services/AudioOutputService.ts +59 -5
  39. package/src/services/VoiceService.ts +280 -252
@@ -148,6 +148,10 @@ export function AIAgent({
148
148
  const voiceServiceRef = useRef<VoiceService | null>(null);
149
149
  const audioInputRef = useRef<AudioInputService | null>(null);
150
150
  const audioOutputRef = useRef<AudioOutputService | null>(null);
151
+ const toolLockRef = useRef<boolean>(false);
152
+ const userHasSpokenRef = useRef<boolean>(false);
153
+ const lastScreenContextRef = useRef<string>('');
154
+ const screenPollIntervalRef = useRef<ReturnType<typeof setInterval> | null>(null);
151
155
 
152
156
  // Compute available modes from props
153
157
  const availableModes: AgentMode[] = useMemo(() => {
@@ -173,8 +177,7 @@ export function AIAgent({
173
177
  onAfterStep,
174
178
  onBeforeTask,
175
179
  onAfterTask,
176
- transformScreenContent,
177
- customTools,
180
+ customTools: mode === 'voice' ? { ...customTools, ask_user: null } : customTools,
178
181
  instructions,
179
182
  stepDelay,
180
183
  mcpServerUrl,
@@ -183,7 +186,7 @@ export function AIAgent({
183
186
  onStatusUpdate: setStatusText,
184
187
  onTokenUsage,
185
188
  // Page-agent pattern: block the agent loop until user responds
186
- onAskUser: (question: string) => {
189
+ onAskUser: mode === 'voice' ? undefined : ((question: string) => {
187
190
  return new Promise<string>((resolve) => {
188
191
  askUserResolverRef.current = resolve;
189
192
  // Show question in chat bar, allow user input
@@ -191,9 +194,9 @@ export function AIAgent({
191
194
  setIsThinking(false);
192
195
  setStatusText('');
193
196
  });
194
- },
197
+ }),
195
198
  }), [
196
- apiKey, model, language, maxSteps,
199
+ mode, apiKey, model, language, maxSteps,
197
200
  interactiveBlacklist, interactiveWhitelist,
198
201
  onBeforeStep, onAfterStep, onBeforeTask, onAfterTask,
199
202
  transformScreenContent, customTools, instructions, stepDelay,
@@ -237,21 +240,25 @@ export function AIAgent({
237
240
 
238
241
  logger.info('AIAgent', `Mode changed to "${mode}" — initializing voice services...`);
239
242
 
243
+ // Track async audio output init — mic MUST wait for this
244
+ let audioOutputInitPromise: Promise<void> = Promise.resolve();
245
+
240
246
  // Create VoiceService with runtime's built-in tools (navigate, tap, type, done, etc.)
241
247
  if (!voiceServiceRef.current) {
242
248
  logger.info('AIAgent', 'Creating VoiceService...');
243
249
  const runtimeTools = runtime.getTools();
244
250
  logger.info('AIAgent', `Registering ${runtimeTools.length} tools with VoiceService: ${runtimeTools.map(t => t.name).join(', ')}`);
245
- // Build the full voice system prompt (screen format + tool descriptions + guardrails)
246
- // This gives voice mode the same screen understanding as text mode
251
+ // Use voice-adapted system prompt same core rules as text mode
252
+ // but without agent-loop directives that trigger autonomous actions
247
253
  const voicePrompt = buildVoiceSystemPrompt(language, instructions?.system);
254
+ logger.info('AIAgent', `📝 Voice system prompt (${voicePrompt.length} chars):\n${voicePrompt}`);
248
255
  voiceServiceRef.current = new VoiceService({
249
256
  apiKey,
250
257
  systemPrompt: voicePrompt,
251
258
  tools: runtimeTools,
252
259
  language,
253
260
  });
254
- logger.info('AIAgent', 'VoiceService created with full voice system prompt and tools');
261
+ logger.info('AIAgent', `VoiceService created with ${runtimeTools.length} tools: ${runtimeTools.map(t => t.name).join(', ')}`);
255
262
  }
256
263
 
257
264
  // Create AudioOutputService if not exists
@@ -260,7 +267,11 @@ export function AIAgent({
260
267
  audioOutputRef.current = new AudioOutputService({
261
268
  onError: (err) => logger.error('AIAgent', `AudioOutput error: ${err}`),
262
269
  });
263
- audioOutputRef.current.initialize().then((ok) => {
270
+ // IMPORTANT: Must await initialize() BEFORE starting mic.
271
+ // initialize() calls setAudioSessionOptions which reconfigures the
272
+ // audio hardware. If the mic starts before this finishes, the native
273
+ // audio session change kills the recorder's device handle.
274
+ audioOutputInitPromise = audioOutputRef.current.initialize().then((ok) => {
264
275
  logger.info('AIAgent', `AudioOutputService initialized: ${ok}`);
265
276
  });
266
277
  }
@@ -271,7 +282,7 @@ export function AIAgent({
271
282
  audioInputRef.current = new AudioInputService({
272
283
  // Default 16kHz — Gemini Live API input standard
273
284
  onAudioChunk: (chunk) => {
274
- logger.debug('AIAgent', `Mic chunk: ${chunk.length} chars`);
285
+ logger.info('AIAgent', `🎤 onAudioChunk: ${chunk.length} chars, voiceService=${!!voiceServiceRef.current}, connected=${voiceServiceRef.current?.isConnected}`);
275
286
  voiceServiceRef.current?.sendAudio(chunk);
276
287
  },
277
288
  onError: (err) => logger.error('AIAgent', `AudioInput error: ${err}`),
@@ -279,63 +290,208 @@ export function AIAgent({
279
290
  });
280
291
  }
281
292
 
282
- // Connect VoiceService
293
+ // Connect VoiceService (async — SDK's ai.live.connect returns a Promise)
283
294
  logger.info('AIAgent', 'Connecting VoiceService...');
284
- voiceServiceRef.current.connect({
295
+ void voiceServiceRef.current.connect({
285
296
  onAudioResponse: (audio) => {
286
- logger.info('AIAgent', `Received audio response (${audio.length} chars)`);
297
+ logger.info('AIAgent', `🔊 Audio response: ${audio.length} chars, audioOutputRef=${!!audioOutputRef.current}`);
287
298
  setIsAISpeaking(true);
288
- audioOutputRef.current?.enqueue(audio);
299
+ if (!audioOutputRef.current) {
300
+ logger.error('AIAgent', '❌ audioOutputRef.current is NULL — cannot play audio!');
301
+ return;
302
+ }
303
+ audioOutputRef.current.enqueue(audio);
289
304
  },
290
305
  onStatusChange: (status) => {
291
306
  logger.info('AIAgent', `Voice status: ${status}`);
292
307
  const connected = status === 'connected';
293
308
  setIsVoiceConnected(connected);
294
309
  if (connected) {
295
- logger.info('AIAgent', '✅ VoiceService connected — auto-starting mic...');
296
- // Auto-start mic streaming once WebSocket is ready
297
- audioInputRef.current?.start().then((ok) => {
298
- if (ok) {
299
- setIsMicActive(true);
300
- logger.info('AIAgent', '🎙️ Mic auto-started after connection');
301
- }
310
+ logger.info('AIAgent', '✅ VoiceService connected — waiting for audio session init before starting mic...');
311
+ // Wait for audio session config to finish BEFORE starting mic.
312
+ // If mic starts while setAudioSessionOptions is in flight,
313
+ // the native audio device gets killed (AudioDeviceStop error).
314
+ audioOutputInitPromise.then(() => {
315
+ logger.info('AIAgent', ' Audio session ready — starting mic now...');
316
+ audioInputRef.current?.start().then((ok) => {
317
+ if (ok) {
318
+ setIsMicActive(true);
319
+ logger.info('AIAgent', '🎙️ Mic auto-started after connection');
320
+ }
321
+ });
302
322
  });
303
- // Send initial screen context (tree) so the model knows what's on screen
304
- const initialContext = runtime.getScreenContext();
305
- voiceServiceRef.current?.sendScreenContext(initialContext);
306
- logger.info('AIAgent', '📡 Initial screen context sent to voice model');
323
+ }
324
+
325
+ // Handle unexpected disconnection — auto-reconnect ONLY if not intentional
326
+ if (
327
+ status === 'disconnected' &&
328
+ mode === 'voice' &&
329
+ voiceServiceRef.current &&
330
+ !voiceServiceRef.current.intentionalDisconnect
331
+ ) {
332
+ // Stop mic & audio immediately so user isn't stuck
333
+ audioInputRef.current?.stop();
334
+ audioOutputRef.current?.stop();
335
+ setIsMicActive(false);
336
+ setIsAISpeaking(false);
337
+
338
+ logger.warn('AIAgent', '⚠️ Voice connection dropped unexpectedly — attempting reconnect in 2s...');
339
+
340
+ // Auto-reconnect after short delay
341
+ setTimeout(() => {
342
+ if (
343
+ mode === 'voice' &&
344
+ voiceServiceRef.current &&
345
+ !voiceServiceRef.current.intentionalDisconnect &&
346
+ voiceServiceRef.current.lastCallbacks
347
+ ) {
348
+ logger.info('AIAgent', '🔄 Reconnecting VoiceService...');
349
+ void voiceServiceRef.current.connect(voiceServiceRef.current.lastCallbacks);
350
+ }
351
+ }, 2000);
307
352
  }
308
353
  },
309
354
  onTranscript: (text, isFinal, role) => {
310
355
  logger.info('AIAgent', `Transcript [${role}] (final=${isFinal}): "${text}"`);
356
+ if (role === 'user') {
357
+ userHasSpokenRef.current = true;
358
+ }
359
+ },
360
+ onSetupComplete: () => {
361
+ logger.info('AIAgent', '✅ SDK setup complete — sending initial screen context now');
362
+ userHasSpokenRef.current = false;
363
+ const initialContext = runtime.getScreenContext();
364
+ lastScreenContextRef.current = initialContext; // Seed for screen change detection
365
+ const passiveContext = `[SYSTEM CONTEXT — THIS IS NOT A USER REQUEST. DO NOT RESPOND. DO NOT NAVIGATE. DO NOT CALL ANY TOOLS. DO NOT SPEAK. Just silently read and memorize this screen layout. You will use it when the user speaks their first voice command.]\n\n${initialContext}`;
366
+ logger.info('AIAgent', `📡 Initial screen context (${passiveContext.length} chars):\n${passiveContext}`);
367
+ voiceServiceRef.current?.sendScreenContext(passiveContext);
368
+ logger.info('AIAgent', '📡 Initial screen context sent (turnComplete=true)');
311
369
  },
312
370
  onToolCall: async (toolCall) => {
313
- logger.info('AIAgent', `Voice tool call: ${toolCall.name}(${JSON.stringify(toolCall.args)})`);
314
- // Execute the tool via AgentRuntime and send result back to Gemini
315
- const result = await runtime.executeTool(toolCall.name, toolCall.args);
316
- logger.info('AIAgent', `Voice tool result: ${result}`);
317
- voiceServiceRef.current?.sendFunctionResponse(toolCall.name, toolCall.id, { result });
318
-
319
- // After tool execution, push updated screen context
320
- // (the screen may have changed from tap/type/navigate)
321
- const updatedContext = runtime.getScreenContext();
322
- voiceServiceRef.current?.sendScreenContext(updatedContext);
323
- logger.info('AIAgent', '📡 Updated screen context sent after tool call');
371
+ logger.info('AIAgent', `🔧 Voice tool call: ${toolCall.name}(${JSON.stringify(toolCall.args)}) [id=${toolCall.id}]`);
372
+
373
+ // Code-level gate: reject tool calls before the user has spoken.
374
+ // The model sometimes auto-navigates on receiving screen context.
375
+ if (!userHasSpokenRef.current) {
376
+ logger.warn('AIAgent', `🚫 Rejected tool call ${toolCall.name} — user hasn't spoken yet`);
377
+ voiceServiceRef.current?.sendFunctionResponse(toolCall.name, toolCall.id, {
378
+ result: 'Action rejected: wait for the user to speak before performing any actions.',
379
+ });
380
+ return;
381
+ }
382
+
383
+ // CRITICAL: Gate audio input during tool execution.
384
+ // The Gemini Live API crashes (code 1008) if sendRealtimeInput
385
+ // (audio) is called while a tool call is pending. Stop the mic
386
+ // before executing the tool and resume after the response is sent.
387
+ audioInputRef.current?.stop();
388
+ logger.info('AIAgent', `🔇 Mic paused for tool execution: ${toolCall.name}`);
389
+
390
+ // One-tool-at-a-time enforcement (mirrors text mode's line 752).
391
+ if (toolLockRef.current) {
392
+ logger.warn('AIAgent', `⏳ Tool locked — waiting for previous tool to finish before executing ${toolCall.name}`);
393
+ while (toolLockRef.current) {
394
+ await new Promise(resolve => setTimeout(resolve, 50));
395
+ }
396
+ }
397
+ toolLockRef.current = true;
398
+
399
+ try {
400
+ // Execute the tool via AgentRuntime and send result back to Gemini
401
+ const result = await runtime.executeTool(toolCall.name, toolCall.args);
402
+ logger.info('AIAgent', `🔧 Tool result for ${toolCall.name}: ${result}`);
403
+
404
+ // Step delay — matches text mode's stepDelay (line 820 in AgentRuntime).
405
+ await new Promise(resolve => setTimeout(resolve, 300));
406
+
407
+ // Include updated screen context IN the tool response
408
+ const updatedContext = runtime.getScreenContext();
409
+ lastScreenContextRef.current = updatedContext; // Sync with poll tracker
410
+ logger.info('AIAgent', `📡 Updated screen context after ${toolCall.name} (${updatedContext.length} chars):\n${updatedContext}`);
411
+ const enrichedResult = `${result}\n\n<updated_screen>\n${updatedContext}\n</updated_screen>`;
412
+ logger.info('AIAgent', `📡 Enriched tool response (${enrichedResult.length} chars):\n${enrichedResult}`);
413
+
414
+ voiceServiceRef.current?.sendFunctionResponse(toolCall.name, toolCall.id, { result: enrichedResult });
415
+ logger.info('AIAgent', `📡 Tool response sent for ${toolCall.name} [id=${toolCall.id}]`);
416
+ } finally {
417
+ toolLockRef.current = false;
418
+ // Resume mic after tool response is sent
419
+ if (voiceServiceRef.current?.isConnected) {
420
+ audioInputRef.current?.start().then((ok) => {
421
+ if (ok) {
422
+ setIsMicActive(true);
423
+ logger.info('AIAgent', `🔊 Mic resumed after tool execution: ${toolCall.name}`);
424
+ }
425
+ });
426
+ }
427
+ }
324
428
  },
325
429
  onError: (err) => {
326
430
  logger.error('AIAgent', `VoiceService error: ${err}`);
431
+ // Stop mic & audio on error to prevent stale state
432
+ audioInputRef.current?.stop();
433
+ audioOutputRef.current?.stop();
434
+ setIsMicActive(false);
435
+ setIsAISpeaking(false);
327
436
  },
328
437
  onTurnComplete: () => {
329
438
  logger.info('AIAgent', 'AI turn complete');
330
439
  setIsAISpeaking(false);
440
+ // No cool-down or echo gate needed — hardware AEC handles everything.
441
+ // Mic stays active and ready for the next voice command immediately.
331
442
  },
332
443
  });
333
444
 
445
+ // ─── Screen Change Detection ───────────────────────────────
446
+ // Poll the Fiber tree every 5s and resend context if the screen meaningfully changed.
447
+ // This gives voice mode the same screen-awareness as text mode's per-step re-read.
448
+ const SCREEN_POLL_INTERVAL = 5000;
449
+ const MIN_DIFF_RATIO = 0.05; // Ignore changes smaller than 5% of total length (animation flicker)
450
+
451
+ screenPollIntervalRef.current = setInterval(() => {
452
+ if (!voiceServiceRef.current?.isConnected) return;
453
+ // Skip during tool execution — the enriched tool response handles that
454
+ if (toolLockRef.current) {
455
+ logger.debug('AIAgent', '🔄 Screen poll skipped — tool lock active');
456
+ return;
457
+ }
458
+
459
+ try {
460
+ const currentContext = runtime.getScreenContext();
461
+ if (currentContext === lastScreenContextRef.current) return; // No change
462
+
463
+ // Check if the change is meaningful (not just animation/cursor flicker)
464
+ const lastLen = lastScreenContextRef.current.length;
465
+ const diff = Math.abs(currentContext.length - lastLen);
466
+ const diffRatio = lastLen > 0 ? diff / lastLen : 1;
467
+
468
+ if (diffRatio < MIN_DIFF_RATIO) {
469
+ logger.debug('AIAgent', `🔄 Screen poll: minor change ignored (${diff} chars, ${(diffRatio * 100).toFixed(1)}% < ${MIN_DIFF_RATIO * 100}% threshold)`);
470
+ return;
471
+ }
472
+
473
+ logger.info('AIAgent', `🔄 Screen change detected (${lastLen} → ${currentContext.length} chars, ${(diffRatio * 100).toFixed(1)}% diff)`);
474
+ lastScreenContextRef.current = currentContext;
475
+ const passiveUpdate = `[SCREEN UPDATE — The UI has changed. Here is the current screen layout. This is not a user request — do not act unless the user asks.]\n\n${currentContext}`;
476
+ voiceServiceRef.current?.sendScreenContext(passiveUpdate);
477
+ logger.info('AIAgent', '🔄 Updated screen context sent to voice model');
478
+ } catch (err) {
479
+ logger.warn('AIAgent', `🔄 Screen poll error: ${err}`);
480
+ }
481
+ }, SCREEN_POLL_INTERVAL);
482
+
334
483
  // Cleanup on mode change back to text
335
484
  return () => {
336
485
  logger.info('AIAgent', `Cleaning up voice services (leaving "${mode}" mode)`);
486
+ // Stop screen change polling
487
+ if (screenPollIntervalRef.current) {
488
+ clearInterval(screenPollIntervalRef.current);
489
+ screenPollIntervalRef.current = null;
490
+ logger.info('AIAgent', '🔄 Screen poll stopped');
491
+ }
492
+ lastScreenContextRef.current = '';
337
493
  voiceServiceRef.current?.disconnect();
338
- voiceServiceRef.current = null; // Ensure fresh instance on next connect
494
+ voiceServiceRef.current = null;
339
495
  audioInputRef.current?.stop();
340
496
  setIsMicActive(false);
341
497
  setIsAISpeaking(false);
@@ -359,7 +515,7 @@ export function AIAgent({
359
515
  setIsMicActive(false);
360
516
  setIsAISpeaking(false);
361
517
  setIsVoiceConnected(false);
362
- // 5. Switch back to text mode (triggers cleanup effect naturally)
518
+ // 6. Switch back to text mode (triggers cleanup effect naturally)
363
519
  setMode('text');
364
520
  logger.info('AIAgent', '🛑 Voice session fully stopped');
365
521
  }, [runtime]);
@@ -16,6 +16,14 @@ import {
16
16
  useWindowDimensions,
17
17
  } from 'react-native';
18
18
  import type { ExecutionResult, AgentMode } from '../core/types';
19
+ import {
20
+ MicIcon,
21
+ SpeakerIcon,
22
+ SendArrowIcon,
23
+ StopIcon,
24
+ LoadingDots,
25
+ AIBadge,
26
+ } from './Icons';
19
27
 
20
28
  // ─── Props ─────────────────────────────────────────────────────
21
29
 
@@ -56,9 +64,9 @@ function ModeSelector({
56
64
  }) {
57
65
  if (modes.length <= 1) return null;
58
66
 
59
- const labels: Record<AgentMode, { icon: string; label: string }> = {
60
- text: { icon: '💬', label: 'Text' },
61
- voice: { icon: '🎙️', label: 'Live Agent' },
67
+ const labels: Record<AgentMode, { label: string }> = {
68
+ text: { label: 'Text' },
69
+ voice: { label: 'Live Agent' },
62
70
  };
63
71
 
64
72
  return (
@@ -73,7 +81,15 @@ function ModeSelector({
73
81
  onPress={() => onSelect(mode)}
74
82
  accessibilityLabel={`Switch to ${labels[mode].label} mode`}
75
83
  >
76
- <Text style={modeStyles.tabIcon}>{labels[mode].icon}</Text>
84
+ {/* Active indicator dot */}
85
+ {activeMode === mode && (
86
+ <View style={{
87
+ width: 6,
88
+ height: 6,
89
+ borderRadius: 3,
90
+ backgroundColor: mode === 'voice' ? '#34C759' : '#7B68EE',
91
+ }} />
92
+ )}
77
93
  <Text
78
94
  style={[
79
95
  modeStyles.tabLabel,
@@ -91,15 +107,13 @@ function ModeSelector({
91
107
  // ─── Audio Control Button ──────────────────────────────────────
92
108
 
93
109
  function AudioControlButton({
94
- icon,
95
- activeIcon,
110
+ children,
96
111
  isActive,
97
112
  onPress,
98
113
  label,
99
114
  size = 36,
100
115
  }: {
101
- icon: string;
102
- activeIcon: string;
116
+ children: React.ReactNode;
103
117
  isActive: boolean;
104
118
  onPress: () => void;
105
119
  label: string;
@@ -116,7 +130,7 @@ function AudioControlButton({
116
130
  accessibilityLabel={label}
117
131
  hitSlop={8}
118
132
  >
119
- <Text style={audioStyles.controlIcon}>{isActive ? activeIcon : icon}</Text>
133
+ {children}
120
134
  </Pressable>
121
135
  );
122
136
  }
@@ -207,9 +221,7 @@ function DictationButton({
207
221
  accessibilityLabel={isListening ? 'Stop dictation' : 'Start dictation'}
208
222
  hitSlop={8}
209
223
  >
210
- <Text style={styles.sendButtonText}>
211
- {isListening ? '⏹️' : '🎤'}
212
- </Text>
224
+ {isListening ? <StopIcon size={18} color="#FF3B30" /> : <MicIcon size={18} color="#fff" />}
213
225
  </Pressable>
214
226
  );
215
227
  }
@@ -253,9 +265,7 @@ function TextInputRow({
253
265
  disabled={isThinking || !text.trim()}
254
266
  accessibilityLabel="Send request to AI Agent"
255
267
  >
256
- <Text style={styles.sendButtonText}>
257
- {isThinking ? '⏳' : '🚀'}
258
- </Text>
268
+ {isThinking ? <LoadingDots size={18} color="#fff" /> : <SendArrowIcon size={18} color="#fff" />}
259
269
  </Pressable>
260
270
  </View>
261
271
  );
@@ -288,12 +298,12 @@ function VoiceControlsRow({
288
298
  <View style={styles.inputRow}>
289
299
  {/* Speaker mute/unmute */}
290
300
  <AudioControlButton
291
- icon="🔊"
292
- activeIcon="🔇"
293
301
  isActive={isSpeakerMuted}
294
302
  onPress={() => onSpeakerToggle(!isSpeakerMuted)}
295
303
  label={isSpeakerMuted ? 'Unmute speaker' : 'Mute speaker'}
296
- />
304
+ >
305
+ <SpeakerIcon size={18} color="#fff" muted={isSpeakerMuted} />
306
+ </AudioControlButton>
297
307
 
298
308
  {/* Mic button — large center */}
299
309
  <Pressable
@@ -318,9 +328,16 @@ function VoiceControlsRow({
318
328
  isMicActive ? 'Stop recording' : 'Start recording'
319
329
  }
320
330
  >
321
- <Text style={audioStyles.micIcon}>
322
- {isConnecting ? '🔄' : isAISpeaking ? '🔊' : isMicActive ? '⏹️' : '🎙️'}
323
- </Text>
331
+ <View style={audioStyles.micIconWrap}>
332
+ {isConnecting
333
+ ? <LoadingDots size={20} color="#fff" />
334
+ : isAISpeaking
335
+ ? <SpeakerIcon size={20} color="#fff" />
336
+ : isMicActive
337
+ ? <StopIcon size={20} color="#fff" />
338
+ : <MicIcon size={20} color="#fff" />
339
+ }
340
+ </View>
324
341
  <Text style={audioStyles.micLabel}>
325
342
  {isConnecting
326
343
  ? (isArabic ? 'جاري الاتصال...' : 'Connecting...')
@@ -402,7 +419,6 @@ export function AgentChatBar({
402
419
  // ─── FAB (Compressed) ──────────────────────────────────────
403
420
 
404
421
  if (!isExpanded) {
405
- const fabIcon = isThinking ? '⏳' : '🤖';
406
422
  return (
407
423
  <Animated.View
408
424
  style={[styles.fabContainer, pan.getLayout()]}
@@ -413,7 +429,7 @@ export function AgentChatBar({
413
429
  onPress={() => setIsExpanded(true)}
414
430
  accessibilityLabel="Open AI Agent Chat"
415
431
  >
416
- <Text style={styles.fabIcon}>{fabIcon}</Text>
432
+ {isThinking ? <LoadingDots size={28} color="#fff" /> : <AIBadge size={28} />}
417
433
  </Pressable>
418
434
  </Animated.View>
419
435
  );
@@ -676,8 +692,11 @@ const audioStyles = StyleSheet.create({
676
692
  micButtonSpeaking: {
677
693
  backgroundColor: 'rgba(52, 199, 89, 0.3)',
678
694
  },
679
- micIcon: {
680
- fontSize: 20,
695
+ micIconWrap: {
696
+ width: 20,
697
+ height: 20,
698
+ alignItems: 'center' as const,
699
+ justifyContent: 'center' as const,
681
700
  },
682
701
  micLabel: {
683
702
  color: '#fff',