@mobileai/react-native 0.4.6 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +80 -4
  2. package/lib/module/components/AIAgent.js +179 -38
  3. package/lib/module/components/AIAgent.js.map +1 -1
  4. package/lib/module/components/AgentChatBar.js +53 -29
  5. package/lib/module/components/AgentChatBar.js.map +1 -1
  6. package/lib/module/components/Icons.js +337 -0
  7. package/lib/module/components/Icons.js.map +1 -0
  8. package/lib/module/core/AgentRuntime.js +74 -3
  9. package/lib/module/core/AgentRuntime.js.map +1 -1
  10. package/lib/module/core/systemPrompt.js +66 -39
  11. package/lib/module/core/systemPrompt.js.map +1 -1
  12. package/lib/module/index.js +3 -9
  13. package/lib/module/index.js.map +1 -1
  14. package/lib/module/services/AudioInputService.js +73 -2
  15. package/lib/module/services/AudioInputService.js.map +1 -1
  16. package/lib/module/services/AudioOutputService.js +58 -5
  17. package/lib/module/services/AudioOutputService.js.map +1 -1
  18. package/lib/module/services/VoiceService.js +281 -275
  19. package/lib/module/services/VoiceService.js.map +1 -1
  20. package/lib/typescript/src/components/AIAgent.d.ts.map +1 -1
  21. package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -1
  22. package/lib/typescript/src/components/Icons.d.ts +43 -0
  23. package/lib/typescript/src/components/Icons.d.ts.map +1 -0
  24. package/lib/typescript/src/core/AgentRuntime.d.ts +12 -0
  25. package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -1
  26. package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -1
  27. package/lib/typescript/src/index.d.ts +4 -0
  28. package/lib/typescript/src/index.d.ts.map +1 -1
  29. package/lib/typescript/src/services/AudioInputService.d.ts +13 -0
  30. package/lib/typescript/src/services/AudioInputService.d.ts.map +1 -1
  31. package/lib/typescript/src/services/AudioOutputService.d.ts.map +1 -1
  32. package/lib/typescript/src/services/VoiceService.d.ts +38 -29
  33. package/lib/typescript/src/services/VoiceService.d.ts.map +1 -1
  34. package/package.json +1 -1
  35. package/src/components/AIAgent.tsx +192 -39
  36. package/src/components/AgentChatBar.tsx +44 -25
  37. package/src/components/Icons.tsx +253 -0
  38. package/src/core/AgentRuntime.ts +70 -3
  39. package/src/core/systemPrompt.ts +66 -39
  40. package/src/index.ts +8 -8
  41. package/src/services/AudioInputService.ts +77 -2
  42. package/src/services/AudioOutputService.ts +59 -5
  43. package/src/services/VoiceService.ts +278 -290
@@ -148,6 +148,10 @@ export function AIAgent({
148
148
  const voiceServiceRef = useRef<VoiceService | null>(null);
149
149
  const audioInputRef = useRef<AudioInputService | null>(null);
150
150
  const audioOutputRef = useRef<AudioOutputService | null>(null);
151
+ const toolLockRef = useRef<boolean>(false);
152
+ const userHasSpokenRef = useRef<boolean>(false);
153
+ const lastScreenContextRef = useRef<string>('');
154
+ const screenPollIntervalRef = useRef<ReturnType<typeof setInterval> | null>(null);
151
155
 
152
156
  // Compute available modes from props
153
157
  const availableModes: AgentMode[] = useMemo(() => {
@@ -173,8 +177,7 @@ export function AIAgent({
173
177
  onAfterStep,
174
178
  onBeforeTask,
175
179
  onAfterTask,
176
- transformScreenContent,
177
- customTools,
180
+ customTools: mode === 'voice' ? { ...customTools, ask_user: null } : customTools,
178
181
  instructions,
179
182
  stepDelay,
180
183
  mcpServerUrl,
@@ -183,7 +186,7 @@ export function AIAgent({
183
186
  onStatusUpdate: setStatusText,
184
187
  onTokenUsage,
185
188
  // Page-agent pattern: block the agent loop until user responds
186
- onAskUser: (question: string) => {
189
+ onAskUser: mode === 'voice' ? undefined : ((question: string) => {
187
190
  return new Promise<string>((resolve) => {
188
191
  askUserResolverRef.current = resolve;
189
192
  // Show question in chat bar, allow user input
@@ -191,9 +194,9 @@ export function AIAgent({
191
194
  setIsThinking(false);
192
195
  setStatusText('');
193
196
  });
194
- },
197
+ }),
195
198
  }), [
196
- apiKey, model, language, maxSteps,
199
+ mode, apiKey, model, language, maxSteps,
197
200
  interactiveBlacklist, interactiveWhitelist,
198
201
  onBeforeStep, onAfterStep, onBeforeTask, onAfterTask,
199
202
  transformScreenContent, customTools, instructions, stepDelay,
@@ -237,6 +240,9 @@ export function AIAgent({
237
240
 
238
241
  logger.info('AIAgent', `Mode changed to "${mode}" — initializing voice services...`);
239
242
 
243
+ // Track async audio output init — mic MUST wait for this
244
+ let audioOutputInitPromise: Promise<void> = Promise.resolve();
245
+
240
246
  // Create VoiceService with runtime's built-in tools (navigate, tap, type, done, etc.)
241
247
  if (!voiceServiceRef.current) {
242
248
  logger.info('AIAgent', 'Creating VoiceService...');
@@ -245,13 +251,14 @@ export function AIAgent({
245
251
  // Use voice-adapted system prompt — same core rules as text mode
246
252
  // but without agent-loop directives that trigger autonomous actions
247
253
  const voicePrompt = buildVoiceSystemPrompt(language, instructions?.system);
254
+ logger.info('AIAgent', `📝 Voice system prompt (${voicePrompt.length} chars):\n${voicePrompt}`);
248
255
  voiceServiceRef.current = new VoiceService({
249
256
  apiKey,
250
257
  systemPrompt: voicePrompt,
251
258
  tools: runtimeTools,
252
259
  language,
253
260
  });
254
- logger.info('AIAgent', 'VoiceService created with full voice system prompt and tools');
261
+ logger.info('AIAgent', `VoiceService created with ${runtimeTools.length} tools: ${runtimeTools.map(t => t.name).join(', ')}`);
255
262
  }
256
263
 
257
264
  // Create AudioOutputService if not exists
@@ -260,7 +267,11 @@ export function AIAgent({
260
267
  audioOutputRef.current = new AudioOutputService({
261
268
  onError: (err) => logger.error('AIAgent', `AudioOutput error: ${err}`),
262
269
  });
263
- audioOutputRef.current.initialize().then((ok) => {
270
+ // IMPORTANT: Must await initialize() BEFORE starting mic.
271
+ // initialize() calls setAudioSessionOptions which reconfigures the
272
+ // audio hardware. If the mic starts before this finishes, the native
273
+ // audio session change kills the recorder's device handle.
274
+ audioOutputInitPromise = audioOutputRef.current.initialize().then((ok) => {
264
275
  logger.info('AIAgent', `AudioOutputService initialized: ${ok}`);
265
276
  });
266
277
  }
@@ -271,7 +282,7 @@ export function AIAgent({
271
282
  audioInputRef.current = new AudioInputService({
272
283
  // Default 16kHz — Gemini Live API input standard
273
284
  onAudioChunk: (chunk) => {
274
- logger.debug('AIAgent', `Mic chunk: ${chunk.length} chars`);
285
+ logger.info('AIAgent', `🎤 onAudioChunk: ${chunk.length} chars, voiceService=${!!voiceServiceRef.current}, connected=${voiceServiceRef.current?.isConnected}`);
275
286
  voiceServiceRef.current?.sendAudio(chunk);
276
287
  },
277
288
  onError: (err) => logger.error('AIAgent', `AudioInput error: ${err}`),
@@ -279,66 +290,208 @@ export function AIAgent({
279
290
  });
280
291
  }
281
292
 
282
- // Connect VoiceService
293
+ // Connect VoiceService (async — SDK's ai.live.connect returns a Promise)
283
294
  logger.info('AIAgent', 'Connecting VoiceService...');
284
- voiceServiceRef.current.connect({
295
+ void voiceServiceRef.current.connect({
285
296
  onAudioResponse: (audio) => {
286
- logger.info('AIAgent', `Received audio response (${audio.length} chars)`);
297
+ logger.info('AIAgent', `🔊 Audio response: ${audio.length} chars, audioOutputRef=${!!audioOutputRef.current}`);
287
298
  setIsAISpeaking(true);
288
- audioOutputRef.current?.enqueue(audio);
299
+ if (!audioOutputRef.current) {
300
+ logger.error('AIAgent', '❌ audioOutputRef.current is NULL — cannot play audio!');
301
+ return;
302
+ }
303
+ audioOutputRef.current.enqueue(audio);
289
304
  },
290
305
  onStatusChange: (status) => {
291
306
  logger.info('AIAgent', `Voice status: ${status}`);
292
307
  const connected = status === 'connected';
293
308
  setIsVoiceConnected(connected);
294
309
  if (connected) {
295
- logger.info('AIAgent', '✅ VoiceService connected — auto-starting mic...');
296
- // Auto-start mic streaming once WebSocket is ready
297
- audioInputRef.current?.start().then((ok) => {
298
- if (ok) {
299
- setIsMicActive(true);
300
- logger.info('AIAgent', '🎙️ Mic auto-started after connection');
301
- }
310
+ logger.info('AIAgent', '✅ VoiceService connected — waiting for audio session init before starting mic...');
311
+ // Wait for audio session config to finish BEFORE starting mic.
312
+ // If mic starts while setAudioSessionOptions is in flight,
313
+ // the native audio device gets killed (AudioDeviceStop error).
314
+ audioOutputInitPromise.then(() => {
315
+ logger.info('AIAgent', ' Audio session ready — starting mic now...');
316
+ audioInputRef.current?.start().then((ok) => {
317
+ if (ok) {
318
+ setIsMicActive(true);
319
+ logger.info('AIAgent', '🎙️ Mic auto-started after connection');
320
+ }
321
+ });
302
322
  });
303
- // Send initial screen context so the model knows what's on screen.
304
- // sendScreenContext uses turnComplete: false (passive context)
305
- // so the model should NOT act on it until the user speaks.
306
- const initialContext = runtime.getScreenContext();
307
- voiceServiceRef.current?.sendScreenContext(initialContext);
308
- logger.info('AIAgent', '📡 Initial screen context sent (passive, turnComplete=false)');
323
+ }
324
+
325
+ // Handle unexpected disconnection auto-reconnect ONLY if not intentional
326
+ if (
327
+ status === 'disconnected' &&
328
+ mode === 'voice' &&
329
+ voiceServiceRef.current &&
330
+ !voiceServiceRef.current.intentionalDisconnect
331
+ ) {
332
+ // Stop mic & audio immediately so user isn't stuck
333
+ audioInputRef.current?.stop();
334
+ audioOutputRef.current?.stop();
335
+ setIsMicActive(false);
336
+ setIsAISpeaking(false);
337
+
338
+ logger.warn('AIAgent', '⚠️ Voice connection dropped unexpectedly — attempting reconnect in 2s...');
339
+
340
+ // Auto-reconnect after short delay
341
+ setTimeout(() => {
342
+ if (
343
+ mode === 'voice' &&
344
+ voiceServiceRef.current &&
345
+ !voiceServiceRef.current.intentionalDisconnect &&
346
+ voiceServiceRef.current.lastCallbacks
347
+ ) {
348
+ logger.info('AIAgent', '🔄 Reconnecting VoiceService...');
349
+ void voiceServiceRef.current.connect(voiceServiceRef.current.lastCallbacks);
350
+ }
351
+ }, 2000);
309
352
  }
310
353
  },
311
354
  onTranscript: (text, isFinal, role) => {
312
355
  logger.info('AIAgent', `Transcript [${role}] (final=${isFinal}): "${text}"`);
356
+ if (role === 'user') {
357
+ userHasSpokenRef.current = true;
358
+ }
359
+ },
360
+ onSetupComplete: () => {
361
+ logger.info('AIAgent', '✅ SDK setup complete — sending initial screen context now');
362
+ userHasSpokenRef.current = false;
363
+ const initialContext = runtime.getScreenContext();
364
+ lastScreenContextRef.current = initialContext; // Seed for screen change detection
365
+ const passiveContext = `[SYSTEM CONTEXT — THIS IS NOT A USER REQUEST. DO NOT RESPOND. DO NOT NAVIGATE. DO NOT CALL ANY TOOLS. DO NOT SPEAK. Just silently read and memorize this screen layout. You will use it when the user speaks their first voice command.]\n\n${initialContext}`;
366
+ logger.info('AIAgent', `📡 Initial screen context (${passiveContext.length} chars):\n${passiveContext}`);
367
+ voiceServiceRef.current?.sendScreenContext(passiveContext);
368
+ logger.info('AIAgent', '📡 Initial screen context sent (turnComplete=true)');
313
369
  },
314
370
  onToolCall: async (toolCall) => {
315
- logger.info('AIAgent', `Voice tool call: ${toolCall.name}(${JSON.stringify(toolCall.args)})`);
316
- // Execute the tool via AgentRuntime and send result back to Gemini
317
- const result = await runtime.executeTool(toolCall.name, toolCall.args);
318
- logger.info('AIAgent', `Voice tool result: ${result}`);
319
-
320
- voiceServiceRef.current?.sendFunctionResponse(toolCall.name, toolCall.id, { result });
321
-
322
- // After tool execution, push updated screen context
323
- // (the screen may have changed from tap/type/navigate)
324
- const updatedContext = runtime.getScreenContext();
325
- voiceServiceRef.current?.sendScreenContext(updatedContext);
326
- logger.info('AIAgent', '📡 Updated screen context sent after tool call');
371
+ logger.info('AIAgent', `🔧 Voice tool call: ${toolCall.name}(${JSON.stringify(toolCall.args)}) [id=${toolCall.id}]`);
372
+
373
+ // Code-level gate: reject tool calls before the user has spoken.
374
+ // The model sometimes auto-navigates on receiving screen context.
375
+ if (!userHasSpokenRef.current) {
376
+ logger.warn('AIAgent', `🚫 Rejected tool call ${toolCall.name} user hasn't spoken yet`);
377
+ voiceServiceRef.current?.sendFunctionResponse(toolCall.name, toolCall.id, {
378
+ result: 'Action rejected: wait for the user to speak before performing any actions.',
379
+ });
380
+ return;
381
+ }
382
+
383
+ // CRITICAL: Gate audio input during tool execution.
384
+ // The Gemini Live API crashes (code 1008) if sendRealtimeInput
385
+ // (audio) is called while a tool call is pending. Stop the mic
386
+ // before executing the tool and resume after the response is sent.
387
+ audioInputRef.current?.stop();
388
+ logger.info('AIAgent', `🔇 Mic paused for tool execution: ${toolCall.name}`);
389
+
390
+ // One-tool-at-a-time enforcement (mirrors text mode's line 752).
391
+ if (toolLockRef.current) {
392
+ logger.warn('AIAgent', `⏳ Tool locked — waiting for previous tool to finish before executing ${toolCall.name}`);
393
+ while (toolLockRef.current) {
394
+ await new Promise(resolve => setTimeout(resolve, 50));
395
+ }
396
+ }
397
+ toolLockRef.current = true;
398
+
399
+ try {
400
+ // Execute the tool via AgentRuntime and send result back to Gemini
401
+ const result = await runtime.executeTool(toolCall.name, toolCall.args);
402
+ logger.info('AIAgent', `🔧 Tool result for ${toolCall.name}: ${result}`);
403
+
404
+ // Step delay — matches text mode's stepDelay (line 820 in AgentRuntime).
405
+ await new Promise(resolve => setTimeout(resolve, 300));
406
+
407
+ // Include updated screen context IN the tool response
408
+ const updatedContext = runtime.getScreenContext();
409
+ lastScreenContextRef.current = updatedContext; // Sync with poll tracker
410
+ logger.info('AIAgent', `📡 Updated screen context after ${toolCall.name} (${updatedContext.length} chars):\n${updatedContext}`);
411
+ const enrichedResult = `${result}\n\n<updated_screen>\n${updatedContext}\n</updated_screen>`;
412
+ logger.info('AIAgent', `📡 Enriched tool response (${enrichedResult.length} chars):\n${enrichedResult}`);
413
+
414
+ voiceServiceRef.current?.sendFunctionResponse(toolCall.name, toolCall.id, { result: enrichedResult });
415
+ logger.info('AIAgent', `📡 Tool response sent for ${toolCall.name} [id=${toolCall.id}]`);
416
+ } finally {
417
+ toolLockRef.current = false;
418
+ // Resume mic after tool response is sent
419
+ if (voiceServiceRef.current?.isConnected) {
420
+ audioInputRef.current?.start().then((ok) => {
421
+ if (ok) {
422
+ setIsMicActive(true);
423
+ logger.info('AIAgent', `🔊 Mic resumed after tool execution: ${toolCall.name}`);
424
+ }
425
+ });
426
+ }
427
+ }
327
428
  },
328
429
  onError: (err) => {
329
430
  logger.error('AIAgent', `VoiceService error: ${err}`);
431
+ // Stop mic & audio on error to prevent stale state
432
+ audioInputRef.current?.stop();
433
+ audioOutputRef.current?.stop();
434
+ setIsMicActive(false);
435
+ setIsAISpeaking(false);
330
436
  },
331
437
  onTurnComplete: () => {
332
438
  logger.info('AIAgent', 'AI turn complete');
333
439
  setIsAISpeaking(false);
440
+ // No cool-down or echo gate needed — hardware AEC handles everything.
441
+ // Mic stays active and ready for the next voice command immediately.
334
442
  },
335
443
  });
336
444
 
445
+ // ─── Screen Change Detection ───────────────────────────────
446
+ // Poll the Fiber tree every 5s and resend context if the screen meaningfully changed.
447
+ // This gives voice mode the same screen-awareness as text mode's per-step re-read.
448
+ const SCREEN_POLL_INTERVAL = 5000;
449
+ const MIN_DIFF_RATIO = 0.05; // Ignore changes smaller than 5% of total length (animation flicker)
450
+
451
+ screenPollIntervalRef.current = setInterval(() => {
452
+ if (!voiceServiceRef.current?.isConnected) return;
453
+ // Skip during tool execution — the enriched tool response handles that
454
+ if (toolLockRef.current) {
455
+ logger.debug('AIAgent', '🔄 Screen poll skipped — tool lock active');
456
+ return;
457
+ }
458
+
459
+ try {
460
+ const currentContext = runtime.getScreenContext();
461
+ if (currentContext === lastScreenContextRef.current) return; // No change
462
+
463
+ // Check if the change is meaningful (not just animation/cursor flicker)
464
+ const lastLen = lastScreenContextRef.current.length;
465
+ const diff = Math.abs(currentContext.length - lastLen);
466
+ const diffRatio = lastLen > 0 ? diff / lastLen : 1;
467
+
468
+ if (diffRatio < MIN_DIFF_RATIO) {
469
+ logger.debug('AIAgent', `🔄 Screen poll: minor change ignored (${diff} chars, ${(diffRatio * 100).toFixed(1)}% < ${MIN_DIFF_RATIO * 100}% threshold)`);
470
+ return;
471
+ }
472
+
473
+ logger.info('AIAgent', `🔄 Screen change detected (${lastLen} → ${currentContext.length} chars, ${(diffRatio * 100).toFixed(1)}% diff)`);
474
+ lastScreenContextRef.current = currentContext;
475
+ const passiveUpdate = `[SCREEN UPDATE — The UI has changed. Here is the current screen layout. This is not a user request — do not act unless the user asks.]\n\n${currentContext}`;
476
+ voiceServiceRef.current?.sendScreenContext(passiveUpdate);
477
+ logger.info('AIAgent', '🔄 Updated screen context sent to voice model');
478
+ } catch (err) {
479
+ logger.warn('AIAgent', `🔄 Screen poll error: ${err}`);
480
+ }
481
+ }, SCREEN_POLL_INTERVAL);
482
+
337
483
  // Cleanup on mode change back to text
338
484
  return () => {
339
485
  logger.info('AIAgent', `Cleaning up voice services (leaving "${mode}" mode)`);
486
+ // Stop screen change polling
487
+ if (screenPollIntervalRef.current) {
488
+ clearInterval(screenPollIntervalRef.current);
489
+ screenPollIntervalRef.current = null;
490
+ logger.info('AIAgent', '🔄 Screen poll stopped');
491
+ }
492
+ lastScreenContextRef.current = '';
340
493
  voiceServiceRef.current?.disconnect();
341
- voiceServiceRef.current = null; // Ensure fresh instance on next connect
494
+ voiceServiceRef.current = null;
342
495
  audioInputRef.current?.stop();
343
496
  setIsMicActive(false);
344
497
  setIsAISpeaking(false);
@@ -362,7 +515,7 @@ export function AIAgent({
362
515
  setIsMicActive(false);
363
516
  setIsAISpeaking(false);
364
517
  setIsVoiceConnected(false);
365
- // 5. Switch back to text mode (triggers cleanup effect naturally)
518
+ // 6. Switch back to text mode (triggers cleanup effect naturally)
366
519
  setMode('text');
367
520
  logger.info('AIAgent', '🛑 Voice session fully stopped');
368
521
  }, [runtime]);
@@ -16,6 +16,14 @@ import {
16
16
  useWindowDimensions,
17
17
  } from 'react-native';
18
18
  import type { ExecutionResult, AgentMode } from '../core/types';
19
+ import {
20
+ MicIcon,
21
+ SpeakerIcon,
22
+ SendArrowIcon,
23
+ StopIcon,
24
+ LoadingDots,
25
+ AIBadge,
26
+ } from './Icons';
19
27
 
20
28
  // ─── Props ─────────────────────────────────────────────────────
21
29
 
@@ -56,9 +64,9 @@ function ModeSelector({
56
64
  }) {
57
65
  if (modes.length <= 1) return null;
58
66
 
59
- const labels: Record<AgentMode, { icon: string; label: string }> = {
60
- text: { icon: '💬', label: 'Text' },
61
- voice: { icon: '🎙️', label: 'Live Agent' },
67
+ const labels: Record<AgentMode, { label: string }> = {
68
+ text: { label: 'Text' },
69
+ voice: { label: 'Live Agent' },
62
70
  };
63
71
 
64
72
  return (
@@ -73,7 +81,15 @@ function ModeSelector({
73
81
  onPress={() => onSelect(mode)}
74
82
  accessibilityLabel={`Switch to ${labels[mode].label} mode`}
75
83
  >
76
- <Text style={modeStyles.tabIcon}>{labels[mode].icon}</Text>
84
+ {/* Active indicator dot */}
85
+ {activeMode === mode && (
86
+ <View style={{
87
+ width: 6,
88
+ height: 6,
89
+ borderRadius: 3,
90
+ backgroundColor: mode === 'voice' ? '#34C759' : '#7B68EE',
91
+ }} />
92
+ )}
77
93
  <Text
78
94
  style={[
79
95
  modeStyles.tabLabel,
@@ -91,15 +107,13 @@ function ModeSelector({
91
107
  // ─── Audio Control Button ──────────────────────────────────────
92
108
 
93
109
  function AudioControlButton({
94
- icon,
95
- activeIcon,
110
+ children,
96
111
  isActive,
97
112
  onPress,
98
113
  label,
99
114
  size = 36,
100
115
  }: {
101
- icon: string;
102
- activeIcon: string;
116
+ children: React.ReactNode;
103
117
  isActive: boolean;
104
118
  onPress: () => void;
105
119
  label: string;
@@ -116,7 +130,7 @@ function AudioControlButton({
116
130
  accessibilityLabel={label}
117
131
  hitSlop={8}
118
132
  >
119
- <Text style={audioStyles.controlIcon}>{isActive ? activeIcon : icon}</Text>
133
+ {children}
120
134
  </Pressable>
121
135
  );
122
136
  }
@@ -207,9 +221,7 @@ function DictationButton({
207
221
  accessibilityLabel={isListening ? 'Stop dictation' : 'Start dictation'}
208
222
  hitSlop={8}
209
223
  >
210
- <Text style={styles.sendButtonText}>
211
- {isListening ? '⏹️' : '🎤'}
212
- </Text>
224
+ {isListening ? <StopIcon size={18} color="#FF3B30" /> : <MicIcon size={18} color="#fff" />}
213
225
  </Pressable>
214
226
  );
215
227
  }
@@ -253,9 +265,7 @@ function TextInputRow({
253
265
  disabled={isThinking || !text.trim()}
254
266
  accessibilityLabel="Send request to AI Agent"
255
267
  >
256
- <Text style={styles.sendButtonText}>
257
- {isThinking ? '⏳' : '🚀'}
258
- </Text>
268
+ {isThinking ? <LoadingDots size={18} color="#fff" /> : <SendArrowIcon size={18} color="#fff" />}
259
269
  </Pressable>
260
270
  </View>
261
271
  );
@@ -288,12 +298,12 @@ function VoiceControlsRow({
288
298
  <View style={styles.inputRow}>
289
299
  {/* Speaker mute/unmute */}
290
300
  <AudioControlButton
291
- icon="🔊"
292
- activeIcon="🔇"
293
301
  isActive={isSpeakerMuted}
294
302
  onPress={() => onSpeakerToggle(!isSpeakerMuted)}
295
303
  label={isSpeakerMuted ? 'Unmute speaker' : 'Mute speaker'}
296
- />
304
+ >
305
+ <SpeakerIcon size={18} color="#fff" muted={isSpeakerMuted} />
306
+ </AudioControlButton>
297
307
 
298
308
  {/* Mic button — large center */}
299
309
  <Pressable
@@ -318,9 +328,16 @@ function VoiceControlsRow({
318
328
  isMicActive ? 'Stop recording' : 'Start recording'
319
329
  }
320
330
  >
321
- <Text style={audioStyles.micIcon}>
322
- {isConnecting ? '🔄' : isAISpeaking ? '🔊' : isMicActive ? '⏹️' : '🎙️'}
323
- </Text>
331
+ <View style={audioStyles.micIconWrap}>
332
+ {isConnecting
333
+ ? <LoadingDots size={20} color="#fff" />
334
+ : isAISpeaking
335
+ ? <SpeakerIcon size={20} color="#fff" />
336
+ : isMicActive
337
+ ? <StopIcon size={20} color="#fff" />
338
+ : <MicIcon size={20} color="#fff" />
339
+ }
340
+ </View>
324
341
  <Text style={audioStyles.micLabel}>
325
342
  {isConnecting
326
343
  ? (isArabic ? 'جاري الاتصال...' : 'Connecting...')
@@ -402,7 +419,6 @@ export function AgentChatBar({
402
419
  // ─── FAB (Compressed) ──────────────────────────────────────
403
420
 
404
421
  if (!isExpanded) {
405
- const fabIcon = isThinking ? '⏳' : '🤖';
406
422
  return (
407
423
  <Animated.View
408
424
  style={[styles.fabContainer, pan.getLayout()]}
@@ -413,7 +429,7 @@ export function AgentChatBar({
413
429
  onPress={() => setIsExpanded(true)}
414
430
  accessibilityLabel="Open AI Agent Chat"
415
431
  >
416
- <Text style={styles.fabIcon}>{fabIcon}</Text>
432
+ {isThinking ? <LoadingDots size={28} color="#fff" /> : <AIBadge size={28} />}
417
433
  </Pressable>
418
434
  </Animated.View>
419
435
  );
@@ -676,8 +692,11 @@ const audioStyles = StyleSheet.create({
676
692
  micButtonSpeaking: {
677
693
  backgroundColor: 'rgba(52, 199, 89, 0.3)',
678
694
  },
679
- micIcon: {
680
- fontSize: 20,
695
+ micIconWrap: {
696
+ width: 20,
697
+ height: 20,
698
+ alignItems: 'center' as const,
699
+ justifyContent: 'center' as const,
681
700
  },
682
701
  micLabel: {
683
702
  color: '#fff',