npm - @mobileai/react-native - Versions diffs - 0.4.6 → 0.5.0 - Mend

@mobileai/react-native 0.4.6 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/README.md +80 -4
package/lib/module/components/AIAgent.js +179 -38
package/lib/module/components/AIAgent.js.map +1 -1
package/lib/module/components/AgentChatBar.js +53 -29
package/lib/module/components/AgentChatBar.js.map +1 -1
package/lib/module/components/Icons.js +337 -0
package/lib/module/components/Icons.js.map +1 -0
package/lib/module/core/AgentRuntime.js +74 -3
package/lib/module/core/AgentRuntime.js.map +1 -1
package/lib/module/core/systemPrompt.js +57 -38
package/lib/module/core/systemPrompt.js.map +1 -1
package/lib/module/index.js +3 -9
package/lib/module/index.js.map +1 -1
package/lib/module/services/AudioInputService.js +73 -2
package/lib/module/services/AudioInputService.js.map +1 -1
package/lib/module/services/AudioOutputService.js +58 -5
package/lib/module/services/AudioOutputService.js.map +1 -1
package/lib/module/services/VoiceService.js +281 -275
package/lib/module/services/VoiceService.js.map +1 -1
package/lib/typescript/src/components/AIAgent.d.ts.map +1 -1
package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -1
package/lib/typescript/src/components/Icons.d.ts +43 -0
package/lib/typescript/src/components/Icons.d.ts.map +1 -0
package/lib/typescript/src/core/AgentRuntime.d.ts +12 -0
package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -1
package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -1
package/lib/typescript/src/index.d.ts +4 -0
package/lib/typescript/src/index.d.ts.map +1 -1
package/lib/typescript/src/services/AudioInputService.d.ts +13 -0
package/lib/typescript/src/services/AudioInputService.d.ts.map +1 -1
package/lib/typescript/src/services/AudioOutputService.d.ts.map +1 -1
package/lib/typescript/src/services/VoiceService.d.ts +38 -29
package/lib/typescript/src/services/VoiceService.d.ts.map +1 -1
package/package.json +1 -1
package/src/components/AIAgent.tsx +192 -39
package/src/components/AgentChatBar.tsx +44 -25
package/src/components/Icons.tsx +253 -0
package/src/core/AgentRuntime.ts +70 -3
package/src/core/systemPrompt.ts +57 -38
package/src/index.ts +8 -8
package/src/services/AudioInputService.ts +77 -2
package/src/services/AudioOutputService.ts +59 -5
package/src/services/VoiceService.ts +278 -290

package/lib/module/services/VoiceService.js CHANGED Viewed

@@ -1,85 +1,151 @@
 "use strict";
 /**
- * VoiceService — WebSocket connection to Gemini Live API.
+ * VoiceService — @google/genai SDK Live API connection.
+ *
+ * Uses the official `ai.live.connect()` method instead of raw WebSocket.
+ * This fixes function calling reliability: the SDK handles protocol details
+ * (binary framing, message transforms, model name prefixes) that our
+ * previous raw WebSocket implementation missed.
  *
  * Handles bidirectional audio streaming between the app and Gemini:
  * - Sends PCM 16kHz 16-bit audio chunks (mic input)
  * - Receives PCM 24kHz 16-bit audio chunks (AI responses)
  * - Receives function calls (tap, navigate, etc.) for agentic actions
- * - Sends screen context (DOM text + optional screenshot) for live mode
- *
- * Protocol: wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent
+ * - Sends screen context (DOM text) for live mode
  */
+// Platform-specific import: Metro can't resolve '@google/genai/web' sub-path
+// export, so we use the full path to the web bundle. This is what the SDK
+// recommends ('use a platform specific import') — RN's WebSocket API is
+// browser-compatible so the web bundle works correctly.
+// @ts-ignore — TS can't find declarations for the deep path
+import { GoogleGenAI, Modality } from '@google/genai/dist/web/index.mjs';
+// @ts-ignore
 import { logger } from "../utils/logger.js";
 // ─── Types ─────────────────────────────────────────────────────
 // ─── Constants ─────────────────────────────────────────────────
-const WS_HOST = 'generativelanguage.googleapis.com';
-const WS_PATH = '/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent';
-// Use -09-2025: Google's own cookbook uses this model for Live API tool use.
-// The -12-2025 model had server-side regressions with function calling
-// and was deprecated March 19, 2026. The -09-2025 version has
-// "improved function calling and better handling of speech cut-offs."
-const DEFAULT_MODEL = 'gemini-2.5-flash-native-audio-preview-09-2025';
+const DEFAULT_MODEL = 'gemini-2.5-flash-native-audio-preview-12-2025';
 const DEFAULT_INPUT_SAMPLE_RATE = 16000;
 // ─── Service ───────────────────────────────────────────────────
 export class VoiceService {
-  ws = null;
+  session = null;
   callbacks = {};
-  setupComplete = false;
+  lastCallbacks = null;
   _status = 'disconnected';
+  intentionalDisconnect = false;
   constructor(config) {
     this.config = config;
   }
   // ─── Connection ────────────────────────────────────────────
-  connect(callbacks) {
-    if (this.ws?.readyState === WebSocket.OPEN) {
+  /**
+   * Connect to Gemini Live API via the official SDK.
+   * Now async because `ai.live.connect()` returns a Promise.
+   */
+  async connect(callbacks) {
+    if (this.session) {
       logger.info('VoiceService', 'Already connected');
       return;
     }
     this.callbacks = callbacks;
+    this.lastCallbacks = callbacks;
     this.setStatus('connecting');
+    this.intentionalDisconnect = false;
     const model = this.config.model || DEFAULT_MODEL;
-    const url = `wss://${WS_HOST}${WS_PATH}?key=${this.config.apiKey}`;
-    logger.info('VoiceService', `Connecting to Gemini Live API (model: ${model})`);
-    this.ws = new WebSocket(url);
-    this.ws.onopen = () => {
-      logger.info('VoiceService', 'WebSocket connected, sending setup...');
-      this.sendSetup();
-    };
-    this.ws.onclose = event => {
-      logger.info('VoiceService', `WebSocket closed: ${event.code} ${event.reason}`);
-      this.setStatus('disconnected');
-      this.setupComplete = false;
-    };
-    this.ws.onerror = error => {
-      logger.error('VoiceService', `WebSocket error: ${error.message || 'Unknown'}`);
+    logger.info('VoiceService', `Connecting via SDK (model: ${model})`);
+    try {
+      const ai = new GoogleGenAI({
+        apiKey: this.config.apiKey
+      });
+      const toolDeclarations = this.buildToolDeclarations();
+      // Build SDK config matching the official docs pattern
+      const sdkConfig = {
+        responseModalities: [Modality.AUDIO]
+      };
+      // Enable transcription for debugging and UX
+      sdkConfig.inputAudioTranscription = {};
+      sdkConfig.outputAudioTranscription = {};
+      logger.info('VoiceService', 'Transcription enabled');
+      if (this.config.systemPrompt) {
+        sdkConfig.systemInstruction = {
+          parts: [{
+            text: this.config.systemPrompt
+          }]
+        };
+      }
+      if (toolDeclarations.length > 0) {
+        sdkConfig.tools = [{
+          functionDeclarations: toolDeclarations
+        }];
+      }
+      // FULL CONFIG DUMP — see exactly what we send to SDK
+      const configDump = JSON.stringify({
+        ...sdkConfig,
+        systemInstruction: sdkConfig.systemInstruction ? '(present)' : '(none)',
+        tools: sdkConfig.tools ? `${toolDeclarations.length} declarations` : '(none)'
+      });
+      logger.info('VoiceService', `📋 SDK config: ${configDump}`);
+      logger.info('VoiceService', `📋 Tool names: ${toolDeclarations.map(t => t.name).join(', ')}`);
+      const session = await ai.live.connect({
+        model: model,
+        config: sdkConfig,
+        callbacks: {
+          onopen: () => {
+            logger.info('VoiceService', '✅ SDK session connected');
+            this.setStatus('connected');
+          },
+          onmessage: message => {
+            this.handleSDKMessage(message);
+          },
+          onerror: error => {
+            const errDetail = error ? JSON.stringify(error, Object.getOwnPropertyNames(error)).substring(0, 500) : 'null';
+            logger.error('VoiceService', `SDK error: ${errDetail}`);
+            this.setStatus('error');
+            this.callbacks.onError?.(error?.message || 'SDK connection error');
+          },
+          onclose: event => {
+            const closeDetail = event ? JSON.stringify(event, Object.getOwnPropertyNames(event)).substring(0, 500) : 'null';
+            if (this.intentionalDisconnect) {
+              logger.info('VoiceService', `SDK session closed (intentional)`);
+            } else {
+              logger.error('VoiceService', `SDK session closed UNEXPECTEDLY — code: ${event?.code}, reason: ${event?.reason}, detail: ${closeDetail}`);
+              this.callbacks.onError?.(`Connection lost (code: ${event?.code || 'unknown'})`);
+            }
+            this.session = null;
+            this.setStatus('disconnected');
+          }
+        }
+      });
+      this.session = session;
+      logger.info('VoiceService', 'SDK session established');
+    } catch (error) {
+      logger.error('VoiceService', `Connection failed: ${error.message}`);
       this.setStatus('error');
-      this.callbacks.onError?.(error.message || 'WebSocket connection error');
-    };
-    this.ws.onmessage = event => {
-      this.handleMessage(event);
-    };
+      this.callbacks.onError?.(error.message || 'Failed to connect');
+    }
   }
   disconnect() {
-    if (this.ws) {
-      logger.info('VoiceService', 'Disconnecting...');
-      this.ws.close();
-      this.ws = null;
-      this.setupComplete = false;
+    if (this.session) {
+      logger.info('VoiceService', 'Disconnecting (intentional)...');
+      this.intentionalDisconnect = true;
+      this.session.close();
+      this.session = null;
       this.setStatus('disconnected');
     }
   }
   get isConnected() {
-    return this.ws?.readyState === WebSocket.OPEN && this.setupComplete;
+    return this.session !== null && this._status === 'connected';
   }
   get currentStatus() {
     return this._status;
@@ -87,298 +153,246 @@ export class VoiceService {
   // ─── Send Audio ────────────────────────────────────────────
-  /** Send PCM audio chunk (base64 encoded) to Gemini */
+  /** Send PCM audio chunk (base64 encoded) via SDK's sendRealtimeInput */
   sendCount = 0;
   sendAudio(base64Audio) {
     this.sendCount++;
-    if (!this.isConnected) {
-      logger.warn('VoiceService', `sendAudio #${this.sendCount} DROPPED — not connected (ws=${this.ws?.readyState}, setup=${this.setupComplete})`);
+    if (!this.isConnected || !this.session) {
+      if (this.sendCount % 20 === 0) {
+        logger.warn('VoiceService', `sendAudio #${this.sendCount} DROPPED — not connected`);
+      }
       return;
     }
-    const message = {
-      realtimeInput: {
+    const mimeType = `audio/pcm;rate=${this.config.inputSampleRate || DEFAULT_INPUT_SAMPLE_RATE}`;
+    // DEBUG: log every send call
+    if (this.sendCount <= 5 || this.sendCount % 10 === 0) {
+      logger.info('VoiceService', `📡 sendAudio #${this.sendCount}: len=${base64Audio.length}, mime=${mimeType}, preview=${base64Audio.substring(0, 30)}...`);
+    }
+    try {
+      this.session.sendRealtimeInput({
         audio: {
-          mimeType: `audio/pcm;rate=${this.config.inputSampleRate || DEFAULT_INPUT_SAMPLE_RATE}`,
-          data: base64Audio
+          data: base64Audio,
+          mimeType
         }
+      });
+      // Log every 50th successful send to confirm data is reaching WebSocket
+      if (this.sendCount % 50 === 0) {
+        logger.info('VoiceService', `✅ sendAudio #${this.sendCount} OK — session.isOpen=${!!this.session}`);
       }
-    };
-    logger.info('VoiceService', `📤 #${this.sendCount} sending ${base64Audio.length} chars (ws=${this.ws?.readyState})`);
-    this.ws.send(JSON.stringify(message));
+    } catch (error) {
+      logger.error('VoiceService', `❌ sendAudio EXCEPTION: ${error.message}\n${error.stack?.substring(0, 300)}`);
+      this.session = null;
+      this.setStatus('disconnected');
+    }
   }
   // ─── Send Text ─────────────────────────────────────────────
-  /** Send text message via realtimeInput (same channel as audio) */
+  /** Send text message via SDK's sendClientContent */
   sendText(text) {
-    if (!this.isConnected) return;
-    const message = {
-      realtimeInput: {
-        text
-      }
-    };
-    this.ws.send(JSON.stringify(message));
+    if (!this.isConnected || !this.session) return;
+    logger.info('VoiceService', `🗣️ USER (text): "${text}"`);
+    try {
+      this.session.sendClientContent({
+        turns: [{
+          role: 'user',
+          parts: [{
+            text
+          }]
+        }],
+        turnComplete: true
+      });
+    } catch (error) {
+      logger.error('VoiceService', `sendText failed: ${error.message}`);
+    }
   }
-  /** Send DOM tree as passive context during live conversation.
-   *
-   * Uses `clientContent` with `turnComplete: false` to inject context
-   * WITHOUT triggering a model response. This is the "incremental content
-   * updates" pattern from the Gemini docs for establishing session context.
-   *
-   * Called once at connect + after each tool call (not on a timer).
-   * Screenshots are handled separately via the capture_screenshot tool.
+  /**
+   * Send DOM tree as passive context during live conversation.
+   * Uses turnComplete: false — the model receives context without responding.
    */
   sendScreenContext(domText) {
-    if (!this.isConnected) return;
-    const message = {
-      clientContent: {
+    if (!this.isConnected || !this.session) return;
+    try {
+      this.session.sendClientContent({
         turns: [{
           role: 'user',
           parts: [{
             text: domText
           }]
         }],
-        turnComplete: false // Passive context — don't trigger a response
-      }
-    };
-    logger.debug('VoiceService', `📤 Screen context sent (${domText.length} chars)`);
-    logger.debug('VoiceService', `📤 Raw Screen Context Payload: ${JSON.stringify(message).substring(0, 500)}...`);
-    this.ws.send(JSON.stringify(message));
+        turnComplete: true
+      });
+      logger.info('VoiceService', `📤 Screen context sent (${domText.length} chars)`);
+    } catch (error) {
+      logger.error('VoiceService', `sendScreenContext failed: ${error.message}`);
+    }
   }
   // ─── Send Function Response ────────────────────────────────
-  /** Send function call result back to Gemini */
+  /** Send function call result back via SDK's sendToolResponse */
   sendFunctionResponse(name, id, result) {
-    if (!this.isConnected) return;
-    const message = {
-      toolResponse: {
+    if (!this.isConnected || !this.session) return;
+    logger.info('VoiceService', `📤 Sending tool response for ${name} (id=${id})`);
+    try {
+      this.session.sendToolResponse({
         functionResponses: [{
           name,
           id,
           response: result
         }]
-      }
-    };
-    logger.info('VoiceService', `📤 Sending tool response for ${name} (id=${id})`);
-    this.ws.send(JSON.stringify(message));
+      });
+    } catch (error) {
+      logger.error('VoiceService', `sendFunctionResponse failed: ${error.message}`);
+    }
   }
-  // ─── Internal: Setup ───────────────────────────────────────
+  // ─── Internal: Tool Declarations ───────────────────────────
   /**
-   * Builds and sends the setup message, replicating text mode's agent_step
-   * compound tool so the model uses structured reasoning + actions.
-   *
-   * The agent_step tool flattens reasoning fields (previous_goal_eval,
-   * memory, plan) + action_name enum + all action parameters into a single
-   * function — matching GeminiProvider.buildAgentStepDeclaration exactly.
+   * Builds function declarations from configured tools.
+   * Converts BOOLEAN params to STRING (native audio model limitation).
    */
-  sendSetup() {
-    if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
-    const model = this.config.model || DEFAULT_MODEL;
-    const setup = {
-      model: `models/${model}`,
-      generationConfig: {
-        responseModalities: ['AUDIO']
-        // Note: Do NOT set thinkingBudget: 0 — it completely disables
-        // the model's ability to reason about when to call tools.
-        // The text thinking blocks are a trade-off for working function calling.
-      }
-    };
-    // Add system instruction if provided
-    if (this.config.systemPrompt) {
-      setup.systemInstruction = {
-        parts: [{
-          text: this.config.systemPrompt
-        }]
+  buildToolDeclarations() {
+    if (!this.config.tools?.length) return [];
+    const validTools = this.config.tools.filter(t => t.name !== 'capture_screenshot');
+    if (validTools.length === 0) return [];
+    return validTools.map(tool => {
+      const hasParams = Object.keys(tool.parameters || {}).length > 0;
+      const functionDecl = {
+        name: tool.name,
+        description: tool.description
       };
-    }
-    // Add individual tool declarations for function calling
-    // NOTE: We use individual tools (tap, type, navigate, done, ask_user)
-    // instead of the compound agent_step used in text mode.
-    // The native audio model in real-time can call simple tools but struggles
-    // with the complex agent_step schema (it speaks about calling tools
-    // instead of actually calling them).
-    if (this.config.tools?.length) {
-      const validTools = this.config.tools.filter(t => t.name !== 'capture_screenshot');
-      if (validTools.length > 0) {
-        setup.tools = [{
-          functionDeclarations: validTools.map(tool => {
-            const hasParams = Object.keys(tool.parameters || {}).length > 0;
-            const functionDecl = {
-              name: tool.name,
-              description: tool.description
-            };
-            if (hasParams) {
-              functionDecl.parameters = {
-                type: 'OBJECT',
-                properties: Object.fromEntries(Object.entries(tool.parameters).map(([key, param]) => {
-                  // Native audio model crashes with BOOLEAN/ENUM types (error 1008)
-                  // Convert to STRING as a workaround
-                  let paramType = param.type.toUpperCase();
-                  let desc = param.description;
-                  if (paramType === 'BOOLEAN') {
-                    paramType = 'STRING';
-                    desc = `${desc} (use "true" or "false")`;
-                  }
-                  return [key, {
-                    type: paramType,
-                    description: desc
-                  }];
-                })),
-                required: Object.entries(tool.parameters).filter(([, param]) => param.required).map(([key]) => key)
-              };
+      if (hasParams) {
+        functionDecl.parameters = {
+          type: 'OBJECT',
+          properties: Object.fromEntries(Object.entries(tool.parameters).map(([key, param]) => {
+            let paramType = param.type.toUpperCase();
+            let desc = param.description;
+            if (paramType === 'BOOLEAN') {
+              paramType = 'STRING';
+              desc = `${desc} (use "true" or "false")`;
             }
-            return functionDecl;
-          })
-        }];
+            return [key, {
+              type: paramType,
+              description: desc
+            }];
+          })),
+          required: Object.entries(tool.parameters).filter(([, param]) => param.required).map(([key]) => key)
+        };
       }
-    }
-    const setupMessage = {
-      setup
-    };
-    logger.info('VoiceService', `Sending setup (model: ${model}, ${this.config.tools?.length || 0} tools)`);
-    try {
-      const payload = JSON.stringify(setupMessage);
-      logger.info('VoiceService', `📤 Raw Setup Payload: ${payload}`);
-      this.ws.send(payload);
-    } catch (err) {
-      logger.error('VoiceService', `❌ Error stringifying setup message: ${err.message}`);
-    }
+      return functionDecl;
+    });
   }
   // ─── Internal: Message Handling ────────────────────────────
-  handleMessage(event) {
+  /**
+   * Handle messages from the SDK's onmessage callback.
+   * The SDK parses binary/JSON automatically — we get clean objects.
+   *
+   * Per official docs, tool calls come at the top level as
+   * `response.toolCall.functionCalls`.
+   */
+  handleSDKMessage(message) {
     try {
-      const dataType = typeof event.data;
-      const dataLen = typeof event.data === 'string' ? event.data.length : event.data?.byteLength || 'unknown';
-      logger.info('VoiceService', `📥 WS message received: type=${dataType}, length=${dataLen}`);
-      // Handle binary data (could be JSON or raw PCM)
-      if (typeof event.data !== 'string') {
-        logger.info('VoiceService', '📥 Binary message — processing...');
-        this.handleBinaryMessage(event.data);
-        return;
+      // RAW MESSAGE DUMP — full session visibility
+      const msgKeys = Object.keys(message || {}).join(', ');
+      logger.info('VoiceService', `📨 SDK message keys: [${msgKeys}]`);
+      // Full raw dump for non-audio messages (audio is too large)
+      if (!message.serverContent?.modelTurn?.parts?.some(p => p.inlineData)) {
+        const rawDump = JSON.stringify(message).substring(0, 1000);
+        logger.info('VoiceService', `📨 RAW: ${rawDump}`);
       }
-      // Handle JSON text messages
-      const message = JSON.parse(event.data);
-      logger.info('VoiceService', `📥 JSON message keys: ${Object.keys(message).join(', ')}`);
-      logger.info('VoiceService', `📥 Raw JSON Message: ${event.data.substring(0, 1000)}`);
-      this.processMessage(message);
-    } catch (error) {
-      logger.error('VoiceService', `Error handling message: ${error.message}`);
-    }
-  }
-  handleBinaryMessage(data) {
-    try {
-      // Try to decode as JSON first
-      let bytes;
-      if (data instanceof ArrayBuffer) {
-        bytes = new Uint8Array(data);
-      } else if (data instanceof Blob) {
-        // Blob handling — read as ArrayBuffer
-        const reader = new FileReader();
-        reader.onload = () => {
-          if (reader.result instanceof ArrayBuffer) {
-            this.processBinaryBytes(new Uint8Array(reader.result));
-          }
-        };
-        reader.readAsArrayBuffer(data);
-        return;
-      } else {
+      // Tool calls — top-level (per official docs)
+      if (message.toolCall?.functionCalls) {
+        this.handleToolCalls(message.toolCall.functionCalls);
         return;
       }
-      this.processBinaryBytes(bytes);
+      // Server content (audio, text, transcripts, turn events)
+      if (message.serverContent) {
+        this.handleServerContent(message.serverContent);
+      }
+      // Setup complete acknowledgment
+      if (message.setupComplete !== undefined) {
+        logger.info('VoiceService', '✅ Setup complete — ready for audio');
+        this.callbacks.onSetupComplete?.();
+      }
+      // Error messages
+      if (message.error) {
+        logger.error('VoiceService', `Server error: ${JSON.stringify(message.error)}`);
+        this.callbacks.onError?.(message.error.message || 'Server error');
+      }
     } catch (error) {
-      logger.error('VoiceService', `Error handling binary message: ${error.message}`);
+      logger.error('VoiceService', `Error handling SDK message: ${error.message}`);
     }
   }
-  processBinaryBytes(bytes) {
-    // Check if it looks like JSON (starts with '{' or '[')
-    const looksLikeJson = bytes.length > 0 && (bytes[0] === 123 || bytes[0] === 91);
-    if (looksLikeJson) {
-      try {
-        const text = new TextDecoder('utf-8').decode(bytes);
-        const message = JSON.parse(text);
-        this.processMessage(message);
-      } catch {
-        // Not JSON — treat as raw PCM audio
-        this.callbacks.onAudioResponse?.(this.arrayBufferToBase64(bytes.buffer));
-      }
-    } else {
-      // Raw PCM audio data
-      this.callbacks.onAudioResponse?.(this.arrayBufferToBase64(bytes.buffer));
+  /** Process tool calls from the model */
+  handleToolCalls(functionCalls) {
+    for (const fn of functionCalls) {
+      logger.info('VoiceService', `🎯 Tool call: ${fn.name}(${JSON.stringify(fn.args)}) [id=${fn.id}]`);
+      this.callbacks.onToolCall?.({
+        name: fn.name,
+        args: fn.args || {},
+        id: fn.id
+      });
     }
   }
-  processMessage(message) {
-    // Setup complete acknowledgment
-    if (message.setupComplete !== undefined) {
-      logger.info('VoiceService', '✅ Setup complete — ready for audio exchange');
-      this.setupComplete = true;
-      this.setStatus('connected');
-      return;
+  audioResponseCount = 0;
+  /** Process server content (audio responses, transcripts, turn events) */
+  handleServerContent(content) {
+    // Log all keys for full visibility
+    const contentKeys = Object.keys(content || {}).join(', ');
+    logger.debug('VoiceService', `📦 serverContent keys: [${contentKeys}]`);
+    // Turn complete
+    if (content.turnComplete) {
+      logger.info('VoiceService', `🏁 Turn complete (audioChunks sent: ${this.audioResponseCount})`);
+      this.audioResponseCount = 0;
+      this.callbacks.onTurnComplete?.();
     }
-    // Server content (audio response + transcripts)
-    if (message.serverContent) {
-      const content = message.serverContent;
-      logger.info('VoiceService', `📥 serverContent received — turnComplete=${content.turnComplete}, hasParts=${!!content.modelTurn?.parts}, inputTranscription=${!!content.inputTranscription}, outputTranscription=${!!content.outputTranscription}`);
-      // Check for turn complete
-      if (content.turnComplete) {
-        this.callbacks.onTurnComplete?.();
-      }
-      // Process model output parts
-      if (content.modelTurn?.parts) {
-        for (const part of content.modelTurn.parts) {
-          // Audio response
-          if (part.inlineData?.data) {
-            logger.info('VoiceService', `🔊 Audio response: ${part.inlineData.data.length} chars`);
-            this.callbacks.onAudioResponse?.(part.inlineData.data);
-          }
-          // Text response (transcript)
-          if (part.text) {
-            logger.info('VoiceService', `💬 Text response: "${part.text}"`);
-            this.callbacks.onTranscript?.(part.text, true, 'model');
+    // Model output parts (audio + optional thinking text)
+    if (content.modelTurn?.parts) {
+      for (const part of content.modelTurn.parts) {
+        if (part.inlineData?.data) {
+          this.audioResponseCount++;
+          if (this.audioResponseCount <= 3 || this.audioResponseCount % 20 === 0) {
+            logger.info('VoiceService', `🔊 Audio chunk #${this.audioResponseCount}: ${part.inlineData.data.length} b64 chars, mime=${part.inlineData.mimeType || 'unknown'}`);
           }
+          this.callbacks.onAudioResponse?.(part.inlineData.data);
+        }
+        if (part.text) {
+          logger.info('VoiceService', `🤖 MODEL: "${part.text}"`);
+          this.callbacks.onTranscript?.(part.text, true, 'model');
         }
       }
+    }
-      // Input transcription (user's speech)
-      if (content.inputTranscription?.text) {
-        this.callbacks.onTranscript?.(content.inputTranscription.text, true, 'user');
-      }
-      // Output transcription (model's speech-to-text)
-      if (content.outputTranscription?.text) {
-        this.callbacks.onTranscript?.(content.outputTranscription.text, true, 'model');
-      }
+    // Input transcription (user's speech-to-text)
+    if (content.inputTranscription?.text) {
+      logger.info('VoiceService', `🗣️ USER (voice): "${content.inputTranscription.text}"`);
+      this.callbacks.onTranscript?.(content.inputTranscription.text, true, 'user');
     }
-    // Tool calls from the model
-    if (message.toolCall?.functionCalls) {
-      for (const fn of message.toolCall.functionCalls) {
-        logger.info('VoiceService', `🎯 Tool call: ${fn.name}(${JSON.stringify(fn.args)})`);
-        this.callbacks.onToolCall?.({
-          name: fn.name,
-          args: fn.args || {},
-          id: fn.id
-        });
-      }
+    // Output transcription (model's speech-to-text)
+    if (content.outputTranscription?.text) {
+      logger.info('VoiceService', `🤖 MODEL (voice): "${content.outputTranscription.text}"`);
+      this.callbacks.onTranscript?.(content.outputTranscription.text, true, 'model');
     }
-    // Error messages
-    if (message.error) {
-      logger.error('VoiceService', `Server error: ${JSON.stringify(message.error)}`);
-      this.callbacks.onError?.(message.error.message || 'Server error');
+    // Tool calls inside serverContent (some SDK versions deliver here)
+    if (content.toolCall?.functionCalls) {
+      this.handleToolCalls(content.toolCall.functionCalls);
     }
   }
@@ -388,13 +402,5 @@ export class VoiceService {
     this._status = newStatus;
     this.callbacks.onStatusChange?.(newStatus);
   }
-  arrayBufferToBase64(buffer) {
-    const bytes = new Uint8Array(buffer);
-    let binary = '';
-    for (let i = 0; i < bytes.byteLength; i++) {
-      binary += String.fromCharCode(bytes[i]);
-    }
-    return btoa(binary);
-  }
 }
 //# sourceMappingURL=VoiceService.js.map