npm - @mobileai/react-native - Versions diffs - 0.4.2 → 0.4.4 - Mend

@mobileai/react-native 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/README.md +23 -3
package/lib/module/components/AIAgent.js +216 -5
package/lib/module/components/AIAgent.js.map +1 -1
package/lib/module/components/AgentChatBar.js +358 -36
package/lib/module/components/AgentChatBar.js.map +1 -1
package/lib/module/core/AgentRuntime.js +122 -6
package/lib/module/core/AgentRuntime.js.map +1 -1
package/lib/module/core/systemPrompt.js +57 -0
package/lib/module/core/systemPrompt.js.map +1 -1
package/lib/module/index.js +8 -0
package/lib/module/index.js.map +1 -1
package/lib/module/providers/GeminiProvider.js +108 -85
package/lib/module/providers/GeminiProvider.js.map +1 -1
package/lib/module/services/AudioInputService.js +128 -0
package/lib/module/services/AudioInputService.js.map +1 -0
package/lib/module/services/AudioOutputService.js +154 -0
package/lib/module/services/AudioOutputService.js.map +1 -0
package/lib/module/services/VoiceService.js +361 -0
package/lib/module/services/VoiceService.js.map +1 -0
package/lib/module/utils/audioUtils.js +49 -0
package/lib/module/utils/audioUtils.js.map +1 -0
package/lib/module/utils/logger.js +21 -4
package/lib/module/utils/logger.js.map +1 -1
package/lib/typescript/babel.config.d.ts +10 -0
package/lib/typescript/babel.config.d.ts.map +1 -0
package/lib/typescript/eslint.config.d.mts +3 -0
package/lib/typescript/eslint.config.d.mts.map +1 -0
package/lib/typescript/fetch-models.d.mts +2 -0
package/lib/typescript/fetch-models.d.mts.map +1 -0
package/lib/typescript/list-all-models.d.mts +2 -0
package/lib/typescript/list-all-models.d.mts.map +1 -0
package/lib/typescript/list-models.d.mts +2 -0
package/lib/typescript/list-models.d.mts.map +1 -0
package/lib/typescript/src/components/AIAgent.d.ts +8 -2
package/lib/typescript/src/components/AIAgent.d.ts.map +1 -1
package/lib/typescript/src/components/AgentChatBar.d.ts +19 -2
package/lib/typescript/src/components/AgentChatBar.d.ts.map +1 -1
package/lib/typescript/src/core/AgentRuntime.d.ts +17 -1
package/lib/typescript/src/core/AgentRuntime.d.ts.map +1 -1
package/lib/typescript/src/core/systemPrompt.d.ts +8 -0
package/lib/typescript/src/core/systemPrompt.d.ts.map +1 -1
package/lib/typescript/src/core/types.d.ts +24 -1
package/lib/typescript/src/core/types.d.ts.map +1 -1
package/lib/typescript/src/index.d.ts +6 -1
package/lib/typescript/src/index.d.ts.map +1 -1
package/lib/typescript/src/providers/GeminiProvider.d.ts +22 -18
package/lib/typescript/src/providers/GeminiProvider.d.ts.map +1 -1
package/lib/typescript/src/services/AudioInputService.d.ts +31 -0
package/lib/typescript/src/services/AudioInputService.d.ts.map +1 -0
package/lib/typescript/src/services/AudioOutputService.d.ts +34 -0
package/lib/typescript/src/services/AudioOutputService.d.ts.map +1 -0
package/lib/typescript/src/services/VoiceService.d.ts +73 -0
package/lib/typescript/src/services/VoiceService.d.ts.map +1 -0
package/lib/typescript/src/utils/audioUtils.d.ts +17 -0
package/lib/typescript/src/utils/audioUtils.d.ts.map +1 -0
package/lib/typescript/src/utils/logger.d.ts +4 -0
package/lib/typescript/src/utils/logger.d.ts.map +1 -1
package/package.json +24 -8
package/src/components/AIAgent.tsx +222 -3
package/src/components/AgentChatBar.tsx +487 -42
package/src/core/AgentRuntime.ts +131 -2
package/src/core/systemPrompt.ts +62 -0
package/src/core/types.ts +30 -0
package/src/index.ts +16 -0
package/src/providers/GeminiProvider.ts +105 -89
package/src/services/AudioInputService.ts +141 -0
package/src/services/AudioOutputService.ts +167 -0
package/src/services/VoiceService.ts +407 -0
package/src/utils/audioUtils.ts +54 -0
package/src/utils/logger.ts +24 -7

package/src/providers/GeminiProvider.ts CHANGED Viewed

@@ -1,40 +1,34 @@
 /**
- * GeminiProvider — Gemini API integration with structured action pattern.
+ * GeminiProvider — Gemini API integration via @google/genai SDK.
  *
- * Uses a single forced function call (`agent_step`) that bundles
- * structured reasoning (evaluation, memory, plan) alongside the action.
- * This replaces free-form text + separate tool calls for stability.
+ * Uses the official Google GenAI SDK for:
+ * - generateContent with structured function calling (agent_step)
+ * - inlineData for vision (base64 screenshots)
+ * - System instructions
+ *
+ * Implements the AIProvider interface so it can be swapped
+ * with OpenAIProvider, AnthropicProvider, etc.
  */
+import { GoogleGenAI, FunctionCallingConfigMode, Type } from '@google/genai';
 import { logger } from '../utils/logger';
-import type { AIProvider, ToolDefinition, AgentStep, ProviderResult, AgentReasoning } from '../core/types';
+import type { AIProvider, ToolDefinition, AgentStep, ProviderResult, AgentReasoning, TokenUsage } from '../core/types';
 // ─── Constants ─────────────────────────────────────────────────
 const AGENT_STEP_FN = 'agent_step';
-// Reasoning fields that are always present in the agent_step schema
+// Reasoning fields always present in the agent_step schema
 const REASONING_FIELDS = ['previous_goal_eval', 'memory', 'plan'] as const;
-// ─── Gemini API Types ──────────────────────────────────────────
-interface GeminiContent {
-  role: 'user' | 'model';
-  parts: Array<{
-    text?: string;
-    functionCall?: { name: string; args: any };
-    functionResponse?: { name: string; response: any };
-  }>;
-}
 // ─── Provider ──────────────────────────────────────────────────
 export class GeminiProvider implements AIProvider {
-  private apiKey: string;
+  private ai: GoogleGenAI;
   private model: string;
   constructor(apiKey: string, model: string = 'gemini-2.5-flash') {
-    this.apiKey = apiKey;
+    this.ai = new GoogleGenAI({ apiKey });
     this.model = model;
   }
@@ -43,59 +37,56 @@ export class GeminiProvider implements AIProvider {
     userMessage: string,
     tools: ToolDefinition[],
     history: AgentStep[],
+    screenshot?: string,
   ): Promise<ProviderResult> {
-    logger.info('GeminiProvider', `Sending request. Model: ${this.model}, Tools: ${tools.length}`);
+    logger.info('GeminiProvider', `Sending request. Model: ${this.model}, Tools: ${tools.length}${screenshot ? ', with screenshot' : ''}`);
     // Build single agent_step function declaration
     const agentStepDeclaration = this.buildAgentStepDeclaration(tools);
-    // Build conversation history with proper function call/response pairs
-    const contents = this.buildContents(userMessage, history);
-    // Make API request
-    const url = `https://generativelanguage.googleapis.com/v1beta/models/${this.model}:generateContent?key=${this.apiKey}`;
-    const body: any = {
-      contents,
-      tools: [{ functionDeclarations: [agentStepDeclaration] }],
-      systemInstruction: { parts: [{ text: systemPrompt }] },
-      // Force the model to always call agent_step
-      tool_config: {
-        function_calling_config: {
-          mode: 'ANY',
-          allowed_function_names: [AGENT_STEP_FN],
-        },
-      },
-      generationConfig: {
-        temperature: 0.2,
-        maxOutputTokens: 2048,
-      },
-    };
+    // Build contents (user message + optional screenshot)
+    const contents = this.buildContents(userMessage, history, screenshot);
     const startTime = Date.now();
     try {
-      const response = await fetch(url, {
-        method: 'POST',
-        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify(body),
+      const response = await this.ai.models.generateContent({
+        model: this.model,
+        contents,
+        config: {
+          systemInstruction: systemPrompt,
+          tools: [{ functionDeclarations: [agentStepDeclaration] }],
+          toolConfig: {
+            functionCallingConfig: {
+              mode: FunctionCallingConfigMode.ANY,
+              allowedFunctionNames: [AGENT_STEP_FN],
+            },
+          },
+          temperature: 0.2,
+          maxOutputTokens: 2048,
+        },
       });
       const elapsed = Date.now() - startTime;
       logger.info('GeminiProvider', `Response received in ${elapsed}ms`);
-      if (!response.ok) {
-        const errorText = await response.text();
-        logger.error('GeminiProvider', `API error ${response.status}: ${errorText}`);
-        throw new Error(`Gemini API error ${response.status}: ${errorText}`);
+      // Extract token usage from SDK response
+      const tokenUsage = this.extractTokenUsage(response);
+      if (tokenUsage) {
+        logger.info('GeminiProvider', `Tokens: ${tokenUsage.promptTokens} in / ${tokenUsage.completionTokens} out / $${tokenUsage.estimatedCostUSD.toFixed(6)}`);
       }
-      const data = await response.json();
-      return this.parseAgentStepResponse(data, tools);
+      const result = this.parseAgentStepResponse(response, tools);
+      result.tokenUsage = tokenUsage;
+      return result;
     } catch (error: any) {
       logger.error('GeminiProvider', 'Request failed:', error.message);
+      // Preserve HTTP error format for backward compatibility with tests
+      if (error.status) {
+        throw new Error(`Gemini API error ${error.status}: ${error.message}`);
+      }
       throw error;
     }
   }
@@ -117,7 +108,6 @@ export class GeminiProvider implements AIProvider {
     const actionProperties: Record<string, any> = {};
     for (const tool of tools) {
       for (const [paramName, param] of Object.entries(tool.parameters)) {
-        // Skip if already added (shared field names like 'text', 'index')
         if (actionProperties[paramName]) continue;
         actionProperties[paramName] = {
           type: this.mapParamType(param.type),
@@ -139,28 +129,25 @@ export class GeminiProvider implements AIProvider {
       name: AGENT_STEP_FN,
       description: `Execute one agent step. Choose an action and provide reasoning.\n\nAvailable actions:\n${toolDescriptions}`,
       parameters: {
-        type: 'OBJECT',
+        type: Type.OBJECT,
         properties: {
-          // ── Reasoning fields ──
           previous_goal_eval: {
-            type: 'STRING',
+            type: Type.STRING,
             description: 'One-sentence assessment of your last action. State success, failure, or uncertain. Skip on first step.',
           },
           memory: {
-            type: 'STRING',
+            type: Type.STRING,
             description: 'Key facts to remember for future steps: progress made, items found, counters, field values already collected.',
           },
           plan: {
-            type: 'STRING',
+            type: Type.STRING,
             description: 'Your immediate next goal — what action you will take and why.',
           },
-          // ── Action selection ──
           action_name: {
-            type: 'STRING',
+            type: Type.STRING,
             description: 'Which action to execute.',
             enum: toolNames,
           },
-          // ── Action parameters (flat) ──
           ...actionProperties,
         },
         required: ['plan', 'action_name'],
@@ -170,43 +157,46 @@ export class GeminiProvider implements AIProvider {
   private mapParamType(type: string): string {
     switch (type) {
-      case 'number': return 'NUMBER';
-      case 'integer': return 'INTEGER';
-      case 'boolean': return 'BOOLEAN';
+      case 'number': return Type.NUMBER;
+      case 'integer': return Type.INTEGER;
+      case 'boolean': return Type.BOOLEAN;
       case 'string':
-      default: return 'STRING';
+      default: return Type.STRING;
     }
   }
   // ─── Build Contents ────────────────────────────────────────
   /**
-   * Builds Gemini conversation contents.
-   *
-   * Each step is a STATELESS single-turn request (matching page-agent's approach):
-   * - System prompt has general instructions
-   * - User message contains full context: task, history, screen state
-   * - Model responds with agent_step function call
-   *
-   * History is embedded as text in assembleUserPrompt (via <agent_history>),
-   * NOT as functionCall/functionResponse pairs. This avoids Gemini's
-   * conversation format requirements and thought_signature complexity.
+   * Builds contents for the generateContent call.
+   * Single-turn: user message + optional screenshot as inlineData.
    */
-  private buildContents(userMessage: string, _history: AgentStep[]): GeminiContent[] {
-    return [{
-      role: 'user',
-      parts: [{ text: userMessage }],
-    }];
+  private buildContents(userMessage: string, _history: AgentStep[], screenshot?: string): any[] {
+    const parts: any[] = [{ text: userMessage }];
+    // Append screenshot as inlineData for Gemini vision
+    if (screenshot) {
+      parts.push({
+        inlineData: {
+          mimeType: 'image/jpeg',
+          data: screenshot,
+        },
+      });
+    }
+    return [{ role: 'user', parts }];
   }
   // ─── Parse Response ────────────────────────────────────────
   /**
-   * Parses the Gemini response expecting a single agent_step function call.
-   * Extracts structured reasoning + action, and determines which tool to execute.
+   * Parses the SDK response expecting a single agent_step function call.
+   * Extracts structured reasoning + action.
    */
-  private parseAgentStepResponse(data: any, tools: ToolDefinition[]): ProviderResult {
-    if (!data.candidates || data.candidates.length === 0) {
+  private parseAgentStepResponse(response: any, tools: ToolDefinition[]): ProviderResult {
+    const candidates = response.candidates || [];
+    if (candidates.length === 0) {
       logger.warn('GeminiProvider', 'No candidates in response');
       return {
         toolCalls: [{ name: 'done', args: { text: 'No response generated.', success: false } }],
@@ -215,7 +205,7 @@ export class GeminiProvider implements AIProvider {
       };
     }
-    const candidate = data.candidates[0];
+    const candidate = candidates[0];
     const parts = candidate.content?.parts || [];
     // Find the function call part
@@ -251,11 +241,10 @@ export class GeminiProvider implements AIProvider {
       };
     }
-    // Build action args: everything except reasoning fields and action_name
+    // Build action args: extract only the params that belong to the matched tool
     const actionArgs: Record<string, any> = {};
     const reservedKeys = new Set([...REASONING_FIELDS, 'action_name']);
-    // Find the matching tool to know which params belong to it
     const matchedTool = tools.find(t => t.name === actionName);
     if (matchedTool) {
       for (const paramName of Object.keys(matchedTool.parameters)) {
@@ -264,7 +253,6 @@ export class GeminiProvider implements AIProvider {
         }
       }
     } else {
-      // Custom/registered tool — grab all non-reserved fields
       for (const [key, value] of Object.entries(args)) {
         if (!reservedKeys.has(key)) {
           actionArgs[key] = value;
@@ -280,4 +268,32 @@ export class GeminiProvider implements AIProvider {
       text: textPart?.text,
     };
   }
+  // ─── Token Usage Extraction ─────────────────────────────────
+  /**
+   * Extracts token usage from SDK response and calculates estimated cost.
+   *
+   * Pricing (Gemini 2.5 Flash):
+   * - Input:  $0.30 / 1M tokens
+   * - Output: $2.50 / 1M tokens
+   */
+  private extractTokenUsage(response: any): TokenUsage | undefined {
+    const meta = response?.usageMetadata;
+    if (!meta) return undefined;
+    const promptTokens = meta.promptTokenCount ?? 0;
+    const completionTokens = meta.candidatesTokenCount ?? 0;
+    const totalTokens = meta.totalTokenCount ?? (promptTokens + completionTokens);
+    // Cost estimation based on Gemini 2.5 Flash pricing
+    const INPUT_COST_PER_M = 0.30;
+    const OUTPUT_COST_PER_M = 2.50;
+    const estimatedCostUSD =
+      (promptTokens / 1_000_000) * INPUT_COST_PER_M +
+      (completionTokens / 1_000_000) * OUTPUT_COST_PER_M;
+    return { promptTokens, completionTokens, totalTokens, estimatedCostUSD };
+  }
 }

package/src/services/AudioInputService.ts ADDED Viewed

@@ -0,0 +1,141 @@
+/**
+ * AudioInputService — Real-time microphone capture for voice mode.
+ *
+ * Uses react-native-audio-api (Software Mansion) AudioRecorder for native
+ * PCM streaming from the microphone. Each chunk is converted from Float32
+ * to Int16 PCM and base64-encoded for the Gemini Live API.
+ *
+ * Requires: react-native-audio-api (development build only, not Expo Go)
+ */
+import { logger } from '../utils/logger';
+import { float32ToInt16Base64 } from '../utils/audioUtils';
+// ─── Types ─────────────────────────────────────────────────────
+export interface AudioInputConfig {
+  sampleRate?: number;
+  /** Number of samples per callback buffer (default: 4096) */
+  bufferLength?: number;
+  /** Callback with base64 PCM audio chunk */
+  onAudioChunk: (base64Audio: string) => void;
+  onError?: (error: string) => void;
+  onPermissionDenied?: () => void;
+}
+type RecordingStatus = 'idle' | 'recording' | 'paused';
+// ─── Service ───────────────────────────────────────────────────
+export class AudioInputService {
+  private config: AudioInputConfig;
+  private status: RecordingStatus = 'idle';
+  private recorder: any = null;
+  constructor(config: AudioInputConfig) {
+    this.config = config;
+  }
+  // ─── Lifecycle ─────────────────────────────────────────────
+  async start(): Promise<boolean> {
+    try {
+      // Lazy-load react-native-audio-api (optional peer dependency)
+      let audioApi: any;
+      try {
+        audioApi = require('react-native-audio-api');
+      } catch {
+        const msg =
+          'Voice mode requires react-native-audio-api. Install with: npm install react-native-audio-api';
+        logger.error('AudioInput', msg);
+        this.config.onError?.(msg);
+        return false;
+      }
+      // Request mic permission (Android)
+      try {
+        const { PermissionsAndroid, Platform } = require('react-native');
+        if (Platform.OS === 'android') {
+          const result = await PermissionsAndroid.request(
+            PermissionsAndroid.PERMISSIONS.RECORD_AUDIO
+          );
+          if (result !== PermissionsAndroid.RESULTS.GRANTED) {
+            logger.warn('AudioInput', 'Microphone permission denied');
+            this.config.onPermissionDenied?.();
+            return false;
+          }
+        }
+      } catch {
+        // Permission check failed — continue and let native layer handle it
+      }
+      // Create AudioRecorder
+      this.recorder = new audioApi.AudioRecorder();
+      const sampleRate = this.config.sampleRate || 16000;
+      const bufferLength = this.config.bufferLength || 4096;
+      // Register audio data callback
+      let frameCount = 0;
+      this.recorder.onAudioReady(
+        { sampleRate, bufferLength, channelCount: 1 },
+        (event: any) => {
+          frameCount++;
+          try {
+            // event.buffer is an AudioBuffer — get Float32 channel data
+            const float32Data = event.buffer.getChannelData(0);
+            // Convert Float32 → Int16 → base64 for Gemini
+            const base64Chunk = float32ToInt16Base64(float32Data);
+            logger.debug('AudioInput', `🎤 Frame #${frameCount}: size=${base64Chunk.length}`);
+            this.config.onAudioChunk(base64Chunk);
+          } catch (err: any) {
+            logger.error('AudioInput', `Frame processing error: ${err.message}`);
+          }
+        }
+      );
+      // Register error callback
+      this.recorder.onError((error: any) => {
+        logger.error('AudioInput', `Recorder error: ${error.message || error}`);
+        this.config.onError?.(error.message || String(error));
+      });
+      // Start recording
+      this.recorder.start();
+      this.status = 'recording';
+      logger.info('AudioInput', `Streaming started (${sampleRate}Hz, bufLen=${bufferLength})`);
+      return true;
+    } catch (error: any) {
+      logger.error('AudioInput', `Failed to start: ${error.message}`);
+      this.config.onError?.(error.message);
+      return false;
+    }
+  }
+  async stop(): Promise<void> {
+    try {
+      if (this.recorder && this.status !== 'idle') {
+        this.recorder.clearOnAudioReady();
+        this.recorder.clearOnError();
+        this.recorder.stop();
+      }
+      this.recorder = null;
+      this.status = 'idle';
+      logger.info('AudioInput', 'Streaming stopped');
+    } catch (error: any) {
+      logger.error('AudioInput', `Failed to stop: ${error.message}`);
+      this.recorder = null;
+      this.status = 'idle';
+    }
+  }
+  // ─── Status ───────────────────────────────────────────────
+  get isRecording(): boolean {
+    return this.status === 'recording';
+  }
+  get currentStatus(): RecordingStatus {
+    return this.status;
+  }
+}

package/src/services/AudioOutputService.ts ADDED Viewed

@@ -0,0 +1,167 @@
+/**
+ * AudioOutputService — AI speech playback for voice mode.
+ *
+ * Uses react-native-audio-api (Software Mansion) for gapless, low-latency
+ * PCM playback. Decodes base64 PCM from Gemini Live API and queues it via
+ * AudioBufferQueueSourceNode for seamless streaming.
+ *
+ * Requires: react-native-audio-api (development build only, not Expo Go)
+ */
+import { logger } from '../utils/logger';
+import { base64ToFloat32 } from '../utils/audioUtils';
+// ─── Types ─────────────────────────────────────────────────────
+/** Gemini Live API outputs 24kHz 16-bit mono PCM */
+const GEMINI_OUTPUT_SAMPLE_RATE = 24000;
+export interface AudioOutputConfig {
+  sampleRate?: number;
+  onPlaybackStart?: () => void;
+  onPlaybackEnd?: () => void;
+  onError?: (error: string) => void;
+}
+// ─── Service ───────────────────────────────────────────────────
+export class AudioOutputService {
+  private config: AudioOutputConfig;
+  private audioContext: any = null;
+  private queueSourceNode: any = null;
+  private gainNode: any = null;
+  private muted = false;
+  private isStarted = false;
+  private chunkCount = 0;
+  constructor(config: AudioOutputConfig = {}) {
+    this.config = config;
+  }
+  // ─── Lifecycle ─────────────────────────────────────────────
+  async initialize(): Promise<boolean> {
+    try {
+      let audioApi: any;
+      try {
+        audioApi = require('react-native-audio-api');
+      } catch {
+        const msg =
+          'react-native-audio-api is required for audio output. Install with: npm install react-native-audio-api';
+        logger.error('AudioOutput', msg);
+        this.config.onError?.(msg);
+        return false;
+      }
+      const sampleRate = this.config.sampleRate || GEMINI_OUTPUT_SAMPLE_RATE;
+      // Create AudioContext at Gemini's output sample rate
+      this.audioContext = new audioApi.AudioContext({ sampleRate });
+      // Create GainNode for mute control
+      this.gainNode = this.audioContext.createGain();
+      this.gainNode.gain.value = 1.0;
+      this.gainNode.connect(this.audioContext.destination);
+      // Create AudioBufferQueueSourceNode for gapless streaming
+      this.queueSourceNode = this.audioContext.createBufferQueueSource();
+      this.queueSourceNode.connect(this.gainNode);
+      logger.info('AudioOutput', `Initialized (${sampleRate}Hz, AudioBufferQueueSourceNode)`);
+      return true;
+    } catch (error: any) {
+      logger.error('AudioOutput', `Failed to initialize: ${error.message}`);
+      this.config.onError?.(error.message);
+      return false;
+    }
+  }
+  // ─── Enqueue Audio ─────────────────────────────────────────
+  /** Add a base64-encoded PCM chunk from Gemini to the playback queue */
+  enqueue(base64Audio: string): void {
+    if (this.muted || !this.audioContext || !this.queueSourceNode) return;
+    try {
+      this.chunkCount++;
+      // Decode base64 Int16 PCM → Float32
+      const float32Data = base64ToFloat32(base64Audio);
+      const sampleRate = this.config.sampleRate || GEMINI_OUTPUT_SAMPLE_RATE;
+      // Create an AudioBuffer and fill it with PCM data
+      const audioBuffer = this.audioContext.createBuffer(1, float32Data.length, sampleRate);
+      audioBuffer.copyToChannel(float32Data, 0);
+      // Enqueue the buffer for gapless playback
+      this.queueSourceNode.enqueueBuffer(audioBuffer);
+      // Start playback on first enqueue
+      if (!this.isStarted) {
+        this.queueSourceNode.start();
+        this.isStarted = true;
+        this.config.onPlaybackStart?.();
+        logger.info('AudioOutput', '▶️ Playback started');
+      }
+      if (this.chunkCount % 20 === 0) {
+        logger.debug('AudioOutput', `Queued chunk #${this.chunkCount}`);
+      }
+    } catch (error: any) {
+      logger.error('AudioOutput', `Enqueue error: ${error.message}`);
+    }
+  }
+  // ─── Mute/Unmute ──────────────────────────────────────────
+  mute(): void {
+    this.muted = true;
+    if (this.gainNode) {
+      this.gainNode.gain.value = 0;
+    }
+    logger.info('AudioOutput', 'Speaker muted');
+  }
+  unmute(): void {
+    this.muted = false;
+    if (this.gainNode) {
+      this.gainNode.gain.value = 1.0;
+    }
+    logger.info('AudioOutput', 'Speaker unmuted');
+  }
+  get isMuted(): boolean {
+    return this.muted;
+  }
+  // ─── Stop & Cleanup ───────────────────────────────────────
+  async stop(): Promise<void> {
+    try {
+      if (this.queueSourceNode && this.isStarted) {
+        this.queueSourceNode.stop();
+        this.queueSourceNode.clearBuffers();
+      }
+      this.isStarted = false;
+      this.chunkCount = 0;
+      this.config.onPlaybackEnd?.();
+      logger.info('AudioOutput', 'Playback stopped');
+    } catch (error: any) {
+      logger.error('AudioOutput', `Stop error: ${error.message}`);
+    }
+  }
+  async cleanup(): Promise<void> {
+    await this.stop();
+    try {
+      if (this.audioContext) {
+        await this.audioContext.close();
+      }
+    } catch {
+      // Non-critical
+    }
+    this.audioContext = null;
+    this.queueSourceNode = null;
+    this.gainNode = null;
+  }
+}