npm - @layercode/js-sdk - Versions diffs - 2.8.2 → 2.8.4 - Mend

@layercode/js-sdk 2.8.2 → 2.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +15 -5
package/dist/layercode-js-sdk.esm.js +345 -242
package/dist/layercode-js-sdk.esm.js.map +1 -1
package/dist/layercode-js-sdk.min.js +345 -242
package/dist/layercode-js-sdk.min.js.map +1 -1
package/dist/types/index.d.ts +21 -3
package/dist/types/interfaces.d.ts +6 -2
package/dist/types/wavtools/lib/analysis/audio_analysis.d.ts +1 -1
package/package.json +1 -1

package/dist/layercode-js-sdk.esm.js CHANGED Viewed

@@ -5082,11 +5082,7 @@ class WavRecorder {
    * @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
    * @returns {WavRecorder}
    */
-  constructor({
-    sampleRate = 24000,
-    outputToSpeakers = false,
-    debug = false,
-  } = {}) {
+  constructor({ sampleRate = 24000, outputToSpeakers = false, debug = false } = {}) {
     // Script source
     this.scriptSrc = AudioProcessorSrc;
     // Config
@@ -5104,6 +5100,11 @@ class WavRecorder {
     this.analyser = null;
     this.recording = false;
     this.contextSampleRate = sampleRate;
+    // Track whether we've already obtained microphone permission
+    // This avoids redundant getUserMedia calls which are expensive on iOS Safari
+    this._hasPermission = false;
+    // Promise used to dedupe concurrent requestPermission() calls
+    this._permissionPromise = null;
     // Event handling with AudioWorklet
     this._lastEventId = 0;
     this.eventReceipts = {};
@@ -5131,17 +5132,13 @@ class WavRecorder {
     let blob;
     if (audioData instanceof Blob) {
       if (fromSampleRate !== -1) {
-        throw new Error(
-          `Can not specify "fromSampleRate" when reading from Blob`,
-        );
+        throw new Error(`Can not specify "fromSampleRate" when reading from Blob`);
       }
       blob = audioData;
       arrayBuffer = await blob.arrayBuffer();
     } else if (audioData instanceof ArrayBuffer) {
       if (fromSampleRate !== -1) {
-        throw new Error(
-          `Can not specify "fromSampleRate" when reading from ArrayBuffer`,
-        );
+        throw new Error(`Can not specify "fromSampleRate" when reading from ArrayBuffer`);
       }
       arrayBuffer = audioData;
       blob = new Blob([arrayBuffer], { type: 'audio/wav' });
@@ -5159,14 +5156,10 @@ class WavRecorder {
       } else if (audioData instanceof Array) {
         float32Array = new Float32Array(audioData);
       } else {
-        throw new Error(
-          `"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array<number>`,
-        );
+        throw new Error(`"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array<number>`);
       }
       if (fromSampleRate === -1) {
-        throw new Error(
-          `Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`,
-        );
+        throw new Error(`Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`);
       } else if (fromSampleRate < 3000) {
         throw new Error(`Minimum "fromSampleRate" is 3000 (3kHz)`);
       }
@@ -5196,12 +5189,13 @@ class WavRecorder {
   /**
    * Logs data in debug mode
-   * @param {...any} arguments
+   * @param {...any} args
    * @returns {true}
    */
-  log() {
+  log(...args) {
     if (this.debug) {
-      this.log(...arguments);
+      // eslint-disable-next-line no-console
+      console.log(...args);
     }
     return true;
   }
@@ -5274,10 +5268,7 @@ class WavRecorder {
    */
   listenForDeviceChange(callback) {
     if (callback === null && this._deviceChangeCallback) {
-      navigator.mediaDevices.removeEventListener(
-        'devicechange',
-        this._deviceChangeCallback,
-      );
+      navigator.mediaDevices.removeEventListener('devicechange', this._deviceChangeCallback);
       this._deviceChangeCallback = null;
     } else if (callback !== null) {
       // Basically a debounce; we only want this called once when devices change
@@ -5309,19 +5300,39 @@ class WavRecorder {
   /**
    * Manually request permission to use the microphone
+   * Skips if permission has already been granted to avoid expensive redundant getUserMedia calls.
+   * Dedupes concurrent calls to prevent multiple getUserMedia requests.
    * @returns {Promise<true>}
    */
   async requestPermission() {
-    try {
-      console.log('ensureUserMediaAccess');
-      await navigator.mediaDevices.getUserMedia({
-        audio: true,
-      });
-    } catch (fallbackError) {
-      window.alert('You must grant microphone access to use this feature.');
-      throw fallbackError;
+    // Skip if we already have permission - each getUserMedia is expensive on iOS Safari
+    if (this._hasPermission) {
+      return true;
     }
-    return true;
+    // Dedupe concurrent calls: if a permission request is already in flight, wait for it
+    if (this._permissionPromise) {
+      return this._permissionPromise;
+    }
+    console.log('ensureUserMediaAccess');
+    this._permissionPromise = (async () => {
+      try {
+        const stream = await navigator.mediaDevices.getUserMedia({
+          audio: true,
+        });
+        // Stop the tracks immediately after getting permission
+        stream.getTracks().forEach((track) => track.stop());
+        this._hasPermission = true;
+        return true;
+      } catch (fallbackError) {
+        console.error('getUserMedia failed:', fallbackError.name, fallbackError.message);
+        throw fallbackError;
+      } finally {
+        this._permissionPromise = null;
+      }
+    })();
+    return this._permissionPromise;
   }
   /**
@@ -5329,25 +5340,18 @@ class WavRecorder {
    * @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
    */
   async listDevices() {
-    if (
-      !navigator.mediaDevices ||
-      !('enumerateDevices' in navigator.mediaDevices)
-    ) {
+    if (!navigator.mediaDevices || !('enumerateDevices' in navigator.mediaDevices)) {
       throw new Error('Could not request user devices');
     }
     await this.requestPermission();
     const devices = await navigator.mediaDevices.enumerateDevices();
     const audioDevices = devices.filter((device) => device.kind === 'audioinput');
-    const defaultDeviceIndex = audioDevices.findIndex(
-      (device) => device.deviceId === 'default',
-    );
+    const defaultDeviceIndex = audioDevices.findIndex((device) => device.deviceId === 'default');
     const deviceList = [];
     if (defaultDeviceIndex !== -1) {
       let defaultDevice = audioDevices.splice(defaultDeviceIndex, 1)[0];
-      let existingIndex = audioDevices.findIndex(
-        (device) => device.groupId === defaultDevice.groupId,
-      );
+      let existingIndex = audioDevices.findIndex((device) => device.groupId === defaultDevice.groupId);
       if (existingIndex !== -1) {
         defaultDevice = audioDevices.splice(existingIndex, 1)[0];
       }
@@ -5369,15 +5373,10 @@ class WavRecorder {
    */
   async begin(deviceId) {
     if (this.processor) {
-      throw new Error(
-        `Already connected: please call .end() to start a new session`,
-      );
+      throw new Error(`Already connected: please call .end() to start a new session`);
     }
-    if (
-      !navigator.mediaDevices ||
-      !('getUserMedia' in navigator.mediaDevices)
-    ) {
+    if (!navigator.mediaDevices || !('getUserMedia' in navigator.mediaDevices)) {
       throw new Error('Could not request user media');
     }
     try {
@@ -5388,14 +5387,16 @@ class WavRecorder {
           echoCancellation: true,
           autoGainControl: true,
           noiseSuppression: true,
-        }
+        },
       };
       if (deviceId) {
         config.audio.deviceId = { exact: deviceId };
       }
       this.stream = await navigator.mediaDevices.getUserMedia(config);
+      // Mark permission as granted so listDevices() won't call requestPermission() again
+      this._hasPermission = true;
     } catch (err) {
-      throw new Error('Could not start media stream');
+      throw err;
     }
     const createContext = (rate) => {
@@ -5447,10 +5448,7 @@ class WavRecorder {
             raw: WavPacker.mergeBuffers(buffer.raw, data.raw),
             mono: WavPacker.mergeBuffers(buffer.mono, data.mono),
           };
-          if (
-            this._chunkProcessorBuffer.mono.byteLength >=
-            this._chunkProcessorSize
-          ) {
+          if (this._chunkProcessorBuffer.mono.byteLength >= this._chunkProcessorSize) {
             this._chunkProcessor(this._chunkProcessorBuffer);
             this._chunkProcessorBuffer = {
               raw: new ArrayBuffer(0),
@@ -5478,11 +5476,7 @@ class WavRecorder {
     node.connect(analyser);
     if (this.outputToSpeakers) {
       // eslint-disable-next-line no-console
-      console.warn(
-        'Warning: Output to speakers may affect sound quality,\n' +
-          'especially due to system audio feedback preventative measures.\n' +
-          'use only for debugging',
-      );
+      console.warn('Warning: Output to speakers may affect sound quality,\n' + 'especially due to system audio feedback preventative measures.\n' + 'use only for debugging');
       analyser.connect(context.destination);
     }
@@ -5509,26 +5503,14 @@ class WavRecorder {
    * @param {number} [maxDecibels] default -30
    * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
    */
-  getFrequencies(
-    analysisType = 'frequency',
-    minDecibels = -100,
-    maxDecibels = -30,
-  ) {
+  getFrequencies(analysisType = 'frequency', minDecibels = -100, maxDecibels = -30) {
     if (!this.processor) {
       throw new Error('Session ended: please call .begin() first');
     }
-    return AudioAnalysis.getFrequencies(
-      this.analyser,
-      this.sampleRate,
-      null,
-      analysisType,
-      minDecibels,
-      maxDecibels,
-    );
+    return AudioAnalysis.getFrequencies(this.analyser, this.sampleRate, null, analysisType, minDecibels, maxDecibels);
   }
-    /**
+  /**
    * Gets the real-time amplitude of the audio signal
    * @returns {number} Amplitude value between 0 and 1
    */
@@ -5653,9 +5635,7 @@ class WavRecorder {
       throw new Error('Session ended: please call .begin() first');
     }
     if (!force && this.recording) {
-      throw new Error(
-        'Currently recording: please call .pause() first, or call .save(true) to force',
-      );
+      throw new Error('Currently recording: please call .pause() first, or call .save(true) to force');
     }
     this.log('Exporting ...');
     const exportData = await this._event('export');
@@ -5762,6 +5742,7 @@ function arrayBufferToBase64(arrayBuffer) {
   return btoa(binary);
 }
+//// src/index.ts
 /* eslint-env browser */
 // import { env as ortEnv } from 'onnxruntime-web';
 // @ts-ignore - VAD package does not provide TypeScript types
@@ -5769,137 +5750,40 @@ const NOOP = () => { };
 const DEFAULT_WS_URL = 'wss://api.layercode.com/v1/agents/web/websocket';
 // SDK version - updated when publishing
 const SDK_VERSION = '2.7.0';
-const MEDIA_DEVICE_CHANGE_EVENT = 'devicechange';
-const MEDIA_DEVICE_KIND_AUDIO = 'audioinput';
+const DEFAULT_RECORDER_SAMPLE_RATE = 8000;
 const hasMediaDevicesSupport = () => typeof navigator !== 'undefined' && !!navigator.mediaDevices;
-let microphonePermissionPromise = null;
-let microphonePermissionGranted = false;
-const stopStreamTracks = (stream) => {
-    if (!stream) {
-        return;
-    }
-    stream.getTracks().forEach((track) => {
-        try {
-            track.stop();
-        }
-        catch (_a) {
-            /* noop */
-        }
-    });
-};
-const ensureMicrophonePermissions = async () => {
-    if (!hasMediaDevicesSupport()) {
-        throw new Error('Media devices are not available in this environment');
-    }
-    if (microphonePermissionGranted) {
-        return;
-    }
-    if (!microphonePermissionPromise) {
-        microphonePermissionPromise = navigator.mediaDevices
-            .getUserMedia({ audio: true })
-            .then((stream) => {
-            microphonePermissionGranted = true;
-            stopStreamTracks(stream);
-        })
-            .finally(() => {
-            microphonePermissionPromise = null;
-        });
-    }
-    return microphonePermissionPromise;
-};
-const cloneAudioDevice = (device, isDefault) => {
+const toLayercodeAudioInputDevice = (device) => {
     const cloned = {
-        deviceId: device.deviceId,
-        groupId: device.groupId,
-        kind: device.kind,
+        ...device,
         label: device.label,
-        default: isDefault,
+        default: Boolean(device.default),
     };
     if (typeof device.toJSON === 'function') {
         cloned.toJSON = device.toJSON.bind(device);
     }
     return cloned;
 };
-const normalizeAudioInputDevices = (devices) => {
-    const audioDevices = devices.filter((device) => device.kind === MEDIA_DEVICE_KIND_AUDIO);
-    if (!audioDevices.length) {
-        return [];
-    }
-    const remaining = [...audioDevices];
-    const normalized = [];
-    const defaultIndex = remaining.findIndex((device) => device.deviceId === 'default');
-    if (defaultIndex !== -1) {
-        let defaultDevice = remaining.splice(defaultIndex, 1)[0];
-        const groupMatchIndex = remaining.findIndex((device) => device.groupId && defaultDevice.groupId && device.groupId === defaultDevice.groupId);
-        if (groupMatchIndex !== -1) {
-            defaultDevice = remaining.splice(groupMatchIndex, 1)[0];
-        }
-        normalized.push(cloneAudioDevice(defaultDevice, true));
-    }
-    else if (remaining.length) {
-        const fallbackDefault = remaining.shift();
-        normalized.push(cloneAudioDevice(fallbackDefault, true));
-    }
-    return normalized.concat(remaining.map((device) => cloneAudioDevice(device, false)));
-};
 const listAudioInputDevices = async () => {
     if (!hasMediaDevicesSupport()) {
         throw new Error('Media devices are not available in this environment');
     }
-    await ensureMicrophonePermissions();
-    const devices = await navigator.mediaDevices.enumerateDevices();
-    return normalizeAudioInputDevices(devices);
+    const recorder = new WavRecorder({ sampleRate: DEFAULT_RECORDER_SAMPLE_RATE });
+    const devices = (await recorder.listDevices());
+    return devices.map(toLayercodeAudioInputDevice);
 };
 const watchAudioInputDevices = (callback) => {
     if (!hasMediaDevicesSupport()) {
         return () => { };
     }
-    let disposed = false;
-    let lastSignature = null;
-    let requestId = 0;
-    const emitDevices = async () => {
-        requestId += 1;
-        const currentRequest = requestId;
-        try {
-            const devices = await listAudioInputDevices();
-            if (disposed || currentRequest !== requestId) {
-                return;
-            }
-            const signature = devices.map((device) => `${device.deviceId}:${device.label}:${device.groupId}:${device.default ? '1' : '0'}`).join('|');
-            if (signature !== lastSignature) {
-                lastSignature = signature;
-                callback(devices);
-            }
-        }
-        catch (error) {
-            if (!disposed) {
-                console.warn('Failed to refresh audio devices', error);
-            }
-        }
+    const recorder = new WavRecorder({ sampleRate: DEFAULT_RECORDER_SAMPLE_RATE });
+    const handleDevicesChange = (devices) => {
+        callback(devices.map(toLayercodeAudioInputDevice));
     };
-    const handler = () => {
-        void emitDevices();
-    };
-    const mediaDevices = navigator.mediaDevices;
-    let teardown = null;
-    if (typeof mediaDevices.addEventListener === 'function') {
-        mediaDevices.addEventListener(MEDIA_DEVICE_CHANGE_EVENT, handler);
-        teardown = () => mediaDevices.removeEventListener(MEDIA_DEVICE_CHANGE_EVENT, handler);
-    }
-    else if ('ondevicechange' in mediaDevices) {
-        const previousHandler = mediaDevices.ondevicechange;
-        mediaDevices.ondevicechange = handler;
-        teardown = () => {
-            if (mediaDevices.ondevicechange === handler) {
-                mediaDevices.ondevicechange = previousHandler || null;
-            }
-        };
-    }
-    // Always emit once on subscribe
-    void emitDevices();
+    // WavRecorder handles initial emit + deduping devicechange events
+    recorder.listenForDeviceChange(handleDevicesChange);
     return () => {
-        disposed = true;
-        teardown === null || teardown === void 0 ? void 0 : teardown();
+        recorder.listenForDeviceChange(null);
+        recorder.quit().catch(() => { });
     };
 };
 /**
@@ -5946,7 +5830,7 @@ class LayercodeClient {
         this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
         this._websocketUrl = DEFAULT_WS_URL;
         this.audioOutputReady = null;
-        this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
+        this.wavRecorder = new WavRecorder({ sampleRate: DEFAULT_RECORDER_SAMPLE_RATE }); // TODO should be set by fetched agent config
         this.wavPlayer = new WavStreamPlayer({
             finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
             sampleRate: 16000, // TODO should be set my fetched agent config
@@ -5966,6 +5850,7 @@ class LayercodeClient {
         this.recorderStarted = false;
         this.readySent = false;
         this.currentTurnId = null;
+        this.sentReplayFinishedForDisabledOutput = false;
         this.audioBuffer = [];
         this.vadConfig = null;
         this.activeDeviceId = null;
@@ -5977,6 +5862,7 @@ class LayercodeClient {
         this.stopRecorderAmplitude = undefined;
         this.deviceChangeListener = null;
         this.recorderRestartChain = Promise.resolve();
+        this._skipFirstDeviceCallback = false;
         this.deviceListenerReady = null;
         this.resolveDeviceListenerReady = null;
         // this.audioPauseTime = null;
@@ -5996,7 +5882,7 @@ class LayercodeClient {
     set onDevicesChanged(callback) {
         this.options.onDevicesChanged = callback !== null && callback !== void 0 ? callback : NOOP;
     }
-    _initializeVAD() {
+    async _initializeVAD() {
         var _a;
         console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt, vadConfig: this.vadConfig });
         // If we're in push to talk mode or mute mode, we don't need to use the VAD model
@@ -6080,13 +5966,13 @@ class LayercodeClient {
             vadOptions.frameSamples = 512; // Required for v5
         }
         console.log('Creating VAD with options:', vadOptions);
-        dist.MicVAD.new(vadOptions)
-            .then((vad) => {
+        try {
+            const vad = await dist.MicVAD.new(vadOptions);
             this.vad = vad;
             this.vad.start();
             console.log('VAD started successfully');
-        })
-            .catch((error) => {
+        }
+        catch (error) {
             console.warn('Error initializing VAD:', error);
             // Send a message to server indicating VAD failure
             const vadFailureMessage = {
@@ -6098,7 +5984,7 @@ class LayercodeClient {
                 ...vadFailureMessage,
                 userSpeaking: this.userIsSpeaking,
             });
-        });
+        }
     }
     /**
      * Updates the connection status and triggers the callback
@@ -6125,11 +6011,14 @@ class LayercodeClient {
         this.options.onAgentSpeakingChange(shouldReportSpeaking);
     }
     _setUserSpeaking(isSpeaking) {
-        const shouldReportSpeaking = this._shouldCaptureUserAudio() && isSpeaking;
+        const shouldCapture = this._shouldCaptureUserAudio();
+        const shouldReportSpeaking = shouldCapture && isSpeaking;
+        console.log('_setUserSpeaking called:', isSpeaking, 'shouldCapture:', shouldCapture, 'shouldReportSpeaking:', shouldReportSpeaking, 'current userIsSpeaking:', this.userIsSpeaking);
         if (this.userIsSpeaking === shouldReportSpeaking) {
             return;
         }
         this.userIsSpeaking = shouldReportSpeaking;
+        console.log('_setUserSpeaking: updated userIsSpeaking to:', this.userIsSpeaking);
         this.options.onUserIsSpeakingChange(shouldReportSpeaking);
     }
     /**
@@ -6179,6 +6068,7 @@ class LayercodeClient {
      * @param {MessageEvent} event - The WebSocket message event
      */
     async _handleWebSocketMessage(event) {
+        var _a, _b;
         try {
             const message = JSON.parse(event.data);
             if (message.type !== 'response.audio') {
@@ -6191,6 +6081,20 @@ class LayercodeClient {
                         // Start tracking new agent turn
                         console.debug('Agent turn started, will track new turn ID from audio/text');
                         this._setUserSpeaking(false);
+                        // Reset the flag for the new assistant turn
+                        this.sentReplayFinishedForDisabledOutput = false;
+                        // When assistant's turn starts but we're not playing audio,
+                        // we need to tell the server we're "done" with playback so it can
+                        // transition the turn back to user. Use a small delay to let any
+                        // response.audio/response.end messages arrive first.
+                        if (!this.audioOutput) {
+                            setTimeout(() => {
+                                if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
+                                    this.sentReplayFinishedForDisabledOutput = true;
+                                    this._clientResponseAudioReplayFinished();
+                                }
+                            }, 1000);
+                        }
                     }
                     else if (message.role === 'user' && !this.pushToTalkEnabled) {
                         // Interrupt any playing agent audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
@@ -6210,7 +6114,25 @@ class LayercodeClient {
                     });
                     break;
                 }
+                case 'response.end': {
+                    // When audioOutput is disabled, notify server that "playback" is complete
+                    if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
+                        this.sentReplayFinishedForDisabledOutput = true;
+                        this._clientResponseAudioReplayFinished();
+                    }
+                    (_b = (_a = this.options).onMessage) === null || _b === void 0 ? void 0 : _b.call(_a, message);
+                    break;
+                }
                 case 'response.audio': {
+                    // Skip audio playback if audioOutput is disabled
+                    if (!this.audioOutput) {
+                        // Send replay_finished so server knows we're "done" with playback (only once per turn)
+                        if (!this.sentReplayFinishedForDisabledOutput) {
+                            this.sentReplayFinishedForDisabledOutput = true;
+                            this._clientResponseAudioReplayFinished();
+                        }
+                        break;
+                    }
                     await this._waitForAudioOutputReady();
                     const audioBuffer = base64ToArrayBuffer(message.content);
                     const hasAudioSamples = audioBuffer.byteLength > 0;
@@ -6345,6 +6267,9 @@ class LayercodeClient {
     }
     _sendReadyIfNeeded() {
         var _a;
+        // Send client.ready when either:
+        // 1. Recorder is started (audio mode active)
+        // 2. audioInput is false (text-only mode, but server should still be ready)
         const audioReady = this.recorderStarted || !this.audioInput;
         if (audioReady && ((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN && !this.readySent) {
             this._wsSend({ type: 'client.ready' });
@@ -6410,14 +6335,99 @@ class LayercodeClient {
     }
     async audioInputConnect() {
         // Turn mic ON
-        await this.wavRecorder.requestPermission();
+        // NOTE: On iOS Safari, each getUserMedia call is expensive (~2-3 seconds).
+        // We optimize by:
+        // 1. Starting the recorder FIRST with begin() (single getUserMedia)
+        // 2. THEN setting up device change listeners (which will skip getUserMedia since permission is cached)
+        console.log('audioInputConnect: recorderStarted =', this.recorderStarted);
+        // If the recorder hasn't spun up yet, start it first with the preferred or default device
+        // This ensures we only make ONE getUserMedia call instead of multiple sequential ones
+        if (!this.recorderStarted) {
+            // Use preferred device if set, otherwise use system default
+            const targetDeviceId = this.useSystemDefaultDevice ? undefined : this.deviceId || undefined;
+            // Mark as using system default if no specific device is set
+            if (!targetDeviceId) {
+                this.useSystemDefaultDevice = true;
+            }
+            console.log('audioInputConnect: starting recorder with device:', targetDeviceId !== null && targetDeviceId !== void 0 ? targetDeviceId : 'system default');
+            await this._startRecorderWithDevice(targetDeviceId);
+        }
+        // Now set up device change listeners - permission is already granted so listDevices() won't call getUserMedia
+        // Skip the first callback since we've already started with the correct device
+        this._skipFirstDeviceCallback = true;
+        console.log('audioInputConnect: setting up device change listener');
         await this._setupDeviceChangeListener();
-        // If the recorder hasn't spun up yet, proactively select a device.
-        if (!this.recorderStarted && this.deviceChangeListener) {
-            await this._initializeRecorderWithDefaultDevice();
+        console.log('audioInputConnect: done, recorderStarted =', this.recorderStarted);
+    }
+    /**
+     * Starts the recorder with a specific device (or default if undefined)
+     * This is the single point where getUserMedia is called during initial setup.
+     * Idempotent: returns early if recorder is already started or has a live stream.
+     */
+    async _startRecorderWithDevice(deviceId) {
+        var _a, _b;
+        // Idempotency guard: don't start again if already running
+        if (this.recorderStarted || this._hasLiveRecorderStream()) {
+            console.debug('_startRecorderWithDevice: already started, skipping');
+            return;
+        }
+        try {
+            this._stopRecorderAmplitudeMonitoring();
+            try {
+                await this.wavRecorder.end();
+            }
+            catch (_c) {
+                // Ignore cleanup errors
+            }
+            await this.wavRecorder.begin(deviceId);
+            await this.wavRecorder.record(this._handleDataAvailable, 1638);
+            // Re-setup amplitude monitoring with the new stream
+            this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
+            if (!this.options.enableAmplitudeMonitoring) {
+                this.userAudioAmplitude = 0;
+            }
+            const stream = this.wavRecorder.getStream();
+            const activeTrack = (stream === null || stream === void 0 ? void 0 : stream.getAudioTracks()[0]) || null;
+            const trackSettings = activeTrack && typeof activeTrack.getSettings === 'function' ? activeTrack.getSettings() : null;
+            const trackDeviceId = trackSettings && typeof trackSettings.deviceId === 'string' ? trackSettings.deviceId : null;
+            this.activeDeviceId = trackDeviceId !== null && trackDeviceId !== void 0 ? trackDeviceId : (this.useSystemDefaultDevice ? null : this.deviceId);
+            if (!this.recorderStarted) {
+                this.recorderStarted = true;
+                this._sendReadyIfNeeded();
+            }
+            const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : ((_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default'));
+            if (reportedDeviceId !== this.lastReportedDeviceId) {
+                this.lastReportedDeviceId = reportedDeviceId;
+                if (this.options.onDeviceSwitched) {
+                    this.options.onDeviceSwitched(reportedDeviceId);
+                }
+            }
+            console.debug('Recorder started successfully with device:', reportedDeviceId);
+        }
+        catch (error) {
+            const permissionDeniedError = await this._microphonePermissionDeniedError(error);
+            if (permissionDeniedError) {
+                console.error(permissionDeniedError.message);
+                this.options.onError(permissionDeniedError);
+                throw permissionDeniedError;
+            }
+            if (await this._shouldWarnAudioDevicesRequireUserGesture(error)) {
+                console.error('Cannot load audio devices before user has interacted with the page. Please move connect() to be triggered by a button, or load the SDK with "audioInput: false" to connection() on page load');
+            }
+            console.error('Error starting recorder:', error);
+            this.options.onError(error instanceof Error ? error : new Error(String(error)));
+            throw error;
         }
     }
     async audioInputDisconnect() {
+        // If we never started the recorder, avoid touching audio APIs at all.
+        if (!this.recorderStarted && !this._hasLiveRecorderStream()) {
+            this._stopRecorderAmplitudeMonitoring();
+            this.stopVad();
+            this._teardownDeviceListeners();
+            this.recorderStarted = false;
+            return;
+        }
         try {
             // stop amplitude monitoring tied to the recorder
             this._stopRecorderAmplitudeMonitoring();
@@ -6439,7 +6449,9 @@ class LayercodeClient {
             this.audioInput = state;
             this._emitAudioInput();
             if (state) {
+                this._setStatus('connecting');
                 await this.audioInputConnect();
+                this._setStatus('connected');
             }
             else {
                 await this.audioInputDisconnect();
@@ -6451,7 +6463,20 @@ class LayercodeClient {
             this.audioOutput = state;
             this._emitAudioOutput();
             if (state) {
-                this.wavPlayer.unmute();
+                // Initialize audio output if not already connected
+                // This happens when audioOutput was initially false and is now being enabled
+                if (!this.wavPlayer.context) {
+                    this._setStatus('connecting');
+                    // Store the promise so _waitForAudioOutputReady() can await it
+                    // This prevents response.audio from running before AudioContext is ready
+                    const setupPromise = this.setupAudioOutput();
+                    this.audioOutputReady = setupPromise;
+                    await setupPromise;
+                    this._setStatus('connected');
+                }
+                else {
+                    this.wavPlayer.unmute();
+                }
                 // Sync agentSpeaking state with actual playback state when enabling audio output
                 this._syncAgentSpeakingState();
             }
@@ -6532,7 +6557,19 @@ class LayercodeClient {
             await audioOutputReady;
         }
         catch (error) {
-            console.error('Error connecting to Layercode agent:', error);
+            const permissionDeniedError = await this._microphonePermissionDeniedError(error);
+            if (permissionDeniedError) {
+                console.error(permissionDeniedError.message);
+                this._setStatus('error');
+                this.options.onError(permissionDeniedError);
+                return;
+            }
+            if (await this._shouldWarnAudioDevicesRequireUserGesture(error)) {
+                console.error('Cannot load audio devices before user has interacted with the page. Please move connect() to be triggered by a button, or load the SDK with "audioInput: false" to connection() on page load');
+            }
+            else {
+                console.error('Error connecting to Layercode agent:', error);
+            }
             this._setStatus('error');
             this.options.onError(error instanceof Error ? error : new Error(String(error)));
         }
@@ -6608,6 +6645,11 @@ class LayercodeClient {
         return authorizeSessionResponseBody;
     }
     async setupAudioOutput() {
+        // Only initialize audio player if audioOutput is enabled
+        // This prevents AudioContext creation before user gesture when audio is disabled
+        if (!this.audioOutput) {
+            return;
+        }
         // Initialize audio player
         // wavRecorder will be started from the onDeviceSwitched callback,
         // which is called when the device is first initialized and also when the device is switched
@@ -6618,12 +6660,7 @@ class LayercodeClient {
         if (!this.options.enableAmplitudeMonitoring) {
             this.agentAudioAmplitude = 0;
         }
-        if (this.audioOutput) {
-            this.wavPlayer.unmute();
-        }
-        else {
-            this.wavPlayer.mute();
-        }
+        this.wavPlayer.unmute();
     }
     async connectToAudioInput() {
         if (!this.audioInput) {
@@ -6672,6 +6709,7 @@ class LayercodeClient {
      */
     async setInputDevice(deviceId) {
         var _a, _b, _c;
+        console.log('setInputDevice called with:', deviceId, 'audioInput:', this.audioInput);
         const normalizedDeviceId = !deviceId || deviceId === 'default' ? null : deviceId;
         this.useSystemDefaultDevice = normalizedDeviceId === null;
         this.deviceId = normalizedDeviceId;
@@ -6680,6 +6718,7 @@ class LayercodeClient {
             return;
         }
         try {
+            console.log('setInputDevice: calling _queueRecorderRestart');
             // Restart recording with the new device
             await this._queueRecorderRestart();
             // Reinitialize VAD with the new audio stream if VAD is enabled
@@ -6689,7 +6728,7 @@ class LayercodeClient {
                 const newStream = this.wavRecorder.getStream();
                 await this._reinitializeVAD(newStream);
             }
-            const reportedDeviceId = (_c = (_b = this.lastReportedDeviceId) !== null && _b !== void 0 ? _b : this.activeDeviceId) !== null && _c !== void 0 ? _c : (this.useSystemDefaultDevice ? 'default' : normalizedDeviceId !== null && normalizedDeviceId !== void 0 ? normalizedDeviceId : 'default');
+            const reportedDeviceId = (_c = (_b = this.lastReportedDeviceId) !== null && _b !== void 0 ? _b : this.activeDeviceId) !== null && _c !== void 0 ? _c : (this.useSystemDefaultDevice ? 'default' : (normalizedDeviceId !== null && normalizedDeviceId !== void 0 ? normalizedDeviceId : 'default'));
             console.debug(`Successfully switched to input device: ${reportedDeviceId}`);
         }
         catch (error) {
@@ -6743,7 +6782,7 @@ class LayercodeClient {
                 this.recorderStarted = true;
                 this._sendReadyIfNeeded();
             }
-            const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : (_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default');
+            const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : ((_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default'));
             if (reportedDeviceId !== previousReportedDeviceId) {
                 this.lastReportedDeviceId = reportedDeviceId;
                 if (this.options.onDeviceSwitched) {
@@ -6762,29 +6801,6 @@ class LayercodeClient {
         this.recorderRestartChain = run.catch(() => { });
         return run;
     }
-    async _initializeRecorderWithDefaultDevice() {
-        if (!this.deviceChangeListener) {
-            return;
-        }
-        try {
-            const devices = await this.wavRecorder.listDevices();
-            if (devices.length) {
-                await this.deviceChangeListener(devices);
-                return;
-            }
-            console.warn('No audio input devices available when enabling microphone');
-        }
-        catch (error) {
-            console.warn('Unable to prime audio devices from listDevices()', error);
-        }
-        try {
-            await this.setInputDevice('default');
-        }
-        catch (error) {
-            console.error('Failed to start recording with the system default device:', error);
-            throw error;
-        }
-    }
     /**
      * Disconnect VAD
      */
@@ -6803,7 +6819,7 @@ class LayercodeClient {
         this.stopVad();
         // Reinitialize with new stream only if we're actually capturing audio
         if (stream && this._shouldCaptureUserAudio()) {
-            this._initializeVAD();
+            await this._initializeVAD();
         }
     }
     /**
@@ -6825,7 +6841,8 @@ class LayercodeClient {
                 };
             });
             this.deviceChangeListener = async (devices) => {
-                var _a;
+                var _a, _b;
+                console.log('deviceChangeListener called, devices:', devices.length, 'recorderStarted:', this.recorderStarted, '_skipFirstDeviceCallback:', this._skipFirstDeviceCallback);
                 try {
                     // Notify user that devices have changed
                     this.options.onDevicesChanged(devices);
@@ -6833,7 +6850,17 @@ class LayercodeClient {
                     const usingDefaultDevice = this.useSystemDefaultDevice;
                     const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
                     const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
+                    // Skip switching on the first callback after starting the recorder to avoid redundant begin() calls
+                    // This is set by audioInputConnect() after _startRecorderWithDevice() completes
+                    if (this._skipFirstDeviceCallback) {
+                        console.log('deviceChangeListener: skipping first callback after recorder start');
+                        this._skipFirstDeviceCallback = false;
+                        this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
+                        (_a = this.resolveDeviceListenerReady) === null || _a === void 0 ? void 0 : _a.call(this);
+                        return;
+                    }
                     let shouldSwitch = !this.recorderStarted;
+                    console.log('deviceChangeListener: shouldSwitch initial:', shouldSwitch);
                     if (!shouldSwitch) {
                         if (usingDefaultDevice) {
                             if (!defaultDevice) {
@@ -6842,8 +6869,7 @@ class LayercodeClient {
                             else if (this.activeDeviceId && defaultDevice.deviceId !== 'default' && defaultDevice.deviceId !== this.activeDeviceId) {
                                 shouldSwitch = true;
                             }
-                            else if ((previousDefaultDeviceKey && previousDefaultDeviceKey !== currentDefaultDeviceKey) ||
-                                (!previousDefaultDeviceKey && !currentDefaultDeviceKey && this.recorderStarted)) {
+                            else if ((previousDefaultDeviceKey && previousDefaultDeviceKey !== currentDefaultDeviceKey) || (!previousDefaultDeviceKey && !currentDefaultDeviceKey && this.recorderStarted)) {
                                 shouldSwitch = true;
                             }
                         }
@@ -6853,6 +6879,7 @@ class LayercodeClient {
                         }
                     }
                     this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
+                    console.log('deviceChangeListener: final shouldSwitch:', shouldSwitch);
                     if (shouldSwitch) {
                         console.debug('Selecting audio input device after change');
                         let targetDeviceId = null;
@@ -6882,7 +6909,7 @@ class LayercodeClient {
                     this.options.onError(error instanceof Error ? error : new Error(String(error)));
                 }
                 finally {
-                    (_a = this.resolveDeviceListenerReady) === null || _a === void 0 ? void 0 : _a.call(this);
+                    (_b = this.resolveDeviceListenerReady) === null || _b === void 0 ? void 0 : _b.call(this);
                 }
             };
             this.wavRecorder.listenForDeviceChange(this.deviceChangeListener);
@@ -6906,6 +6933,7 @@ class LayercodeClient {
         this.lastKnownSystemDefaultDeviceKey = null;
         this.recorderStarted = false;
         this.readySent = false;
+        this._skipFirstDeviceCallback = false;
         this._stopAmplitudeMonitoring();
         this._teardownDeviceListeners();
         if (this.vad) {
@@ -6941,6 +6969,81 @@ class LayercodeClient {
         }
         return null;
     }
+    _getUserActivationState() {
+        try {
+            const nav = typeof navigator !== 'undefined' ? navigator : null;
+            const act = nav === null || nav === void 0 ? void 0 : nav.userActivation;
+            if (act && typeof act === 'object') {
+                if (typeof act.hasBeenActive === 'boolean')
+                    return act.hasBeenActive;
+                if (typeof act.isActive === 'boolean')
+                    return act.isActive ? true : null;
+            }
+            const doc = typeof document !== 'undefined' ? document : null;
+            const dact = doc === null || doc === void 0 ? void 0 : doc.userActivation;
+            if (dact && typeof dact === 'object') {
+                if (typeof dact.hasBeenActive === 'boolean')
+                    return dact.hasBeenActive;
+                if (typeof dact.isActive === 'boolean')
+                    return dact.isActive ? true : null;
+            }
+        }
+        catch (_a) { }
+        return null;
+    }
+    async _isMicrophonePermissionDenied() {
+        try {
+            const nav = typeof navigator !== 'undefined' ? navigator : null;
+            const permissions = nav === null || nav === void 0 ? void 0 : nav.permissions;
+            if (!(permissions === null || permissions === void 0 ? void 0 : permissions.query))
+                return null;
+            const status = await permissions.query({ name: 'microphone' });
+            const state = status === null || status === void 0 ? void 0 : status.state;
+            if (state === 'denied')
+                return true;
+            if (state === 'granted' || state === 'prompt')
+                return false;
+        }
+        catch (_a) { }
+        return null;
+    }
+    async _microphonePermissionDeniedError(error) {
+        const err = error;
+        const message = typeof (err === null || err === void 0 ? void 0 : err.message) === 'string' ? err.message : typeof error === 'string' ? error : '';
+        if (message === 'User has denined audio device permissions') {
+            return err instanceof Error ? err : new Error(message);
+        }
+        const name = typeof (err === null || err === void 0 ? void 0 : err.name) === 'string' ? err.name : '';
+        const isPermissionLike = name === 'NotAllowedError' || name === 'SecurityError' || name === 'PermissionDeniedError';
+        if (!isPermissionLike) {
+            return null;
+        }
+        const micDenied = await this._isMicrophonePermissionDenied();
+        if (micDenied === true || /permission denied/i.test(message)) {
+            return new Error('User has denined audio device permissions');
+        }
+        return null;
+    }
+    async _shouldWarnAudioDevicesRequireUserGesture(error) {
+        const e = error;
+        const name = typeof (e === null || e === void 0 ? void 0 : e.name) === 'string' ? e.name : '';
+        const msg = typeof (e === null || e === void 0 ? void 0 : e.message) === 'string'
+            ? e.message
+            : typeof error === 'string'
+                ? error
+                : '';
+        const isPermissionLike = name === 'NotAllowedError' || name === 'SecurityError' || name === 'PermissionDeniedError';
+        if (!isPermissionLike)
+            return false;
+        // If the browser can tell us mic permission is explicitly denied, don't show the "user gesture" guidance.
+        const micDenied = await this._isMicrophonePermissionDenied();
+        if (micDenied === true)
+            return false;
+        if (/user activation|user gesture|interacte?d? with( the)? (page|document)|before user has interacted/i.test(msg)) {
+            return true;
+        }
+        return this._getUserActivationState() === false;
+    }
     /**
      * Mutes the microphone to stop sending audio to the server
      * The connection and recording remain active for quick unmute
@@ -6957,13 +7060,13 @@ class LayercodeClient {
     /**
      * Unmutes the microphone to resume sending audio to the server
      */
-    unmute() {
+    async unmute() {
         if (this.isMuted) {
             this.isMuted = false;
             console.log('Microphone unmuted');
             this.options.onMuteStateChange(false);
             if (this.audioInput && this.recorderStarted) {
-                this._initializeVAD();
+                await this._initializeVAD();
                 if (this.stopRecorderAmplitude === undefined) {
                     this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
                 }