npm - @layercode/js-sdk - Versions diffs - 2.8.1 → 2.8.3 - Mend

@layercode/js-sdk 2.8.1 → 2.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/layercode-js-sdk.esm.js +116 -14
package/dist/layercode-js-sdk.esm.js.map +1 -1
package/dist/layercode-js-sdk.min.js +116 -14
package/dist/layercode-js-sdk.min.js.map +1 -1
package/dist/types/index.d.ts +14 -0
package/dist/types/interfaces.d.ts +6 -2
package/dist/types/wavtools/lib/analysis/audio_analysis.d.ts +1 -1
package/package.json +1 -1

package/dist/layercode-js-sdk.esm.js CHANGED Viewed

@@ -5312,13 +5312,15 @@ class WavRecorder {
    * @returns {Promise<true>}
    */
   async requestPermission() {
+    console.log('ensureUserMediaAccess');
     try {
-      console.log('ensureUserMediaAccess');
-      await navigator.mediaDevices.getUserMedia({
+      const stream = await navigator.mediaDevices.getUserMedia({
         audio: true,
       });
+      // Stop the tracks immediately after getting permission
+      stream.getTracks().forEach(track => track.stop());
     } catch (fallbackError) {
-      window.alert('You must grant microphone access to use this feature.');
+      console.error('getUserMedia failed:', fallbackError.name, fallbackError.message);
       throw fallbackError;
     }
     return true;
@@ -5962,9 +5964,11 @@ class LayercodeClient {
         this.canInterrupt = false;
         this.userIsSpeaking = false;
         this.agentIsSpeaking = false;
+        this.agentIsPlayingAudio = false;
         this.recorderStarted = false;
         this.readySent = false;
         this.currentTurnId = null;
+        this.sentReplayFinishedForDisabledOutput = false;
         this.audioBuffer = [];
         this.vadConfig = null;
         this.activeDeviceId = null;
@@ -6114,6 +6118,8 @@ class LayercodeClient {
         await this.audioOutputReady;
     }
     _setAgentSpeaking(isSpeaking) {
+        // Track the actual audio playback state regardless of audioOutput setting
+        this.agentIsPlayingAudio = isSpeaking;
         const shouldReportSpeaking = this.audioOutput && isSpeaking;
         if (this.agentIsSpeaking === shouldReportSpeaking) {
             return;
@@ -6122,11 +6128,14 @@ class LayercodeClient {
         this.options.onAgentSpeakingChange(shouldReportSpeaking);
     }
     _setUserSpeaking(isSpeaking) {
-        const shouldReportSpeaking = this._shouldCaptureUserAudio() && isSpeaking;
+        const shouldCapture = this._shouldCaptureUserAudio();
+        const shouldReportSpeaking = shouldCapture && isSpeaking;
+        console.log('_setUserSpeaking called:', isSpeaking, 'shouldCapture:', shouldCapture, 'shouldReportSpeaking:', shouldReportSpeaking, 'current userIsSpeaking:', this.userIsSpeaking);
         if (this.userIsSpeaking === shouldReportSpeaking) {
             return;
         }
         this.userIsSpeaking = shouldReportSpeaking;
+        console.log('_setUserSpeaking: updated userIsSpeaking to:', this.userIsSpeaking);
         this.options.onUserIsSpeakingChange(shouldReportSpeaking);
     }
     /**
@@ -6176,6 +6185,7 @@ class LayercodeClient {
      * @param {MessageEvent} event - The WebSocket message event
      */
     async _handleWebSocketMessage(event) {
+        var _a, _b;
         try {
             const message = JSON.parse(event.data);
             if (message.type !== 'response.audio') {
@@ -6188,6 +6198,20 @@ class LayercodeClient {
                         // Start tracking new agent turn
                         console.debug('Agent turn started, will track new turn ID from audio/text');
                         this._setUserSpeaking(false);
+                        // Reset the flag for the new assistant turn
+                        this.sentReplayFinishedForDisabledOutput = false;
+                        // When assistant's turn starts but we're not playing audio,
+                        // we need to tell the server we're "done" with playback so it can
+                        // transition the turn back to user. Use a small delay to let any
+                        // response.audio/response.end messages arrive first.
+                        if (!this.audioOutput) {
+                            setTimeout(() => {
+                                if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
+                                    this.sentReplayFinishedForDisabledOutput = true;
+                                    this._clientResponseAudioReplayFinished();
+                                }
+                            }, 1000);
+                        }
                     }
                     else if (message.role === 'user' && !this.pushToTalkEnabled) {
                         // Interrupt any playing agent audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
@@ -6207,11 +6231,42 @@ class LayercodeClient {
                     });
                     break;
                 }
-                case 'response.audio':
+                case 'response.end': {
+                    // When audioOutput is disabled, notify server that "playback" is complete
+                    if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
+                        this.sentReplayFinishedForDisabledOutput = true;
+                        this._clientResponseAudioReplayFinished();
+                    }
+                    (_b = (_a = this.options).onMessage) === null || _b === void 0 ? void 0 : _b.call(_a, message);
+                    break;
+                }
+                case 'response.audio': {
+                    // Skip audio playback if audioOutput is disabled
+                    if (!this.audioOutput) {
+                        // Send replay_finished so server knows we're "done" with playback (only once per turn)
+                        if (!this.sentReplayFinishedForDisabledOutput) {
+                            this.sentReplayFinishedForDisabledOutput = true;
+                            this._clientResponseAudioReplayFinished();
+                        }
+                        break;
+                    }
                     await this._waitForAudioOutputReady();
-                    this._setAgentSpeaking(true);
                     const audioBuffer = base64ToArrayBuffer(message.content);
-                    this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
+                    const hasAudioSamples = audioBuffer.byteLength > 0;
+                    let audioEnqueued = false;
+                    if (hasAudioSamples) {
+                        try {
+                            const playbackBuffer = this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
+                            audioEnqueued = Boolean(playbackBuffer && playbackBuffer.length > 0);
+                        }
+                        catch (error) {
+                            this._setAgentSpeaking(false);
+                            throw error;
+                        }
+                    }
+                    else {
+                        console.debug(`Skipping empty audio response for turn ${message.turn_id}`);
+                    }
                     // TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
                     // Set current turn ID from first audio message, or update if different turn
                     if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
@@ -6220,7 +6275,11 @@ class LayercodeClient {
                         // Clean up interrupted tracks, keeping only the current turn
                         this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
                     }
+                    if (audioEnqueued) {
+                        this._setAgentSpeaking(true);
+                    }
                     break;
+                }
                 case 'response.text':
                     // Set turn ID from first text message if not set
                     if (!this.currentTurnId) {
@@ -6325,6 +6384,9 @@ class LayercodeClient {
     }
     _sendReadyIfNeeded() {
         var _a;
+        // Send client.ready when either:
+        // 1. Recorder is started (audio mode active)
+        // 2. audioInput is false (text-only mode, but server should still be ready)
         const audioReady = this.recorderStarted || !this.audioInput;
         if (audioReady && ((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN && !this.readySent) {
             this._wsSend({ type: 'client.ready' });
@@ -6390,12 +6452,16 @@ class LayercodeClient {
     }
     async audioInputConnect() {
         // Turn mic ON
+        console.log('audioInputConnect: requesting permission');
         await this.wavRecorder.requestPermission();
+        console.log('audioInputConnect: setting up device change listener');
         await this._setupDeviceChangeListener();
         // If the recorder hasn't spun up yet, proactively select a device.
         if (!this.recorderStarted && this.deviceChangeListener) {
+            console.log('audioInputConnect: initializing recorder with default device');
             await this._initializeRecorderWithDefaultDevice();
         }
+        console.log('audioInputConnect: done, recorderStarted =', this.recorderStarted);
     }
     async audioInputDisconnect() {
         try {
@@ -6427,11 +6493,27 @@ class LayercodeClient {
         }
     }
     async setAudioOutput(state) {
+        console.log('setAudioOutput called with state:', state, 'current:', this.audioOutput);
         if (this.audioOutput !== state) {
             this.audioOutput = state;
             this._emitAudioOutput();
             if (state) {
-                this.wavPlayer.unmute();
+                // Initialize audio output if not already connected
+                // This happens when audioOutput was initially false and is now being enabled
+                if (!this.wavPlayer.context) {
+                    console.log('setAudioOutput: initializing audio output (no context yet)');
+                    // Store the promise so _waitForAudioOutputReady() can await it
+                    // This prevents response.audio from running before AudioContext is ready
+                    const setupPromise = this.setupAudioOutput();
+                    this.audioOutputReady = setupPromise;
+                    await setupPromise;
+                }
+                else {
+                    console.log('setAudioOutput: unmuting existing player');
+                    this.wavPlayer.unmute();
+                }
+                // Sync agentSpeaking state with actual playback state when enabling audio output
+                this._syncAgentSpeakingState();
             }
             else {
                 this.wavPlayer.mute();
@@ -6439,6 +6521,17 @@ class LayercodeClient {
             }
         }
     }
+    /**
+     * Syncs the reported agentSpeaking state with the actual audio playback state.
+     * Called when audioOutput is enabled to ensure proper state synchronization.
+     */
+    _syncAgentSpeakingState() {
+        const shouldReportSpeaking = this.audioOutput && this.agentIsPlayingAudio;
+        if (this.agentIsSpeaking !== shouldReportSpeaking) {
+            this.agentIsSpeaking = shouldReportSpeaking;
+            this.options.onAgentSpeakingChange(shouldReportSpeaking);
+        }
+    }
     /** Emitters for audio flags */
     _emitAudioInput() {
         this.options.audioInputChanged(this.audioInput);
@@ -6575,6 +6668,11 @@ class LayercodeClient {
         return authorizeSessionResponseBody;
     }
     async setupAudioOutput() {
+        // Only initialize audio player if audioOutput is enabled
+        // This prevents AudioContext creation before user gesture when audio is disabled
+        if (!this.audioOutput) {
+            return;
+        }
         // Initialize audio player
         // wavRecorder will be started from the onDeviceSwitched callback,
         // which is called when the device is first initialized and also when the device is switched
@@ -6585,12 +6683,7 @@ class LayercodeClient {
         if (!this.options.enableAmplitudeMonitoring) {
             this.agentAudioAmplitude = 0;
         }
-        if (this.audioOutput) {
-            this.wavPlayer.unmute();
-        }
-        else {
-            this.wavPlayer.mute();
-        }
+        this.wavPlayer.unmute();
     }
     async connectToAudioInput() {
         if (!this.audioInput) {
@@ -6639,6 +6732,7 @@ class LayercodeClient {
      */
     async setInputDevice(deviceId) {
         var _a, _b, _c;
+        console.log('setInputDevice called with:', deviceId, 'audioInput:', this.audioInput);
         const normalizedDeviceId = !deviceId || deviceId === 'default' ? null : deviceId;
         this.useSystemDefaultDevice = normalizedDeviceId === null;
         this.deviceId = normalizedDeviceId;
@@ -6647,6 +6741,7 @@ class LayercodeClient {
             return;
         }
         try {
+            console.log('setInputDevice: calling _queueRecorderRestart');
             // Restart recording with the new device
             await this._queueRecorderRestart();
             // Reinitialize VAD with the new audio stream if VAD is enabled
@@ -6730,12 +6825,15 @@ class LayercodeClient {
         return run;
     }
     async _initializeRecorderWithDefaultDevice() {
+        console.log('_initializeRecorderWithDefaultDevice called, deviceChangeListener:', !!this.deviceChangeListener);
         if (!this.deviceChangeListener) {
             return;
         }
         try {
             const devices = await this.wavRecorder.listDevices();
+            console.log('_initializeRecorderWithDefaultDevice: got devices:', devices.length);
             if (devices.length) {
+                console.log('_initializeRecorderWithDefaultDevice: calling deviceChangeListener');
                 await this.deviceChangeListener(devices);
                 return;
             }
@@ -6745,6 +6843,7 @@ class LayercodeClient {
             console.warn('Unable to prime audio devices from listDevices()', error);
         }
         try {
+            console.log('_initializeRecorderWithDefaultDevice: calling setInputDevice default');
             await this.setInputDevice('default');
         }
         catch (error) {
@@ -6793,6 +6892,7 @@ class LayercodeClient {
             });
             this.deviceChangeListener = async (devices) => {
                 var _a;
+                console.log('deviceChangeListener called, devices:', devices.length, 'recorderStarted:', this.recorderStarted);
                 try {
                     // Notify user that devices have changed
                     this.options.onDevicesChanged(devices);
@@ -6801,6 +6901,7 @@ class LayercodeClient {
                     const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
                     const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
                     let shouldSwitch = !this.recorderStarted;
+                    console.log('deviceChangeListener: shouldSwitch initial:', shouldSwitch);
                     if (!shouldSwitch) {
                         if (usingDefaultDevice) {
                             if (!defaultDevice) {
@@ -6820,6 +6921,7 @@ class LayercodeClient {
                         }
                     }
                     this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
+                    console.log('deviceChangeListener: final shouldSwitch:', shouldSwitch);
                     if (shouldSwitch) {
                         console.debug('Selecting audio input device after change');
                         let targetDeviceId = null;