npm - @layercode/js-sdk - Versions diffs - 2.8.1 → 2.8.3 - Mend

@layercode/js-sdk 2.8.1 → 2.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/layercode-js-sdk.esm.js +116 -14
package/dist/layercode-js-sdk.esm.js.map +1 -1
package/dist/layercode-js-sdk.min.js +116 -14
package/dist/layercode-js-sdk.min.js.map +1 -1
package/dist/types/index.d.ts +14 -0
package/dist/types/interfaces.d.ts +6 -2
package/dist/types/wavtools/lib/analysis/audio_analysis.d.ts +1 -1
package/package.json +1 -1

package/dist/layercode-js-sdk.min.js CHANGED Viewed

@@ -5318,13 +5318,15 @@ registerProcessor('audio_processor', AudioProcessor);
 	   * @returns {Promise<true>}
 	   */
 	  async requestPermission() {
+	    console.log('ensureUserMediaAccess');
 	    try {
-	      console.log('ensureUserMediaAccess');
-	      await navigator.mediaDevices.getUserMedia({
+	      const stream = await navigator.mediaDevices.getUserMedia({
 	        audio: true,
 	      });
+	      // Stop the tracks immediately after getting permission
+	      stream.getTracks().forEach(track => track.stop());
 	    } catch (fallbackError) {
-	      window.alert('You must grant microphone access to use this feature.');
+	      console.error('getUserMedia failed:', fallbackError.name, fallbackError.message);
 	      throw fallbackError;
 	    }
 	    return true;
@@ -5968,9 +5970,11 @@ registerProcessor('audio_processor', AudioProcessor);
 	        this.canInterrupt = false;
 	        this.userIsSpeaking = false;
 	        this.agentIsSpeaking = false;
+	        this.agentIsPlayingAudio = false;
 	        this.recorderStarted = false;
 	        this.readySent = false;
 	        this.currentTurnId = null;
+	        this.sentReplayFinishedForDisabledOutput = false;
 	        this.audioBuffer = [];
 	        this.vadConfig = null;
 	        this.activeDeviceId = null;
@@ -6120,6 +6124,8 @@ registerProcessor('audio_processor', AudioProcessor);
 	        await this.audioOutputReady;
 	    }
 	    _setAgentSpeaking(isSpeaking) {
+	        // Track the actual audio playback state regardless of audioOutput setting
+	        this.agentIsPlayingAudio = isSpeaking;
 	        const shouldReportSpeaking = this.audioOutput && isSpeaking;
 	        if (this.agentIsSpeaking === shouldReportSpeaking) {
 	            return;
@@ -6128,11 +6134,14 @@ registerProcessor('audio_processor', AudioProcessor);
 	        this.options.onAgentSpeakingChange(shouldReportSpeaking);
 	    }
 	    _setUserSpeaking(isSpeaking) {
-	        const shouldReportSpeaking = this._shouldCaptureUserAudio() && isSpeaking;
+	        const shouldCapture = this._shouldCaptureUserAudio();
+	        const shouldReportSpeaking = shouldCapture && isSpeaking;
+	        console.log('_setUserSpeaking called:', isSpeaking, 'shouldCapture:', shouldCapture, 'shouldReportSpeaking:', shouldReportSpeaking, 'current userIsSpeaking:', this.userIsSpeaking);
 	        if (this.userIsSpeaking === shouldReportSpeaking) {
 	            return;
 	        }
 	        this.userIsSpeaking = shouldReportSpeaking;
+	        console.log('_setUserSpeaking: updated userIsSpeaking to:', this.userIsSpeaking);
 	        this.options.onUserIsSpeakingChange(shouldReportSpeaking);
 	    }
 	    /**
@@ -6182,6 +6191,7 @@ registerProcessor('audio_processor', AudioProcessor);
 	     * @param {MessageEvent} event - The WebSocket message event
 	     */
 	    async _handleWebSocketMessage(event) {
+	        var _a, _b;
 	        try {
 	            const message = JSON.parse(event.data);
 	            if (message.type !== 'response.audio') {
@@ -6194,6 +6204,20 @@ registerProcessor('audio_processor', AudioProcessor);
 	                        // Start tracking new agent turn
 	                        console.debug('Agent turn started, will track new turn ID from audio/text');
 	                        this._setUserSpeaking(false);
+	                        // Reset the flag for the new assistant turn
+	                        this.sentReplayFinishedForDisabledOutput = false;
+	                        // When assistant's turn starts but we're not playing audio,
+	                        // we need to tell the server we're "done" with playback so it can
+	                        // transition the turn back to user. Use a small delay to let any
+	                        // response.audio/response.end messages arrive first.
+	                        if (!this.audioOutput) {
+	                            setTimeout(() => {
+	                                if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
+	                                    this.sentReplayFinishedForDisabledOutput = true;
+	                                    this._clientResponseAudioReplayFinished();
+	                                }
+	                            }, 1000);
+	                        }
 	                    }
 	                    else if (message.role === 'user' && !this.pushToTalkEnabled) {
 	                        // Interrupt any playing agent audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
@@ -6213,11 +6237,42 @@ registerProcessor('audio_processor', AudioProcessor);
 	                    });
 	                    break;
 	                }
-	                case 'response.audio':
+	                case 'response.end': {
+	                    // When audioOutput is disabled, notify server that "playback" is complete
+	                    if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
+	                        this.sentReplayFinishedForDisabledOutput = true;
+	                        this._clientResponseAudioReplayFinished();
+	                    }
+	                    (_b = (_a = this.options).onMessage) === null || _b === void 0 ? void 0 : _b.call(_a, message);
+	                    break;
+	                }
+	                case 'response.audio': {
+	                    // Skip audio playback if audioOutput is disabled
+	                    if (!this.audioOutput) {
+	                        // Send replay_finished so server knows we're "done" with playback (only once per turn)
+	                        if (!this.sentReplayFinishedForDisabledOutput) {
+	                            this.sentReplayFinishedForDisabledOutput = true;
+	                            this._clientResponseAudioReplayFinished();
+	                        }
+	                        break;
+	                    }
 	                    await this._waitForAudioOutputReady();
-	                    this._setAgentSpeaking(true);
 	                    const audioBuffer = base64ToArrayBuffer(message.content);
-	                    this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
+	                    const hasAudioSamples = audioBuffer.byteLength > 0;
+	                    let audioEnqueued = false;
+	                    if (hasAudioSamples) {
+	                        try {
+	                            const playbackBuffer = this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
+	                            audioEnqueued = Boolean(playbackBuffer && playbackBuffer.length > 0);
+	                        }
+	                        catch (error) {
+	                            this._setAgentSpeaking(false);
+	                            throw error;
+	                        }
+	                    }
+	                    else {
+	                        console.debug(`Skipping empty audio response for turn ${message.turn_id}`);
+	                    }
 	                    // TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
 	                    // Set current turn ID from first audio message, or update if different turn
 	                    if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
@@ -6226,7 +6281,11 @@ registerProcessor('audio_processor', AudioProcessor);
 	                        // Clean up interrupted tracks, keeping only the current turn
 	                        this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
 	                    }
+	                    if (audioEnqueued) {
+	                        this._setAgentSpeaking(true);
+	                    }
 	                    break;
+	                }
 	                case 'response.text':
 	                    // Set turn ID from first text message if not set
 	                    if (!this.currentTurnId) {
@@ -6331,6 +6390,9 @@ registerProcessor('audio_processor', AudioProcessor);
 	    }
 	    _sendReadyIfNeeded() {
 	        var _a;
+	        // Send client.ready when either:
+	        // 1. Recorder is started (audio mode active)
+	        // 2. audioInput is false (text-only mode, but server should still be ready)
 	        const audioReady = this.recorderStarted || !this.audioInput;
 	        if (audioReady && ((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN && !this.readySent) {
 	            this._wsSend({ type: 'client.ready' });
@@ -6396,12 +6458,16 @@ registerProcessor('audio_processor', AudioProcessor);
 	    }
 	    async audioInputConnect() {
 	        // Turn mic ON
+	        console.log('audioInputConnect: requesting permission');
 	        await this.wavRecorder.requestPermission();
+	        console.log('audioInputConnect: setting up device change listener');
 	        await this._setupDeviceChangeListener();
 	        // If the recorder hasn't spun up yet, proactively select a device.
 	        if (!this.recorderStarted && this.deviceChangeListener) {
+	            console.log('audioInputConnect: initializing recorder with default device');
 	            await this._initializeRecorderWithDefaultDevice();
 	        }
+	        console.log('audioInputConnect: done, recorderStarted =', this.recorderStarted);
 	    }
 	    async audioInputDisconnect() {
 	        try {
@@ -6433,11 +6499,27 @@ registerProcessor('audio_processor', AudioProcessor);
 	        }
 	    }
 	    async setAudioOutput(state) {
+	        console.log('setAudioOutput called with state:', state, 'current:', this.audioOutput);
 	        if (this.audioOutput !== state) {
 	            this.audioOutput = state;
 	            this._emitAudioOutput();
 	            if (state) {
-	                this.wavPlayer.unmute();
+	                // Initialize audio output if not already connected
+	                // This happens when audioOutput was initially false and is now being enabled
+	                if (!this.wavPlayer.context) {
+	                    console.log('setAudioOutput: initializing audio output (no context yet)');
+	                    // Store the promise so _waitForAudioOutputReady() can await it
+	                    // This prevents response.audio from running before AudioContext is ready
+	                    const setupPromise = this.setupAudioOutput();
+	                    this.audioOutputReady = setupPromise;
+	                    await setupPromise;
+	                }
+	                else {
+	                    console.log('setAudioOutput: unmuting existing player');
+	                    this.wavPlayer.unmute();
+	                }
+	                // Sync agentSpeaking state with actual playback state when enabling audio output
+	                this._syncAgentSpeakingState();
 	            }
 	            else {
 	                this.wavPlayer.mute();
@@ -6445,6 +6527,17 @@ registerProcessor('audio_processor', AudioProcessor);
 	            }
 	        }
 	    }
+	    /**
+	     * Syncs the reported agentSpeaking state with the actual audio playback state.
+	     * Called when audioOutput is enabled to ensure proper state synchronization.
+	     */
+	    _syncAgentSpeakingState() {
+	        const shouldReportSpeaking = this.audioOutput && this.agentIsPlayingAudio;
+	        if (this.agentIsSpeaking !== shouldReportSpeaking) {
+	            this.agentIsSpeaking = shouldReportSpeaking;
+	            this.options.onAgentSpeakingChange(shouldReportSpeaking);
+	        }
+	    }
 	    /** Emitters for audio flags */
 	    _emitAudioInput() {
 	        this.options.audioInputChanged(this.audioInput);
@@ -6581,6 +6674,11 @@ registerProcessor('audio_processor', AudioProcessor);
 	        return authorizeSessionResponseBody;
 	    }
 	    async setupAudioOutput() {
+	        // Only initialize audio player if audioOutput is enabled
+	        // This prevents AudioContext creation before user gesture when audio is disabled
+	        if (!this.audioOutput) {
+	            return;
+	        }
 	        // Initialize audio player
 	        // wavRecorder will be started from the onDeviceSwitched callback,
 	        // which is called when the device is first initialized and also when the device is switched
@@ -6591,12 +6689,7 @@ registerProcessor('audio_processor', AudioProcessor);
 	        if (!this.options.enableAmplitudeMonitoring) {
 	            this.agentAudioAmplitude = 0;
 	        }
-	        if (this.audioOutput) {
-	            this.wavPlayer.unmute();
-	        }
-	        else {
-	            this.wavPlayer.mute();
-	        }
+	        this.wavPlayer.unmute();
 	    }
 	    async connectToAudioInput() {
 	        if (!this.audioInput) {
@@ -6645,6 +6738,7 @@ registerProcessor('audio_processor', AudioProcessor);
 	     */
 	    async setInputDevice(deviceId) {
 	        var _a, _b, _c;
+	        console.log('setInputDevice called with:', deviceId, 'audioInput:', this.audioInput);
 	        const normalizedDeviceId = !deviceId || deviceId === 'default' ? null : deviceId;
 	        this.useSystemDefaultDevice = normalizedDeviceId === null;
 	        this.deviceId = normalizedDeviceId;
@@ -6653,6 +6747,7 @@ registerProcessor('audio_processor', AudioProcessor);
 	            return;
 	        }
 	        try {
+	            console.log('setInputDevice: calling _queueRecorderRestart');
 	            // Restart recording with the new device
 	            await this._queueRecorderRestart();
 	            // Reinitialize VAD with the new audio stream if VAD is enabled
@@ -6736,12 +6831,15 @@ registerProcessor('audio_processor', AudioProcessor);
 	        return run;
 	    }
 	    async _initializeRecorderWithDefaultDevice() {
+	        console.log('_initializeRecorderWithDefaultDevice called, deviceChangeListener:', !!this.deviceChangeListener);
 	        if (!this.deviceChangeListener) {
 	            return;
 	        }
 	        try {
 	            const devices = await this.wavRecorder.listDevices();
+	            console.log('_initializeRecorderWithDefaultDevice: got devices:', devices.length);
 	            if (devices.length) {
+	                console.log('_initializeRecorderWithDefaultDevice: calling deviceChangeListener');
 	                await this.deviceChangeListener(devices);
 	                return;
 	            }
@@ -6751,6 +6849,7 @@ registerProcessor('audio_processor', AudioProcessor);
 	            console.warn('Unable to prime audio devices from listDevices()', error);
 	        }
 	        try {
+	            console.log('_initializeRecorderWithDefaultDevice: calling setInputDevice default');
 	            await this.setInputDevice('default');
 	        }
 	        catch (error) {
@@ -6799,6 +6898,7 @@ registerProcessor('audio_processor', AudioProcessor);
 	            });
 	            this.deviceChangeListener = async (devices) => {
 	                var _a;
+	                console.log('deviceChangeListener called, devices:', devices.length, 'recorderStarted:', this.recorderStarted);
 	                try {
 	                    // Notify user that devices have changed
 	                    this.options.onDevicesChanged(devices);
@@ -6807,6 +6907,7 @@ registerProcessor('audio_processor', AudioProcessor);
 	                    const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
 	                    const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
 	                    let shouldSwitch = !this.recorderStarted;
+	                    console.log('deviceChangeListener: shouldSwitch initial:', shouldSwitch);
 	                    if (!shouldSwitch) {
 	                        if (usingDefaultDevice) {
 	                            if (!defaultDevice) {
@@ -6826,6 +6927,7 @@ registerProcessor('audio_processor', AudioProcessor);
 	                        }
 	                    }
 	                    this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
+	                    console.log('deviceChangeListener: final shouldSwitch:', shouldSwitch);
 	                    if (shouldSwitch) {
 	                        console.debug('Selecting audio input device after change');
 	                        let targetDeviceId = null;