npm - @layercode/js-sdk - Versions diffs - 1.0.24 → 1.0.26 - Mend

@layercode/js-sdk 1.0.24 → 1.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/layercode-js-sdk.esm.js +46 -151
package/dist/layercode-js-sdk.esm.js.map +1 -1
package/dist/layercode-js-sdk.min.js +46 -151
package/dist/layercode-js-sdk.min.js.map +1 -1
package/dist/types/index.d.ts +0 -7
package/package.json +2 -2

package/dist/layercode-js-sdk.min.js CHANGED Viewed

@@ -3525,12 +3525,11 @@ registerProcessor('audio_processor', AudioProcessor);
           this.pushToTalkEnabled = false;
           this.canInterrupt = false;
           this.userIsSpeaking = false;
-          this.endUserTurn = false;
           this.recorderStarted = false;
           this.readySent = false;
           this.currentTurnId = null;
           this.audioBuffer = [];
-          this.audioPauseTime = null;
+          // this.audioPauseTime = null;
           // Bind event handlers
           this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
           this._handleDataAvailable = this._handleDataAvailable.bind(this);
@@ -3539,18 +3538,13 @@ registerProcessor('audio_processor', AudioProcessor);
           let isSpeakingByAmplitude = false;
           let silenceFrames = 0;
           const AMPLITUDE_THRESHOLD = 0.01; // Adjust based on testing
-          const SILENCE_FRAMES_THRESHOLD = 30; // ~600ms at 20ms chunks
+          const SILENCE_FRAMES_THRESHOLD = 6.4; // 6.4 * 20ms chunks = 128ms silence. Same as Silero ((frame samples: 512 / sampleRate: 16000) * 1000 * redemptionFrames: 4) = 128 ms silence
           // Monitor amplitude changes
           this.wavRecorder.startAmplitudeMonitoring((amplitude) => {
               const wasSpeaking = isSpeakingByAmplitude;
               if (amplitude > AMPLITUDE_THRESHOLD) {
                   silenceFrames = 0;
                   if (!wasSpeaking) {
-                      // Speech started - pause audio if playing and track timing for interruption calculation
-                      if (this.canInterrupt && this.wavPlayer.isPlaying) {
-                          this.audioPauseTime = Date.now();
-                          this.wavPlayer.pause();
-                      }
                       isSpeakingByAmplitude = true;
                       this.userIsSpeaking = true;
                       this.options.onUserIsSpeakingChange(true);
@@ -3563,7 +3557,6 @@ registerProcessor('audio_processor', AudioProcessor);
               else {
                   silenceFrames++;
                   if (wasSpeaking && silenceFrames >= SILENCE_FRAMES_THRESHOLD) {
-                      // Speech ended
                       isSpeakingByAmplitude = false;
                       this.userIsSpeaking = false;
                       this.options.onUserIsSpeakingChange(false);
@@ -3581,7 +3574,7 @@ registerProcessor('audio_processor', AudioProcessor);
           if (this.pushToTalkEnabled) {
               return;
           }
-          const timeout = setTimeout(() => {
+          const vadLoadTimeout = setTimeout(() => {
               console.log('silero vad model timeout');
               console.warn('VAD model failed to load - falling back to amplitude-based detection');
               // Send a message to server indicating VAD failure
@@ -3589,134 +3582,54 @@ registerProcessor('audio_processor', AudioProcessor);
                   type: 'vad_events',
                   event: 'vad_model_failed',
               });
-              // In automatic mode without VAD, allow the bot to speak initially
-              this.userIsSpeaking = false;
-              this.options.onUserIsSpeakingChange(false);
               // Set up amplitude-based fallback detection
               this._setupAmplitudeBasedVAD();
           }, 2000);
-          if (!this.canInterrupt) {
-              dist.MicVAD.new({
-                  stream: this.wavRecorder.getStream() || undefined,
-                  model: 'v5',
-                  positiveSpeechThreshold: 0.3,
-                  negativeSpeechThreshold: 0.2,
-                  redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
-                  minSpeechFrames: 0,
-                  preSpeechPadFrames: 0,
-                  onSpeechStart: () => {
-                      this.userIsSpeaking = true;
-                      this.options.onUserIsSpeakingChange(true);
-                      console.log('onSpeechStart: sending vad_start');
-                      this._wsSend({
-                          type: 'vad_events',
-                          event: 'vad_start',
-                      });
-                  },
-                  onSpeechEnd: () => {
-                      console.log('onSpeechEnd: sending vad_end');
-                      this.endUserTurn = true; // Set flag to indicate that the user turn has ended
-                      this.audioBuffer = []; // Clear buffer on speech end
-                      this.userIsSpeaking = false;
-                      this.options.onUserIsSpeakingChange(false);
-                      console.log('onSpeechEnd: State after update - endUserTurn:', this.endUserTurn, 'userIsSpeaking:', this.userIsSpeaking);
-                      // Send vad_end immediately instead of waiting for next audio chunk
-                      this._wsSend({
-                          type: 'vad_events',
-                          event: 'vad_end',
-                      });
-                      this.endUserTurn = false; // Reset the flag after sending vad_end
-                  },
-              })
-                  .then((vad) => {
-                  clearTimeout(timeout);
-                  this.vad = vad;
-                  this.vad.start();
-                  console.log('VAD started');
-              })
-                  .catch((error) => {
-                  console.error('Error initializing VAD:', error);
-              });
-          }
-          else {
-              dist.MicVAD.new({
-                  stream: this.wavRecorder.getStream() || undefined,
-                  model: 'v5',
-                  // baseAssetPath: '/', // Use if bundling model locally
-                  // onnxWASMBasePath: '/', // Use if bundling model locally
-                  positiveSpeechThreshold: 0.5,
-                  negativeSpeechThreshold: 0.3,
-                  redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
-                  minSpeechFrames: 25,
-                  preSpeechPadFrames: 0,
-                  onSpeechStart: () => {
-                      // Only pause agent audio if it's currently playing
-                      if (this.wavPlayer.isPlaying) {
-                          console.log('onSpeechStart: WavPlayer is playing, pausing it.');
-                          this.audioPauseTime = Date.now(); // Track when we paused
-                          this.wavPlayer.pause();
-                      }
-                      else {
-                          console.log('onSpeechStart: WavPlayer is not playing, VAD will not pause.');
-                      }
-                      console.log('onSpeechStart: sending vad_start');
-                      this._wsSend({
-                          type: 'vad_events',
-                          event: 'vad_start',
-                      });
-                      this.userIsSpeaking = true;
-                      this.options.onUserIsSpeakingChange(true);
-                      this.endUserTurn = false; // Reset endUserTurn when speech starts
-                      console.log('onSpeechStart: State after update - endUserTurn:', this.endUserTurn, 'userIsSpeaking:', this.userIsSpeaking);
-                  },
-                  onVADMisfire: () => {
-                      // If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd, and we should resume the assistant audio as it was a false interruption. We include a configurable delay so the assistant isn't too quick to start speaking again.
-                      this.userIsSpeaking = false;
-                      this.audioBuffer = []; // Clear buffer on misfire
-                      this.options.onUserIsSpeakingChange(false);
-                      // Add the missing delay before resuming to prevent race conditions
-                      setTimeout(() => {
-                          if (!this.wavPlayer.isPlaying) {
-                              console.log('onVADMisfire: Resuming after delay');
-                              this.audioPauseTime = null; // Clear pause time since we're resuming
-                              this.wavPlayer.play();
-                          }
-                          else {
-                              console.log('onVADMisfire: Not resuming - either no pause or user speaking again');
-                              this.endUserTurn = true;
-                          }
-                      }, this.options.vadResumeDelay);
-                  },
-                  onSpeechEnd: () => {
-                      console.log('onSpeechEnd: sending vad_end');
-                      this.endUserTurn = true; // Set flag to indicate that the user turn has ended
-                      this.audioBuffer = []; // Clear buffer on speech end
-                      this.userIsSpeaking = false;
-                      this.options.onUserIsSpeakingChange(false);
-                      console.log('onSpeechEnd: State after update - endUserTurn:', this.endUserTurn, 'userIsSpeaking:', this.userIsSpeaking);
-                      // Send vad_end immediately instead of waiting for next audio chunk
-                      this._wsSend({
-                          type: 'vad_events',
-                          event: 'vad_end',
-                      });
-                      this.endUserTurn = false; // Reset the flag after sending vad_end
-                  },
-              })
-                  .then((vad) => {
-                  clearTimeout(timeout);
-                  this.vad = vad;
-                  this.vad.start();
-                  console.log('VAD started');
-              })
-                  .catch((error) => {
-                  console.error('Error initializing VAD:', error);
-              });
-          }
+          dist.MicVAD.new({
+              stream: this.wavRecorder.getStream() || undefined,
+              model: 'v5',
+              positiveSpeechThreshold: 0.15,
+              negativeSpeechThreshold: 0.05,
+              redemptionFrames: 4,
+              minSpeechFrames: 2,
+              preSpeechPadFrames: 0,
+              frameSamples: 512, // Required for v5 as per https://docs.vad.ricky0123.com/user-guide/algorithm/#configuration
+              onSpeechStart: () => {
+                  console.log('onSpeechStart: sending vad_start');
+                  this.userIsSpeaking = true;
+                  this.options.onUserIsSpeakingChange(true);
+                  this._wsSend({
+                      type: 'vad_events',
+                      event: 'vad_start',
+                  });
+              },
+              onSpeechEnd: () => {
+                  console.log('onSpeechEnd: sending vad_end');
+                  this.userIsSpeaking = false;
+                  this.options.onUserIsSpeakingChange(false);
+                  this.audioBuffer = []; // Clear buffer on speech end
+                  this._wsSend({
+                      type: 'vad_events',
+                      event: 'vad_end',
+                  });
+              },
+              // onVADMisfire: () => {
+              //   // If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd.
+              // },
+          })
+              .then((vad) => {
+              clearTimeout(vadLoadTimeout);
+              this.vad = vad;
+              this.vad.start();
+              console.log('VAD started');
+          })
+              .catch((error) => {
+              console.error('Error initializing VAD:', error);
+          });
       }
       /**
        * Updates the connection status and triggers the callback
        * @param {string} status - New status value
-       * @private
        */
       _setStatus(status) {
           this.status = status;
@@ -3724,7 +3637,6 @@ registerProcessor('audio_processor', AudioProcessor);
       }
       /**
        * Handles when agent audio finishes playing
-       * @private
        */
       _clientResponseAudioReplayFinished() {
           console.log('clientResponseAudioReplayFinished');
@@ -3737,17 +3649,6 @@ registerProcessor('audio_processor', AudioProcessor);
           const offsetData = await this.wavPlayer.interrupt();
           if (offsetData && this.currentTurnId) {
               let offsetMs = offsetData.currentTime * 1000;
-              // Calculate accurate offset by subtracting pause time if audio was paused for VAD
-              if (this.audioPauseTime) {
-                  const pauseDurationMs = Date.now() - this.audioPauseTime;
-                  const adjustedOffsetMs = Math.max(0, offsetMs - pauseDurationMs);
-                  console.log(`Interruption detected: Raw offset ${offsetMs}ms, pause duration ${pauseDurationMs}ms, adjusted offset ${adjustedOffsetMs}ms for turn ${this.currentTurnId}`);
-                  offsetMs = adjustedOffsetMs;
-                  this.audioPauseTime = null; // Clear the pause time
-              }
-              else {
-                  console.log(`Interruption detected: ${offsetMs}ms offset for turn ${this.currentTurnId} (no pause adjustment needed)`);
-              }
               // Send interruption event with accurate playback offset in milliseconds
               this._wsSend({
                   type: 'trigger.response.audio.interrupted',
@@ -3781,7 +3682,6 @@ registerProcessor('audio_processor', AudioProcessor);
       /**
        * Handles incoming WebSocket messages
        * @param {MessageEvent} event - The WebSocket message event
-       * @private
        */
       async _handleWebSocketMessage(event) {
           try {
@@ -3796,12 +3696,10 @@ registerProcessor('audio_processor', AudioProcessor);
                       console.log(message);
                       if (message.role === 'assistant') {
                           // Start tracking new assistant turn
-                          // Note: Don't reset currentTurnId here - let response.audio set it
-                          // This prevents race conditions where text arrives before audio
                           console.log('Assistant turn started, will track new turn ID from audio/text');
                       }
-                      else if (message.role === 'user' && !this.pushToTalkEnabled && this.canInterrupt) {
-                          // Interrupt any playing assistant audio if this is a turn trigged by the server (and not push to talk, which will have already called interrupt)
+                      else if (message.role === 'user' && !this.pushToTalkEnabled) {
+                          // Interrupt any playing assistant audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
                           console.log('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
                           await this._clientInterruptAssistantReplay();
                       }
@@ -3823,7 +3721,6 @@ registerProcessor('audio_processor', AudioProcessor);
                           this.currentTurnId = message.turn_id;
                           console.log(`Setting current turn ID to: ${message.turn_id} from text message`);
                       }
-                      // Note: We no longer track text content in the client - the pipeline handles interruption estimation
                       break;
                   }
                   case 'response.data':
@@ -3843,7 +3740,6 @@ registerProcessor('audio_processor', AudioProcessor);
       /**
        * Handles available client browser microphone audio data and sends it over the WebSocket
        * @param {ArrayBuffer} data - The audio data buffer
-       * @private
        */
       _handleDataAvailable(data) {
           try {
@@ -3903,7 +3799,6 @@ registerProcessor('audio_processor', AudioProcessor);
        * @param {WavRecorder | WavStreamPlayer} source - The audio source (recorder or player).
        * @param {(amplitude: number) => void} callback - The callback function to invoke on amplitude change.
        * @param {(amplitude: number) => void} updateInternalState - Function to update the internal amplitude state.
-       * @private
        */
       _setupAmplitudeMonitoring(source, callback, updateInternalState) {
           // Set up amplitude monitoring only if a callback is provided