npm - @layercode/js-sdk - Versions diffs - 1.0.22 → 1.0.23 - Mend

@layercode/js-sdk 1.0.22 → 1.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/layercode-js-sdk.esm.js +92 -63
package/dist/layercode-js-sdk.esm.js.map +1 -1
package/dist/layercode-js-sdk.min.js +92 -63
package/dist/layercode-js-sdk.min.js.map +1 -1
package/dist/types/index.d.ts +3 -2
package/dist/types/interfaces.d.ts +2 -4
package/package.json +1 -1

package/dist/layercode-js-sdk.min.js CHANGED Viewed

@@ -3528,13 +3528,53 @@ registerProcessor('audio_processor', AudioProcessor);
           this.endUserTurn = false;
           this.recorderStarted = false;
           this.readySent = false;
-          this.currentTurnText = '';
           this.currentTurnId = null;
           this.audioBuffer = [];
+          this.audioPauseTime = null;
           // Bind event handlers
           this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
           this._handleDataAvailable = this._handleDataAvailable.bind(this);
       }
+      _setupAmplitudeBasedVAD() {
+          let isSpeakingByAmplitude = false;
+          let silenceFrames = 0;
+          const AMPLITUDE_THRESHOLD = 0.01; // Adjust based on testing
+          const SILENCE_FRAMES_THRESHOLD = 30; // ~600ms at 20ms chunks
+          // Monitor amplitude changes
+          this.wavRecorder.startAmplitudeMonitoring((amplitude) => {
+              const wasSpeaking = isSpeakingByAmplitude;
+              if (amplitude > AMPLITUDE_THRESHOLD) {
+                  silenceFrames = 0;
+                  if (!wasSpeaking) {
+                      // Speech started - pause audio if playing and track timing for interruption calculation
+                      if (this.canInterrupt && this.wavPlayer.isPlaying) {
+                          this.audioPauseTime = Date.now();
+                          this.wavPlayer.pause();
+                      }
+                      isSpeakingByAmplitude = true;
+                      this.userIsSpeaking = true;
+                      this.options.onUserIsSpeakingChange(true);
+                      this._wsSend({
+                          type: 'vad_events',
+                          event: 'vad_start',
+                      });
+                  }
+              }
+              else {
+                  silenceFrames++;
+                  if (wasSpeaking && silenceFrames >= SILENCE_FRAMES_THRESHOLD) {
+                      // Speech ended
+                      isSpeakingByAmplitude = false;
+                      this.userIsSpeaking = false;
+                      this.options.onUserIsSpeakingChange(false);
+                      this._wsSend({
+                          type: 'vad_events',
+                          event: 'vad_end',
+                      });
+                  }
+              }
+          });
+      }
       _initializeVAD() {
           console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt });
           // If we're in push to talk mode, we don't need to use the VAD model
@@ -3543,9 +3583,17 @@ registerProcessor('audio_processor', AudioProcessor);
           }
           const timeout = setTimeout(() => {
               console.log('silero vad model timeout');
-              // TODO: send message to server to indicate that the vad model timed out
-              this.userIsSpeaking = true; // allow audio to be sent to the server
-              this.options.onUserIsSpeakingChange(true);
+              console.warn('VAD model failed to load - falling back to amplitude-based detection');
+              // Send a message to server indicating VAD failure
+              this._wsSend({
+                  type: 'vad_events',
+                  event: 'vad_model_failed',
+              });
+              // In automatic mode without VAD, allow the bot to speak initially
+              this.userIsSpeaking = false;
+              this.options.onUserIsSpeakingChange(false);
+              // Set up amplitude-based fallback detection
+              this._setupAmplitudeBasedVAD();
           }, 2000);
           if (!this.canInterrupt) {
               dist.MicVAD.new({
@@ -3554,7 +3602,7 @@ registerProcessor('audio_processor', AudioProcessor);
                   positiveSpeechThreshold: 0.3,
                   negativeSpeechThreshold: 0.2,
                   redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
-                  minSpeechFrames: 15,
+                  minSpeechFrames: 0,
                   preSpeechPadFrames: 0,
                   onSpeechStart: () => {
                       this.userIsSpeaking = true;
@@ -3565,27 +3613,6 @@ registerProcessor('audio_processor', AudioProcessor);
                           event: 'vad_start',
                       });
                   },
-                  onVADMisfire: () => {
-                      console.log('onVADMisfire: Short utterance detected, resuming bot');
-                      this.audioBuffer = []; // Clear buffer on misfire
-                      this.userIsSpeaking = false;
-                      this.options.onUserIsSpeakingChange(false);
-                      // Send vad_end to indicate the short utterance is over
-                      this._wsSend({
-                          type: 'vad_events',
-                          event: 'vad_end',
-                      });
-                      // End the user's turn
-                      this._wsSend({
-                          type: 'trigger.turn.end',
-                          role: 'user',
-                      });
-                      // Resume bot audio if it was playing
-                      if (!this.wavPlayer.isPlaying) {
-                          console.log('onVADMisfire: Resuming bot audio');
-                          this.wavPlayer.play();
-                      }
-                  },
                   onSpeechEnd: () => {
                       console.log('onSpeechEnd: sending vad_end');
                       this.endUserTurn = true; // Set flag to indicate that the user turn has ended
@@ -3626,6 +3653,7 @@ registerProcessor('audio_processor', AudioProcessor);
                       // Only pause agent audio if it's currently playing
                       if (this.wavPlayer.isPlaying) {
                           console.log('onSpeechStart: WavPlayer is playing, pausing it.');
+                          this.audioPauseTime = Date.now(); // Track when we paused
                           this.wavPlayer.pause();
                       }
                       else {
@@ -3650,9 +3678,8 @@ registerProcessor('audio_processor', AudioProcessor);
                       setTimeout(() => {
                           if (!this.wavPlayer.isPlaying) {
                               console.log('onVADMisfire: Resuming after delay');
+                              this.audioPauseTime = null; // Clear pause time since we're resuming
                               this.wavPlayer.play();
-                              this.userIsSpeaking = true;
-                              this.options.onUserIsSpeakingChange(true);
                           }
                           else {
                               console.log('onVADMisfire: Not resuming - either no pause or user speaking again');
@@ -3706,33 +3733,37 @@ registerProcessor('audio_processor', AudioProcessor);
               reason: 'completed',
           });
       }
-      _estimateWordsHeard(text, playbackOffsetSeconds) {
-          const words = text.split(/\s+/).filter((word) => word.length > 0);
-          const totalWords = words.length;
-          // Rough estimation: average speaking rate is ~150 words per minute (2.5 words per second)
-          const estimatedWordsPerSecond = 2.5;
-          const estimatedWordsHeard = Math.min(Math.floor(playbackOffsetSeconds * estimatedWordsPerSecond), totalWords);
-          const textHeard = words.slice(0, estimatedWordsHeard).join(' ');
-          return { wordsHeard: estimatedWordsHeard, textHeard };
-      }
       async _clientInterruptAssistantReplay() {
           const offsetData = await this.wavPlayer.interrupt();
-          if (offsetData && this.currentTurnText && this.currentTurnId) {
-              const { wordsHeard, textHeard } = this._estimateWordsHeard(this.currentTurnText, offsetData.currentTime);
-              const totalWords = this.currentTurnText.split(/\s+/).filter((word) => word.length > 0).length;
-              console.log(`Interruption detected: ${wordsHeard}/${totalWords} words heard, text: "${textHeard}"`);
-              // Send interruption event with context
+          if (offsetData && this.currentTurnId) {
+              let offsetMs = offsetData.currentTime * 1000;
+              // Calculate accurate offset by subtracting pause time if audio was paused for VAD
+              if (this.audioPauseTime) {
+                  const pauseDurationMs = Date.now() - this.audioPauseTime;
+                  const adjustedOffsetMs = Math.max(0, offsetMs - pauseDurationMs);
+                  console.log(`Interruption detected: Raw offset ${offsetMs}ms, pause duration ${pauseDurationMs}ms, adjusted offset ${adjustedOffsetMs}ms for turn ${this.currentTurnId}`);
+                  offsetMs = adjustedOffsetMs;
+                  this.audioPauseTime = null; // Clear the pause time
+              }
+              else {
+                  console.log(`Interruption detected: ${offsetMs}ms offset for turn ${this.currentTurnId} (no pause adjustment needed)`);
+              }
+              // Send interruption event with accurate playback offset in milliseconds
               this._wsSend({
                   type: 'trigger.response.audio.interrupted',
-                  playback_offset: offsetData.currentTime,
+                  playback_offset: offsetMs,
                   interruption_context: {
                       turn_id: this.currentTurnId,
-                      estimated_words_heard: wordsHeard,
-                      total_words: totalWords,
-                      text_heard: textHeard,
+                      playback_offset_ms: offsetMs,
                   },
               });
           }
+          else {
+              console.warn('Interruption requested but missing required data:', {
+                  hasOffsetData: !!offsetData,
+                  hasTurnId: !!this.currentTurnId,
+              });
+          }
       }
       async triggerUserTurnStarted() {
           if (!this.pushToTalkActive) {
@@ -3781,30 +3812,20 @@ registerProcessor('audio_processor', AudioProcessor);
                       // Set current turn ID from first audio message, or update if different turn
                       if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
                           console.log(`Setting current turn ID to: ${message.turn_id} (was: ${this.currentTurnId})`);
-                          const oldTurnId = this.currentTurnId;
                           this.currentTurnId = message.turn_id;
-                          this.currentTurnText = ''; // Reset text for new turn
                           // Clean up interrupted tracks, keeping only the current turn
                           this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
                       }
                       break;
-                  case 'response.text':
-                      // Set turn ID from first text message if not set, or accumulate if matches current turn
-                      if (!this.currentTurnId || message.turn_id === this.currentTurnId) {
-                          if (!this.currentTurnId) {
-                              console.log(`Setting current turn ID to: ${message.turn_id} from text message`);
-                              this.currentTurnId = message.turn_id;
-                              this.currentTurnText = '';
-                          }
-                          this.currentTurnText += message.content;
-                      }
-                      else {
-                          console.log(`Ignoring text for turn ${message.turn_id}, current turn is ${this.currentTurnId}`);
+                  case 'response.text': {
+                      // Set turn ID from first text message if not set
+                      if (!this.currentTurnId) {
+                          this.currentTurnId = message.turn_id;
+                          console.log(`Setting current turn ID to: ${message.turn_id} from text message`);
                       }
+                      // Note: We no longer track text content in the client - the pipeline handles interruption estimation
                       break;
-                  // case 'response.end':
-                  //   console.log('received response.end');
-                  //   break;
+                  }
                   case 'response.data':
                       console.log('received response.data', message);
                       this.options.onDataMessage(message);
@@ -3908,6 +3929,8 @@ registerProcessor('audio_processor', AudioProcessor);
       async connect() {
           try {
               this._setStatus('connecting');
+              // Reset turn tracking for clean start
+              this._resetTurnTracking();
               // Get session key from server
               let authorizeSessionRequestBody = {
                   pipeline_id: this.options.pipelineId,
@@ -3985,6 +4008,10 @@ registerProcessor('audio_processor', AudioProcessor);
               throw error;
           }
       }
+      _resetTurnTracking() {
+          this.currentTurnId = null;
+          console.log('Reset turn tracking state');
+      }
       async disconnect() {
           // Clean up VAD if it exists
           if (this.vad) {
@@ -3994,6 +4021,8 @@ registerProcessor('audio_processor', AudioProcessor);
           }
           this.wavRecorder.quit();
           this.wavPlayer.disconnect();
+          // Reset turn tracking
+          this._resetTurnTracking();
           // Close websocket and ensure status is updated
           if (this.ws) {
               this.ws.close();