@layercode/js-sdk 1.0.15 → 1.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3506,13 +3506,58 @@ registerProcessor('audio_processor', AudioProcessor);
3506
3506
  this.vadPausedPlayer = false;
3507
3507
  this.pushToTalkEnabled = false;
3508
3508
  this.canInterrupt = false;
3509
+ this.userIsSpeaking = false;
3509
3510
  // Bind event handlers
3510
3511
  this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
3511
3512
  this._handleDataAvailable = this._handleDataAvailable.bind(this);
3512
3513
  }
3513
3514
  _initializeVAD() {
3514
3515
  console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt });
3515
- if (!this.pushToTalkEnabled && this.canInterrupt) {
3516
+ // If we're in push to talk mode, we don't need to use the VAD model
3517
+ if (this.pushToTalkEnabled) {
3518
+ return;
3519
+ }
3520
+ const timeout = setTimeout(() => {
3521
+ console.log('silero vad model timeout');
3522
+ // TODO: send message to server to indicate that the vad model timed out
3523
+ this.userIsSpeaking = true; // allow audio to be sent to the server
3524
+ }, 2000);
3525
+ if (!this.canInterrupt) {
3526
+ dist.MicVAD.new({
3527
+ stream: this.wavRecorder.getStream() || undefined,
3528
+ model: 'v5',
3529
+ positiveSpeechThreshold: 0.3,
3530
+ negativeSpeechThreshold: 0.2,
3531
+ redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
3532
+ minSpeechFrames: 15,
3533
+ preSpeechPadFrames: 0,
3534
+ onSpeechStart: () => {
3535
+ if (!this.wavPlayer.isPlaying) {
3536
+ this.userIsSpeaking = true;
3537
+ }
3538
+ },
3539
+ onVADMisfire: () => {
3540
+ this.userIsSpeaking = false;
3541
+ },
3542
+ onSpeechEnd: () => {
3543
+ this.userIsSpeaking = false;
3544
+ this._wsSend({
3545
+ type: 'vad_events',
3546
+ event: 'vad_end',
3547
+ });
3548
+ },
3549
+ })
3550
+ .then((vad) => {
3551
+ clearTimeout(timeout);
3552
+ this.vad = vad;
3553
+ this.vad.start();
3554
+ console.log('VAD started');
3555
+ })
3556
+ .catch((error) => {
3557
+ console.error('Error initializing VAD:', error);
3558
+ });
3559
+ }
3560
+ else {
3516
3561
  dist.MicVAD.new({
3517
3562
  stream: this.wavRecorder.getStream() || undefined,
3518
3563
  model: 'v5',
@@ -3533,36 +3578,35 @@ registerProcessor('audio_processor', AudioProcessor);
3533
3578
  else {
3534
3579
  console.log('onSpeechStart: WavPlayer is not playing, VAD will not pause.');
3535
3580
  }
3581
+ this.userIsSpeaking = true;
3582
+ console.log('onSpeechStart: sending vad_start');
3583
+ this._wsSend({
3584
+ type: 'vad_events',
3585
+ event: 'vad_start',
3586
+ });
3536
3587
  },
3537
3588
  onVADMisfire: () => {
3538
3589
  // If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd, and we should resume the assistant audio as it was a false interruption. We include a configurable delay so the assistant isn't too quick to start speaking again.
3590
+ this.userIsSpeaking = false;
3539
3591
  if (this.vadPausedPlayer) {
3540
3592
  console.log('onSpeechEnd: VAD paused the player, resuming');
3541
3593
  this.wavPlayer.play();
3542
3594
  this.vadPausedPlayer = false; // Reset flag
3543
- // Option to extend delay in the case where the transcriber takes longer to detect a new turn
3544
- // console.log('onVADMisfire: VAD paused the player, resuming in ' + this.options.vadResumeDelay + 'ms');
3545
- // // Add configurable delay before resuming playback
3546
- // setTimeout(() => {
3547
- // this.wavPlayer.play();
3548
- // this.vadPausedPlayer = false; // Reset flag
3549
- // }, this.options.vadResumeDelay);
3550
3595
  }
3551
3596
  else {
3552
3597
  console.log('onVADMisfire: VAD did not pause the player, no action taken to resume.');
3553
3598
  }
3554
3599
  },
3555
- // onSpeechEnd: () => {
3556
- // if (this.vadPausedPlayer) {
3557
- // console.log('onSpeechEnd: VAD paused the player, resuming');
3558
- // this.wavPlayer.play();
3559
- // this.vadPausedPlayer = false; // Reset flag
3560
- // } else {
3561
- // console.log('onSpeechEnd: VAD did not pause the player, not resuming.');
3562
- // }
3563
- // },
3600
+ onSpeechEnd: () => {
3601
+ this.userIsSpeaking = false;
3602
+ this._wsSend({
3603
+ type: 'vad_events',
3604
+ event: 'vad_end',
3605
+ });
3606
+ },
3564
3607
  })
3565
3608
  .then((vad) => {
3609
+ clearTimeout(timeout);
3566
3610
  this.vad = vad;
3567
3611
  this.vad.start();
3568
3612
  console.log('VAD started');
@@ -3670,10 +3714,13 @@ registerProcessor('audio_processor', AudioProcessor);
3670
3714
  _handleDataAvailable(data) {
3671
3715
  try {
3672
3716
  const base64 = arrayBufferToBase64(data.mono);
3673
- this._wsSend({
3674
- type: 'client.audio',
3675
- content: base64,
3676
- });
3717
+ const sendAudio = this.pushToTalkEnabled ? this.pushToTalkActive : this.userIsSpeaking;
3718
+ if (sendAudio) {
3719
+ this._wsSend({
3720
+ type: 'client.audio',
3721
+ content: base64,
3722
+ });
3723
+ }
3677
3724
  }
3678
3725
  catch (error) {
3679
3726
  console.error('Error processing audio:', error);
@@ -3778,7 +3825,7 @@ registerProcessor('audio_processor', AudioProcessor);
3778
3825
  };
3779
3826
  // Initialize microphone audio capture
3780
3827
  await this.wavRecorder.begin();
3781
- await this.wavRecorder.record(this._handleDataAvailable);
3828
+ await this.wavRecorder.record(this._handleDataAvailable, 1638);
3782
3829
  // Set up microphone amplitude monitoring
3783
3830
  this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
3784
3831
  // Initialize audio player