@layercode/js-sdk 1.0.15 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3500,13 +3500,58 @@ class LayercodeClient {
3500
3500
  this.vadPausedPlayer = false;
3501
3501
  this.pushToTalkEnabled = false;
3502
3502
  this.canInterrupt = false;
3503
+ this.userIsSpeaking = false;
3503
3504
  // Bind event handlers
3504
3505
  this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
3505
3506
  this._handleDataAvailable = this._handleDataAvailable.bind(this);
3506
3507
  }
3507
3508
  _initializeVAD() {
3508
3509
  console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt });
3509
- if (!this.pushToTalkEnabled && this.canInterrupt) {
3510
+ // If we're in push to talk mode, we don't need to use the VAD model
3511
+ if (this.pushToTalkEnabled) {
3512
+ return;
3513
+ }
3514
+ const timeout = setTimeout(() => {
3515
+ console.log('silero vad model timeout');
3516
+ // TODO: send message to server to indicate that the vad model timed out
3517
+ this.userIsSpeaking = true; // allow audio to be sent to the server
3518
+ }, 2000);
3519
+ if (!this.canInterrupt) {
3520
+ dist.MicVAD.new({
3521
+ stream: this.wavRecorder.getStream() || undefined,
3522
+ model: 'v5',
3523
+ positiveSpeechThreshold: 0.3,
3524
+ negativeSpeechThreshold: 0.2,
3525
+ redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
3526
+ minSpeechFrames: 15,
3527
+ preSpeechPadFrames: 0,
3528
+ onSpeechStart: () => {
3529
+ if (!this.wavPlayer.isPlaying) {
3530
+ this.userIsSpeaking = true;
3531
+ }
3532
+ },
3533
+ onVADMisfire: () => {
3534
+ this.userIsSpeaking = false;
3535
+ },
3536
+ onSpeechEnd: () => {
3537
+ this.userIsSpeaking = false;
3538
+ this._wsSend({
3539
+ type: 'vad_events',
3540
+ event: 'vad_end',
3541
+ });
3542
+ },
3543
+ })
3544
+ .then((vad) => {
3545
+ clearTimeout(timeout);
3546
+ this.vad = vad;
3547
+ this.vad.start();
3548
+ console.log('VAD started');
3549
+ })
3550
+ .catch((error) => {
3551
+ console.error('Error initializing VAD:', error);
3552
+ });
3553
+ }
3554
+ else {
3510
3555
  dist.MicVAD.new({
3511
3556
  stream: this.wavRecorder.getStream() || undefined,
3512
3557
  model: 'v5',
@@ -3527,36 +3572,35 @@ class LayercodeClient {
3527
3572
  else {
3528
3573
  console.log('onSpeechStart: WavPlayer is not playing, VAD will not pause.');
3529
3574
  }
3575
+ this.userIsSpeaking = true;
3576
+ console.log('onSpeechStart: sending vad_start');
3577
+ this._wsSend({
3578
+ type: 'vad_events',
3579
+ event: 'vad_start',
3580
+ });
3530
3581
  },
3531
3582
  onVADMisfire: () => {
3532
3583
  // If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd, and we should resume the assistant audio as it was a false interruption. We include a configurable delay so the assistant isn't too quick to start speaking again.
3584
+ this.userIsSpeaking = false;
3533
3585
  if (this.vadPausedPlayer) {
3534
3586
  console.log('onSpeechEnd: VAD paused the player, resuming');
3535
3587
  this.wavPlayer.play();
3536
3588
  this.vadPausedPlayer = false; // Reset flag
3537
- // Option to extend delay in the case where the transcriber takes longer to detect a new turn
3538
- // console.log('onVADMisfire: VAD paused the player, resuming in ' + this.options.vadResumeDelay + 'ms');
3539
- // // Add configurable delay before resuming playback
3540
- // setTimeout(() => {
3541
- // this.wavPlayer.play();
3542
- // this.vadPausedPlayer = false; // Reset flag
3543
- // }, this.options.vadResumeDelay);
3544
3589
  }
3545
3590
  else {
3546
3591
  console.log('onVADMisfire: VAD did not pause the player, no action taken to resume.');
3547
3592
  }
3548
3593
  },
3549
- // onSpeechEnd: () => {
3550
- // if (this.vadPausedPlayer) {
3551
- // console.log('onSpeechEnd: VAD paused the player, resuming');
3552
- // this.wavPlayer.play();
3553
- // this.vadPausedPlayer = false; // Reset flag
3554
- // } else {
3555
- // console.log('onSpeechEnd: VAD did not pause the player, not resuming.');
3556
- // }
3557
- // },
3594
+ onSpeechEnd: () => {
3595
+ this.userIsSpeaking = false;
3596
+ this._wsSend({
3597
+ type: 'vad_events',
3598
+ event: 'vad_end',
3599
+ });
3600
+ },
3558
3601
  })
3559
3602
  .then((vad) => {
3603
+ clearTimeout(timeout);
3560
3604
  this.vad = vad;
3561
3605
  this.vad.start();
3562
3606
  console.log('VAD started');
@@ -3664,10 +3708,13 @@ class LayercodeClient {
3664
3708
  _handleDataAvailable(data) {
3665
3709
  try {
3666
3710
  const base64 = arrayBufferToBase64(data.mono);
3667
- this._wsSend({
3668
- type: 'client.audio',
3669
- content: base64,
3670
- });
3711
+ const sendAudio = this.pushToTalkEnabled ? this.pushToTalkActive : this.userIsSpeaking;
3712
+ if (sendAudio) {
3713
+ this._wsSend({
3714
+ type: 'client.audio',
3715
+ content: base64,
3716
+ });
3717
+ }
3671
3718
  }
3672
3719
  catch (error) {
3673
3720
  console.error('Error processing audio:', error);