@layercode/js-sdk 1.0.14 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3474,7 +3474,6 @@ class LayercodeClient {
3474
3474
  sessionId: options.sessionId || null,
3475
3475
  authorizeSessionEndpoint: options.authorizeSessionEndpoint,
3476
3476
  metadata: options.metadata || {},
3477
- vadEnabled: options.vadEnabled || true,
3478
3477
  vadResumeDelay: options.vadResumeDelay || 500,
3479
3478
  onConnect: options.onConnect || (() => { }),
3480
3479
  onDisconnect: options.onDisconnect || (() => { }),
@@ -3492,7 +3491,67 @@ class LayercodeClient {
3492
3491
  sampleRate: 16000, // TODO should be set my fetched pipeline config
3493
3492
  });
3494
3493
  this.vad = null;
3495
- if (this.options.vadEnabled) {
3494
+ this.ws = null;
3495
+ this.status = 'disconnected';
3496
+ this.userAudioAmplitude = 0;
3497
+ this.agentAudioAmplitude = 0;
3498
+ this.sessionId = options.sessionId || null;
3499
+ this.pushToTalkActive = false;
3500
+ this.vadPausedPlayer = false;
3501
+ this.pushToTalkEnabled = false;
3502
+ this.canInterrupt = false;
3503
+ this.userIsSpeaking = false;
3504
+ // Bind event handlers
3505
+ this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
3506
+ this._handleDataAvailable = this._handleDataAvailable.bind(this);
3507
+ }
3508
+ _initializeVAD() {
3509
+ console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt });
3510
+ // If we're in push to talk mode, we don't need to use the VAD model
3511
+ if (this.pushToTalkEnabled) {
3512
+ return;
3513
+ }
3514
+ const timeout = setTimeout(() => {
3515
+ console.log('silero vad model timeout');
3516
+ // TODO: send message to server to indicate that the vad model timed out
3517
+ this.userIsSpeaking = true; // allow audio to be sent to the server
3518
+ }, 2000);
3519
+ if (!this.canInterrupt) {
3520
+ dist.MicVAD.new({
3521
+ stream: this.wavRecorder.getStream() || undefined,
3522
+ model: 'v5',
3523
+ positiveSpeechThreshold: 0.3,
3524
+ negativeSpeechThreshold: 0.2,
3525
+ redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
3526
+ minSpeechFrames: 15,
3527
+ preSpeechPadFrames: 0,
3528
+ onSpeechStart: () => {
3529
+ if (!this.wavPlayer.isPlaying) {
3530
+ this.userIsSpeaking = true;
3531
+ }
3532
+ },
3533
+ onVADMisfire: () => {
3534
+ this.userIsSpeaking = false;
3535
+ },
3536
+ onSpeechEnd: () => {
3537
+ this.userIsSpeaking = false;
3538
+ this._wsSend({
3539
+ type: 'vad_events',
3540
+ event: 'vad_end',
3541
+ });
3542
+ },
3543
+ })
3544
+ .then((vad) => {
3545
+ clearTimeout(timeout);
3546
+ this.vad = vad;
3547
+ this.vad.start();
3548
+ console.log('VAD started');
3549
+ })
3550
+ .catch((error) => {
3551
+ console.error('Error initializing VAD:', error);
3552
+ });
3553
+ }
3554
+ else {
3496
3555
  dist.MicVAD.new({
3497
3556
  stream: this.wavRecorder.getStream() || undefined,
3498
3557
  model: 'v5',
@@ -3513,36 +3572,35 @@ class LayercodeClient {
3513
3572
  else {
3514
3573
  console.log('onSpeechStart: WavPlayer is not playing, VAD will not pause.');
3515
3574
  }
3575
+ this.userIsSpeaking = true;
3576
+ console.log('onSpeechStart: sending vad_start');
3577
+ this._wsSend({
3578
+ type: 'vad_events',
3579
+ event: 'vad_start',
3580
+ });
3516
3581
  },
3517
3582
  onVADMisfire: () => {
3518
3583
  // If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd, and we should resume the assistant audio as it was a false interruption. We include a configurable delay so the assistant isn't too quick to start speaking again.
3584
+ this.userIsSpeaking = false;
3519
3585
  if (this.vadPausedPlayer) {
3520
3586
  console.log('onSpeechEnd: VAD paused the player, resuming');
3521
3587
  this.wavPlayer.play();
3522
3588
  this.vadPausedPlayer = false; // Reset flag
3523
- // Option to extend delay in the case where the transcriber takes longer to detect a new turn
3524
- // console.log('onVADMisfire: VAD paused the player, resuming in ' + this.options.vadResumeDelay + 'ms');
3525
- // // Add configurable delay before resuming playback
3526
- // setTimeout(() => {
3527
- // this.wavPlayer.play();
3528
- // this.vadPausedPlayer = false; // Reset flag
3529
- // }, this.options.vadResumeDelay);
3530
3589
  }
3531
3590
  else {
3532
3591
  console.log('onVADMisfire: VAD did not pause the player, no action taken to resume.');
3533
3592
  }
3534
3593
  },
3535
- // onSpeechEnd: () => {
3536
- // if (this.vadPausedPlayer) {
3537
- // console.log('onSpeechEnd: VAD paused the player, resuming');
3538
- // this.wavPlayer.play();
3539
- // this.vadPausedPlayer = false; // Reset flag
3540
- // } else {
3541
- // console.log('onSpeechEnd: VAD did not pause the player, not resuming.');
3542
- // }
3543
- // },
3594
+ onSpeechEnd: () => {
3595
+ this.userIsSpeaking = false;
3596
+ this._wsSend({
3597
+ type: 'vad_events',
3598
+ event: 'vad_end',
3599
+ });
3600
+ },
3544
3601
  })
3545
3602
  .then((vad) => {
3603
+ clearTimeout(timeout);
3546
3604
  this.vad = vad;
3547
3605
  this.vad.start();
3548
3606
  console.log('VAD started');
@@ -3551,16 +3609,6 @@ class LayercodeClient {
3551
3609
  console.error('Error initializing VAD:', error);
3552
3610
  });
3553
3611
  }
3554
- this.ws = null;
3555
- this.status = 'disconnected';
3556
- this.userAudioAmplitude = 0;
3557
- this.agentAudioAmplitude = 0;
3558
- this.sessionId = options.sessionId || null;
3559
- this.pushToTalkActive = false;
3560
- this.vadPausedPlayer = false;
3561
- // Bind event handlers
3562
- this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
3563
- this._handleDataAvailable = this._handleDataAvailable.bind(this);
3564
3612
  }
3565
3613
  /**
3566
3614
  * Updates the connection status and triggers the callback
@@ -3620,10 +3668,9 @@ class LayercodeClient {
3620
3668
  // Sent from the server to this client when a new user turn is detected
3621
3669
  console.log('received turn.start from server');
3622
3670
  console.log(message);
3623
- // if (message.role === 'user' && !this.pushToTalkActive) {
3624
- if (message.role === 'user') {
3671
+ if (message.role === 'user' && !this.pushToTalkEnabled && this.canInterrupt) {
3625
3672
  // Interrupt any playing assistant audio if this is a turn trigged by the server (and not push to talk, which will have already called interrupt)
3626
- console.log('interrupting assistant audio, as user turn has started and pushToTalkActive is false');
3673
+ console.log('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
3627
3674
  await this._clientInterruptAssistantReplay();
3628
3675
  }
3629
3676
  // if (message.role === 'assistant') {
@@ -3661,10 +3708,13 @@ class LayercodeClient {
3661
3708
  _handleDataAvailable(data) {
3662
3709
  try {
3663
3710
  const base64 = arrayBufferToBase64(data.mono);
3664
- this._wsSend({
3665
- type: 'client.audio',
3666
- content: base64,
3667
- });
3711
+ const sendAudio = this.pushToTalkEnabled ? this.pushToTalkActive : this.userIsSpeaking;
3712
+ if (sendAudio) {
3713
+ this._wsSend({
3714
+ type: 'client.audio',
3715
+ content: base64,
3716
+ });
3717
+ }
3668
3718
  }
3669
3719
  catch (error) {
3670
3720
  console.error('Error processing audio:', error);
@@ -3737,6 +3787,19 @@ class LayercodeClient {
3737
3787
  this.ws = new WebSocket(`${this._websocketUrl}?${new URLSearchParams({
3738
3788
  client_session_key: authorizeSessionResponseBody.client_session_key,
3739
3789
  })}`);
3790
+ const config = authorizeSessionResponseBody.config;
3791
+ console.log('config', config);
3792
+ if (config.transcription.trigger === 'push_to_talk') {
3793
+ this.pushToTalkEnabled = true;
3794
+ }
3795
+ else if (config.transcription.trigger === 'automatic') {
3796
+ this.pushToTalkEnabled = false;
3797
+ this.canInterrupt = config.transcription.can_interrupt;
3798
+ }
3799
+ else {
3800
+ throw new Error(`Unknown trigger: ${config.transcription.trigger}`);
3801
+ }
3802
+ this._initializeVAD();
3740
3803
  // Bind the websocket message callbacks
3741
3804
  this.ws.onmessage = this._handleWebSocketMessage;
3742
3805
  this.ws.onopen = () => {