@layercode/js-sdk 1.0.15 → 1.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -3500,13 +3500,58 @@ class LayercodeClient {
|
|
|
3500
3500
|
this.vadPausedPlayer = false;
|
|
3501
3501
|
this.pushToTalkEnabled = false;
|
|
3502
3502
|
this.canInterrupt = false;
|
|
3503
|
+
this.userIsSpeaking = false;
|
|
3503
3504
|
// Bind event handlers
|
|
3504
3505
|
this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
|
|
3505
3506
|
this._handleDataAvailable = this._handleDataAvailable.bind(this);
|
|
3506
3507
|
}
|
|
3507
3508
|
_initializeVAD() {
|
|
3508
3509
|
console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt });
|
|
3509
|
-
|
|
3510
|
+
// If we're in push to talk mode, we don't need to use the VAD model
|
|
3511
|
+
if (this.pushToTalkEnabled) {
|
|
3512
|
+
return;
|
|
3513
|
+
}
|
|
3514
|
+
const timeout = setTimeout(() => {
|
|
3515
|
+
console.log('silero vad model timeout');
|
|
3516
|
+
// TODO: send message to server to indicate that the vad model timed out
|
|
3517
|
+
this.userIsSpeaking = true; // allow audio to be sent to the server
|
|
3518
|
+
}, 2000);
|
|
3519
|
+
if (!this.canInterrupt) {
|
|
3520
|
+
dist.MicVAD.new({
|
|
3521
|
+
stream: this.wavRecorder.getStream() || undefined,
|
|
3522
|
+
model: 'v5',
|
|
3523
|
+
positiveSpeechThreshold: 0.3,
|
|
3524
|
+
negativeSpeechThreshold: 0.2,
|
|
3525
|
+
redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
|
|
3526
|
+
minSpeechFrames: 15,
|
|
3527
|
+
preSpeechPadFrames: 0,
|
|
3528
|
+
onSpeechStart: () => {
|
|
3529
|
+
if (!this.wavPlayer.isPlaying) {
|
|
3530
|
+
this.userIsSpeaking = true;
|
|
3531
|
+
}
|
|
3532
|
+
},
|
|
3533
|
+
onVADMisfire: () => {
|
|
3534
|
+
this.userIsSpeaking = false;
|
|
3535
|
+
},
|
|
3536
|
+
onSpeechEnd: () => {
|
|
3537
|
+
this.userIsSpeaking = false;
|
|
3538
|
+
this._wsSend({
|
|
3539
|
+
type: 'vad_events',
|
|
3540
|
+
event: 'vad_end',
|
|
3541
|
+
});
|
|
3542
|
+
},
|
|
3543
|
+
})
|
|
3544
|
+
.then((vad) => {
|
|
3545
|
+
clearTimeout(timeout);
|
|
3546
|
+
this.vad = vad;
|
|
3547
|
+
this.vad.start();
|
|
3548
|
+
console.log('VAD started');
|
|
3549
|
+
})
|
|
3550
|
+
.catch((error) => {
|
|
3551
|
+
console.error('Error initializing VAD:', error);
|
|
3552
|
+
});
|
|
3553
|
+
}
|
|
3554
|
+
else {
|
|
3510
3555
|
dist.MicVAD.new({
|
|
3511
3556
|
stream: this.wavRecorder.getStream() || undefined,
|
|
3512
3557
|
model: 'v5',
|
|
@@ -3527,36 +3572,35 @@ class LayercodeClient {
|
|
|
3527
3572
|
else {
|
|
3528
3573
|
console.log('onSpeechStart: WavPlayer is not playing, VAD will not pause.');
|
|
3529
3574
|
}
|
|
3575
|
+
this.userIsSpeaking = true;
|
|
3576
|
+
console.log('onSpeechStart: sending vad_start');
|
|
3577
|
+
this._wsSend({
|
|
3578
|
+
type: 'vad_events',
|
|
3579
|
+
event: 'vad_start',
|
|
3580
|
+
});
|
|
3530
3581
|
},
|
|
3531
3582
|
onVADMisfire: () => {
|
|
3532
3583
|
// If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd, and we should resume the assistant audio as it was a false interruption. We include a configurable delay so the assistant isn't too quick to start speaking again.
|
|
3584
|
+
this.userIsSpeaking = false;
|
|
3533
3585
|
if (this.vadPausedPlayer) {
|
|
3534
3586
|
console.log('onSpeechEnd: VAD paused the player, resuming');
|
|
3535
3587
|
this.wavPlayer.play();
|
|
3536
3588
|
this.vadPausedPlayer = false; // Reset flag
|
|
3537
|
-
// Option to extend delay in the case where the transcriber takes longer to detect a new turn
|
|
3538
|
-
// console.log('onVADMisfire: VAD paused the player, resuming in ' + this.options.vadResumeDelay + 'ms');
|
|
3539
|
-
// // Add configurable delay before resuming playback
|
|
3540
|
-
// setTimeout(() => {
|
|
3541
|
-
// this.wavPlayer.play();
|
|
3542
|
-
// this.vadPausedPlayer = false; // Reset flag
|
|
3543
|
-
// }, this.options.vadResumeDelay);
|
|
3544
3589
|
}
|
|
3545
3590
|
else {
|
|
3546
3591
|
console.log('onVADMisfire: VAD did not pause the player, no action taken to resume.');
|
|
3547
3592
|
}
|
|
3548
3593
|
},
|
|
3549
|
-
|
|
3550
|
-
|
|
3551
|
-
|
|
3552
|
-
|
|
3553
|
-
|
|
3554
|
-
|
|
3555
|
-
|
|
3556
|
-
// }
|
|
3557
|
-
// },
|
|
3594
|
+
onSpeechEnd: () => {
|
|
3595
|
+
this.userIsSpeaking = false;
|
|
3596
|
+
this._wsSend({
|
|
3597
|
+
type: 'vad_events',
|
|
3598
|
+
event: 'vad_end',
|
|
3599
|
+
});
|
|
3600
|
+
},
|
|
3558
3601
|
})
|
|
3559
3602
|
.then((vad) => {
|
|
3603
|
+
clearTimeout(timeout);
|
|
3560
3604
|
this.vad = vad;
|
|
3561
3605
|
this.vad.start();
|
|
3562
3606
|
console.log('VAD started');
|
|
@@ -3664,10 +3708,13 @@ class LayercodeClient {
|
|
|
3664
3708
|
_handleDataAvailable(data) {
|
|
3665
3709
|
try {
|
|
3666
3710
|
const base64 = arrayBufferToBase64(data.mono);
|
|
3667
|
-
this.
|
|
3668
|
-
|
|
3669
|
-
|
|
3670
|
-
|
|
3711
|
+
const sendAudio = this.pushToTalkEnabled ? this.pushToTalkActive : this.userIsSpeaking;
|
|
3712
|
+
if (sendAudio) {
|
|
3713
|
+
this._wsSend({
|
|
3714
|
+
type: 'client.audio',
|
|
3715
|
+
content: base64,
|
|
3716
|
+
});
|
|
3717
|
+
}
|
|
3671
3718
|
}
|
|
3672
3719
|
catch (error) {
|
|
3673
3720
|
console.error('Error processing audio:', error);
|
|
@@ -3772,7 +3819,7 @@ class LayercodeClient {
|
|
|
3772
3819
|
};
|
|
3773
3820
|
// Initialize microphone audio capture
|
|
3774
3821
|
await this.wavRecorder.begin();
|
|
3775
|
-
await this.wavRecorder.record(this._handleDataAvailable);
|
|
3822
|
+
await this.wavRecorder.record(this._handleDataAvailable, 1638);
|
|
3776
3823
|
// Set up microphone amplitude monitoring
|
|
3777
3824
|
this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
|
|
3778
3825
|
// Initialize audio player
|