@layercode/js-sdk 1.0.15 → 1.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -3506,13 +3506,58 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3506
3506
|
this.vadPausedPlayer = false;
|
|
3507
3507
|
this.pushToTalkEnabled = false;
|
|
3508
3508
|
this.canInterrupt = false;
|
|
3509
|
+
this.userIsSpeaking = false;
|
|
3509
3510
|
// Bind event handlers
|
|
3510
3511
|
this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
|
|
3511
3512
|
this._handleDataAvailable = this._handleDataAvailable.bind(this);
|
|
3512
3513
|
}
|
|
3513
3514
|
_initializeVAD() {
|
|
3514
3515
|
console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt });
|
|
3515
|
-
|
|
3516
|
+
// If we're in push to talk mode, we don't need to use the VAD model
|
|
3517
|
+
if (this.pushToTalkEnabled) {
|
|
3518
|
+
return;
|
|
3519
|
+
}
|
|
3520
|
+
const timeout = setTimeout(() => {
|
|
3521
|
+
console.log('silero vad model timeout');
|
|
3522
|
+
// TODO: send message to server to indicate that the vad model timed out
|
|
3523
|
+
this.userIsSpeaking = true; // allow audio to be sent to the server
|
|
3524
|
+
}, 2000);
|
|
3525
|
+
if (!this.canInterrupt) {
|
|
3526
|
+
dist.MicVAD.new({
|
|
3527
|
+
stream: this.wavRecorder.getStream() || undefined,
|
|
3528
|
+
model: 'v5',
|
|
3529
|
+
positiveSpeechThreshold: 0.3,
|
|
3530
|
+
negativeSpeechThreshold: 0.2,
|
|
3531
|
+
redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
|
|
3532
|
+
minSpeechFrames: 15,
|
|
3533
|
+
preSpeechPadFrames: 0,
|
|
3534
|
+
onSpeechStart: () => {
|
|
3535
|
+
if (!this.wavPlayer.isPlaying) {
|
|
3536
|
+
this.userIsSpeaking = true;
|
|
3537
|
+
}
|
|
3538
|
+
},
|
|
3539
|
+
onVADMisfire: () => {
|
|
3540
|
+
this.userIsSpeaking = false;
|
|
3541
|
+
},
|
|
3542
|
+
onSpeechEnd: () => {
|
|
3543
|
+
this.userIsSpeaking = false;
|
|
3544
|
+
this._wsSend({
|
|
3545
|
+
type: 'vad_events',
|
|
3546
|
+
event: 'vad_end',
|
|
3547
|
+
});
|
|
3548
|
+
},
|
|
3549
|
+
})
|
|
3550
|
+
.then((vad) => {
|
|
3551
|
+
clearTimeout(timeout);
|
|
3552
|
+
this.vad = vad;
|
|
3553
|
+
this.vad.start();
|
|
3554
|
+
console.log('VAD started');
|
|
3555
|
+
})
|
|
3556
|
+
.catch((error) => {
|
|
3557
|
+
console.error('Error initializing VAD:', error);
|
|
3558
|
+
});
|
|
3559
|
+
}
|
|
3560
|
+
else {
|
|
3516
3561
|
dist.MicVAD.new({
|
|
3517
3562
|
stream: this.wavRecorder.getStream() || undefined,
|
|
3518
3563
|
model: 'v5',
|
|
@@ -3533,36 +3578,35 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3533
3578
|
else {
|
|
3534
3579
|
console.log('onSpeechStart: WavPlayer is not playing, VAD will not pause.');
|
|
3535
3580
|
}
|
|
3581
|
+
this.userIsSpeaking = true;
|
|
3582
|
+
console.log('onSpeechStart: sending vad_start');
|
|
3583
|
+
this._wsSend({
|
|
3584
|
+
type: 'vad_events',
|
|
3585
|
+
event: 'vad_start',
|
|
3586
|
+
});
|
|
3536
3587
|
},
|
|
3537
3588
|
onVADMisfire: () => {
|
|
3538
3589
|
// If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd, and we should resume the assistant audio as it was a false interruption. We include a configurable delay so the assistant isn't too quick to start speaking again.
|
|
3590
|
+
this.userIsSpeaking = false;
|
|
3539
3591
|
if (this.vadPausedPlayer) {
|
|
3540
3592
|
console.log('onSpeechEnd: VAD paused the player, resuming');
|
|
3541
3593
|
this.wavPlayer.play();
|
|
3542
3594
|
this.vadPausedPlayer = false; // Reset flag
|
|
3543
|
-
// Option to extend delay in the case where the transcriber takes longer to detect a new turn
|
|
3544
|
-
// console.log('onVADMisfire: VAD paused the player, resuming in ' + this.options.vadResumeDelay + 'ms');
|
|
3545
|
-
// // Add configurable delay before resuming playback
|
|
3546
|
-
// setTimeout(() => {
|
|
3547
|
-
// this.wavPlayer.play();
|
|
3548
|
-
// this.vadPausedPlayer = false; // Reset flag
|
|
3549
|
-
// }, this.options.vadResumeDelay);
|
|
3550
3595
|
}
|
|
3551
3596
|
else {
|
|
3552
3597
|
console.log('onVADMisfire: VAD did not pause the player, no action taken to resume.');
|
|
3553
3598
|
}
|
|
3554
3599
|
},
|
|
3555
|
-
|
|
3556
|
-
|
|
3557
|
-
|
|
3558
|
-
|
|
3559
|
-
|
|
3560
|
-
|
|
3561
|
-
|
|
3562
|
-
// }
|
|
3563
|
-
// },
|
|
3600
|
+
onSpeechEnd: () => {
|
|
3601
|
+
this.userIsSpeaking = false;
|
|
3602
|
+
this._wsSend({
|
|
3603
|
+
type: 'vad_events',
|
|
3604
|
+
event: 'vad_end',
|
|
3605
|
+
});
|
|
3606
|
+
},
|
|
3564
3607
|
})
|
|
3565
3608
|
.then((vad) => {
|
|
3609
|
+
clearTimeout(timeout);
|
|
3566
3610
|
this.vad = vad;
|
|
3567
3611
|
this.vad.start();
|
|
3568
3612
|
console.log('VAD started');
|
|
@@ -3670,10 +3714,13 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3670
3714
|
_handleDataAvailable(data) {
|
|
3671
3715
|
try {
|
|
3672
3716
|
const base64 = arrayBufferToBase64(data.mono);
|
|
3673
|
-
this.
|
|
3674
|
-
|
|
3675
|
-
|
|
3676
|
-
|
|
3717
|
+
const sendAudio = this.pushToTalkEnabled ? this.pushToTalkActive : this.userIsSpeaking;
|
|
3718
|
+
if (sendAudio) {
|
|
3719
|
+
this._wsSend({
|
|
3720
|
+
type: 'client.audio',
|
|
3721
|
+
content: base64,
|
|
3722
|
+
});
|
|
3723
|
+
}
|
|
3677
3724
|
}
|
|
3678
3725
|
catch (error) {
|
|
3679
3726
|
console.error('Error processing audio:', error);
|