@layercode/js-sdk 1.0.14 → 1.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -3474,7 +3474,6 @@ class LayercodeClient {
|
|
|
3474
3474
|
sessionId: options.sessionId || null,
|
|
3475
3475
|
authorizeSessionEndpoint: options.authorizeSessionEndpoint,
|
|
3476
3476
|
metadata: options.metadata || {},
|
|
3477
|
-
vadEnabled: options.vadEnabled || true,
|
|
3478
3477
|
vadResumeDelay: options.vadResumeDelay || 500,
|
|
3479
3478
|
onConnect: options.onConnect || (() => { }),
|
|
3480
3479
|
onDisconnect: options.onDisconnect || (() => { }),
|
|
@@ -3492,7 +3491,67 @@ class LayercodeClient {
|
|
|
3492
3491
|
sampleRate: 16000, // TODO should be set my fetched pipeline config
|
|
3493
3492
|
});
|
|
3494
3493
|
this.vad = null;
|
|
3495
|
-
|
|
3494
|
+
this.ws = null;
|
|
3495
|
+
this.status = 'disconnected';
|
|
3496
|
+
this.userAudioAmplitude = 0;
|
|
3497
|
+
this.agentAudioAmplitude = 0;
|
|
3498
|
+
this.sessionId = options.sessionId || null;
|
|
3499
|
+
this.pushToTalkActive = false;
|
|
3500
|
+
this.vadPausedPlayer = false;
|
|
3501
|
+
this.pushToTalkEnabled = false;
|
|
3502
|
+
this.canInterrupt = false;
|
|
3503
|
+
this.userIsSpeaking = false;
|
|
3504
|
+
// Bind event handlers
|
|
3505
|
+
this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
|
|
3506
|
+
this._handleDataAvailable = this._handleDataAvailable.bind(this);
|
|
3507
|
+
}
|
|
3508
|
+
_initializeVAD() {
|
|
3509
|
+
console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt });
|
|
3510
|
+
// If we're in push to talk mode, we don't need to use the VAD model
|
|
3511
|
+
if (this.pushToTalkEnabled) {
|
|
3512
|
+
return;
|
|
3513
|
+
}
|
|
3514
|
+
const timeout = setTimeout(() => {
|
|
3515
|
+
console.log('silero vad model timeout');
|
|
3516
|
+
// TODO: send message to server to indicate that the vad model timed out
|
|
3517
|
+
this.userIsSpeaking = true; // allow audio to be sent to the server
|
|
3518
|
+
}, 2000);
|
|
3519
|
+
if (!this.canInterrupt) {
|
|
3520
|
+
dist.MicVAD.new({
|
|
3521
|
+
stream: this.wavRecorder.getStream() || undefined,
|
|
3522
|
+
model: 'v5',
|
|
3523
|
+
positiveSpeechThreshold: 0.3,
|
|
3524
|
+
negativeSpeechThreshold: 0.2,
|
|
3525
|
+
redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
|
|
3526
|
+
minSpeechFrames: 15,
|
|
3527
|
+
preSpeechPadFrames: 0,
|
|
3528
|
+
onSpeechStart: () => {
|
|
3529
|
+
if (!this.wavPlayer.isPlaying) {
|
|
3530
|
+
this.userIsSpeaking = true;
|
|
3531
|
+
}
|
|
3532
|
+
},
|
|
3533
|
+
onVADMisfire: () => {
|
|
3534
|
+
this.userIsSpeaking = false;
|
|
3535
|
+
},
|
|
3536
|
+
onSpeechEnd: () => {
|
|
3537
|
+
this.userIsSpeaking = false;
|
|
3538
|
+
this._wsSend({
|
|
3539
|
+
type: 'vad_events',
|
|
3540
|
+
event: 'vad_end',
|
|
3541
|
+
});
|
|
3542
|
+
},
|
|
3543
|
+
})
|
|
3544
|
+
.then((vad) => {
|
|
3545
|
+
clearTimeout(timeout);
|
|
3546
|
+
this.vad = vad;
|
|
3547
|
+
this.vad.start();
|
|
3548
|
+
console.log('VAD started');
|
|
3549
|
+
})
|
|
3550
|
+
.catch((error) => {
|
|
3551
|
+
console.error('Error initializing VAD:', error);
|
|
3552
|
+
});
|
|
3553
|
+
}
|
|
3554
|
+
else {
|
|
3496
3555
|
dist.MicVAD.new({
|
|
3497
3556
|
stream: this.wavRecorder.getStream() || undefined,
|
|
3498
3557
|
model: 'v5',
|
|
@@ -3513,36 +3572,35 @@ class LayercodeClient {
|
|
|
3513
3572
|
else {
|
|
3514
3573
|
console.log('onSpeechStart: WavPlayer is not playing, VAD will not pause.');
|
|
3515
3574
|
}
|
|
3575
|
+
this.userIsSpeaking = true;
|
|
3576
|
+
console.log('onSpeechStart: sending vad_start');
|
|
3577
|
+
this._wsSend({
|
|
3578
|
+
type: 'vad_events',
|
|
3579
|
+
event: 'vad_start',
|
|
3580
|
+
});
|
|
3516
3581
|
},
|
|
3517
3582
|
onVADMisfire: () => {
|
|
3518
3583
|
// If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd, and we should resume the assistant audio as it was a false interruption. We include a configurable delay so the assistant isn't too quick to start speaking again.
|
|
3584
|
+
this.userIsSpeaking = false;
|
|
3519
3585
|
if (this.vadPausedPlayer) {
|
|
3520
3586
|
console.log('onSpeechEnd: VAD paused the player, resuming');
|
|
3521
3587
|
this.wavPlayer.play();
|
|
3522
3588
|
this.vadPausedPlayer = false; // Reset flag
|
|
3523
|
-
// Option to extend delay in the case where the transcriber takes longer to detect a new turn
|
|
3524
|
-
// console.log('onVADMisfire: VAD paused the player, resuming in ' + this.options.vadResumeDelay + 'ms');
|
|
3525
|
-
// // Add configurable delay before resuming playback
|
|
3526
|
-
// setTimeout(() => {
|
|
3527
|
-
// this.wavPlayer.play();
|
|
3528
|
-
// this.vadPausedPlayer = false; // Reset flag
|
|
3529
|
-
// }, this.options.vadResumeDelay);
|
|
3530
3589
|
}
|
|
3531
3590
|
else {
|
|
3532
3591
|
console.log('onVADMisfire: VAD did not pause the player, no action taken to resume.');
|
|
3533
3592
|
}
|
|
3534
3593
|
},
|
|
3535
|
-
|
|
3536
|
-
|
|
3537
|
-
|
|
3538
|
-
|
|
3539
|
-
|
|
3540
|
-
|
|
3541
|
-
|
|
3542
|
-
// }
|
|
3543
|
-
// },
|
|
3594
|
+
onSpeechEnd: () => {
|
|
3595
|
+
this.userIsSpeaking = false;
|
|
3596
|
+
this._wsSend({
|
|
3597
|
+
type: 'vad_events',
|
|
3598
|
+
event: 'vad_end',
|
|
3599
|
+
});
|
|
3600
|
+
},
|
|
3544
3601
|
})
|
|
3545
3602
|
.then((vad) => {
|
|
3603
|
+
clearTimeout(timeout);
|
|
3546
3604
|
this.vad = vad;
|
|
3547
3605
|
this.vad.start();
|
|
3548
3606
|
console.log('VAD started');
|
|
@@ -3551,16 +3609,6 @@ class LayercodeClient {
|
|
|
3551
3609
|
console.error('Error initializing VAD:', error);
|
|
3552
3610
|
});
|
|
3553
3611
|
}
|
|
3554
|
-
this.ws = null;
|
|
3555
|
-
this.status = 'disconnected';
|
|
3556
|
-
this.userAudioAmplitude = 0;
|
|
3557
|
-
this.agentAudioAmplitude = 0;
|
|
3558
|
-
this.sessionId = options.sessionId || null;
|
|
3559
|
-
this.pushToTalkActive = false;
|
|
3560
|
-
this.vadPausedPlayer = false;
|
|
3561
|
-
// Bind event handlers
|
|
3562
|
-
this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
|
|
3563
|
-
this._handleDataAvailable = this._handleDataAvailable.bind(this);
|
|
3564
3612
|
}
|
|
3565
3613
|
/**
|
|
3566
3614
|
* Updates the connection status and triggers the callback
|
|
@@ -3620,10 +3668,9 @@ class LayercodeClient {
|
|
|
3620
3668
|
// Sent from the server to this client when a new user turn is detected
|
|
3621
3669
|
console.log('received turn.start from server');
|
|
3622
3670
|
console.log(message);
|
|
3623
|
-
|
|
3624
|
-
if (message.role === 'user') {
|
|
3671
|
+
if (message.role === 'user' && !this.pushToTalkEnabled && this.canInterrupt) {
|
|
3625
3672
|
// Interrupt any playing assistant audio if this is a turn trigged by the server (and not push to talk, which will have already called interrupt)
|
|
3626
|
-
console.log('interrupting assistant audio, as user turn has started and
|
|
3673
|
+
console.log('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
|
|
3627
3674
|
await this._clientInterruptAssistantReplay();
|
|
3628
3675
|
}
|
|
3629
3676
|
// if (message.role === 'assistant') {
|
|
@@ -3661,10 +3708,13 @@ class LayercodeClient {
|
|
|
3661
3708
|
_handleDataAvailable(data) {
|
|
3662
3709
|
try {
|
|
3663
3710
|
const base64 = arrayBufferToBase64(data.mono);
|
|
3664
|
-
this.
|
|
3665
|
-
|
|
3666
|
-
|
|
3667
|
-
|
|
3711
|
+
const sendAudio = this.pushToTalkEnabled ? this.pushToTalkActive : this.userIsSpeaking;
|
|
3712
|
+
if (sendAudio) {
|
|
3713
|
+
this._wsSend({
|
|
3714
|
+
type: 'client.audio',
|
|
3715
|
+
content: base64,
|
|
3716
|
+
});
|
|
3717
|
+
}
|
|
3668
3718
|
}
|
|
3669
3719
|
catch (error) {
|
|
3670
3720
|
console.error('Error processing audio:', error);
|
|
@@ -3737,6 +3787,19 @@ class LayercodeClient {
|
|
|
3737
3787
|
this.ws = new WebSocket(`${this._websocketUrl}?${new URLSearchParams({
|
|
3738
3788
|
client_session_key: authorizeSessionResponseBody.client_session_key,
|
|
3739
3789
|
})}`);
|
|
3790
|
+
const config = authorizeSessionResponseBody.config;
|
|
3791
|
+
console.log('config', config);
|
|
3792
|
+
if (config.transcription.trigger === 'push_to_talk') {
|
|
3793
|
+
this.pushToTalkEnabled = true;
|
|
3794
|
+
}
|
|
3795
|
+
else if (config.transcription.trigger === 'automatic') {
|
|
3796
|
+
this.pushToTalkEnabled = false;
|
|
3797
|
+
this.canInterrupt = config.transcription.can_interrupt;
|
|
3798
|
+
}
|
|
3799
|
+
else {
|
|
3800
|
+
throw new Error(`Unknown trigger: ${config.transcription.trigger}`);
|
|
3801
|
+
}
|
|
3802
|
+
this._initializeVAD();
|
|
3740
3803
|
// Bind the websocket message callbacks
|
|
3741
3804
|
this.ws.onmessage = this._handleWebSocketMessage;
|
|
3742
3805
|
this.ws.onopen = () => {
|