@layercode/js-sdk 1.0.13 → 1.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -3474,7 +3474,7 @@ class LayercodeClient {
|
|
|
3474
3474
|
sessionId: options.sessionId || null,
|
|
3475
3475
|
authorizeSessionEndpoint: options.authorizeSessionEndpoint,
|
|
3476
3476
|
metadata: options.metadata || {},
|
|
3477
|
-
|
|
3477
|
+
vadResumeDelay: options.vadResumeDelay || 500,
|
|
3478
3478
|
onConnect: options.onConnect || (() => { }),
|
|
3479
3479
|
onDisconnect: options.onDisconnect || (() => { }),
|
|
3480
3480
|
onError: options.onError || (() => { }),
|
|
@@ -3491,16 +3491,31 @@ class LayercodeClient {
|
|
|
3491
3491
|
sampleRate: 16000, // TODO should be set my fetched pipeline config
|
|
3492
3492
|
});
|
|
3493
3493
|
this.vad = null;
|
|
3494
|
-
|
|
3494
|
+
this.ws = null;
|
|
3495
|
+
this.status = 'disconnected';
|
|
3496
|
+
this.userAudioAmplitude = 0;
|
|
3497
|
+
this.agentAudioAmplitude = 0;
|
|
3498
|
+
this.sessionId = options.sessionId || null;
|
|
3499
|
+
this.pushToTalkActive = false;
|
|
3500
|
+
this.vadPausedPlayer = false;
|
|
3501
|
+
this.pushToTalkEnabled = false;
|
|
3502
|
+
this.canInterrupt = false;
|
|
3503
|
+
// Bind event handlers
|
|
3504
|
+
this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
|
|
3505
|
+
this._handleDataAvailable = this._handleDataAvailable.bind(this);
|
|
3506
|
+
}
|
|
3507
|
+
_initializeVAD() {
|
|
3508
|
+
console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt });
|
|
3509
|
+
if (!this.pushToTalkEnabled && this.canInterrupt) {
|
|
3495
3510
|
dist.MicVAD.new({
|
|
3496
3511
|
stream: this.wavRecorder.getStream() || undefined,
|
|
3497
3512
|
model: 'v5',
|
|
3498
3513
|
// baseAssetPath: '/', // Use if bundling model locally
|
|
3499
3514
|
// onnxWASMBasePath: '/', // Use if bundling model locally
|
|
3500
|
-
positiveSpeechThreshold: 0.
|
|
3501
|
-
negativeSpeechThreshold: 0.
|
|
3502
|
-
redemptionFrames:
|
|
3503
|
-
minSpeechFrames:
|
|
3515
|
+
positiveSpeechThreshold: 0.3,
|
|
3516
|
+
negativeSpeechThreshold: 0.2,
|
|
3517
|
+
redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
|
|
3518
|
+
minSpeechFrames: 15,
|
|
3504
3519
|
preSpeechPadFrames: 0,
|
|
3505
3520
|
onSpeechStart: () => {
|
|
3506
3521
|
// Only pause agent audio if it's currently playing
|
|
@@ -3511,23 +3526,35 @@ class LayercodeClient {
|
|
|
3511
3526
|
}
|
|
3512
3527
|
else {
|
|
3513
3528
|
console.log('onSpeechStart: WavPlayer is not playing, VAD will not pause.');
|
|
3514
|
-
this.vadPausedPlayer = false;
|
|
3515
3529
|
}
|
|
3516
3530
|
},
|
|
3517
3531
|
onVADMisfire: () => {
|
|
3518
|
-
// If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd, and we should resume the assistant audio as it was a false interruption
|
|
3532
|
+
// If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd, and we should resume the assistant audio as it was a false interruption. We include a configurable delay so the assistant isn't too quick to start speaking again.
|
|
3519
3533
|
if (this.vadPausedPlayer) {
|
|
3520
|
-
console.log('
|
|
3534
|
+
console.log('onSpeechEnd: VAD paused the player, resuming');
|
|
3521
3535
|
this.wavPlayer.play();
|
|
3522
3536
|
this.vadPausedPlayer = false; // Reset flag
|
|
3537
|
+
// Option to extend delay in the case where the transcriber takes longer to detect a new turn
|
|
3538
|
+
// console.log('onVADMisfire: VAD paused the player, resuming in ' + this.options.vadResumeDelay + 'ms');
|
|
3539
|
+
// // Add configurable delay before resuming playback
|
|
3540
|
+
// setTimeout(() => {
|
|
3541
|
+
// this.wavPlayer.play();
|
|
3542
|
+
// this.vadPausedPlayer = false; // Reset flag
|
|
3543
|
+
// }, this.options.vadResumeDelay);
|
|
3523
3544
|
}
|
|
3524
3545
|
else {
|
|
3525
3546
|
console.log('onVADMisfire: VAD did not pause the player, no action taken to resume.');
|
|
3526
3547
|
}
|
|
3527
3548
|
},
|
|
3528
|
-
onSpeechEnd: () => {
|
|
3529
|
-
|
|
3530
|
-
|
|
3549
|
+
// onSpeechEnd: () => {
|
|
3550
|
+
// if (this.vadPausedPlayer) {
|
|
3551
|
+
// console.log('onSpeechEnd: VAD paused the player, resuming');
|
|
3552
|
+
// this.wavPlayer.play();
|
|
3553
|
+
// this.vadPausedPlayer = false; // Reset flag
|
|
3554
|
+
// } else {
|
|
3555
|
+
// console.log('onSpeechEnd: VAD did not pause the player, not resuming.');
|
|
3556
|
+
// }
|
|
3557
|
+
// },
|
|
3531
3558
|
})
|
|
3532
3559
|
.then((vad) => {
|
|
3533
3560
|
this.vad = vad;
|
|
@@ -3538,16 +3565,6 @@ class LayercodeClient {
|
|
|
3538
3565
|
console.error('Error initializing VAD:', error);
|
|
3539
3566
|
});
|
|
3540
3567
|
}
|
|
3541
|
-
this.ws = null;
|
|
3542
|
-
this.status = 'disconnected';
|
|
3543
|
-
this.userAudioAmplitude = 0;
|
|
3544
|
-
this.agentAudioAmplitude = 0;
|
|
3545
|
-
this.sessionId = options.sessionId || null;
|
|
3546
|
-
this.pushToTalkActive = false;
|
|
3547
|
-
this.vadPausedPlayer = false;
|
|
3548
|
-
// Bind event handlers
|
|
3549
|
-
this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
|
|
3550
|
-
this._handleDataAvailable = this._handleDataAvailable.bind(this);
|
|
3551
3568
|
}
|
|
3552
3569
|
/**
|
|
3553
3570
|
* Updates the connection status and triggers the callback
|
|
@@ -3607,10 +3624,9 @@ class LayercodeClient {
|
|
|
3607
3624
|
// Sent from the server to this client when a new user turn is detected
|
|
3608
3625
|
console.log('received turn.start from server');
|
|
3609
3626
|
console.log(message);
|
|
3610
|
-
|
|
3611
|
-
if (message.role === 'user') {
|
|
3627
|
+
if (message.role === 'user' && !this.pushToTalkEnabled && this.canInterrupt) {
|
|
3612
3628
|
// Interrupt any playing assistant audio if this is a turn trigged by the server (and not push to talk, which will have already called interrupt)
|
|
3613
|
-
console.log('interrupting assistant audio, as user turn has started and
|
|
3629
|
+
console.log('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
|
|
3614
3630
|
await this._clientInterruptAssistantReplay();
|
|
3615
3631
|
}
|
|
3616
3632
|
// if (message.role === 'assistant') {
|
|
@@ -3724,6 +3740,19 @@ class LayercodeClient {
|
|
|
3724
3740
|
this.ws = new WebSocket(`${this._websocketUrl}?${new URLSearchParams({
|
|
3725
3741
|
client_session_key: authorizeSessionResponseBody.client_session_key,
|
|
3726
3742
|
})}`);
|
|
3743
|
+
const config = authorizeSessionResponseBody.config;
|
|
3744
|
+
console.log('config', config);
|
|
3745
|
+
if (config.transcription.trigger === 'push_to_talk') {
|
|
3746
|
+
this.pushToTalkEnabled = true;
|
|
3747
|
+
}
|
|
3748
|
+
else if (config.transcription.trigger === 'automatic') {
|
|
3749
|
+
this.pushToTalkEnabled = false;
|
|
3750
|
+
this.canInterrupt = config.transcription.can_interrupt;
|
|
3751
|
+
}
|
|
3752
|
+
else {
|
|
3753
|
+
throw new Error(`Unknown trigger: ${config.transcription.trigger}`);
|
|
3754
|
+
}
|
|
3755
|
+
this._initializeVAD();
|
|
3727
3756
|
// Bind the websocket message callbacks
|
|
3728
3757
|
this.ws.onmessage = this._handleWebSocketMessage;
|
|
3729
3758
|
this.ws.onopen = () => {
|