@layercode/js-sdk 1.0.13 → 1.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -3480,7 +3480,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3480
3480
|
sessionId: options.sessionId || null,
|
|
3481
3481
|
authorizeSessionEndpoint: options.authorizeSessionEndpoint,
|
|
3482
3482
|
metadata: options.metadata || {},
|
|
3483
|
-
|
|
3483
|
+
vadResumeDelay: options.vadResumeDelay || 500,
|
|
3484
3484
|
onConnect: options.onConnect || (() => { }),
|
|
3485
3485
|
onDisconnect: options.onDisconnect || (() => { }),
|
|
3486
3486
|
onError: options.onError || (() => { }),
|
|
@@ -3497,16 +3497,31 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3497
3497
|
sampleRate: 16000, // TODO should be set my fetched pipeline config
|
|
3498
3498
|
});
|
|
3499
3499
|
this.vad = null;
|
|
3500
|
-
|
|
3500
|
+
this.ws = null;
|
|
3501
|
+
this.status = 'disconnected';
|
|
3502
|
+
this.userAudioAmplitude = 0;
|
|
3503
|
+
this.agentAudioAmplitude = 0;
|
|
3504
|
+
this.sessionId = options.sessionId || null;
|
|
3505
|
+
this.pushToTalkActive = false;
|
|
3506
|
+
this.vadPausedPlayer = false;
|
|
3507
|
+
this.pushToTalkEnabled = false;
|
|
3508
|
+
this.canInterrupt = false;
|
|
3509
|
+
// Bind event handlers
|
|
3510
|
+
this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
|
|
3511
|
+
this._handleDataAvailable = this._handleDataAvailable.bind(this);
|
|
3512
|
+
}
|
|
3513
|
+
_initializeVAD() {
|
|
3514
|
+
console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt });
|
|
3515
|
+
if (!this.pushToTalkEnabled && this.canInterrupt) {
|
|
3501
3516
|
dist.MicVAD.new({
|
|
3502
3517
|
stream: this.wavRecorder.getStream() || undefined,
|
|
3503
3518
|
model: 'v5',
|
|
3504
3519
|
// baseAssetPath: '/', // Use if bundling model locally
|
|
3505
3520
|
// onnxWASMBasePath: '/', // Use if bundling model locally
|
|
3506
|
-
positiveSpeechThreshold: 0.
|
|
3507
|
-
negativeSpeechThreshold: 0.
|
|
3508
|
-
redemptionFrames:
|
|
3509
|
-
minSpeechFrames:
|
|
3521
|
+
positiveSpeechThreshold: 0.3,
|
|
3522
|
+
negativeSpeechThreshold: 0.2,
|
|
3523
|
+
redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
|
|
3524
|
+
minSpeechFrames: 15,
|
|
3510
3525
|
preSpeechPadFrames: 0,
|
|
3511
3526
|
onSpeechStart: () => {
|
|
3512
3527
|
// Only pause agent audio if it's currently playing
|
|
@@ -3517,23 +3532,35 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3517
3532
|
}
|
|
3518
3533
|
else {
|
|
3519
3534
|
console.log('onSpeechStart: WavPlayer is not playing, VAD will not pause.');
|
|
3520
|
-
this.vadPausedPlayer = false;
|
|
3521
3535
|
}
|
|
3522
3536
|
},
|
|
3523
3537
|
onVADMisfire: () => {
|
|
3524
|
-
// If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd, and we should resume the assistant audio as it was a false interruption
|
|
3538
|
+
// If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd, and we should resume the assistant audio as it was a false interruption. We include a configurable delay so the assistant isn't too quick to start speaking again.
|
|
3525
3539
|
if (this.vadPausedPlayer) {
|
|
3526
|
-
console.log('
|
|
3540
|
+
console.log('onSpeechEnd: VAD paused the player, resuming');
|
|
3527
3541
|
this.wavPlayer.play();
|
|
3528
3542
|
this.vadPausedPlayer = false; // Reset flag
|
|
3543
|
+
// Option to extend delay in the case where the transcriber takes longer to detect a new turn
|
|
3544
|
+
// console.log('onVADMisfire: VAD paused the player, resuming in ' + this.options.vadResumeDelay + 'ms');
|
|
3545
|
+
// // Add configurable delay before resuming playback
|
|
3546
|
+
// setTimeout(() => {
|
|
3547
|
+
// this.wavPlayer.play();
|
|
3548
|
+
// this.vadPausedPlayer = false; // Reset flag
|
|
3549
|
+
// }, this.options.vadResumeDelay);
|
|
3529
3550
|
}
|
|
3530
3551
|
else {
|
|
3531
3552
|
console.log('onVADMisfire: VAD did not pause the player, no action taken to resume.');
|
|
3532
3553
|
}
|
|
3533
3554
|
},
|
|
3534
|
-
onSpeechEnd: () => {
|
|
3535
|
-
|
|
3536
|
-
|
|
3555
|
+
// onSpeechEnd: () => {
|
|
3556
|
+
// if (this.vadPausedPlayer) {
|
|
3557
|
+
// console.log('onSpeechEnd: VAD paused the player, resuming');
|
|
3558
|
+
// this.wavPlayer.play();
|
|
3559
|
+
// this.vadPausedPlayer = false; // Reset flag
|
|
3560
|
+
// } else {
|
|
3561
|
+
// console.log('onSpeechEnd: VAD did not pause the player, not resuming.');
|
|
3562
|
+
// }
|
|
3563
|
+
// },
|
|
3537
3564
|
})
|
|
3538
3565
|
.then((vad) => {
|
|
3539
3566
|
this.vad = vad;
|
|
@@ -3544,16 +3571,6 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3544
3571
|
console.error('Error initializing VAD:', error);
|
|
3545
3572
|
});
|
|
3546
3573
|
}
|
|
3547
|
-
this.ws = null;
|
|
3548
|
-
this.status = 'disconnected';
|
|
3549
|
-
this.userAudioAmplitude = 0;
|
|
3550
|
-
this.agentAudioAmplitude = 0;
|
|
3551
|
-
this.sessionId = options.sessionId || null;
|
|
3552
|
-
this.pushToTalkActive = false;
|
|
3553
|
-
this.vadPausedPlayer = false;
|
|
3554
|
-
// Bind event handlers
|
|
3555
|
-
this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
|
|
3556
|
-
this._handleDataAvailable = this._handleDataAvailable.bind(this);
|
|
3557
3574
|
}
|
|
3558
3575
|
/**
|
|
3559
3576
|
* Updates the connection status and triggers the callback
|
|
@@ -3613,10 +3630,9 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3613
3630
|
// Sent from the server to this client when a new user turn is detected
|
|
3614
3631
|
console.log('received turn.start from server');
|
|
3615
3632
|
console.log(message);
|
|
3616
|
-
|
|
3617
|
-
if (message.role === 'user') {
|
|
3633
|
+
if (message.role === 'user' && !this.pushToTalkEnabled && this.canInterrupt) {
|
|
3618
3634
|
// Interrupt any playing assistant audio if this is a turn trigged by the server (and not push to talk, which will have already called interrupt)
|
|
3619
|
-
console.log('interrupting assistant audio, as user turn has started and
|
|
3635
|
+
console.log('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
|
|
3620
3636
|
await this._clientInterruptAssistantReplay();
|
|
3621
3637
|
}
|
|
3622
3638
|
// if (message.role === 'assistant') {
|
|
@@ -3730,6 +3746,19 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3730
3746
|
this.ws = new WebSocket(`${this._websocketUrl}?${new URLSearchParams({
|
|
3731
3747
|
client_session_key: authorizeSessionResponseBody.client_session_key,
|
|
3732
3748
|
})}`);
|
|
3749
|
+
const config = authorizeSessionResponseBody.config;
|
|
3750
|
+
console.log('config', config);
|
|
3751
|
+
if (config.transcription.trigger === 'push_to_talk') {
|
|
3752
|
+
this.pushToTalkEnabled = true;
|
|
3753
|
+
}
|
|
3754
|
+
else if (config.transcription.trigger === 'automatic') {
|
|
3755
|
+
this.pushToTalkEnabled = false;
|
|
3756
|
+
this.canInterrupt = config.transcription.can_interrupt;
|
|
3757
|
+
}
|
|
3758
|
+
else {
|
|
3759
|
+
throw new Error(`Unknown trigger: ${config.transcription.trigger}`);
|
|
3760
|
+
}
|
|
3761
|
+
this._initializeVAD();
|
|
3733
3762
|
// Bind the websocket message callbacks
|
|
3734
3763
|
this.ws.onmessage = this._handleWebSocketMessage;
|
|
3735
3764
|
this.ws.onopen = () => {
|