@layercode/js-sdk 1.0.25 → 1.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -3477,6 +3477,8 @@ function arrayBufferToBase64(arrayBuffer) {
|
|
|
3477
3477
|
}
|
|
3478
3478
|
|
|
3479
3479
|
/* eslint-env browser */
|
|
3480
|
+
// SDK version - updated when publishing
|
|
3481
|
+
const SDK_VERSION = '1.0.27';
|
|
3480
3482
|
/**
|
|
3481
3483
|
* @class LayercodeClient
|
|
3482
3484
|
* @classdesc Core client for Layercode audio pipeline that manages audio recording, WebSocket communication, and speech processing.
|
|
@@ -3487,6 +3489,7 @@ class LayercodeClient {
|
|
|
3487
3489
|
* @param {Object} options - Configuration options
|
|
3488
3490
|
*/
|
|
3489
3491
|
constructor(options) {
|
|
3492
|
+
this.deviceId = null;
|
|
3490
3493
|
this.options = {
|
|
3491
3494
|
pipelineId: options.pipelineId,
|
|
3492
3495
|
sessionId: options.sessionId || null,
|
|
@@ -3496,6 +3499,7 @@ class LayercodeClient {
|
|
|
3496
3499
|
onConnect: options.onConnect || (() => { }),
|
|
3497
3500
|
onDisconnect: options.onDisconnect || (() => { }),
|
|
3498
3501
|
onError: options.onError || (() => { }),
|
|
3502
|
+
onDeviceSwitched: options.onDeviceSwitched || (() => { }),
|
|
3499
3503
|
onDataMessage: options.onDataMessage || (() => { }),
|
|
3500
3504
|
onUserAmplitudeChange: options.onUserAmplitudeChange || (() => { }),
|
|
3501
3505
|
onAgentAmplitudeChange: options.onAgentAmplitudeChange || (() => { }),
|
|
@@ -3519,198 +3523,99 @@ class LayercodeClient {
|
|
|
3519
3523
|
this.pushToTalkEnabled = false;
|
|
3520
3524
|
this.canInterrupt = false;
|
|
3521
3525
|
this.userIsSpeaking = false;
|
|
3522
|
-
this.endUserTurn = false;
|
|
3523
3526
|
this.recorderStarted = false;
|
|
3524
3527
|
this.readySent = false;
|
|
3525
3528
|
this.currentTurnId = null;
|
|
3526
3529
|
this.audioBuffer = [];
|
|
3527
|
-
this.
|
|
3530
|
+
this.vadConfig = null;
|
|
3531
|
+
// this.audioPauseTime = null;
|
|
3528
3532
|
// Bind event handlers
|
|
3529
3533
|
this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
|
|
3530
3534
|
this._handleDataAvailable = this._handleDataAvailable.bind(this);
|
|
3531
|
-
|
|
3532
|
-
_setupAmplitudeBasedVAD() {
|
|
3533
|
-
let isSpeakingByAmplitude = false;
|
|
3534
|
-
let silenceFrames = 0;
|
|
3535
|
-
const AMPLITUDE_THRESHOLD = 0.01; // Adjust based on testing
|
|
3536
|
-
const SILENCE_FRAMES_THRESHOLD = 30; // ~600ms at 20ms chunks
|
|
3537
|
-
// Monitor amplitude changes
|
|
3538
|
-
this.wavRecorder.startAmplitudeMonitoring((amplitude) => {
|
|
3539
|
-
const wasSpeaking = isSpeakingByAmplitude;
|
|
3540
|
-
if (amplitude > AMPLITUDE_THRESHOLD) {
|
|
3541
|
-
silenceFrames = 0;
|
|
3542
|
-
if (!wasSpeaking) {
|
|
3543
|
-
// Speech started - pause audio if playing and track timing for interruption calculation
|
|
3544
|
-
if (this.canInterrupt && this.wavPlayer.isPlaying) {
|
|
3545
|
-
this.audioPauseTime = Date.now();
|
|
3546
|
-
this.wavPlayer.pause();
|
|
3547
|
-
}
|
|
3548
|
-
isSpeakingByAmplitude = true;
|
|
3549
|
-
this.userIsSpeaking = true;
|
|
3550
|
-
this.options.onUserIsSpeakingChange(true);
|
|
3551
|
-
this._wsSend({
|
|
3552
|
-
type: 'vad_events',
|
|
3553
|
-
event: 'vad_start',
|
|
3554
|
-
});
|
|
3555
|
-
}
|
|
3556
|
-
}
|
|
3557
|
-
else {
|
|
3558
|
-
silenceFrames++;
|
|
3559
|
-
if (wasSpeaking && silenceFrames >= SILENCE_FRAMES_THRESHOLD) {
|
|
3560
|
-
// Speech ended
|
|
3561
|
-
isSpeakingByAmplitude = false;
|
|
3562
|
-
this.userIsSpeaking = false;
|
|
3563
|
-
this.options.onUserIsSpeakingChange(false);
|
|
3564
|
-
this._wsSend({
|
|
3565
|
-
type: 'vad_events',
|
|
3566
|
-
event: 'vad_end',
|
|
3567
|
-
});
|
|
3568
|
-
}
|
|
3569
|
-
}
|
|
3570
|
-
});
|
|
3535
|
+
this._setupDeviceChangeListener();
|
|
3571
3536
|
}
|
|
3572
3537
|
_initializeVAD() {
|
|
3573
|
-
|
|
3538
|
+
var _a;
|
|
3539
|
+
console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt, vadConfig: this.vadConfig });
|
|
3574
3540
|
// If we're in push to talk mode, we don't need to use the VAD model
|
|
3575
3541
|
if (this.pushToTalkEnabled) {
|
|
3576
3542
|
return;
|
|
3577
3543
|
}
|
|
3578
|
-
|
|
3579
|
-
|
|
3580
|
-
console.
|
|
3544
|
+
// Check if VAD is disabled
|
|
3545
|
+
if (((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) === false) {
|
|
3546
|
+
console.log('VAD is disabled by backend configuration');
|
|
3547
|
+
return;
|
|
3548
|
+
}
|
|
3549
|
+
// Build VAD configuration object, only including keys that are defined
|
|
3550
|
+
const vadOptions = {
|
|
3551
|
+
stream: this.wavRecorder.getStream() || undefined,
|
|
3552
|
+
onSpeechStart: () => {
|
|
3553
|
+
console.log('onSpeechStart: sending vad_start');
|
|
3554
|
+
this.userIsSpeaking = true;
|
|
3555
|
+
this.options.onUserIsSpeakingChange(true);
|
|
3556
|
+
this._wsSend({
|
|
3557
|
+
type: 'vad_events',
|
|
3558
|
+
event: 'vad_start',
|
|
3559
|
+
});
|
|
3560
|
+
},
|
|
3561
|
+
onSpeechEnd: () => {
|
|
3562
|
+
console.log('onSpeechEnd: sending vad_end');
|
|
3563
|
+
this.userIsSpeaking = false;
|
|
3564
|
+
this.options.onUserIsSpeakingChange(false);
|
|
3565
|
+
this.audioBuffer = []; // Clear buffer on speech end
|
|
3566
|
+
this._wsSend({
|
|
3567
|
+
type: 'vad_events',
|
|
3568
|
+
event: 'vad_end',
|
|
3569
|
+
});
|
|
3570
|
+
},
|
|
3571
|
+
};
|
|
3572
|
+
// Apply VAD configuration from backend if available
|
|
3573
|
+
if (this.vadConfig) {
|
|
3574
|
+
// Only add keys that are explicitly defined (not undefined)
|
|
3575
|
+
if (this.vadConfig.model !== undefined)
|
|
3576
|
+
vadOptions.model = this.vadConfig.model;
|
|
3577
|
+
if (this.vadConfig.positive_speech_threshold !== undefined)
|
|
3578
|
+
vadOptions.positiveSpeechThreshold = this.vadConfig.positive_speech_threshold;
|
|
3579
|
+
if (this.vadConfig.negative_speech_threshold !== undefined)
|
|
3580
|
+
vadOptions.negativeSpeechThreshold = this.vadConfig.negative_speech_threshold;
|
|
3581
|
+
if (this.vadConfig.redemption_frames !== undefined)
|
|
3582
|
+
vadOptions.redemptionFrames = this.vadConfig.redemption_frames;
|
|
3583
|
+
if (this.vadConfig.min_speech_frames !== undefined)
|
|
3584
|
+
vadOptions.minSpeechFrames = this.vadConfig.min_speech_frames;
|
|
3585
|
+
if (this.vadConfig.pre_speech_pad_frames !== undefined)
|
|
3586
|
+
vadOptions.preSpeechPadFrames = this.vadConfig.pre_speech_pad_frames;
|
|
3587
|
+
if (this.vadConfig.frame_samples !== undefined)
|
|
3588
|
+
vadOptions.frameSamples = this.vadConfig.frame_samples;
|
|
3589
|
+
}
|
|
3590
|
+
else {
|
|
3591
|
+
// Default values if no config from backend
|
|
3592
|
+
vadOptions.model = 'v5';
|
|
3593
|
+
vadOptions.positiveSpeechThreshold = 0.15;
|
|
3594
|
+
vadOptions.negativeSpeechThreshold = 0.05;
|
|
3595
|
+
vadOptions.redemptionFrames = 4;
|
|
3596
|
+
vadOptions.minSpeechFrames = 2;
|
|
3597
|
+
vadOptions.preSpeechPadFrames = 0;
|
|
3598
|
+
vadOptions.frameSamples = 512; // Required for v5
|
|
3599
|
+
}
|
|
3600
|
+
console.log('Creating VAD with options:', vadOptions);
|
|
3601
|
+
dist.MicVAD.new(vadOptions)
|
|
3602
|
+
.then((vad) => {
|
|
3603
|
+
this.vad = vad;
|
|
3604
|
+
this.vad.start();
|
|
3605
|
+
console.log('VAD started successfully');
|
|
3606
|
+
})
|
|
3607
|
+
.catch((error) => {
|
|
3608
|
+
console.warn('Error initializing VAD:', error);
|
|
3581
3609
|
// Send a message to server indicating VAD failure
|
|
3582
3610
|
this._wsSend({
|
|
3583
3611
|
type: 'vad_events',
|
|
3584
3612
|
event: 'vad_model_failed',
|
|
3585
3613
|
});
|
|
3586
|
-
|
|
3587
|
-
this.userIsSpeaking = false;
|
|
3588
|
-
this.options.onUserIsSpeakingChange(false);
|
|
3589
|
-
// Set up amplitude-based fallback detection
|
|
3590
|
-
this._setupAmplitudeBasedVAD();
|
|
3591
|
-
}, 2000);
|
|
3592
|
-
if (!this.canInterrupt) {
|
|
3593
|
-
dist.MicVAD.new({
|
|
3594
|
-
stream: this.wavRecorder.getStream() || undefined,
|
|
3595
|
-
model: 'v5',
|
|
3596
|
-
positiveSpeechThreshold: 0.7,
|
|
3597
|
-
negativeSpeechThreshold: 0.55,
|
|
3598
|
-
redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
|
|
3599
|
-
minSpeechFrames: 0,
|
|
3600
|
-
preSpeechPadFrames: 0,
|
|
3601
|
-
onSpeechStart: () => {
|
|
3602
|
-
this.userIsSpeaking = true;
|
|
3603
|
-
this.options.onUserIsSpeakingChange(true);
|
|
3604
|
-
console.log('onSpeechStart: sending vad_start');
|
|
3605
|
-
this._wsSend({
|
|
3606
|
-
type: 'vad_events',
|
|
3607
|
-
event: 'vad_start',
|
|
3608
|
-
});
|
|
3609
|
-
},
|
|
3610
|
-
onSpeechEnd: () => {
|
|
3611
|
-
console.log('onSpeechEnd: sending vad_end');
|
|
3612
|
-
this.endUserTurn = true; // Set flag to indicate that the user turn has ended
|
|
3613
|
-
this.audioBuffer = []; // Clear buffer on speech end
|
|
3614
|
-
this.userIsSpeaking = false;
|
|
3615
|
-
this.options.onUserIsSpeakingChange(false);
|
|
3616
|
-
console.log('onSpeechEnd: State after update - endUserTurn:', this.endUserTurn, 'userIsSpeaking:', this.userIsSpeaking);
|
|
3617
|
-
// Send vad_end immediately instead of waiting for next audio chunk
|
|
3618
|
-
this._wsSend({
|
|
3619
|
-
type: 'vad_events',
|
|
3620
|
-
event: 'vad_end',
|
|
3621
|
-
});
|
|
3622
|
-
this.endUserTurn = false; // Reset the flag after sending vad_end
|
|
3623
|
-
},
|
|
3624
|
-
})
|
|
3625
|
-
.then((vad) => {
|
|
3626
|
-
clearTimeout(timeout);
|
|
3627
|
-
this.vad = vad;
|
|
3628
|
-
this.vad.start();
|
|
3629
|
-
console.log('VAD started');
|
|
3630
|
-
})
|
|
3631
|
-
.catch((error) => {
|
|
3632
|
-
console.error('Error initializing VAD:', error);
|
|
3633
|
-
});
|
|
3634
|
-
}
|
|
3635
|
-
else {
|
|
3636
|
-
dist.MicVAD.new({
|
|
3637
|
-
stream: this.wavRecorder.getStream() || undefined,
|
|
3638
|
-
model: 'v5',
|
|
3639
|
-
// baseAssetPath: '/', // Use if bundling model locally
|
|
3640
|
-
// onnxWASMBasePath: '/', // Use if bundling model locally
|
|
3641
|
-
positiveSpeechThreshold: 0.7,
|
|
3642
|
-
negativeSpeechThreshold: 0.55,
|
|
3643
|
-
redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
|
|
3644
|
-
minSpeechFrames: 25,
|
|
3645
|
-
preSpeechPadFrames: 0,
|
|
3646
|
-
onSpeechStart: () => {
|
|
3647
|
-
// Only pause agent audio if it's currently playing
|
|
3648
|
-
if (this.wavPlayer.isPlaying) {
|
|
3649
|
-
console.log('onSpeechStart: WavPlayer is playing, pausing it.');
|
|
3650
|
-
this.audioPauseTime = Date.now(); // Track when we paused
|
|
3651
|
-
this.wavPlayer.pause();
|
|
3652
|
-
}
|
|
3653
|
-
else {
|
|
3654
|
-
console.log('onSpeechStart: WavPlayer is not playing, VAD will not pause.');
|
|
3655
|
-
}
|
|
3656
|
-
console.log('onSpeechStart: sending vad_start');
|
|
3657
|
-
this._wsSend({
|
|
3658
|
-
type: 'vad_events',
|
|
3659
|
-
event: 'vad_start',
|
|
3660
|
-
});
|
|
3661
|
-
this.userIsSpeaking = true;
|
|
3662
|
-
this.options.onUserIsSpeakingChange(true);
|
|
3663
|
-
this.endUserTurn = false; // Reset endUserTurn when speech starts
|
|
3664
|
-
console.log('onSpeechStart: State after update - endUserTurn:', this.endUserTurn, 'userIsSpeaking:', this.userIsSpeaking);
|
|
3665
|
-
},
|
|
3666
|
-
onVADMisfire: () => {
|
|
3667
|
-
// If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd, and we should resume the assistant audio as it was a false interruption. We include a configurable delay so the assistant isn't too quick to start speaking again.
|
|
3668
|
-
this.userIsSpeaking = false;
|
|
3669
|
-
this.audioBuffer = []; // Clear buffer on misfire
|
|
3670
|
-
this.options.onUserIsSpeakingChange(false);
|
|
3671
|
-
// Add the missing delay before resuming to prevent race conditions
|
|
3672
|
-
setTimeout(() => {
|
|
3673
|
-
if (!this.wavPlayer.isPlaying) {
|
|
3674
|
-
console.log('onVADMisfire: Resuming after delay');
|
|
3675
|
-
this.audioPauseTime = null; // Clear pause time since we're resuming
|
|
3676
|
-
this.wavPlayer.play();
|
|
3677
|
-
}
|
|
3678
|
-
else {
|
|
3679
|
-
console.log('onVADMisfire: Not resuming - either no pause or user speaking again');
|
|
3680
|
-
this.endUserTurn = true;
|
|
3681
|
-
}
|
|
3682
|
-
}, this.options.vadResumeDelay);
|
|
3683
|
-
},
|
|
3684
|
-
onSpeechEnd: () => {
|
|
3685
|
-
console.log('onSpeechEnd: sending vad_end');
|
|
3686
|
-
this.endUserTurn = true; // Set flag to indicate that the user turn has ended
|
|
3687
|
-
this.audioBuffer = []; // Clear buffer on speech end
|
|
3688
|
-
this.userIsSpeaking = false;
|
|
3689
|
-
this.options.onUserIsSpeakingChange(false);
|
|
3690
|
-
console.log('onSpeechEnd: State after update - endUserTurn:', this.endUserTurn, 'userIsSpeaking:', this.userIsSpeaking);
|
|
3691
|
-
// Send vad_end immediately instead of waiting for next audio chunk
|
|
3692
|
-
this._wsSend({
|
|
3693
|
-
type: 'vad_events',
|
|
3694
|
-
event: 'vad_end',
|
|
3695
|
-
});
|
|
3696
|
-
this.endUserTurn = false; // Reset the flag after sending vad_end
|
|
3697
|
-
},
|
|
3698
|
-
})
|
|
3699
|
-
.then((vad) => {
|
|
3700
|
-
clearTimeout(timeout);
|
|
3701
|
-
this.vad = vad;
|
|
3702
|
-
this.vad.start();
|
|
3703
|
-
console.log('VAD started');
|
|
3704
|
-
})
|
|
3705
|
-
.catch((error) => {
|
|
3706
|
-
console.error('Error initializing VAD:', error);
|
|
3707
|
-
});
|
|
3708
|
-
}
|
|
3614
|
+
});
|
|
3709
3615
|
}
|
|
3710
3616
|
/**
|
|
3711
3617
|
* Updates the connection status and triggers the callback
|
|
3712
3618
|
* @param {string} status - New status value
|
|
3713
|
-
* @private
|
|
3714
3619
|
*/
|
|
3715
3620
|
_setStatus(status) {
|
|
3716
3621
|
this.status = status;
|
|
@@ -3718,7 +3623,6 @@ class LayercodeClient {
|
|
|
3718
3623
|
}
|
|
3719
3624
|
/**
|
|
3720
3625
|
* Handles when agent audio finishes playing
|
|
3721
|
-
* @private
|
|
3722
3626
|
*/
|
|
3723
3627
|
_clientResponseAudioReplayFinished() {
|
|
3724
3628
|
console.log('clientResponseAudioReplayFinished');
|
|
@@ -3731,17 +3635,6 @@ class LayercodeClient {
|
|
|
3731
3635
|
const offsetData = await this.wavPlayer.interrupt();
|
|
3732
3636
|
if (offsetData && this.currentTurnId) {
|
|
3733
3637
|
let offsetMs = offsetData.currentTime * 1000;
|
|
3734
|
-
// Calculate accurate offset by subtracting pause time if audio was paused for VAD
|
|
3735
|
-
if (this.audioPauseTime) {
|
|
3736
|
-
const pauseDurationMs = Date.now() - this.audioPauseTime;
|
|
3737
|
-
const adjustedOffsetMs = Math.max(0, offsetMs - pauseDurationMs);
|
|
3738
|
-
console.log(`Interruption detected: Raw offset ${offsetMs}ms, pause duration ${pauseDurationMs}ms, adjusted offset ${adjustedOffsetMs}ms for turn ${this.currentTurnId}`);
|
|
3739
|
-
offsetMs = adjustedOffsetMs;
|
|
3740
|
-
this.audioPauseTime = null; // Clear the pause time
|
|
3741
|
-
}
|
|
3742
|
-
else {
|
|
3743
|
-
console.log(`Interruption detected: ${offsetMs}ms offset for turn ${this.currentTurnId} (no pause adjustment needed)`);
|
|
3744
|
-
}
|
|
3745
3638
|
// Send interruption event with accurate playback offset in milliseconds
|
|
3746
3639
|
this._wsSend({
|
|
3747
3640
|
type: 'trigger.response.audio.interrupted',
|
|
@@ -3775,7 +3668,6 @@ class LayercodeClient {
|
|
|
3775
3668
|
/**
|
|
3776
3669
|
* Handles incoming WebSocket messages
|
|
3777
3670
|
* @param {MessageEvent} event - The WebSocket message event
|
|
3778
|
-
* @private
|
|
3779
3671
|
*/
|
|
3780
3672
|
async _handleWebSocketMessage(event) {
|
|
3781
3673
|
try {
|
|
@@ -3790,12 +3682,10 @@ class LayercodeClient {
|
|
|
3790
3682
|
console.log(message);
|
|
3791
3683
|
if (message.role === 'assistant') {
|
|
3792
3684
|
// Start tracking new assistant turn
|
|
3793
|
-
// Note: Don't reset currentTurnId here - let response.audio set it
|
|
3794
|
-
// This prevents race conditions where text arrives before audio
|
|
3795
3685
|
console.log('Assistant turn started, will track new turn ID from audio/text');
|
|
3796
3686
|
}
|
|
3797
|
-
else if (message.role === 'user' && !this.pushToTalkEnabled
|
|
3798
|
-
// Interrupt any playing assistant audio if this is a turn
|
|
3687
|
+
else if (message.role === 'user' && !this.pushToTalkEnabled) {
|
|
3688
|
+
// Interrupt any playing assistant audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
|
|
3799
3689
|
console.log('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
|
|
3800
3690
|
await this._clientInterruptAssistantReplay();
|
|
3801
3691
|
}
|
|
@@ -3817,7 +3707,6 @@ class LayercodeClient {
|
|
|
3817
3707
|
this.currentTurnId = message.turn_id;
|
|
3818
3708
|
console.log(`Setting current turn ID to: ${message.turn_id} from text message`);
|
|
3819
3709
|
}
|
|
3820
|
-
// Note: We no longer track text content in the client - the pipeline handles interruption estimation
|
|
3821
3710
|
break;
|
|
3822
3711
|
}
|
|
3823
3712
|
case 'response.data':
|
|
@@ -3825,7 +3714,7 @@ class LayercodeClient {
|
|
|
3825
3714
|
this.options.onDataMessage(message);
|
|
3826
3715
|
break;
|
|
3827
3716
|
default:
|
|
3828
|
-
console.
|
|
3717
|
+
console.warn('Unknown message type received:', message);
|
|
3829
3718
|
break;
|
|
3830
3719
|
}
|
|
3831
3720
|
}
|
|
@@ -3837,15 +3726,28 @@ class LayercodeClient {
|
|
|
3837
3726
|
/**
|
|
3838
3727
|
* Handles available client browser microphone audio data and sends it over the WebSocket
|
|
3839
3728
|
* @param {ArrayBuffer} data - The audio data buffer
|
|
3840
|
-
* @private
|
|
3841
3729
|
*/
|
|
3842
3730
|
_handleDataAvailable(data) {
|
|
3731
|
+
var _a, _b, _c;
|
|
3843
3732
|
try {
|
|
3844
3733
|
const base64 = arrayBufferToBase64(data.mono);
|
|
3845
|
-
|
|
3734
|
+
// Determine if we should gate audio based on VAD configuration
|
|
3735
|
+
const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
|
|
3736
|
+
const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
|
|
3737
|
+
let sendAudio;
|
|
3738
|
+
if (this.pushToTalkEnabled) {
|
|
3739
|
+
sendAudio = this.pushToTalkActive;
|
|
3740
|
+
}
|
|
3741
|
+
else if (shouldGateAudio) {
|
|
3742
|
+
sendAudio = this.userIsSpeaking;
|
|
3743
|
+
}
|
|
3744
|
+
else {
|
|
3745
|
+
// If gate_audio is false, always send audio
|
|
3746
|
+
sendAudio = true;
|
|
3747
|
+
}
|
|
3846
3748
|
if (sendAudio) {
|
|
3847
|
-
// If we have buffered audio, send it first
|
|
3848
|
-
if (this.audioBuffer.length > 0) {
|
|
3749
|
+
// If we have buffered audio and we're gating, send it first
|
|
3750
|
+
if (shouldGateAudio && this.audioBuffer.length > 0) {
|
|
3849
3751
|
console.log(`Sending ${this.audioBuffer.length} buffered audio chunks`);
|
|
3850
3752
|
for (const bufferedAudio of this.audioBuffer) {
|
|
3851
3753
|
this._wsSend({
|
|
@@ -3864,8 +3766,8 @@ class LayercodeClient {
|
|
|
3864
3766
|
else {
|
|
3865
3767
|
// Buffer audio when not sending (to catch audio just before VAD triggers)
|
|
3866
3768
|
this.audioBuffer.push(base64);
|
|
3867
|
-
// Keep buffer size
|
|
3868
|
-
if (this.audioBuffer.length >
|
|
3769
|
+
// Keep buffer size based on configuration
|
|
3770
|
+
if (this.audioBuffer.length > bufferFrames) {
|
|
3869
3771
|
this.audioBuffer.shift(); // Remove oldest chunk
|
|
3870
3772
|
}
|
|
3871
3773
|
}
|
|
@@ -3897,7 +3799,6 @@ class LayercodeClient {
|
|
|
3897
3799
|
* @param {WavRecorder | WavStreamPlayer} source - The audio source (recorder or player).
|
|
3898
3800
|
* @param {(amplitude: number) => void} callback - The callback function to invoke on amplitude change.
|
|
3899
3801
|
* @param {(amplitude: number) => void} updateInternalState - Function to update the internal amplitude state.
|
|
3900
|
-
* @private
|
|
3901
3802
|
*/
|
|
3902
3803
|
_setupAmplitudeMonitoring(source, callback, updateInternalState) {
|
|
3903
3804
|
// Set up amplitude monitoring only if a callback is provided
|
|
@@ -3929,6 +3830,7 @@ class LayercodeClient {
|
|
|
3929
3830
|
let authorizeSessionRequestBody = {
|
|
3930
3831
|
pipeline_id: this.options.pipelineId,
|
|
3931
3832
|
metadata: this.options.metadata,
|
|
3833
|
+
sdk_version: SDK_VERSION,
|
|
3932
3834
|
};
|
|
3933
3835
|
// If we're reconnecting to a previous session, we need to include the session_id in the request. Otherwise we don't send session_id, and a new session will be created and the session_id will be returned in the response.
|
|
3934
3836
|
if (this.options.sessionId) {
|
|
@@ -3952,6 +3854,8 @@ class LayercodeClient {
|
|
|
3952
3854
|
})}`);
|
|
3953
3855
|
const config = authorizeSessionResponseBody.config;
|
|
3954
3856
|
console.log('config', config);
|
|
3857
|
+
// Store VAD configuration
|
|
3858
|
+
this.vadConfig = config.vad || null;
|
|
3955
3859
|
if (config.transcription.trigger === 'push_to_talk') {
|
|
3956
3860
|
this.pushToTalkEnabled = true;
|
|
3957
3861
|
}
|
|
@@ -3962,7 +3866,6 @@ class LayercodeClient {
|
|
|
3962
3866
|
else {
|
|
3963
3867
|
throw new Error(`Unknown trigger: ${config.transcription.trigger}`);
|
|
3964
3868
|
}
|
|
3965
|
-
this._initializeVAD();
|
|
3966
3869
|
// Bind the websocket message callbacks
|
|
3967
3870
|
this.ws.onmessage = this._handleWebSocketMessage;
|
|
3968
3871
|
this.ws.onopen = () => {
|
|
@@ -3982,18 +3885,13 @@ class LayercodeClient {
|
|
|
3982
3885
|
this._setStatus('error');
|
|
3983
3886
|
this.options.onError(new Error('WebSocket connection error'));
|
|
3984
3887
|
};
|
|
3985
|
-
// Initialize microphone audio capture
|
|
3986
|
-
await this.wavRecorder.begin();
|
|
3987
|
-
await this.wavRecorder.record(this._handleDataAvailable, 1638);
|
|
3988
|
-
// Set up microphone amplitude monitoring
|
|
3989
|
-
this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
|
|
3990
3888
|
// Initialize audio player
|
|
3991
3889
|
await this.wavPlayer.connect();
|
|
3992
3890
|
// Set up audio player amplitude monitoring
|
|
3993
3891
|
this._setupAmplitudeMonitoring(this.wavPlayer, this.options.onAgentAmplitudeChange, (amp) => (this.agentAudioAmplitude = amp));
|
|
3994
|
-
//
|
|
3995
|
-
|
|
3996
|
-
this
|
|
3892
|
+
// wavRecorder will be started from the onDeviceSwitched callback,
|
|
3893
|
+
// which is called when the device is first initialized and also when the device is switched
|
|
3894
|
+
// this is to ensure that the device is initialized before the recorder is started
|
|
3997
3895
|
}
|
|
3998
3896
|
catch (error) {
|
|
3999
3897
|
console.error('Error connecting to Layercode pipeline:', error);
|
|
@@ -4013,6 +3911,7 @@ class LayercodeClient {
|
|
|
4013
3911
|
this.vad.destroy();
|
|
4014
3912
|
this.vad = null;
|
|
4015
3913
|
}
|
|
3914
|
+
this.wavRecorder.listenForDeviceChange(null);
|
|
4016
3915
|
this.wavRecorder.quit();
|
|
4017
3916
|
this.wavPlayer.disconnect();
|
|
4018
3917
|
// Reset turn tracking
|
|
@@ -4036,19 +3935,101 @@ class LayercodeClient {
|
|
|
4036
3935
|
* @param {string} deviceId - The deviceId of the new microphone
|
|
4037
3936
|
*/
|
|
4038
3937
|
async setInputDevice(deviceId) {
|
|
4039
|
-
|
|
3938
|
+
var _a;
|
|
3939
|
+
try {
|
|
3940
|
+
this.deviceId = deviceId;
|
|
3941
|
+
// Restart recording with the new device
|
|
3942
|
+
await this._restartAudioRecording();
|
|
3943
|
+
// Reinitialize VAD with the new audio stream if VAD is enabled
|
|
3944
|
+
const shouldUseVAD = !this.pushToTalkEnabled && ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) !== false;
|
|
3945
|
+
if (shouldUseVAD) {
|
|
3946
|
+
console.log('Reinitializing VAD with new audio stream');
|
|
3947
|
+
const newStream = this.wavRecorder.getStream();
|
|
3948
|
+
await this._reinitializeVAD(newStream);
|
|
3949
|
+
}
|
|
3950
|
+
console.log(`Successfully switched to input device: ${deviceId}`);
|
|
3951
|
+
}
|
|
3952
|
+
catch (error) {
|
|
3953
|
+
console.error(`Failed to switch to input device ${deviceId}:`, error);
|
|
3954
|
+
throw new Error(`Failed to switch to input device: ${error instanceof Error ? error.message : String(error)}`);
|
|
3955
|
+
}
|
|
3956
|
+
}
|
|
3957
|
+
/**
|
|
3958
|
+
* Restarts audio recording after a device switch to ensure audio is captured from the new device
|
|
3959
|
+
*/
|
|
3960
|
+
async _restartAudioRecording() {
|
|
3961
|
+
try {
|
|
3962
|
+
console.log('Restarting audio recording after device switch...');
|
|
4040
3963
|
try {
|
|
4041
3964
|
await this.wavRecorder.end();
|
|
4042
3965
|
}
|
|
4043
|
-
catch (
|
|
4044
|
-
|
|
4045
|
-
await this.wavRecorder.quit();
|
|
3966
|
+
catch (_a) {
|
|
3967
|
+
// Ignore cleanup errors
|
|
4046
3968
|
}
|
|
4047
|
-
|
|
3969
|
+
// Start with new device
|
|
3970
|
+
await this.wavRecorder.begin(this.deviceId || undefined);
|
|
3971
|
+
await this.wavRecorder.record(this._handleDataAvailable, 1638);
|
|
3972
|
+
// Re-setup amplitude monitoring with the new stream
|
|
3973
|
+
this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
|
|
3974
|
+
console.log('Audio recording restart completed successfully');
|
|
3975
|
+
}
|
|
3976
|
+
catch (error) {
|
|
3977
|
+
console.error('Error restarting audio recording after device switch:', error);
|
|
3978
|
+
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
4048
3979
|
}
|
|
4049
|
-
|
|
4050
|
-
|
|
4051
|
-
|
|
3980
|
+
}
|
|
3981
|
+
/**
|
|
3982
|
+
* Reinitializes VAD with a new stream (used after device switching)
|
|
3983
|
+
*/
|
|
3984
|
+
async _reinitializeVAD(stream) {
|
|
3985
|
+
// Clean up existing VAD
|
|
3986
|
+
if (this.vad) {
|
|
3987
|
+
this.vad.pause();
|
|
3988
|
+
this.vad.destroy();
|
|
3989
|
+
this.vad = null;
|
|
3990
|
+
}
|
|
3991
|
+
// Reinitialize with new stream
|
|
3992
|
+
if (stream) {
|
|
3993
|
+
this._initializeVAD();
|
|
3994
|
+
}
|
|
3995
|
+
}
|
|
3996
|
+
/**
|
|
3997
|
+
* Sets up the device change event listener
|
|
3998
|
+
*/
|
|
3999
|
+
_setupDeviceChangeListener() {
|
|
4000
|
+
this.wavRecorder.listenForDeviceChange(async (devices) => {
|
|
4001
|
+
try {
|
|
4002
|
+
const currentDeviceExists = devices.some((device) => device.deviceId === this.deviceId);
|
|
4003
|
+
if (!currentDeviceExists) {
|
|
4004
|
+
console.log('Current device disconnected, switching to next available device');
|
|
4005
|
+
try {
|
|
4006
|
+
const nextDevice = devices.find((d) => d.default);
|
|
4007
|
+
if (nextDevice) {
|
|
4008
|
+
await this.setInputDevice(nextDevice.deviceId);
|
|
4009
|
+
// Mark recorder as started and attempt to notify server
|
|
4010
|
+
if (!this.recorderStarted) {
|
|
4011
|
+
this.recorderStarted = true;
|
|
4012
|
+
this._sendReadyIfNeeded();
|
|
4013
|
+
}
|
|
4014
|
+
// Notify about device switch
|
|
4015
|
+
if (this.options.onDeviceSwitched) {
|
|
4016
|
+
this.options.onDeviceSwitched(nextDevice.deviceId);
|
|
4017
|
+
}
|
|
4018
|
+
}
|
|
4019
|
+
else {
|
|
4020
|
+
console.warn('No alternative audio device found');
|
|
4021
|
+
}
|
|
4022
|
+
}
|
|
4023
|
+
catch (error) {
|
|
4024
|
+
console.error('Error switching to next device:', error);
|
|
4025
|
+
throw error;
|
|
4026
|
+
}
|
|
4027
|
+
}
|
|
4028
|
+
}
|
|
4029
|
+
catch (error) {
|
|
4030
|
+
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
4031
|
+
}
|
|
4032
|
+
});
|
|
4052
4033
|
}
|
|
4053
4034
|
}
|
|
4054
4035
|
|