@layercode/js-sdk 1.0.25 → 1.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -3483,6 +3483,8 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3483
3483
|
}
|
|
3484
3484
|
|
|
3485
3485
|
/* eslint-env browser */
|
|
3486
|
+
// SDK version - updated when publishing
|
|
3487
|
+
const SDK_VERSION = '1.0.27';
|
|
3486
3488
|
/**
|
|
3487
3489
|
* @class LayercodeClient
|
|
3488
3490
|
* @classdesc Core client for Layercode audio pipeline that manages audio recording, WebSocket communication, and speech processing.
|
|
@@ -3493,6 +3495,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3493
3495
|
* @param {Object} options - Configuration options
|
|
3494
3496
|
*/
|
|
3495
3497
|
constructor(options) {
|
|
3498
|
+
this.deviceId = null;
|
|
3496
3499
|
this.options = {
|
|
3497
3500
|
pipelineId: options.pipelineId,
|
|
3498
3501
|
sessionId: options.sessionId || null,
|
|
@@ -3502,6 +3505,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3502
3505
|
onConnect: options.onConnect || (() => { }),
|
|
3503
3506
|
onDisconnect: options.onDisconnect || (() => { }),
|
|
3504
3507
|
onError: options.onError || (() => { }),
|
|
3508
|
+
onDeviceSwitched: options.onDeviceSwitched || (() => { }),
|
|
3505
3509
|
onDataMessage: options.onDataMessage || (() => { }),
|
|
3506
3510
|
onUserAmplitudeChange: options.onUserAmplitudeChange || (() => { }),
|
|
3507
3511
|
onAgentAmplitudeChange: options.onAgentAmplitudeChange || (() => { }),
|
|
@@ -3525,198 +3529,99 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3525
3529
|
this.pushToTalkEnabled = false;
|
|
3526
3530
|
this.canInterrupt = false;
|
|
3527
3531
|
this.userIsSpeaking = false;
|
|
3528
|
-
this.endUserTurn = false;
|
|
3529
3532
|
this.recorderStarted = false;
|
|
3530
3533
|
this.readySent = false;
|
|
3531
3534
|
this.currentTurnId = null;
|
|
3532
3535
|
this.audioBuffer = [];
|
|
3533
|
-
this.
|
|
3536
|
+
this.vadConfig = null;
|
|
3537
|
+
// this.audioPauseTime = null;
|
|
3534
3538
|
// Bind event handlers
|
|
3535
3539
|
this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
|
|
3536
3540
|
this._handleDataAvailable = this._handleDataAvailable.bind(this);
|
|
3537
|
-
|
|
3538
|
-
_setupAmplitudeBasedVAD() {
|
|
3539
|
-
let isSpeakingByAmplitude = false;
|
|
3540
|
-
let silenceFrames = 0;
|
|
3541
|
-
const AMPLITUDE_THRESHOLD = 0.01; // Adjust based on testing
|
|
3542
|
-
const SILENCE_FRAMES_THRESHOLD = 30; // ~600ms at 20ms chunks
|
|
3543
|
-
// Monitor amplitude changes
|
|
3544
|
-
this.wavRecorder.startAmplitudeMonitoring((amplitude) => {
|
|
3545
|
-
const wasSpeaking = isSpeakingByAmplitude;
|
|
3546
|
-
if (amplitude > AMPLITUDE_THRESHOLD) {
|
|
3547
|
-
silenceFrames = 0;
|
|
3548
|
-
if (!wasSpeaking) {
|
|
3549
|
-
// Speech started - pause audio if playing and track timing for interruption calculation
|
|
3550
|
-
if (this.canInterrupt && this.wavPlayer.isPlaying) {
|
|
3551
|
-
this.audioPauseTime = Date.now();
|
|
3552
|
-
this.wavPlayer.pause();
|
|
3553
|
-
}
|
|
3554
|
-
isSpeakingByAmplitude = true;
|
|
3555
|
-
this.userIsSpeaking = true;
|
|
3556
|
-
this.options.onUserIsSpeakingChange(true);
|
|
3557
|
-
this._wsSend({
|
|
3558
|
-
type: 'vad_events',
|
|
3559
|
-
event: 'vad_start',
|
|
3560
|
-
});
|
|
3561
|
-
}
|
|
3562
|
-
}
|
|
3563
|
-
else {
|
|
3564
|
-
silenceFrames++;
|
|
3565
|
-
if (wasSpeaking && silenceFrames >= SILENCE_FRAMES_THRESHOLD) {
|
|
3566
|
-
// Speech ended
|
|
3567
|
-
isSpeakingByAmplitude = false;
|
|
3568
|
-
this.userIsSpeaking = false;
|
|
3569
|
-
this.options.onUserIsSpeakingChange(false);
|
|
3570
|
-
this._wsSend({
|
|
3571
|
-
type: 'vad_events',
|
|
3572
|
-
event: 'vad_end',
|
|
3573
|
-
});
|
|
3574
|
-
}
|
|
3575
|
-
}
|
|
3576
|
-
});
|
|
3541
|
+
this._setupDeviceChangeListener();
|
|
3577
3542
|
}
|
|
3578
3543
|
_initializeVAD() {
|
|
3579
|
-
|
|
3544
|
+
var _a;
|
|
3545
|
+
console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt, vadConfig: this.vadConfig });
|
|
3580
3546
|
// If we're in push to talk mode, we don't need to use the VAD model
|
|
3581
3547
|
if (this.pushToTalkEnabled) {
|
|
3582
3548
|
return;
|
|
3583
3549
|
}
|
|
3584
|
-
|
|
3585
|
-
|
|
3586
|
-
console.
|
|
3550
|
+
// Check if VAD is disabled
|
|
3551
|
+
if (((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) === false) {
|
|
3552
|
+
console.log('VAD is disabled by backend configuration');
|
|
3553
|
+
return;
|
|
3554
|
+
}
|
|
3555
|
+
// Build VAD configuration object, only including keys that are defined
|
|
3556
|
+
const vadOptions = {
|
|
3557
|
+
stream: this.wavRecorder.getStream() || undefined,
|
|
3558
|
+
onSpeechStart: () => {
|
|
3559
|
+
console.log('onSpeechStart: sending vad_start');
|
|
3560
|
+
this.userIsSpeaking = true;
|
|
3561
|
+
this.options.onUserIsSpeakingChange(true);
|
|
3562
|
+
this._wsSend({
|
|
3563
|
+
type: 'vad_events',
|
|
3564
|
+
event: 'vad_start',
|
|
3565
|
+
});
|
|
3566
|
+
},
|
|
3567
|
+
onSpeechEnd: () => {
|
|
3568
|
+
console.log('onSpeechEnd: sending vad_end');
|
|
3569
|
+
this.userIsSpeaking = false;
|
|
3570
|
+
this.options.onUserIsSpeakingChange(false);
|
|
3571
|
+
this.audioBuffer = []; // Clear buffer on speech end
|
|
3572
|
+
this._wsSend({
|
|
3573
|
+
type: 'vad_events',
|
|
3574
|
+
event: 'vad_end',
|
|
3575
|
+
});
|
|
3576
|
+
},
|
|
3577
|
+
};
|
|
3578
|
+
// Apply VAD configuration from backend if available
|
|
3579
|
+
if (this.vadConfig) {
|
|
3580
|
+
// Only add keys that are explicitly defined (not undefined)
|
|
3581
|
+
if (this.vadConfig.model !== undefined)
|
|
3582
|
+
vadOptions.model = this.vadConfig.model;
|
|
3583
|
+
if (this.vadConfig.positive_speech_threshold !== undefined)
|
|
3584
|
+
vadOptions.positiveSpeechThreshold = this.vadConfig.positive_speech_threshold;
|
|
3585
|
+
if (this.vadConfig.negative_speech_threshold !== undefined)
|
|
3586
|
+
vadOptions.negativeSpeechThreshold = this.vadConfig.negative_speech_threshold;
|
|
3587
|
+
if (this.vadConfig.redemption_frames !== undefined)
|
|
3588
|
+
vadOptions.redemptionFrames = this.vadConfig.redemption_frames;
|
|
3589
|
+
if (this.vadConfig.min_speech_frames !== undefined)
|
|
3590
|
+
vadOptions.minSpeechFrames = this.vadConfig.min_speech_frames;
|
|
3591
|
+
if (this.vadConfig.pre_speech_pad_frames !== undefined)
|
|
3592
|
+
vadOptions.preSpeechPadFrames = this.vadConfig.pre_speech_pad_frames;
|
|
3593
|
+
if (this.vadConfig.frame_samples !== undefined)
|
|
3594
|
+
vadOptions.frameSamples = this.vadConfig.frame_samples;
|
|
3595
|
+
}
|
|
3596
|
+
else {
|
|
3597
|
+
// Default values if no config from backend
|
|
3598
|
+
vadOptions.model = 'v5';
|
|
3599
|
+
vadOptions.positiveSpeechThreshold = 0.15;
|
|
3600
|
+
vadOptions.negativeSpeechThreshold = 0.05;
|
|
3601
|
+
vadOptions.redemptionFrames = 4;
|
|
3602
|
+
vadOptions.minSpeechFrames = 2;
|
|
3603
|
+
vadOptions.preSpeechPadFrames = 0;
|
|
3604
|
+
vadOptions.frameSamples = 512; // Required for v5
|
|
3605
|
+
}
|
|
3606
|
+
console.log('Creating VAD with options:', vadOptions);
|
|
3607
|
+
dist.MicVAD.new(vadOptions)
|
|
3608
|
+
.then((vad) => {
|
|
3609
|
+
this.vad = vad;
|
|
3610
|
+
this.vad.start();
|
|
3611
|
+
console.log('VAD started successfully');
|
|
3612
|
+
})
|
|
3613
|
+
.catch((error) => {
|
|
3614
|
+
console.warn('Error initializing VAD:', error);
|
|
3587
3615
|
// Send a message to server indicating VAD failure
|
|
3588
3616
|
this._wsSend({
|
|
3589
3617
|
type: 'vad_events',
|
|
3590
3618
|
event: 'vad_model_failed',
|
|
3591
3619
|
});
|
|
3592
|
-
|
|
3593
|
-
this.userIsSpeaking = false;
|
|
3594
|
-
this.options.onUserIsSpeakingChange(false);
|
|
3595
|
-
// Set up amplitude-based fallback detection
|
|
3596
|
-
this._setupAmplitudeBasedVAD();
|
|
3597
|
-
}, 2000);
|
|
3598
|
-
if (!this.canInterrupt) {
|
|
3599
|
-
dist.MicVAD.new({
|
|
3600
|
-
stream: this.wavRecorder.getStream() || undefined,
|
|
3601
|
-
model: 'v5',
|
|
3602
|
-
positiveSpeechThreshold: 0.7,
|
|
3603
|
-
negativeSpeechThreshold: 0.55,
|
|
3604
|
-
redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
|
|
3605
|
-
minSpeechFrames: 0,
|
|
3606
|
-
preSpeechPadFrames: 0,
|
|
3607
|
-
onSpeechStart: () => {
|
|
3608
|
-
this.userIsSpeaking = true;
|
|
3609
|
-
this.options.onUserIsSpeakingChange(true);
|
|
3610
|
-
console.log('onSpeechStart: sending vad_start');
|
|
3611
|
-
this._wsSend({
|
|
3612
|
-
type: 'vad_events',
|
|
3613
|
-
event: 'vad_start',
|
|
3614
|
-
});
|
|
3615
|
-
},
|
|
3616
|
-
onSpeechEnd: () => {
|
|
3617
|
-
console.log('onSpeechEnd: sending vad_end');
|
|
3618
|
-
this.endUserTurn = true; // Set flag to indicate that the user turn has ended
|
|
3619
|
-
this.audioBuffer = []; // Clear buffer on speech end
|
|
3620
|
-
this.userIsSpeaking = false;
|
|
3621
|
-
this.options.onUserIsSpeakingChange(false);
|
|
3622
|
-
console.log('onSpeechEnd: State after update - endUserTurn:', this.endUserTurn, 'userIsSpeaking:', this.userIsSpeaking);
|
|
3623
|
-
// Send vad_end immediately instead of waiting for next audio chunk
|
|
3624
|
-
this._wsSend({
|
|
3625
|
-
type: 'vad_events',
|
|
3626
|
-
event: 'vad_end',
|
|
3627
|
-
});
|
|
3628
|
-
this.endUserTurn = false; // Reset the flag after sending vad_end
|
|
3629
|
-
},
|
|
3630
|
-
})
|
|
3631
|
-
.then((vad) => {
|
|
3632
|
-
clearTimeout(timeout);
|
|
3633
|
-
this.vad = vad;
|
|
3634
|
-
this.vad.start();
|
|
3635
|
-
console.log('VAD started');
|
|
3636
|
-
})
|
|
3637
|
-
.catch((error) => {
|
|
3638
|
-
console.error('Error initializing VAD:', error);
|
|
3639
|
-
});
|
|
3640
|
-
}
|
|
3641
|
-
else {
|
|
3642
|
-
dist.MicVAD.new({
|
|
3643
|
-
stream: this.wavRecorder.getStream() || undefined,
|
|
3644
|
-
model: 'v5',
|
|
3645
|
-
// baseAssetPath: '/', // Use if bundling model locally
|
|
3646
|
-
// onnxWASMBasePath: '/', // Use if bundling model locally
|
|
3647
|
-
positiveSpeechThreshold: 0.7,
|
|
3648
|
-
negativeSpeechThreshold: 0.55,
|
|
3649
|
-
redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
|
|
3650
|
-
minSpeechFrames: 25,
|
|
3651
|
-
preSpeechPadFrames: 0,
|
|
3652
|
-
onSpeechStart: () => {
|
|
3653
|
-
// Only pause agent audio if it's currently playing
|
|
3654
|
-
if (this.wavPlayer.isPlaying) {
|
|
3655
|
-
console.log('onSpeechStart: WavPlayer is playing, pausing it.');
|
|
3656
|
-
this.audioPauseTime = Date.now(); // Track when we paused
|
|
3657
|
-
this.wavPlayer.pause();
|
|
3658
|
-
}
|
|
3659
|
-
else {
|
|
3660
|
-
console.log('onSpeechStart: WavPlayer is not playing, VAD will not pause.');
|
|
3661
|
-
}
|
|
3662
|
-
console.log('onSpeechStart: sending vad_start');
|
|
3663
|
-
this._wsSend({
|
|
3664
|
-
type: 'vad_events',
|
|
3665
|
-
event: 'vad_start',
|
|
3666
|
-
});
|
|
3667
|
-
this.userIsSpeaking = true;
|
|
3668
|
-
this.options.onUserIsSpeakingChange(true);
|
|
3669
|
-
this.endUserTurn = false; // Reset endUserTurn when speech starts
|
|
3670
|
-
console.log('onSpeechStart: State after update - endUserTurn:', this.endUserTurn, 'userIsSpeaking:', this.userIsSpeaking);
|
|
3671
|
-
},
|
|
3672
|
-
onVADMisfire: () => {
|
|
3673
|
-
// If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd, and we should resume the assistant audio as it was a false interruption. We include a configurable delay so the assistant isn't too quick to start speaking again.
|
|
3674
|
-
this.userIsSpeaking = false;
|
|
3675
|
-
this.audioBuffer = []; // Clear buffer on misfire
|
|
3676
|
-
this.options.onUserIsSpeakingChange(false);
|
|
3677
|
-
// Add the missing delay before resuming to prevent race conditions
|
|
3678
|
-
setTimeout(() => {
|
|
3679
|
-
if (!this.wavPlayer.isPlaying) {
|
|
3680
|
-
console.log('onVADMisfire: Resuming after delay');
|
|
3681
|
-
this.audioPauseTime = null; // Clear pause time since we're resuming
|
|
3682
|
-
this.wavPlayer.play();
|
|
3683
|
-
}
|
|
3684
|
-
else {
|
|
3685
|
-
console.log('onVADMisfire: Not resuming - either no pause or user speaking again');
|
|
3686
|
-
this.endUserTurn = true;
|
|
3687
|
-
}
|
|
3688
|
-
}, this.options.vadResumeDelay);
|
|
3689
|
-
},
|
|
3690
|
-
onSpeechEnd: () => {
|
|
3691
|
-
console.log('onSpeechEnd: sending vad_end');
|
|
3692
|
-
this.endUserTurn = true; // Set flag to indicate that the user turn has ended
|
|
3693
|
-
this.audioBuffer = []; // Clear buffer on speech end
|
|
3694
|
-
this.userIsSpeaking = false;
|
|
3695
|
-
this.options.onUserIsSpeakingChange(false);
|
|
3696
|
-
console.log('onSpeechEnd: State after update - endUserTurn:', this.endUserTurn, 'userIsSpeaking:', this.userIsSpeaking);
|
|
3697
|
-
// Send vad_end immediately instead of waiting for next audio chunk
|
|
3698
|
-
this._wsSend({
|
|
3699
|
-
type: 'vad_events',
|
|
3700
|
-
event: 'vad_end',
|
|
3701
|
-
});
|
|
3702
|
-
this.endUserTurn = false; // Reset the flag after sending vad_end
|
|
3703
|
-
},
|
|
3704
|
-
})
|
|
3705
|
-
.then((vad) => {
|
|
3706
|
-
clearTimeout(timeout);
|
|
3707
|
-
this.vad = vad;
|
|
3708
|
-
this.vad.start();
|
|
3709
|
-
console.log('VAD started');
|
|
3710
|
-
})
|
|
3711
|
-
.catch((error) => {
|
|
3712
|
-
console.error('Error initializing VAD:', error);
|
|
3713
|
-
});
|
|
3714
|
-
}
|
|
3620
|
+
});
|
|
3715
3621
|
}
|
|
3716
3622
|
/**
|
|
3717
3623
|
* Updates the connection status and triggers the callback
|
|
3718
3624
|
* @param {string} status - New status value
|
|
3719
|
-
* @private
|
|
3720
3625
|
*/
|
|
3721
3626
|
_setStatus(status) {
|
|
3722
3627
|
this.status = status;
|
|
@@ -3724,7 +3629,6 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3724
3629
|
}
|
|
3725
3630
|
/**
|
|
3726
3631
|
* Handles when agent audio finishes playing
|
|
3727
|
-
* @private
|
|
3728
3632
|
*/
|
|
3729
3633
|
_clientResponseAudioReplayFinished() {
|
|
3730
3634
|
console.log('clientResponseAudioReplayFinished');
|
|
@@ -3737,17 +3641,6 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3737
3641
|
const offsetData = await this.wavPlayer.interrupt();
|
|
3738
3642
|
if (offsetData && this.currentTurnId) {
|
|
3739
3643
|
let offsetMs = offsetData.currentTime * 1000;
|
|
3740
|
-
// Calculate accurate offset by subtracting pause time if audio was paused for VAD
|
|
3741
|
-
if (this.audioPauseTime) {
|
|
3742
|
-
const pauseDurationMs = Date.now() - this.audioPauseTime;
|
|
3743
|
-
const adjustedOffsetMs = Math.max(0, offsetMs - pauseDurationMs);
|
|
3744
|
-
console.log(`Interruption detected: Raw offset ${offsetMs}ms, pause duration ${pauseDurationMs}ms, adjusted offset ${adjustedOffsetMs}ms for turn ${this.currentTurnId}`);
|
|
3745
|
-
offsetMs = adjustedOffsetMs;
|
|
3746
|
-
this.audioPauseTime = null; // Clear the pause time
|
|
3747
|
-
}
|
|
3748
|
-
else {
|
|
3749
|
-
console.log(`Interruption detected: ${offsetMs}ms offset for turn ${this.currentTurnId} (no pause adjustment needed)`);
|
|
3750
|
-
}
|
|
3751
3644
|
// Send interruption event with accurate playback offset in milliseconds
|
|
3752
3645
|
this._wsSend({
|
|
3753
3646
|
type: 'trigger.response.audio.interrupted',
|
|
@@ -3781,7 +3674,6 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3781
3674
|
/**
|
|
3782
3675
|
* Handles incoming WebSocket messages
|
|
3783
3676
|
* @param {MessageEvent} event - The WebSocket message event
|
|
3784
|
-
* @private
|
|
3785
3677
|
*/
|
|
3786
3678
|
async _handleWebSocketMessage(event) {
|
|
3787
3679
|
try {
|
|
@@ -3796,12 +3688,10 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3796
3688
|
console.log(message);
|
|
3797
3689
|
if (message.role === 'assistant') {
|
|
3798
3690
|
// Start tracking new assistant turn
|
|
3799
|
-
// Note: Don't reset currentTurnId here - let response.audio set it
|
|
3800
|
-
// This prevents race conditions where text arrives before audio
|
|
3801
3691
|
console.log('Assistant turn started, will track new turn ID from audio/text');
|
|
3802
3692
|
}
|
|
3803
|
-
else if (message.role === 'user' && !this.pushToTalkEnabled
|
|
3804
|
-
// Interrupt any playing assistant audio if this is a turn
|
|
3693
|
+
else if (message.role === 'user' && !this.pushToTalkEnabled) {
|
|
3694
|
+
// Interrupt any playing assistant audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
|
|
3805
3695
|
console.log('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
|
|
3806
3696
|
await this._clientInterruptAssistantReplay();
|
|
3807
3697
|
}
|
|
@@ -3823,7 +3713,6 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3823
3713
|
this.currentTurnId = message.turn_id;
|
|
3824
3714
|
console.log(`Setting current turn ID to: ${message.turn_id} from text message`);
|
|
3825
3715
|
}
|
|
3826
|
-
// Note: We no longer track text content in the client - the pipeline handles interruption estimation
|
|
3827
3716
|
break;
|
|
3828
3717
|
}
|
|
3829
3718
|
case 'response.data':
|
|
@@ -3831,7 +3720,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3831
3720
|
this.options.onDataMessage(message);
|
|
3832
3721
|
break;
|
|
3833
3722
|
default:
|
|
3834
|
-
console.
|
|
3723
|
+
console.warn('Unknown message type received:', message);
|
|
3835
3724
|
break;
|
|
3836
3725
|
}
|
|
3837
3726
|
}
|
|
@@ -3843,15 +3732,28 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3843
3732
|
/**
|
|
3844
3733
|
* Handles available client browser microphone audio data and sends it over the WebSocket
|
|
3845
3734
|
* @param {ArrayBuffer} data - The audio data buffer
|
|
3846
|
-
* @private
|
|
3847
3735
|
*/
|
|
3848
3736
|
_handleDataAvailable(data) {
|
|
3737
|
+
var _a, _b, _c;
|
|
3849
3738
|
try {
|
|
3850
3739
|
const base64 = arrayBufferToBase64(data.mono);
|
|
3851
|
-
|
|
3740
|
+
// Determine if we should gate audio based on VAD configuration
|
|
3741
|
+
const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
|
|
3742
|
+
const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
|
|
3743
|
+
let sendAudio;
|
|
3744
|
+
if (this.pushToTalkEnabled) {
|
|
3745
|
+
sendAudio = this.pushToTalkActive;
|
|
3746
|
+
}
|
|
3747
|
+
else if (shouldGateAudio) {
|
|
3748
|
+
sendAudio = this.userIsSpeaking;
|
|
3749
|
+
}
|
|
3750
|
+
else {
|
|
3751
|
+
// If gate_audio is false, always send audio
|
|
3752
|
+
sendAudio = true;
|
|
3753
|
+
}
|
|
3852
3754
|
if (sendAudio) {
|
|
3853
|
-
// If we have buffered audio, send it first
|
|
3854
|
-
if (this.audioBuffer.length > 0) {
|
|
3755
|
+
// If we have buffered audio and we're gating, send it first
|
|
3756
|
+
if (shouldGateAudio && this.audioBuffer.length > 0) {
|
|
3855
3757
|
console.log(`Sending ${this.audioBuffer.length} buffered audio chunks`);
|
|
3856
3758
|
for (const bufferedAudio of this.audioBuffer) {
|
|
3857
3759
|
this._wsSend({
|
|
@@ -3870,8 +3772,8 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3870
3772
|
else {
|
|
3871
3773
|
// Buffer audio when not sending (to catch audio just before VAD triggers)
|
|
3872
3774
|
this.audioBuffer.push(base64);
|
|
3873
|
-
// Keep buffer size
|
|
3874
|
-
if (this.audioBuffer.length >
|
|
3775
|
+
// Keep buffer size based on configuration
|
|
3776
|
+
if (this.audioBuffer.length > bufferFrames) {
|
|
3875
3777
|
this.audioBuffer.shift(); // Remove oldest chunk
|
|
3876
3778
|
}
|
|
3877
3779
|
}
|
|
@@ -3903,7 +3805,6 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3903
3805
|
* @param {WavRecorder | WavStreamPlayer} source - The audio source (recorder or player).
|
|
3904
3806
|
* @param {(amplitude: number) => void} callback - The callback function to invoke on amplitude change.
|
|
3905
3807
|
* @param {(amplitude: number) => void} updateInternalState - Function to update the internal amplitude state.
|
|
3906
|
-
* @private
|
|
3907
3808
|
*/
|
|
3908
3809
|
_setupAmplitudeMonitoring(source, callback, updateInternalState) {
|
|
3909
3810
|
// Set up amplitude monitoring only if a callback is provided
|
|
@@ -3935,6 +3836,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3935
3836
|
let authorizeSessionRequestBody = {
|
|
3936
3837
|
pipeline_id: this.options.pipelineId,
|
|
3937
3838
|
metadata: this.options.metadata,
|
|
3839
|
+
sdk_version: SDK_VERSION,
|
|
3938
3840
|
};
|
|
3939
3841
|
// If we're reconnecting to a previous session, we need to include the session_id in the request. Otherwise we don't send session_id, and a new session will be created and the session_id will be returned in the response.
|
|
3940
3842
|
if (this.options.sessionId) {
|
|
@@ -3958,6 +3860,8 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3958
3860
|
})}`);
|
|
3959
3861
|
const config = authorizeSessionResponseBody.config;
|
|
3960
3862
|
console.log('config', config);
|
|
3863
|
+
// Store VAD configuration
|
|
3864
|
+
this.vadConfig = config.vad || null;
|
|
3961
3865
|
if (config.transcription.trigger === 'push_to_talk') {
|
|
3962
3866
|
this.pushToTalkEnabled = true;
|
|
3963
3867
|
}
|
|
@@ -3968,7 +3872,6 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3968
3872
|
else {
|
|
3969
3873
|
throw new Error(`Unknown trigger: ${config.transcription.trigger}`);
|
|
3970
3874
|
}
|
|
3971
|
-
this._initializeVAD();
|
|
3972
3875
|
// Bind the websocket message callbacks
|
|
3973
3876
|
this.ws.onmessage = this._handleWebSocketMessage;
|
|
3974
3877
|
this.ws.onopen = () => {
|
|
@@ -3988,18 +3891,13 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3988
3891
|
this._setStatus('error');
|
|
3989
3892
|
this.options.onError(new Error('WebSocket connection error'));
|
|
3990
3893
|
};
|
|
3991
|
-
// Initialize microphone audio capture
|
|
3992
|
-
await this.wavRecorder.begin();
|
|
3993
|
-
await this.wavRecorder.record(this._handleDataAvailable, 1638);
|
|
3994
|
-
// Set up microphone amplitude monitoring
|
|
3995
|
-
this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
|
|
3996
3894
|
// Initialize audio player
|
|
3997
3895
|
await this.wavPlayer.connect();
|
|
3998
3896
|
// Set up audio player amplitude monitoring
|
|
3999
3897
|
this._setupAmplitudeMonitoring(this.wavPlayer, this.options.onAgentAmplitudeChange, (amp) => (this.agentAudioAmplitude = amp));
|
|
4000
|
-
//
|
|
4001
|
-
|
|
4002
|
-
this
|
|
3898
|
+
// wavRecorder will be started from the onDeviceSwitched callback,
|
|
3899
|
+
// which is called when the device is first initialized and also when the device is switched
|
|
3900
|
+
// this is to ensure that the device is initialized before the recorder is started
|
|
4003
3901
|
}
|
|
4004
3902
|
catch (error) {
|
|
4005
3903
|
console.error('Error connecting to Layercode pipeline:', error);
|
|
@@ -4019,6 +3917,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
4019
3917
|
this.vad.destroy();
|
|
4020
3918
|
this.vad = null;
|
|
4021
3919
|
}
|
|
3920
|
+
this.wavRecorder.listenForDeviceChange(null);
|
|
4022
3921
|
this.wavRecorder.quit();
|
|
4023
3922
|
this.wavPlayer.disconnect();
|
|
4024
3923
|
// Reset turn tracking
|
|
@@ -4042,19 +3941,101 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
4042
3941
|
* @param {string} deviceId - The deviceId of the new microphone
|
|
4043
3942
|
*/
|
|
4044
3943
|
async setInputDevice(deviceId) {
|
|
4045
|
-
|
|
3944
|
+
var _a;
|
|
3945
|
+
try {
|
|
3946
|
+
this.deviceId = deviceId;
|
|
3947
|
+
// Restart recording with the new device
|
|
3948
|
+
await this._restartAudioRecording();
|
|
3949
|
+
// Reinitialize VAD with the new audio stream if VAD is enabled
|
|
3950
|
+
const shouldUseVAD = !this.pushToTalkEnabled && ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) !== false;
|
|
3951
|
+
if (shouldUseVAD) {
|
|
3952
|
+
console.log('Reinitializing VAD with new audio stream');
|
|
3953
|
+
const newStream = this.wavRecorder.getStream();
|
|
3954
|
+
await this._reinitializeVAD(newStream);
|
|
3955
|
+
}
|
|
3956
|
+
console.log(`Successfully switched to input device: ${deviceId}`);
|
|
3957
|
+
}
|
|
3958
|
+
catch (error) {
|
|
3959
|
+
console.error(`Failed to switch to input device ${deviceId}:`, error);
|
|
3960
|
+
throw new Error(`Failed to switch to input device: ${error instanceof Error ? error.message : String(error)}`);
|
|
3961
|
+
}
|
|
3962
|
+
}
|
|
3963
|
+
/**
|
|
3964
|
+
* Restarts audio recording after a device switch to ensure audio is captured from the new device
|
|
3965
|
+
*/
|
|
3966
|
+
async _restartAudioRecording() {
|
|
3967
|
+
try {
|
|
3968
|
+
console.log('Restarting audio recording after device switch...');
|
|
4046
3969
|
try {
|
|
4047
3970
|
await this.wavRecorder.end();
|
|
4048
3971
|
}
|
|
4049
|
-
catch (
|
|
4050
|
-
|
|
4051
|
-
await this.wavRecorder.quit();
|
|
3972
|
+
catch (_a) {
|
|
3973
|
+
// Ignore cleanup errors
|
|
4052
3974
|
}
|
|
4053
|
-
|
|
3975
|
+
// Start with new device
|
|
3976
|
+
await this.wavRecorder.begin(this.deviceId || undefined);
|
|
3977
|
+
await this.wavRecorder.record(this._handleDataAvailable, 1638);
|
|
3978
|
+
// Re-setup amplitude monitoring with the new stream
|
|
3979
|
+
this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
|
|
3980
|
+
console.log('Audio recording restart completed successfully');
|
|
3981
|
+
}
|
|
3982
|
+
catch (error) {
|
|
3983
|
+
console.error('Error restarting audio recording after device switch:', error);
|
|
3984
|
+
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
4054
3985
|
}
|
|
4055
|
-
|
|
4056
|
-
|
|
4057
|
-
|
|
3986
|
+
}
|
|
3987
|
+
/**
|
|
3988
|
+
* Reinitializes VAD with a new stream (used after device switching)
|
|
3989
|
+
*/
|
|
3990
|
+
async _reinitializeVAD(stream) {
|
|
3991
|
+
// Clean up existing VAD
|
|
3992
|
+
if (this.vad) {
|
|
3993
|
+
this.vad.pause();
|
|
3994
|
+
this.vad.destroy();
|
|
3995
|
+
this.vad = null;
|
|
3996
|
+
}
|
|
3997
|
+
// Reinitialize with new stream
|
|
3998
|
+
if (stream) {
|
|
3999
|
+
this._initializeVAD();
|
|
4000
|
+
}
|
|
4001
|
+
}
|
|
4002
|
+
/**
|
|
4003
|
+
* Sets up the device change event listener
|
|
4004
|
+
*/
|
|
4005
|
+
_setupDeviceChangeListener() {
|
|
4006
|
+
this.wavRecorder.listenForDeviceChange(async (devices) => {
|
|
4007
|
+
try {
|
|
4008
|
+
const currentDeviceExists = devices.some((device) => device.deviceId === this.deviceId);
|
|
4009
|
+
if (!currentDeviceExists) {
|
|
4010
|
+
console.log('Current device disconnected, switching to next available device');
|
|
4011
|
+
try {
|
|
4012
|
+
const nextDevice = devices.find((d) => d.default);
|
|
4013
|
+
if (nextDevice) {
|
|
4014
|
+
await this.setInputDevice(nextDevice.deviceId);
|
|
4015
|
+
// Mark recorder as started and attempt to notify server
|
|
4016
|
+
if (!this.recorderStarted) {
|
|
4017
|
+
this.recorderStarted = true;
|
|
4018
|
+
this._sendReadyIfNeeded();
|
|
4019
|
+
}
|
|
4020
|
+
// Notify about device switch
|
|
4021
|
+
if (this.options.onDeviceSwitched) {
|
|
4022
|
+
this.options.onDeviceSwitched(nextDevice.deviceId);
|
|
4023
|
+
}
|
|
4024
|
+
}
|
|
4025
|
+
else {
|
|
4026
|
+
console.warn('No alternative audio device found');
|
|
4027
|
+
}
|
|
4028
|
+
}
|
|
4029
|
+
catch (error) {
|
|
4030
|
+
console.error('Error switching to next device:', error);
|
|
4031
|
+
throw error;
|
|
4032
|
+
}
|
|
4033
|
+
}
|
|
4034
|
+
}
|
|
4035
|
+
catch (error) {
|
|
4036
|
+
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
4037
|
+
}
|
|
4038
|
+
});
|
|
4058
4039
|
}
|
|
4059
4040
|
}
|
|
4060
4041
|
|