@layercode/js-sdk 1.0.26 → 1.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -3483,6 +3483,8 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3483
3483
|
}
|
|
3484
3484
|
|
|
3485
3485
|
/* eslint-env browser */
|
|
3486
|
+
// SDK version - updated when publishing
|
|
3487
|
+
const SDK_VERSION = '1.0.27';
|
|
3486
3488
|
/**
|
|
3487
3489
|
* @class LayercodeClient
|
|
3488
3490
|
* @classdesc Core client for Layercode audio pipeline that manages audio recording, WebSocket communication, and speech processing.
|
|
@@ -3493,6 +3495,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3493
3495
|
* @param {Object} options - Configuration options
|
|
3494
3496
|
*/
|
|
3495
3497
|
constructor(options) {
|
|
3498
|
+
this.deviceId = null;
|
|
3496
3499
|
this.options = {
|
|
3497
3500
|
pipelineId: options.pipelineId,
|
|
3498
3501
|
sessionId: options.sessionId || null,
|
|
@@ -3502,6 +3505,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3502
3505
|
onConnect: options.onConnect || (() => { }),
|
|
3503
3506
|
onDisconnect: options.onDisconnect || (() => { }),
|
|
3504
3507
|
onError: options.onError || (() => { }),
|
|
3508
|
+
onDeviceSwitched: options.onDeviceSwitched || (() => { }),
|
|
3505
3509
|
onDataMessage: options.onDataMessage || (() => { }),
|
|
3506
3510
|
onUserAmplitudeChange: options.onUserAmplitudeChange || (() => { }),
|
|
3507
3511
|
onAgentAmplitudeChange: options.onAgentAmplitudeChange || (() => { }),
|
|
@@ -3529,71 +3533,28 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3529
3533
|
this.readySent = false;
|
|
3530
3534
|
this.currentTurnId = null;
|
|
3531
3535
|
this.audioBuffer = [];
|
|
3536
|
+
this.vadConfig = null;
|
|
3532
3537
|
// this.audioPauseTime = null;
|
|
3533
3538
|
// Bind event handlers
|
|
3534
3539
|
this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
|
|
3535
3540
|
this._handleDataAvailable = this._handleDataAvailable.bind(this);
|
|
3536
|
-
|
|
3537
|
-
_setupAmplitudeBasedVAD() {
|
|
3538
|
-
let isSpeakingByAmplitude = false;
|
|
3539
|
-
let silenceFrames = 0;
|
|
3540
|
-
const AMPLITUDE_THRESHOLD = 0.01; // Adjust based on testing
|
|
3541
|
-
const SILENCE_FRAMES_THRESHOLD = 6.4; // 6.4 * 20ms chunks = 128ms silence. Same as Silero ((frame samples: 512 / sampleRate: 16000) * 1000 * redemptionFrames: 4) = 128 ms silence
|
|
3542
|
-
// Monitor amplitude changes
|
|
3543
|
-
this.wavRecorder.startAmplitudeMonitoring((amplitude) => {
|
|
3544
|
-
const wasSpeaking = isSpeakingByAmplitude;
|
|
3545
|
-
if (amplitude > AMPLITUDE_THRESHOLD) {
|
|
3546
|
-
silenceFrames = 0;
|
|
3547
|
-
if (!wasSpeaking) {
|
|
3548
|
-
isSpeakingByAmplitude = true;
|
|
3549
|
-
this.userIsSpeaking = true;
|
|
3550
|
-
this.options.onUserIsSpeakingChange(true);
|
|
3551
|
-
this._wsSend({
|
|
3552
|
-
type: 'vad_events',
|
|
3553
|
-
event: 'vad_start',
|
|
3554
|
-
});
|
|
3555
|
-
}
|
|
3556
|
-
}
|
|
3557
|
-
else {
|
|
3558
|
-
silenceFrames++;
|
|
3559
|
-
if (wasSpeaking && silenceFrames >= SILENCE_FRAMES_THRESHOLD) {
|
|
3560
|
-
isSpeakingByAmplitude = false;
|
|
3561
|
-
this.userIsSpeaking = false;
|
|
3562
|
-
this.options.onUserIsSpeakingChange(false);
|
|
3563
|
-
this._wsSend({
|
|
3564
|
-
type: 'vad_events',
|
|
3565
|
-
event: 'vad_end',
|
|
3566
|
-
});
|
|
3567
|
-
}
|
|
3568
|
-
}
|
|
3569
|
-
});
|
|
3541
|
+
this._setupDeviceChangeListener();
|
|
3570
3542
|
}
|
|
3571
3543
|
_initializeVAD() {
|
|
3572
|
-
|
|
3544
|
+
var _a;
|
|
3545
|
+
console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt, vadConfig: this.vadConfig });
|
|
3573
3546
|
// If we're in push to talk mode, we don't need to use the VAD model
|
|
3574
3547
|
if (this.pushToTalkEnabled) {
|
|
3575
3548
|
return;
|
|
3576
3549
|
}
|
|
3577
|
-
|
|
3578
|
-
|
|
3579
|
-
console.
|
|
3580
|
-
|
|
3581
|
-
|
|
3582
|
-
|
|
3583
|
-
|
|
3584
|
-
});
|
|
3585
|
-
// Set up amplitude-based fallback detection
|
|
3586
|
-
this._setupAmplitudeBasedVAD();
|
|
3587
|
-
}, 2000);
|
|
3588
|
-
dist.MicVAD.new({
|
|
3550
|
+
// Check if VAD is disabled
|
|
3551
|
+
if (((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) === false) {
|
|
3552
|
+
console.log('VAD is disabled by backend configuration');
|
|
3553
|
+
return;
|
|
3554
|
+
}
|
|
3555
|
+
// Build VAD configuration object, only including keys that are defined
|
|
3556
|
+
const vadOptions = {
|
|
3589
3557
|
stream: this.wavRecorder.getStream() || undefined,
|
|
3590
|
-
model: 'v5',
|
|
3591
|
-
positiveSpeechThreshold: 0.15,
|
|
3592
|
-
negativeSpeechThreshold: 0.05,
|
|
3593
|
-
redemptionFrames: 4,
|
|
3594
|
-
minSpeechFrames: 2,
|
|
3595
|
-
preSpeechPadFrames: 0,
|
|
3596
|
-
frameSamples: 512, // Required for v5 as per https://docs.vad.ricky0123.com/user-guide/algorithm/#configuration
|
|
3597
3558
|
onSpeechStart: () => {
|
|
3598
3559
|
console.log('onSpeechStart: sending vad_start');
|
|
3599
3560
|
this.userIsSpeaking = true;
|
|
@@ -3613,18 +3574,49 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3613
3574
|
event: 'vad_end',
|
|
3614
3575
|
});
|
|
3615
3576
|
},
|
|
3616
|
-
|
|
3617
|
-
|
|
3618
|
-
|
|
3619
|
-
|
|
3577
|
+
};
|
|
3578
|
+
// Apply VAD configuration from backend if available
|
|
3579
|
+
if (this.vadConfig) {
|
|
3580
|
+
// Only add keys that are explicitly defined (not undefined)
|
|
3581
|
+
if (this.vadConfig.model !== undefined)
|
|
3582
|
+
vadOptions.model = this.vadConfig.model;
|
|
3583
|
+
if (this.vadConfig.positive_speech_threshold !== undefined)
|
|
3584
|
+
vadOptions.positiveSpeechThreshold = this.vadConfig.positive_speech_threshold;
|
|
3585
|
+
if (this.vadConfig.negative_speech_threshold !== undefined)
|
|
3586
|
+
vadOptions.negativeSpeechThreshold = this.vadConfig.negative_speech_threshold;
|
|
3587
|
+
if (this.vadConfig.redemption_frames !== undefined)
|
|
3588
|
+
vadOptions.redemptionFrames = this.vadConfig.redemption_frames;
|
|
3589
|
+
if (this.vadConfig.min_speech_frames !== undefined)
|
|
3590
|
+
vadOptions.minSpeechFrames = this.vadConfig.min_speech_frames;
|
|
3591
|
+
if (this.vadConfig.pre_speech_pad_frames !== undefined)
|
|
3592
|
+
vadOptions.preSpeechPadFrames = this.vadConfig.pre_speech_pad_frames;
|
|
3593
|
+
if (this.vadConfig.frame_samples !== undefined)
|
|
3594
|
+
vadOptions.frameSamples = this.vadConfig.frame_samples;
|
|
3595
|
+
}
|
|
3596
|
+
else {
|
|
3597
|
+
// Default values if no config from backend
|
|
3598
|
+
vadOptions.model = 'v5';
|
|
3599
|
+
vadOptions.positiveSpeechThreshold = 0.15;
|
|
3600
|
+
vadOptions.negativeSpeechThreshold = 0.05;
|
|
3601
|
+
vadOptions.redemptionFrames = 4;
|
|
3602
|
+
vadOptions.minSpeechFrames = 2;
|
|
3603
|
+
vadOptions.preSpeechPadFrames = 0;
|
|
3604
|
+
vadOptions.frameSamples = 512; // Required for v5
|
|
3605
|
+
}
|
|
3606
|
+
console.log('Creating VAD with options:', vadOptions);
|
|
3607
|
+
dist.MicVAD.new(vadOptions)
|
|
3620
3608
|
.then((vad) => {
|
|
3621
|
-
clearTimeout(vadLoadTimeout);
|
|
3622
3609
|
this.vad = vad;
|
|
3623
3610
|
this.vad.start();
|
|
3624
|
-
console.log('VAD started');
|
|
3611
|
+
console.log('VAD started successfully');
|
|
3625
3612
|
})
|
|
3626
3613
|
.catch((error) => {
|
|
3627
|
-
console.
|
|
3614
|
+
console.warn('Error initializing VAD:', error);
|
|
3615
|
+
// Send a message to server indicating VAD failure
|
|
3616
|
+
this._wsSend({
|
|
3617
|
+
type: 'vad_events',
|
|
3618
|
+
event: 'vad_model_failed',
|
|
3619
|
+
});
|
|
3628
3620
|
});
|
|
3629
3621
|
}
|
|
3630
3622
|
/**
|
|
@@ -3728,7 +3720,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3728
3720
|
this.options.onDataMessage(message);
|
|
3729
3721
|
break;
|
|
3730
3722
|
default:
|
|
3731
|
-
console.
|
|
3723
|
+
console.warn('Unknown message type received:', message);
|
|
3732
3724
|
break;
|
|
3733
3725
|
}
|
|
3734
3726
|
}
|
|
@@ -3742,12 +3734,26 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3742
3734
|
* @param {ArrayBuffer} data - The audio data buffer
|
|
3743
3735
|
*/
|
|
3744
3736
|
_handleDataAvailable(data) {
|
|
3737
|
+
var _a, _b, _c;
|
|
3745
3738
|
try {
|
|
3746
3739
|
const base64 = arrayBufferToBase64(data.mono);
|
|
3747
|
-
|
|
3740
|
+
// Determine if we should gate audio based on VAD configuration
|
|
3741
|
+
const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
|
|
3742
|
+
const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
|
|
3743
|
+
let sendAudio;
|
|
3744
|
+
if (this.pushToTalkEnabled) {
|
|
3745
|
+
sendAudio = this.pushToTalkActive;
|
|
3746
|
+
}
|
|
3747
|
+
else if (shouldGateAudio) {
|
|
3748
|
+
sendAudio = this.userIsSpeaking;
|
|
3749
|
+
}
|
|
3750
|
+
else {
|
|
3751
|
+
// If gate_audio is false, always send audio
|
|
3752
|
+
sendAudio = true;
|
|
3753
|
+
}
|
|
3748
3754
|
if (sendAudio) {
|
|
3749
|
-
// If we have buffered audio, send it first
|
|
3750
|
-
if (this.audioBuffer.length > 0) {
|
|
3755
|
+
// If we have buffered audio and we're gating, send it first
|
|
3756
|
+
if (shouldGateAudio && this.audioBuffer.length > 0) {
|
|
3751
3757
|
console.log(`Sending ${this.audioBuffer.length} buffered audio chunks`);
|
|
3752
3758
|
for (const bufferedAudio of this.audioBuffer) {
|
|
3753
3759
|
this._wsSend({
|
|
@@ -3766,8 +3772,8 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3766
3772
|
else {
|
|
3767
3773
|
// Buffer audio when not sending (to catch audio just before VAD triggers)
|
|
3768
3774
|
this.audioBuffer.push(base64);
|
|
3769
|
-
// Keep buffer size
|
|
3770
|
-
if (this.audioBuffer.length >
|
|
3775
|
+
// Keep buffer size based on configuration
|
|
3776
|
+
if (this.audioBuffer.length > bufferFrames) {
|
|
3771
3777
|
this.audioBuffer.shift(); // Remove oldest chunk
|
|
3772
3778
|
}
|
|
3773
3779
|
}
|
|
@@ -3830,6 +3836,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3830
3836
|
let authorizeSessionRequestBody = {
|
|
3831
3837
|
pipeline_id: this.options.pipelineId,
|
|
3832
3838
|
metadata: this.options.metadata,
|
|
3839
|
+
sdk_version: SDK_VERSION,
|
|
3833
3840
|
};
|
|
3834
3841
|
// If we're reconnecting to a previous session, we need to include the session_id in the request. Otherwise we don't send session_id, and a new session will be created and the session_id will be returned in the response.
|
|
3835
3842
|
if (this.options.sessionId) {
|
|
@@ -3853,6 +3860,8 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3853
3860
|
})}`);
|
|
3854
3861
|
const config = authorizeSessionResponseBody.config;
|
|
3855
3862
|
console.log('config', config);
|
|
3863
|
+
// Store VAD configuration
|
|
3864
|
+
this.vadConfig = config.vad || null;
|
|
3856
3865
|
if (config.transcription.trigger === 'push_to_talk') {
|
|
3857
3866
|
this.pushToTalkEnabled = true;
|
|
3858
3867
|
}
|
|
@@ -3863,7 +3872,6 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3863
3872
|
else {
|
|
3864
3873
|
throw new Error(`Unknown trigger: ${config.transcription.trigger}`);
|
|
3865
3874
|
}
|
|
3866
|
-
this._initializeVAD();
|
|
3867
3875
|
// Bind the websocket message callbacks
|
|
3868
3876
|
this.ws.onmessage = this._handleWebSocketMessage;
|
|
3869
3877
|
this.ws.onopen = () => {
|
|
@@ -3883,18 +3891,13 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3883
3891
|
this._setStatus('error');
|
|
3884
3892
|
this.options.onError(new Error('WebSocket connection error'));
|
|
3885
3893
|
};
|
|
3886
|
-
// Initialize microphone audio capture
|
|
3887
|
-
await this.wavRecorder.begin();
|
|
3888
|
-
await this.wavRecorder.record(this._handleDataAvailable, 1638);
|
|
3889
|
-
// Set up microphone amplitude monitoring
|
|
3890
|
-
this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
|
|
3891
3894
|
// Initialize audio player
|
|
3892
3895
|
await this.wavPlayer.connect();
|
|
3893
3896
|
// Set up audio player amplitude monitoring
|
|
3894
3897
|
this._setupAmplitudeMonitoring(this.wavPlayer, this.options.onAgentAmplitudeChange, (amp) => (this.agentAudioAmplitude = amp));
|
|
3895
|
-
//
|
|
3896
|
-
|
|
3897
|
-
this
|
|
3898
|
+
// wavRecorder will be started from the onDeviceSwitched callback,
|
|
3899
|
+
// which is called when the device is first initialized and also when the device is switched
|
|
3900
|
+
// this is to ensure that the device is initialized before the recorder is started
|
|
3898
3901
|
}
|
|
3899
3902
|
catch (error) {
|
|
3900
3903
|
console.error('Error connecting to Layercode pipeline:', error);
|
|
@@ -3914,6 +3917,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3914
3917
|
this.vad.destroy();
|
|
3915
3918
|
this.vad = null;
|
|
3916
3919
|
}
|
|
3920
|
+
this.wavRecorder.listenForDeviceChange(null);
|
|
3917
3921
|
this.wavRecorder.quit();
|
|
3918
3922
|
this.wavPlayer.disconnect();
|
|
3919
3923
|
// Reset turn tracking
|
|
@@ -3937,19 +3941,101 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3937
3941
|
* @param {string} deviceId - The deviceId of the new microphone
|
|
3938
3942
|
*/
|
|
3939
3943
|
async setInputDevice(deviceId) {
|
|
3940
|
-
|
|
3944
|
+
var _a;
|
|
3945
|
+
try {
|
|
3946
|
+
this.deviceId = deviceId;
|
|
3947
|
+
// Restart recording with the new device
|
|
3948
|
+
await this._restartAudioRecording();
|
|
3949
|
+
// Reinitialize VAD with the new audio stream if VAD is enabled
|
|
3950
|
+
const shouldUseVAD = !this.pushToTalkEnabled && ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) !== false;
|
|
3951
|
+
if (shouldUseVAD) {
|
|
3952
|
+
console.log('Reinitializing VAD with new audio stream');
|
|
3953
|
+
const newStream = this.wavRecorder.getStream();
|
|
3954
|
+
await this._reinitializeVAD(newStream);
|
|
3955
|
+
}
|
|
3956
|
+
console.log(`Successfully switched to input device: ${deviceId}`);
|
|
3957
|
+
}
|
|
3958
|
+
catch (error) {
|
|
3959
|
+
console.error(`Failed to switch to input device ${deviceId}:`, error);
|
|
3960
|
+
throw new Error(`Failed to switch to input device: ${error instanceof Error ? error.message : String(error)}`);
|
|
3961
|
+
}
|
|
3962
|
+
}
|
|
3963
|
+
/**
|
|
3964
|
+
* Restarts audio recording after a device switch to ensure audio is captured from the new device
|
|
3965
|
+
*/
|
|
3966
|
+
async _restartAudioRecording() {
|
|
3967
|
+
try {
|
|
3968
|
+
console.log('Restarting audio recording after device switch...');
|
|
3941
3969
|
try {
|
|
3942
3970
|
await this.wavRecorder.end();
|
|
3943
3971
|
}
|
|
3944
|
-
catch (
|
|
3945
|
-
|
|
3946
|
-
await this.wavRecorder.quit();
|
|
3972
|
+
catch (_a) {
|
|
3973
|
+
// Ignore cleanup errors
|
|
3947
3974
|
}
|
|
3948
|
-
|
|
3975
|
+
// Start with new device
|
|
3976
|
+
await this.wavRecorder.begin(this.deviceId || undefined);
|
|
3977
|
+
await this.wavRecorder.record(this._handleDataAvailable, 1638);
|
|
3978
|
+
// Re-setup amplitude monitoring with the new stream
|
|
3979
|
+
this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
|
|
3980
|
+
console.log('Audio recording restart completed successfully');
|
|
3981
|
+
}
|
|
3982
|
+
catch (error) {
|
|
3983
|
+
console.error('Error restarting audio recording after device switch:', error);
|
|
3984
|
+
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
3985
|
+
}
|
|
3986
|
+
}
|
|
3987
|
+
/**
|
|
3988
|
+
* Reinitializes VAD with a new stream (used after device switching)
|
|
3989
|
+
*/
|
|
3990
|
+
async _reinitializeVAD(stream) {
|
|
3991
|
+
// Clean up existing VAD
|
|
3992
|
+
if (this.vad) {
|
|
3993
|
+
this.vad.pause();
|
|
3994
|
+
this.vad.destroy();
|
|
3995
|
+
this.vad = null;
|
|
3949
3996
|
}
|
|
3950
|
-
|
|
3951
|
-
|
|
3952
|
-
|
|
3997
|
+
// Reinitialize with new stream
|
|
3998
|
+
if (stream) {
|
|
3999
|
+
this._initializeVAD();
|
|
4000
|
+
}
|
|
4001
|
+
}
|
|
4002
|
+
/**
|
|
4003
|
+
* Sets up the device change event listener
|
|
4004
|
+
*/
|
|
4005
|
+
_setupDeviceChangeListener() {
|
|
4006
|
+
this.wavRecorder.listenForDeviceChange(async (devices) => {
|
|
4007
|
+
try {
|
|
4008
|
+
const currentDeviceExists = devices.some((device) => device.deviceId === this.deviceId);
|
|
4009
|
+
if (!currentDeviceExists) {
|
|
4010
|
+
console.log('Current device disconnected, switching to next available device');
|
|
4011
|
+
try {
|
|
4012
|
+
const nextDevice = devices.find((d) => d.default);
|
|
4013
|
+
if (nextDevice) {
|
|
4014
|
+
await this.setInputDevice(nextDevice.deviceId);
|
|
4015
|
+
// Mark recorder as started and attempt to notify server
|
|
4016
|
+
if (!this.recorderStarted) {
|
|
4017
|
+
this.recorderStarted = true;
|
|
4018
|
+
this._sendReadyIfNeeded();
|
|
4019
|
+
}
|
|
4020
|
+
// Notify about device switch
|
|
4021
|
+
if (this.options.onDeviceSwitched) {
|
|
4022
|
+
this.options.onDeviceSwitched(nextDevice.deviceId);
|
|
4023
|
+
}
|
|
4024
|
+
}
|
|
4025
|
+
else {
|
|
4026
|
+
console.warn('No alternative audio device found');
|
|
4027
|
+
}
|
|
4028
|
+
}
|
|
4029
|
+
catch (error) {
|
|
4030
|
+
console.error('Error switching to next device:', error);
|
|
4031
|
+
throw error;
|
|
4032
|
+
}
|
|
4033
|
+
}
|
|
4034
|
+
}
|
|
4035
|
+
catch (error) {
|
|
4036
|
+
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
4037
|
+
}
|
|
4038
|
+
});
|
|
3953
4039
|
}
|
|
3954
4040
|
}
|
|
3955
4041
|
|