@layercode/js-sdk 1.0.26 → 1.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -3477,6 +3477,8 @@ function arrayBufferToBase64(arrayBuffer) {
|
|
|
3477
3477
|
}
|
|
3478
3478
|
|
|
3479
3479
|
/* eslint-env browser */
|
|
3480
|
+
// SDK version - updated when publishing
|
|
3481
|
+
const SDK_VERSION = '1.0.27';
|
|
3480
3482
|
/**
|
|
3481
3483
|
* @class LayercodeClient
|
|
3482
3484
|
* @classdesc Core client for Layercode audio pipeline that manages audio recording, WebSocket communication, and speech processing.
|
|
@@ -3487,6 +3489,7 @@ class LayercodeClient {
|
|
|
3487
3489
|
* @param {Object} options - Configuration options
|
|
3488
3490
|
*/
|
|
3489
3491
|
constructor(options) {
|
|
3492
|
+
this.deviceId = null;
|
|
3490
3493
|
this.options = {
|
|
3491
3494
|
pipelineId: options.pipelineId,
|
|
3492
3495
|
sessionId: options.sessionId || null,
|
|
@@ -3496,6 +3499,7 @@ class LayercodeClient {
|
|
|
3496
3499
|
onConnect: options.onConnect || (() => { }),
|
|
3497
3500
|
onDisconnect: options.onDisconnect || (() => { }),
|
|
3498
3501
|
onError: options.onError || (() => { }),
|
|
3502
|
+
onDeviceSwitched: options.onDeviceSwitched || (() => { }),
|
|
3499
3503
|
onDataMessage: options.onDataMessage || (() => { }),
|
|
3500
3504
|
onUserAmplitudeChange: options.onUserAmplitudeChange || (() => { }),
|
|
3501
3505
|
onAgentAmplitudeChange: options.onAgentAmplitudeChange || (() => { }),
|
|
@@ -3523,71 +3527,28 @@ class LayercodeClient {
|
|
|
3523
3527
|
this.readySent = false;
|
|
3524
3528
|
this.currentTurnId = null;
|
|
3525
3529
|
this.audioBuffer = [];
|
|
3530
|
+
this.vadConfig = null;
|
|
3526
3531
|
// this.audioPauseTime = null;
|
|
3527
3532
|
// Bind event handlers
|
|
3528
3533
|
this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
|
|
3529
3534
|
this._handleDataAvailable = this._handleDataAvailable.bind(this);
|
|
3530
|
-
|
|
3531
|
-
_setupAmplitudeBasedVAD() {
|
|
3532
|
-
let isSpeakingByAmplitude = false;
|
|
3533
|
-
let silenceFrames = 0;
|
|
3534
|
-
const AMPLITUDE_THRESHOLD = 0.01; // Adjust based on testing
|
|
3535
|
-
const SILENCE_FRAMES_THRESHOLD = 6.4; // 6.4 * 20ms chunks = 128ms silence. Same as Silero ((frame samples: 512 / sampleRate: 16000) * 1000 * redemptionFrames: 4) = 128 ms silence
|
|
3536
|
-
// Monitor amplitude changes
|
|
3537
|
-
this.wavRecorder.startAmplitudeMonitoring((amplitude) => {
|
|
3538
|
-
const wasSpeaking = isSpeakingByAmplitude;
|
|
3539
|
-
if (amplitude > AMPLITUDE_THRESHOLD) {
|
|
3540
|
-
silenceFrames = 0;
|
|
3541
|
-
if (!wasSpeaking) {
|
|
3542
|
-
isSpeakingByAmplitude = true;
|
|
3543
|
-
this.userIsSpeaking = true;
|
|
3544
|
-
this.options.onUserIsSpeakingChange(true);
|
|
3545
|
-
this._wsSend({
|
|
3546
|
-
type: 'vad_events',
|
|
3547
|
-
event: 'vad_start',
|
|
3548
|
-
});
|
|
3549
|
-
}
|
|
3550
|
-
}
|
|
3551
|
-
else {
|
|
3552
|
-
silenceFrames++;
|
|
3553
|
-
if (wasSpeaking && silenceFrames >= SILENCE_FRAMES_THRESHOLD) {
|
|
3554
|
-
isSpeakingByAmplitude = false;
|
|
3555
|
-
this.userIsSpeaking = false;
|
|
3556
|
-
this.options.onUserIsSpeakingChange(false);
|
|
3557
|
-
this._wsSend({
|
|
3558
|
-
type: 'vad_events',
|
|
3559
|
-
event: 'vad_end',
|
|
3560
|
-
});
|
|
3561
|
-
}
|
|
3562
|
-
}
|
|
3563
|
-
});
|
|
3535
|
+
this._setupDeviceChangeListener();
|
|
3564
3536
|
}
|
|
3565
3537
|
_initializeVAD() {
|
|
3566
|
-
|
|
3538
|
+
var _a;
|
|
3539
|
+
console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt, vadConfig: this.vadConfig });
|
|
3567
3540
|
// If we're in push to talk mode, we don't need to use the VAD model
|
|
3568
3541
|
if (this.pushToTalkEnabled) {
|
|
3569
3542
|
return;
|
|
3570
3543
|
}
|
|
3571
|
-
|
|
3572
|
-
|
|
3573
|
-
console.
|
|
3574
|
-
|
|
3575
|
-
|
|
3576
|
-
|
|
3577
|
-
|
|
3578
|
-
});
|
|
3579
|
-
// Set up amplitude-based fallback detection
|
|
3580
|
-
this._setupAmplitudeBasedVAD();
|
|
3581
|
-
}, 2000);
|
|
3582
|
-
dist.MicVAD.new({
|
|
3544
|
+
// Check if VAD is disabled
|
|
3545
|
+
if (((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) === false) {
|
|
3546
|
+
console.log('VAD is disabled by backend configuration');
|
|
3547
|
+
return;
|
|
3548
|
+
}
|
|
3549
|
+
// Build VAD configuration object, only including keys that are defined
|
|
3550
|
+
const vadOptions = {
|
|
3583
3551
|
stream: this.wavRecorder.getStream() || undefined,
|
|
3584
|
-
model: 'v5',
|
|
3585
|
-
positiveSpeechThreshold: 0.15,
|
|
3586
|
-
negativeSpeechThreshold: 0.05,
|
|
3587
|
-
redemptionFrames: 4,
|
|
3588
|
-
minSpeechFrames: 2,
|
|
3589
|
-
preSpeechPadFrames: 0,
|
|
3590
|
-
frameSamples: 512, // Required for v5 as per https://docs.vad.ricky0123.com/user-guide/algorithm/#configuration
|
|
3591
3552
|
onSpeechStart: () => {
|
|
3592
3553
|
console.log('onSpeechStart: sending vad_start');
|
|
3593
3554
|
this.userIsSpeaking = true;
|
|
@@ -3607,18 +3568,49 @@ class LayercodeClient {
|
|
|
3607
3568
|
event: 'vad_end',
|
|
3608
3569
|
});
|
|
3609
3570
|
},
|
|
3610
|
-
|
|
3611
|
-
|
|
3612
|
-
|
|
3613
|
-
|
|
3571
|
+
};
|
|
3572
|
+
// Apply VAD configuration from backend if available
|
|
3573
|
+
if (this.vadConfig) {
|
|
3574
|
+
// Only add keys that are explicitly defined (not undefined)
|
|
3575
|
+
if (this.vadConfig.model !== undefined)
|
|
3576
|
+
vadOptions.model = this.vadConfig.model;
|
|
3577
|
+
if (this.vadConfig.positive_speech_threshold !== undefined)
|
|
3578
|
+
vadOptions.positiveSpeechThreshold = this.vadConfig.positive_speech_threshold;
|
|
3579
|
+
if (this.vadConfig.negative_speech_threshold !== undefined)
|
|
3580
|
+
vadOptions.negativeSpeechThreshold = this.vadConfig.negative_speech_threshold;
|
|
3581
|
+
if (this.vadConfig.redemption_frames !== undefined)
|
|
3582
|
+
vadOptions.redemptionFrames = this.vadConfig.redemption_frames;
|
|
3583
|
+
if (this.vadConfig.min_speech_frames !== undefined)
|
|
3584
|
+
vadOptions.minSpeechFrames = this.vadConfig.min_speech_frames;
|
|
3585
|
+
if (this.vadConfig.pre_speech_pad_frames !== undefined)
|
|
3586
|
+
vadOptions.preSpeechPadFrames = this.vadConfig.pre_speech_pad_frames;
|
|
3587
|
+
if (this.vadConfig.frame_samples !== undefined)
|
|
3588
|
+
vadOptions.frameSamples = this.vadConfig.frame_samples;
|
|
3589
|
+
}
|
|
3590
|
+
else {
|
|
3591
|
+
// Default values if no config from backend
|
|
3592
|
+
vadOptions.model = 'v5';
|
|
3593
|
+
vadOptions.positiveSpeechThreshold = 0.15;
|
|
3594
|
+
vadOptions.negativeSpeechThreshold = 0.05;
|
|
3595
|
+
vadOptions.redemptionFrames = 4;
|
|
3596
|
+
vadOptions.minSpeechFrames = 2;
|
|
3597
|
+
vadOptions.preSpeechPadFrames = 0;
|
|
3598
|
+
vadOptions.frameSamples = 512; // Required for v5
|
|
3599
|
+
}
|
|
3600
|
+
console.log('Creating VAD with options:', vadOptions);
|
|
3601
|
+
dist.MicVAD.new(vadOptions)
|
|
3614
3602
|
.then((vad) => {
|
|
3615
|
-
clearTimeout(vadLoadTimeout);
|
|
3616
3603
|
this.vad = vad;
|
|
3617
3604
|
this.vad.start();
|
|
3618
|
-
console.log('VAD started');
|
|
3605
|
+
console.log('VAD started successfully');
|
|
3619
3606
|
})
|
|
3620
3607
|
.catch((error) => {
|
|
3621
|
-
console.
|
|
3608
|
+
console.warn('Error initializing VAD:', error);
|
|
3609
|
+
// Send a message to server indicating VAD failure
|
|
3610
|
+
this._wsSend({
|
|
3611
|
+
type: 'vad_events',
|
|
3612
|
+
event: 'vad_model_failed',
|
|
3613
|
+
});
|
|
3622
3614
|
});
|
|
3623
3615
|
}
|
|
3624
3616
|
/**
|
|
@@ -3722,7 +3714,7 @@ class LayercodeClient {
|
|
|
3722
3714
|
this.options.onDataMessage(message);
|
|
3723
3715
|
break;
|
|
3724
3716
|
default:
|
|
3725
|
-
console.
|
|
3717
|
+
console.warn('Unknown message type received:', message);
|
|
3726
3718
|
break;
|
|
3727
3719
|
}
|
|
3728
3720
|
}
|
|
@@ -3736,12 +3728,26 @@ class LayercodeClient {
|
|
|
3736
3728
|
* @param {ArrayBuffer} data - The audio data buffer
|
|
3737
3729
|
*/
|
|
3738
3730
|
_handleDataAvailable(data) {
|
|
3731
|
+
var _a, _b, _c;
|
|
3739
3732
|
try {
|
|
3740
3733
|
const base64 = arrayBufferToBase64(data.mono);
|
|
3741
|
-
|
|
3734
|
+
// Determine if we should gate audio based on VAD configuration
|
|
3735
|
+
const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
|
|
3736
|
+
const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
|
|
3737
|
+
let sendAudio;
|
|
3738
|
+
if (this.pushToTalkEnabled) {
|
|
3739
|
+
sendAudio = this.pushToTalkActive;
|
|
3740
|
+
}
|
|
3741
|
+
else if (shouldGateAudio) {
|
|
3742
|
+
sendAudio = this.userIsSpeaking;
|
|
3743
|
+
}
|
|
3744
|
+
else {
|
|
3745
|
+
// If gate_audio is false, always send audio
|
|
3746
|
+
sendAudio = true;
|
|
3747
|
+
}
|
|
3742
3748
|
if (sendAudio) {
|
|
3743
|
-
// If we have buffered audio, send it first
|
|
3744
|
-
if (this.audioBuffer.length > 0) {
|
|
3749
|
+
// If we have buffered audio and we're gating, send it first
|
|
3750
|
+
if (shouldGateAudio && this.audioBuffer.length > 0) {
|
|
3745
3751
|
console.log(`Sending ${this.audioBuffer.length} buffered audio chunks`);
|
|
3746
3752
|
for (const bufferedAudio of this.audioBuffer) {
|
|
3747
3753
|
this._wsSend({
|
|
@@ -3760,8 +3766,8 @@ class LayercodeClient {
|
|
|
3760
3766
|
else {
|
|
3761
3767
|
// Buffer audio when not sending (to catch audio just before VAD triggers)
|
|
3762
3768
|
this.audioBuffer.push(base64);
|
|
3763
|
-
// Keep buffer size
|
|
3764
|
-
if (this.audioBuffer.length >
|
|
3769
|
+
// Keep buffer size based on configuration
|
|
3770
|
+
if (this.audioBuffer.length > bufferFrames) {
|
|
3765
3771
|
this.audioBuffer.shift(); // Remove oldest chunk
|
|
3766
3772
|
}
|
|
3767
3773
|
}
|
|
@@ -3824,6 +3830,7 @@ class LayercodeClient {
|
|
|
3824
3830
|
let authorizeSessionRequestBody = {
|
|
3825
3831
|
pipeline_id: this.options.pipelineId,
|
|
3826
3832
|
metadata: this.options.metadata,
|
|
3833
|
+
sdk_version: SDK_VERSION,
|
|
3827
3834
|
};
|
|
3828
3835
|
// If we're reconnecting to a previous session, we need to include the session_id in the request. Otherwise we don't send session_id, and a new session will be created and the session_id will be returned in the response.
|
|
3829
3836
|
if (this.options.sessionId) {
|
|
@@ -3847,6 +3854,8 @@ class LayercodeClient {
|
|
|
3847
3854
|
})}`);
|
|
3848
3855
|
const config = authorizeSessionResponseBody.config;
|
|
3849
3856
|
console.log('config', config);
|
|
3857
|
+
// Store VAD configuration
|
|
3858
|
+
this.vadConfig = config.vad || null;
|
|
3850
3859
|
if (config.transcription.trigger === 'push_to_talk') {
|
|
3851
3860
|
this.pushToTalkEnabled = true;
|
|
3852
3861
|
}
|
|
@@ -3857,7 +3866,6 @@ class LayercodeClient {
|
|
|
3857
3866
|
else {
|
|
3858
3867
|
throw new Error(`Unknown trigger: ${config.transcription.trigger}`);
|
|
3859
3868
|
}
|
|
3860
|
-
this._initializeVAD();
|
|
3861
3869
|
// Bind the websocket message callbacks
|
|
3862
3870
|
this.ws.onmessage = this._handleWebSocketMessage;
|
|
3863
3871
|
this.ws.onopen = () => {
|
|
@@ -3877,18 +3885,13 @@ class LayercodeClient {
|
|
|
3877
3885
|
this._setStatus('error');
|
|
3878
3886
|
this.options.onError(new Error('WebSocket connection error'));
|
|
3879
3887
|
};
|
|
3880
|
-
// Initialize microphone audio capture
|
|
3881
|
-
await this.wavRecorder.begin();
|
|
3882
|
-
await this.wavRecorder.record(this._handleDataAvailable, 1638);
|
|
3883
|
-
// Set up microphone amplitude monitoring
|
|
3884
|
-
this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
|
|
3885
3888
|
// Initialize audio player
|
|
3886
3889
|
await this.wavPlayer.connect();
|
|
3887
3890
|
// Set up audio player amplitude monitoring
|
|
3888
3891
|
this._setupAmplitudeMonitoring(this.wavPlayer, this.options.onAgentAmplitudeChange, (amp) => (this.agentAudioAmplitude = amp));
|
|
3889
|
-
//
|
|
3890
|
-
|
|
3891
|
-
this
|
|
3892
|
+
// wavRecorder will be started from the onDeviceSwitched callback,
|
|
3893
|
+
// which is called when the device is first initialized and also when the device is switched
|
|
3894
|
+
// this is to ensure that the device is initialized before the recorder is started
|
|
3892
3895
|
}
|
|
3893
3896
|
catch (error) {
|
|
3894
3897
|
console.error('Error connecting to Layercode pipeline:', error);
|
|
@@ -3908,6 +3911,7 @@ class LayercodeClient {
|
|
|
3908
3911
|
this.vad.destroy();
|
|
3909
3912
|
this.vad = null;
|
|
3910
3913
|
}
|
|
3914
|
+
this.wavRecorder.listenForDeviceChange(null);
|
|
3911
3915
|
this.wavRecorder.quit();
|
|
3912
3916
|
this.wavPlayer.disconnect();
|
|
3913
3917
|
// Reset turn tracking
|
|
@@ -3931,19 +3935,101 @@ class LayercodeClient {
|
|
|
3931
3935
|
* @param {string} deviceId - The deviceId of the new microphone
|
|
3932
3936
|
*/
|
|
3933
3937
|
async setInputDevice(deviceId) {
|
|
3934
|
-
|
|
3938
|
+
var _a;
|
|
3939
|
+
try {
|
|
3940
|
+
this.deviceId = deviceId;
|
|
3941
|
+
// Restart recording with the new device
|
|
3942
|
+
await this._restartAudioRecording();
|
|
3943
|
+
// Reinitialize VAD with the new audio stream if VAD is enabled
|
|
3944
|
+
const shouldUseVAD = !this.pushToTalkEnabled && ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) !== false;
|
|
3945
|
+
if (shouldUseVAD) {
|
|
3946
|
+
console.log('Reinitializing VAD with new audio stream');
|
|
3947
|
+
const newStream = this.wavRecorder.getStream();
|
|
3948
|
+
await this._reinitializeVAD(newStream);
|
|
3949
|
+
}
|
|
3950
|
+
console.log(`Successfully switched to input device: ${deviceId}`);
|
|
3951
|
+
}
|
|
3952
|
+
catch (error) {
|
|
3953
|
+
console.error(`Failed to switch to input device ${deviceId}:`, error);
|
|
3954
|
+
throw new Error(`Failed to switch to input device: ${error instanceof Error ? error.message : String(error)}`);
|
|
3955
|
+
}
|
|
3956
|
+
}
|
|
3957
|
+
/**
|
|
3958
|
+
* Restarts audio recording after a device switch to ensure audio is captured from the new device
|
|
3959
|
+
*/
|
|
3960
|
+
async _restartAudioRecording() {
|
|
3961
|
+
try {
|
|
3962
|
+
console.log('Restarting audio recording after device switch...');
|
|
3935
3963
|
try {
|
|
3936
3964
|
await this.wavRecorder.end();
|
|
3937
3965
|
}
|
|
3938
|
-
catch (
|
|
3939
|
-
|
|
3940
|
-
await this.wavRecorder.quit();
|
|
3966
|
+
catch (_a) {
|
|
3967
|
+
// Ignore cleanup errors
|
|
3941
3968
|
}
|
|
3942
|
-
|
|
3969
|
+
// Start with new device
|
|
3970
|
+
await this.wavRecorder.begin(this.deviceId || undefined);
|
|
3971
|
+
await this.wavRecorder.record(this._handleDataAvailable, 1638);
|
|
3972
|
+
// Re-setup amplitude monitoring with the new stream
|
|
3973
|
+
this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
|
|
3974
|
+
console.log('Audio recording restart completed successfully');
|
|
3975
|
+
}
|
|
3976
|
+
catch (error) {
|
|
3977
|
+
console.error('Error restarting audio recording after device switch:', error);
|
|
3978
|
+
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
3979
|
+
}
|
|
3980
|
+
}
|
|
3981
|
+
/**
|
|
3982
|
+
* Reinitializes VAD with a new stream (used after device switching)
|
|
3983
|
+
*/
|
|
3984
|
+
async _reinitializeVAD(stream) {
|
|
3985
|
+
// Clean up existing VAD
|
|
3986
|
+
if (this.vad) {
|
|
3987
|
+
this.vad.pause();
|
|
3988
|
+
this.vad.destroy();
|
|
3989
|
+
this.vad = null;
|
|
3943
3990
|
}
|
|
3944
|
-
|
|
3945
|
-
|
|
3946
|
-
|
|
3991
|
+
// Reinitialize with new stream
|
|
3992
|
+
if (stream) {
|
|
3993
|
+
this._initializeVAD();
|
|
3994
|
+
}
|
|
3995
|
+
}
|
|
3996
|
+
/**
|
|
3997
|
+
* Sets up the device change event listener
|
|
3998
|
+
*/
|
|
3999
|
+
_setupDeviceChangeListener() {
|
|
4000
|
+
this.wavRecorder.listenForDeviceChange(async (devices) => {
|
|
4001
|
+
try {
|
|
4002
|
+
const currentDeviceExists = devices.some((device) => device.deviceId === this.deviceId);
|
|
4003
|
+
if (!currentDeviceExists) {
|
|
4004
|
+
console.log('Current device disconnected, switching to next available device');
|
|
4005
|
+
try {
|
|
4006
|
+
const nextDevice = devices.find((d) => d.default);
|
|
4007
|
+
if (nextDevice) {
|
|
4008
|
+
await this.setInputDevice(nextDevice.deviceId);
|
|
4009
|
+
// Mark recorder as started and attempt to notify server
|
|
4010
|
+
if (!this.recorderStarted) {
|
|
4011
|
+
this.recorderStarted = true;
|
|
4012
|
+
this._sendReadyIfNeeded();
|
|
4013
|
+
}
|
|
4014
|
+
// Notify about device switch
|
|
4015
|
+
if (this.options.onDeviceSwitched) {
|
|
4016
|
+
this.options.onDeviceSwitched(nextDevice.deviceId);
|
|
4017
|
+
}
|
|
4018
|
+
}
|
|
4019
|
+
else {
|
|
4020
|
+
console.warn('No alternative audio device found');
|
|
4021
|
+
}
|
|
4022
|
+
}
|
|
4023
|
+
catch (error) {
|
|
4024
|
+
console.error('Error switching to next device:', error);
|
|
4025
|
+
throw error;
|
|
4026
|
+
}
|
|
4027
|
+
}
|
|
4028
|
+
}
|
|
4029
|
+
catch (error) {
|
|
4030
|
+
this.options.onError(error instanceof Error ? error : new Error(String(error)));
|
|
4031
|
+
}
|
|
4032
|
+
});
|
|
3947
4033
|
}
|
|
3948
4034
|
}
|
|
3949
4035
|
|