@layercode/js-sdk 2.0.1 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -3484,7 +3484,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3484
3484
|
|
|
3485
3485
|
/* eslint-env browser */
|
|
3486
3486
|
// SDK version - updated when publishing
|
|
3487
|
-
const SDK_VERSION = '2.0.
|
|
3487
|
+
const SDK_VERSION = '2.0.2';
|
|
3488
3488
|
/**
|
|
3489
3489
|
* @class LayercodeClient
|
|
3490
3490
|
* @classdesc Core client for Layercode audio agent that manages audio recording, WebSocket communication, and speech processing.
|
|
@@ -3507,13 +3507,15 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3507
3507
|
onError: options.onError || (() => { }),
|
|
3508
3508
|
onDeviceSwitched: options.onDeviceSwitched || (() => { }),
|
|
3509
3509
|
onDataMessage: options.onDataMessage || (() => { }),
|
|
3510
|
+
onMessage: options.onMessage || (() => { }),
|
|
3510
3511
|
onUserAmplitudeChange: options.onUserAmplitudeChange || (() => { }),
|
|
3511
3512
|
onAgentAmplitudeChange: options.onAgentAmplitudeChange || (() => { }),
|
|
3512
3513
|
onStatusChange: options.onStatusChange || (() => { }),
|
|
3513
3514
|
onUserIsSpeakingChange: options.onUserIsSpeakingChange || (() => { }),
|
|
3515
|
+
onMuteStateChange: options.onMuteStateChange || (() => { }),
|
|
3514
3516
|
};
|
|
3515
|
-
this.AMPLITUDE_MONITORING_SAMPLE_RATE =
|
|
3516
|
-
this._websocketUrl = 'wss://api.layercode.com/v1/agents/websocket';
|
|
3517
|
+
this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
|
|
3518
|
+
this._websocketUrl = 'wss://api.layercode.com/v1/agents/web/websocket';
|
|
3517
3519
|
this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
|
|
3518
3520
|
this.wavPlayer = new WavStreamPlayer({
|
|
3519
3521
|
finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
|
|
@@ -3534,6 +3536,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3534
3536
|
this.currentTurnId = null;
|
|
3535
3537
|
this.audioBuffer = [];
|
|
3536
3538
|
this.vadConfig = null;
|
|
3539
|
+
this.isMuted = false;
|
|
3537
3540
|
// this.audioPauseTime = null;
|
|
3538
3541
|
// Bind event handlers
|
|
3539
3542
|
this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
|
|
@@ -3556,16 +3559,20 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3556
3559
|
const vadOptions = {
|
|
3557
3560
|
stream: this.wavRecorder.getStream() || undefined,
|
|
3558
3561
|
onSpeechStart: () => {
|
|
3559
|
-
console.
|
|
3562
|
+
console.debug('onSpeechStart: sending vad_start');
|
|
3560
3563
|
this.userIsSpeaking = true;
|
|
3561
3564
|
this.options.onUserIsSpeakingChange(true);
|
|
3562
3565
|
this._wsSend({
|
|
3563
3566
|
type: 'vad_events',
|
|
3564
3567
|
event: 'vad_start',
|
|
3565
3568
|
});
|
|
3569
|
+
this.options.onMessage({
|
|
3570
|
+
type: 'vad_events',
|
|
3571
|
+
event: 'vad_start',
|
|
3572
|
+
});
|
|
3566
3573
|
},
|
|
3567
3574
|
onSpeechEnd: () => {
|
|
3568
|
-
console.
|
|
3575
|
+
console.debug('onSpeechEnd: sending vad_end');
|
|
3569
3576
|
this.userIsSpeaking = false;
|
|
3570
3577
|
this.options.onUserIsSpeakingChange(false);
|
|
3571
3578
|
this.audioBuffer = []; // Clear buffer on speech end
|
|
@@ -3573,6 +3580,10 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3573
3580
|
type: 'vad_events',
|
|
3574
3581
|
event: 'vad_end',
|
|
3575
3582
|
});
|
|
3583
|
+
this.options.onMessage({
|
|
3584
|
+
type: 'vad_events',
|
|
3585
|
+
event: 'vad_end',
|
|
3586
|
+
});
|
|
3576
3587
|
},
|
|
3577
3588
|
};
|
|
3578
3589
|
// Apply VAD configuration from backend if available
|
|
@@ -3631,32 +3642,14 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3631
3642
|
* Handles when agent audio finishes playing
|
|
3632
3643
|
*/
|
|
3633
3644
|
_clientResponseAudioReplayFinished() {
|
|
3634
|
-
console.
|
|
3645
|
+
console.debug('clientResponseAudioReplayFinished');
|
|
3635
3646
|
this._wsSend({
|
|
3636
3647
|
type: 'trigger.response.audio.replay_finished',
|
|
3637
3648
|
reason: 'completed',
|
|
3638
3649
|
});
|
|
3639
3650
|
}
|
|
3640
3651
|
async _clientInterruptAssistantReplay() {
|
|
3641
|
-
|
|
3642
|
-
if (offsetData && this.currentTurnId) {
|
|
3643
|
-
let offsetMs = offsetData.currentTime * 1000;
|
|
3644
|
-
// Send interruption event with accurate playback offset in milliseconds
|
|
3645
|
-
this._wsSend({
|
|
3646
|
-
type: 'trigger.response.audio.interrupted',
|
|
3647
|
-
playback_offset: offsetMs,
|
|
3648
|
-
interruption_context: {
|
|
3649
|
-
turn_id: this.currentTurnId,
|
|
3650
|
-
playback_offset_ms: offsetMs,
|
|
3651
|
-
},
|
|
3652
|
-
});
|
|
3653
|
-
}
|
|
3654
|
-
else {
|
|
3655
|
-
console.warn('Interruption requested but missing required data:', {
|
|
3656
|
-
hasOffsetData: !!offsetData,
|
|
3657
|
-
hasTurnId: !!this.currentTurnId,
|
|
3658
|
-
});
|
|
3659
|
-
}
|
|
3652
|
+
await this.wavPlayer.interrupt();
|
|
3660
3653
|
}
|
|
3661
3654
|
async triggerUserTurnStarted() {
|
|
3662
3655
|
if (!this.pushToTalkActive) {
|
|
@@ -3679,49 +3672,51 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3679
3672
|
try {
|
|
3680
3673
|
const message = JSON.parse(event.data);
|
|
3681
3674
|
if (message.type !== 'response.audio') {
|
|
3682
|
-
console.
|
|
3675
|
+
console.debug('msg:', message);
|
|
3683
3676
|
}
|
|
3684
3677
|
switch (message.type) {
|
|
3685
3678
|
case 'turn.start':
|
|
3686
3679
|
// Sent from the server to this client when a new user turn is detected
|
|
3687
|
-
console.log('received turn.start from server');
|
|
3688
|
-
console.log(message);
|
|
3689
3680
|
if (message.role === 'assistant') {
|
|
3690
3681
|
// Start tracking new assistant turn
|
|
3691
|
-
console.
|
|
3682
|
+
console.debug('Assistant turn started, will track new turn ID from audio/text');
|
|
3692
3683
|
}
|
|
3693
3684
|
else if (message.role === 'user' && !this.pushToTalkEnabled) {
|
|
3694
3685
|
// Interrupt any playing assistant audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
|
|
3695
|
-
console.
|
|
3686
|
+
console.debug('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
|
|
3696
3687
|
await this._clientInterruptAssistantReplay();
|
|
3697
3688
|
}
|
|
3689
|
+
this.options.onMessage(message);
|
|
3698
3690
|
break;
|
|
3699
3691
|
case 'response.audio':
|
|
3700
3692
|
const audioBuffer = base64ToArrayBuffer(message.content);
|
|
3701
3693
|
this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
|
|
3694
|
+
// TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
|
|
3702
3695
|
// Set current turn ID from first audio message, or update if different turn
|
|
3703
3696
|
if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
|
|
3704
|
-
console.
|
|
3697
|
+
console.debug(`Setting current turn ID to: ${message.turn_id} (was: ${this.currentTurnId})`);
|
|
3705
3698
|
this.currentTurnId = message.turn_id;
|
|
3706
3699
|
// Clean up interrupted tracks, keeping only the current turn
|
|
3707
3700
|
this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
|
|
3708
3701
|
}
|
|
3709
3702
|
break;
|
|
3710
|
-
case 'response.text':
|
|
3703
|
+
case 'response.text':
|
|
3711
3704
|
// Set turn ID from first text message if not set
|
|
3712
3705
|
if (!this.currentTurnId) {
|
|
3713
3706
|
this.currentTurnId = message.turn_id;
|
|
3714
|
-
console.
|
|
3707
|
+
console.debug(`Setting current turn ID to: ${message.turn_id} from text message`);
|
|
3715
3708
|
}
|
|
3709
|
+
this.options.onMessage(message);
|
|
3716
3710
|
break;
|
|
3717
|
-
}
|
|
3718
3711
|
case 'response.data':
|
|
3719
|
-
console.log('received response.data', message);
|
|
3720
3712
|
this.options.onDataMessage(message);
|
|
3721
3713
|
break;
|
|
3714
|
+
case 'user.transcript':
|
|
3715
|
+
case 'user.transcript.delta':
|
|
3716
|
+
this.options.onMessage(message);
|
|
3717
|
+
break;
|
|
3722
3718
|
default:
|
|
3723
3719
|
console.warn('Unknown message type received:', message);
|
|
3724
|
-
break;
|
|
3725
3720
|
}
|
|
3726
3721
|
}
|
|
3727
3722
|
catch (error) {
|
|
@@ -3737,6 +3732,10 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3737
3732
|
var _a, _b, _c;
|
|
3738
3733
|
try {
|
|
3739
3734
|
const base64 = arrayBufferToBase64(data.mono);
|
|
3735
|
+
// Don't send audio if muted
|
|
3736
|
+
if (this.isMuted) {
|
|
3737
|
+
return;
|
|
3738
|
+
}
|
|
3740
3739
|
// Determine if we should gate audio based on VAD configuration
|
|
3741
3740
|
const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
|
|
3742
3741
|
const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
|
|
@@ -3754,7 +3753,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3754
3753
|
if (sendAudio) {
|
|
3755
3754
|
// If we have buffered audio and we're gating, send it first
|
|
3756
3755
|
if (shouldGateAudio && this.audioBuffer.length > 0) {
|
|
3757
|
-
console.
|
|
3756
|
+
console.debug(`Sending ${this.audioBuffer.length} buffered audio chunks`);
|
|
3758
3757
|
for (const bufferedAudio of this.audioBuffer) {
|
|
3759
3758
|
this._wsSend({
|
|
3760
3759
|
type: 'client.audio',
|
|
@@ -3786,7 +3785,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3786
3785
|
_wsSend(message) {
|
|
3787
3786
|
var _a;
|
|
3788
3787
|
if (message.type !== 'client.audio') {
|
|
3789
|
-
console.
|
|
3788
|
+
console.debug('sent_msg:', message);
|
|
3790
3789
|
}
|
|
3791
3790
|
const messageString = JSON.stringify(message);
|
|
3792
3791
|
if (((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN) {
|
|
@@ -3859,7 +3858,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3859
3858
|
client_session_key: authorizeSessionResponseBody.client_session_key,
|
|
3860
3859
|
})}`);
|
|
3861
3860
|
const config = authorizeSessionResponseBody.config;
|
|
3862
|
-
console.log('
|
|
3861
|
+
console.log('AgentConfig', config);
|
|
3863
3862
|
// Store VAD configuration
|
|
3864
3863
|
this.vadConfig = config.vad || null;
|
|
3865
3864
|
if (config.transcription.trigger === 'push_to_talk') {
|
|
@@ -3908,7 +3907,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3908
3907
|
}
|
|
3909
3908
|
_resetTurnTracking() {
|
|
3910
3909
|
this.currentTurnId = null;
|
|
3911
|
-
console.
|
|
3910
|
+
console.debug('Reset turn tracking state');
|
|
3912
3911
|
}
|
|
3913
3912
|
async disconnect() {
|
|
3914
3913
|
// Clean up VAD if it exists
|
|
@@ -3949,11 +3948,11 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3949
3948
|
// Reinitialize VAD with the new audio stream if VAD is enabled
|
|
3950
3949
|
const shouldUseVAD = !this.pushToTalkEnabled && ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) !== false;
|
|
3951
3950
|
if (shouldUseVAD) {
|
|
3952
|
-
console.
|
|
3951
|
+
console.debug('Reinitializing VAD with new audio stream');
|
|
3953
3952
|
const newStream = this.wavRecorder.getStream();
|
|
3954
3953
|
await this._reinitializeVAD(newStream);
|
|
3955
3954
|
}
|
|
3956
|
-
console.
|
|
3955
|
+
console.debug(`Successfully switched to input device: ${deviceId}`);
|
|
3957
3956
|
}
|
|
3958
3957
|
catch (error) {
|
|
3959
3958
|
console.error(`Failed to switch to input device ${deviceId}:`, error);
|
|
@@ -3965,7 +3964,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3965
3964
|
*/
|
|
3966
3965
|
async _restartAudioRecording() {
|
|
3967
3966
|
try {
|
|
3968
|
-
console.
|
|
3967
|
+
console.debug('Restarting audio recording after device switch...');
|
|
3969
3968
|
try {
|
|
3970
3969
|
await this.wavRecorder.end();
|
|
3971
3970
|
}
|
|
@@ -3977,7 +3976,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
3977
3976
|
await this.wavRecorder.record(this._handleDataAvailable, 1638);
|
|
3978
3977
|
// Re-setup amplitude monitoring with the new stream
|
|
3979
3978
|
this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
|
|
3980
|
-
console.
|
|
3979
|
+
console.debug('Audio recording restart completed successfully');
|
|
3981
3980
|
}
|
|
3982
3981
|
catch (error) {
|
|
3983
3982
|
console.error('Error restarting audio recording after device switch:', error);
|
|
@@ -4007,7 +4006,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
4007
4006
|
try {
|
|
4008
4007
|
const currentDeviceExists = devices.some((device) => device.deviceId === this.deviceId);
|
|
4009
4008
|
if (!currentDeviceExists) {
|
|
4010
|
-
console.
|
|
4009
|
+
console.debug('Current device disconnected, switching to next available device');
|
|
4011
4010
|
try {
|
|
4012
4011
|
const nextDevice = devices.find((d) => d.default);
|
|
4013
4012
|
if (nextDevice) {
|
|
@@ -4037,6 +4036,27 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
4037
4036
|
}
|
|
4038
4037
|
});
|
|
4039
4038
|
}
|
|
4039
|
+
/**
|
|
4040
|
+
* Mutes the microphone to stop sending audio to the server
|
|
4041
|
+
* The connection and recording remain active for quick unmute
|
|
4042
|
+
*/
|
|
4043
|
+
mute() {
|
|
4044
|
+
if (!this.isMuted) {
|
|
4045
|
+
this.isMuted = true;
|
|
4046
|
+
console.log('Microphone muted');
|
|
4047
|
+
this.options.onMuteStateChange(true);
|
|
4048
|
+
}
|
|
4049
|
+
}
|
|
4050
|
+
/**
|
|
4051
|
+
* Unmutes the microphone to resume sending audio to the server
|
|
4052
|
+
*/
|
|
4053
|
+
unmute() {
|
|
4054
|
+
if (this.isMuted) {
|
|
4055
|
+
this.isMuted = false;
|
|
4056
|
+
console.log('Microphone unmuted');
|
|
4057
|
+
this.options.onMuteStateChange(false);
|
|
4058
|
+
}
|
|
4059
|
+
}
|
|
4040
4060
|
}
|
|
4041
4061
|
|
|
4042
4062
|
return LayercodeClient;
|