@layercode/js-sdk 2.8.0 → 2.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -5945,6 +5945,7 @@ class LayercodeClient {
|
|
|
5945
5945
|
this._emitAudioInput();
|
|
5946
5946
|
this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
|
|
5947
5947
|
this._websocketUrl = DEFAULT_WS_URL;
|
|
5948
|
+
this.audioOutputReady = null;
|
|
5948
5949
|
this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
|
|
5949
5950
|
this.wavPlayer = new WavStreamPlayer({
|
|
5950
5951
|
finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
|
|
@@ -5961,6 +5962,7 @@ class LayercodeClient {
|
|
|
5961
5962
|
this.canInterrupt = false;
|
|
5962
5963
|
this.userIsSpeaking = false;
|
|
5963
5964
|
this.agentIsSpeaking = false;
|
|
5965
|
+
this.agentIsPlayingAudio = false;
|
|
5964
5966
|
this.recorderStarted = false;
|
|
5965
5967
|
this.readySent = false;
|
|
5966
5968
|
this.currentTurnId = null;
|
|
@@ -6106,12 +6108,21 @@ class LayercodeClient {
|
|
|
6106
6108
|
this.status = status;
|
|
6107
6109
|
this.options.onStatusChange(status);
|
|
6108
6110
|
}
|
|
6111
|
+
async _waitForAudioOutputReady() {
|
|
6112
|
+
if (!this.audioOutputReady) {
|
|
6113
|
+
return;
|
|
6114
|
+
}
|
|
6115
|
+
await this.audioOutputReady;
|
|
6116
|
+
}
|
|
6109
6117
|
_setAgentSpeaking(isSpeaking) {
|
|
6110
|
-
|
|
6118
|
+
// Track the actual audio playback state regardless of audioOutput setting
|
|
6119
|
+
this.agentIsPlayingAudio = isSpeaking;
|
|
6120
|
+
const shouldReportSpeaking = this.audioOutput && isSpeaking;
|
|
6121
|
+
if (this.agentIsSpeaking === shouldReportSpeaking) {
|
|
6111
6122
|
return;
|
|
6112
6123
|
}
|
|
6113
|
-
this.agentIsSpeaking =
|
|
6114
|
-
this.options.onAgentSpeakingChange(
|
|
6124
|
+
this.agentIsSpeaking = shouldReportSpeaking;
|
|
6125
|
+
this.options.onAgentSpeakingChange(shouldReportSpeaking);
|
|
6115
6126
|
}
|
|
6116
6127
|
_setUserSpeaking(isSpeaking) {
|
|
6117
6128
|
const shouldReportSpeaking = this._shouldCaptureUserAudio() && isSpeaking;
|
|
@@ -6179,7 +6190,6 @@ class LayercodeClient {
|
|
|
6179
6190
|
if (message.role === 'assistant') {
|
|
6180
6191
|
// Start tracking new agent turn
|
|
6181
6192
|
console.debug('Agent turn started, will track new turn ID from audio/text');
|
|
6182
|
-
this._setAgentSpeaking(true);
|
|
6183
6193
|
this._setUserSpeaking(false);
|
|
6184
6194
|
}
|
|
6185
6195
|
else if (message.role === 'user' && !this.pushToTalkEnabled) {
|
|
@@ -6200,10 +6210,24 @@ class LayercodeClient {
|
|
|
6200
6210
|
});
|
|
6201
6211
|
break;
|
|
6202
6212
|
}
|
|
6203
|
-
case 'response.audio':
|
|
6204
|
-
this.
|
|
6213
|
+
case 'response.audio': {
|
|
6214
|
+
await this._waitForAudioOutputReady();
|
|
6205
6215
|
const audioBuffer = base64ToArrayBuffer(message.content);
|
|
6206
|
-
|
|
6216
|
+
const hasAudioSamples = audioBuffer.byteLength > 0;
|
|
6217
|
+
let audioEnqueued = false;
|
|
6218
|
+
if (hasAudioSamples) {
|
|
6219
|
+
try {
|
|
6220
|
+
const playbackBuffer = this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
|
|
6221
|
+
audioEnqueued = Boolean(playbackBuffer && playbackBuffer.length > 0);
|
|
6222
|
+
}
|
|
6223
|
+
catch (error) {
|
|
6224
|
+
this._setAgentSpeaking(false);
|
|
6225
|
+
throw error;
|
|
6226
|
+
}
|
|
6227
|
+
}
|
|
6228
|
+
else {
|
|
6229
|
+
console.debug(`Skipping empty audio response for turn ${message.turn_id}`);
|
|
6230
|
+
}
|
|
6207
6231
|
// TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
|
|
6208
6232
|
// Set current turn ID from first audio message, or update if different turn
|
|
6209
6233
|
if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
|
|
@@ -6212,7 +6236,11 @@ class LayercodeClient {
|
|
|
6212
6236
|
// Clean up interrupted tracks, keeping only the current turn
|
|
6213
6237
|
this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
|
|
6214
6238
|
}
|
|
6239
|
+
if (audioEnqueued) {
|
|
6240
|
+
this._setAgentSpeaking(true);
|
|
6241
|
+
}
|
|
6215
6242
|
break;
|
|
6243
|
+
}
|
|
6216
6244
|
case 'response.text':
|
|
6217
6245
|
// Set turn ID from first text message if not set
|
|
6218
6246
|
if (!this.currentTurnId) {
|
|
@@ -6424,12 +6452,26 @@ class LayercodeClient {
|
|
|
6424
6452
|
this._emitAudioOutput();
|
|
6425
6453
|
if (state) {
|
|
6426
6454
|
this.wavPlayer.unmute();
|
|
6455
|
+
// Sync agentSpeaking state with actual playback state when enabling audio output
|
|
6456
|
+
this._syncAgentSpeakingState();
|
|
6427
6457
|
}
|
|
6428
6458
|
else {
|
|
6429
6459
|
this.wavPlayer.mute();
|
|
6460
|
+
this._setAgentSpeaking(false);
|
|
6430
6461
|
}
|
|
6431
6462
|
}
|
|
6432
6463
|
}
|
|
6464
|
+
/**
|
|
6465
|
+
* Syncs the reported agentSpeaking state with the actual audio playback state.
|
|
6466
|
+
* Called when audioOutput is enabled to ensure proper state synchronization.
|
|
6467
|
+
*/
|
|
6468
|
+
_syncAgentSpeakingState() {
|
|
6469
|
+
const shouldReportSpeaking = this.audioOutput && this.agentIsPlayingAudio;
|
|
6470
|
+
if (this.agentIsSpeaking !== shouldReportSpeaking) {
|
|
6471
|
+
this.agentIsSpeaking = shouldReportSpeaking;
|
|
6472
|
+
this.options.onAgentSpeakingChange(shouldReportSpeaking);
|
|
6473
|
+
}
|
|
6474
|
+
}
|
|
6433
6475
|
/** Emitters for audio flags */
|
|
6434
6476
|
_emitAudioInput() {
|
|
6435
6477
|
this.options.audioInputChanged(this.audioInput);
|
|
@@ -6485,7 +6527,9 @@ class LayercodeClient {
|
|
|
6485
6527
|
this.setupVadConfig(config);
|
|
6486
6528
|
// Bind the websocket message callbacks
|
|
6487
6529
|
this.bindWebsocketMessageCallbacks(ws, config);
|
|
6488
|
-
|
|
6530
|
+
const audioOutputReady = this.setupAudioOutput();
|
|
6531
|
+
this.audioOutputReady = audioOutputReady;
|
|
6532
|
+
await audioOutputReady;
|
|
6489
6533
|
}
|
|
6490
6534
|
catch (error) {
|
|
6491
6535
|
console.error('Error connecting to Layercode agent:', error);
|