@layercode/js-sdk 2.8.0 → 2.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -5951,6 +5951,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
5951
5951
|
this._emitAudioInput();
|
|
5952
5952
|
this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
|
|
5953
5953
|
this._websocketUrl = DEFAULT_WS_URL;
|
|
5954
|
+
this.audioOutputReady = null;
|
|
5954
5955
|
this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
|
|
5955
5956
|
this.wavPlayer = new WavStreamPlayer({
|
|
5956
5957
|
finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
|
|
@@ -5967,6 +5968,7 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
5967
5968
|
this.canInterrupt = false;
|
|
5968
5969
|
this.userIsSpeaking = false;
|
|
5969
5970
|
this.agentIsSpeaking = false;
|
|
5971
|
+
this.agentIsPlayingAudio = false;
|
|
5970
5972
|
this.recorderStarted = false;
|
|
5971
5973
|
this.readySent = false;
|
|
5972
5974
|
this.currentTurnId = null;
|
|
@@ -6112,12 +6114,21 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6112
6114
|
this.status = status;
|
|
6113
6115
|
this.options.onStatusChange(status);
|
|
6114
6116
|
}
|
|
6117
|
+
async _waitForAudioOutputReady() {
|
|
6118
|
+
if (!this.audioOutputReady) {
|
|
6119
|
+
return;
|
|
6120
|
+
}
|
|
6121
|
+
await this.audioOutputReady;
|
|
6122
|
+
}
|
|
6115
6123
|
_setAgentSpeaking(isSpeaking) {
|
|
6116
|
-
|
|
6124
|
+
// Track the actual audio playback state regardless of audioOutput setting
|
|
6125
|
+
this.agentIsPlayingAudio = isSpeaking;
|
|
6126
|
+
const shouldReportSpeaking = this.audioOutput && isSpeaking;
|
|
6127
|
+
if (this.agentIsSpeaking === shouldReportSpeaking) {
|
|
6117
6128
|
return;
|
|
6118
6129
|
}
|
|
6119
|
-
this.agentIsSpeaking =
|
|
6120
|
-
this.options.onAgentSpeakingChange(
|
|
6130
|
+
this.agentIsSpeaking = shouldReportSpeaking;
|
|
6131
|
+
this.options.onAgentSpeakingChange(shouldReportSpeaking);
|
|
6121
6132
|
}
|
|
6122
6133
|
_setUserSpeaking(isSpeaking) {
|
|
6123
6134
|
const shouldReportSpeaking = this._shouldCaptureUserAudio() && isSpeaking;
|
|
@@ -6185,7 +6196,6 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6185
6196
|
if (message.role === 'assistant') {
|
|
6186
6197
|
// Start tracking new agent turn
|
|
6187
6198
|
console.debug('Agent turn started, will track new turn ID from audio/text');
|
|
6188
|
-
this._setAgentSpeaking(true);
|
|
6189
6199
|
this._setUserSpeaking(false);
|
|
6190
6200
|
}
|
|
6191
6201
|
else if (message.role === 'user' && !this.pushToTalkEnabled) {
|
|
@@ -6206,10 +6216,24 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6206
6216
|
});
|
|
6207
6217
|
break;
|
|
6208
6218
|
}
|
|
6209
|
-
case 'response.audio':
|
|
6210
|
-
this.
|
|
6219
|
+
case 'response.audio': {
|
|
6220
|
+
await this._waitForAudioOutputReady();
|
|
6211
6221
|
const audioBuffer = base64ToArrayBuffer(message.content);
|
|
6212
|
-
|
|
6222
|
+
const hasAudioSamples = audioBuffer.byteLength > 0;
|
|
6223
|
+
let audioEnqueued = false;
|
|
6224
|
+
if (hasAudioSamples) {
|
|
6225
|
+
try {
|
|
6226
|
+
const playbackBuffer = this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
|
|
6227
|
+
audioEnqueued = Boolean(playbackBuffer && playbackBuffer.length > 0);
|
|
6228
|
+
}
|
|
6229
|
+
catch (error) {
|
|
6230
|
+
this._setAgentSpeaking(false);
|
|
6231
|
+
throw error;
|
|
6232
|
+
}
|
|
6233
|
+
}
|
|
6234
|
+
else {
|
|
6235
|
+
console.debug(`Skipping empty audio response for turn ${message.turn_id}`);
|
|
6236
|
+
}
|
|
6213
6237
|
// TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
|
|
6214
6238
|
// Set current turn ID from first audio message, or update if different turn
|
|
6215
6239
|
if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
|
|
@@ -6218,7 +6242,11 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6218
6242
|
// Clean up interrupted tracks, keeping only the current turn
|
|
6219
6243
|
this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
|
|
6220
6244
|
}
|
|
6245
|
+
if (audioEnqueued) {
|
|
6246
|
+
this._setAgentSpeaking(true);
|
|
6247
|
+
}
|
|
6221
6248
|
break;
|
|
6249
|
+
}
|
|
6222
6250
|
case 'response.text':
|
|
6223
6251
|
// Set turn ID from first text message if not set
|
|
6224
6252
|
if (!this.currentTurnId) {
|
|
@@ -6430,12 +6458,26 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6430
6458
|
this._emitAudioOutput();
|
|
6431
6459
|
if (state) {
|
|
6432
6460
|
this.wavPlayer.unmute();
|
|
6461
|
+
// Sync agentSpeaking state with actual playback state when enabling audio output
|
|
6462
|
+
this._syncAgentSpeakingState();
|
|
6433
6463
|
}
|
|
6434
6464
|
else {
|
|
6435
6465
|
this.wavPlayer.mute();
|
|
6466
|
+
this._setAgentSpeaking(false);
|
|
6436
6467
|
}
|
|
6437
6468
|
}
|
|
6438
6469
|
}
|
|
6470
|
+
/**
|
|
6471
|
+
* Syncs the reported agentSpeaking state with the actual audio playback state.
|
|
6472
|
+
* Called when audioOutput is enabled to ensure proper state synchronization.
|
|
6473
|
+
*/
|
|
6474
|
+
_syncAgentSpeakingState() {
|
|
6475
|
+
const shouldReportSpeaking = this.audioOutput && this.agentIsPlayingAudio;
|
|
6476
|
+
if (this.agentIsSpeaking !== shouldReportSpeaking) {
|
|
6477
|
+
this.agentIsSpeaking = shouldReportSpeaking;
|
|
6478
|
+
this.options.onAgentSpeakingChange(shouldReportSpeaking);
|
|
6479
|
+
}
|
|
6480
|
+
}
|
|
6439
6481
|
/** Emitters for audio flags */
|
|
6440
6482
|
_emitAudioInput() {
|
|
6441
6483
|
this.options.audioInputChanged(this.audioInput);
|
|
@@ -6491,7 +6533,9 @@ registerProcessor('audio_processor', AudioProcessor);
|
|
|
6491
6533
|
this.setupVadConfig(config);
|
|
6492
6534
|
// Bind the websocket message callbacks
|
|
6493
6535
|
this.bindWebsocketMessageCallbacks(ws, config);
|
|
6494
|
-
|
|
6536
|
+
const audioOutputReady = this.setupAudioOutput();
|
|
6537
|
+
this.audioOutputReady = audioOutputReady;
|
|
6538
|
+
await audioOutputReady;
|
|
6495
6539
|
}
|
|
6496
6540
|
catch (error) {
|
|
6497
6541
|
console.error('Error connecting to Layercode agent:', error);
|