@layercode/js-sdk 2.8.1 → 2.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5962,6 +5962,7 @@ class LayercodeClient {
5962
5962
  this.canInterrupt = false;
5963
5963
  this.userIsSpeaking = false;
5964
5964
  this.agentIsSpeaking = false;
5965
+ this.agentIsPlayingAudio = false;
5965
5966
  this.recorderStarted = false;
5966
5967
  this.readySent = false;
5967
5968
  this.currentTurnId = null;
@@ -6114,6 +6115,8 @@ class LayercodeClient {
6114
6115
  await this.audioOutputReady;
6115
6116
  }
6116
6117
  _setAgentSpeaking(isSpeaking) {
6118
+ // Track the actual audio playback state regardless of audioOutput setting
6119
+ this.agentIsPlayingAudio = isSpeaking;
6117
6120
  const shouldReportSpeaking = this.audioOutput && isSpeaking;
6118
6121
  if (this.agentIsSpeaking === shouldReportSpeaking) {
6119
6122
  return;
@@ -6207,11 +6210,24 @@ class LayercodeClient {
6207
6210
  });
6208
6211
  break;
6209
6212
  }
6210
- case 'response.audio':
6213
+ case 'response.audio': {
6211
6214
  await this._waitForAudioOutputReady();
6212
- this._setAgentSpeaking(true);
6213
6215
  const audioBuffer = base64ToArrayBuffer(message.content);
6214
- this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
6216
+ const hasAudioSamples = audioBuffer.byteLength > 0;
6217
+ let audioEnqueued = false;
6218
+ if (hasAudioSamples) {
6219
+ try {
6220
+ const playbackBuffer = this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
6221
+ audioEnqueued = Boolean(playbackBuffer && playbackBuffer.length > 0);
6222
+ }
6223
+ catch (error) {
6224
+ this._setAgentSpeaking(false);
6225
+ throw error;
6226
+ }
6227
+ }
6228
+ else {
6229
+ console.debug(`Skipping empty audio response for turn ${message.turn_id}`);
6230
+ }
6215
6231
  // TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
6216
6232
  // Set current turn ID from first audio message, or update if different turn
6217
6233
  if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
@@ -6220,7 +6236,11 @@ class LayercodeClient {
6220
6236
  // Clean up interrupted tracks, keeping only the current turn
6221
6237
  this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
6222
6238
  }
6239
+ if (audioEnqueued) {
6240
+ this._setAgentSpeaking(true);
6241
+ }
6223
6242
  break;
6243
+ }
6224
6244
  case 'response.text':
6225
6245
  // Set turn ID from first text message if not set
6226
6246
  if (!this.currentTurnId) {
@@ -6432,6 +6452,8 @@ class LayercodeClient {
6432
6452
  this._emitAudioOutput();
6433
6453
  if (state) {
6434
6454
  this.wavPlayer.unmute();
6455
+ // Sync agentSpeaking state with actual playback state when enabling audio output
6456
+ this._syncAgentSpeakingState();
6435
6457
  }
6436
6458
  else {
6437
6459
  this.wavPlayer.mute();
@@ -6439,6 +6461,17 @@ class LayercodeClient {
6439
6461
  }
6440
6462
  }
6441
6463
  }
6464
+ /**
6465
+ * Syncs the reported agentSpeaking state with the actual audio playback state.
6466
+ * Called when audioOutput is enabled to ensure proper state synchronization.
6467
+ */
6468
+ _syncAgentSpeakingState() {
6469
+ const shouldReportSpeaking = this.audioOutput && this.agentIsPlayingAudio;
6470
+ if (this.agentIsSpeaking !== shouldReportSpeaking) {
6471
+ this.agentIsSpeaking = shouldReportSpeaking;
6472
+ this.options.onAgentSpeakingChange(shouldReportSpeaking);
6473
+ }
6474
+ }
6442
6475
  /** Emitters for audio flags */
6443
6476
  _emitAudioInput() {
6444
6477
  this.options.audioInputChanged(this.audioInput);