@layercode/js-sdk 2.8.0 → 2.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5945,6 +5945,7 @@ class LayercodeClient {
5945
5945
  this._emitAudioInput();
5946
5946
  this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
5947
5947
  this._websocketUrl = DEFAULT_WS_URL;
5948
+ this.audioOutputReady = null;
5948
5949
  this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
5949
5950
  this.wavPlayer = new WavStreamPlayer({
5950
5951
  finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
@@ -5961,6 +5962,7 @@ class LayercodeClient {
5961
5962
  this.canInterrupt = false;
5962
5963
  this.userIsSpeaking = false;
5963
5964
  this.agentIsSpeaking = false;
5965
+ this.agentIsPlayingAudio = false;
5964
5966
  this.recorderStarted = false;
5965
5967
  this.readySent = false;
5966
5968
  this.currentTurnId = null;
@@ -6106,12 +6108,21 @@ class LayercodeClient {
6106
6108
  this.status = status;
6107
6109
  this.options.onStatusChange(status);
6108
6110
  }
6111
+ async _waitForAudioOutputReady() {
6112
+ if (!this.audioOutputReady) {
6113
+ return;
6114
+ }
6115
+ await this.audioOutputReady;
6116
+ }
6109
6117
  _setAgentSpeaking(isSpeaking) {
6110
- if (this.agentIsSpeaking === isSpeaking) {
6118
+ // Track the actual audio playback state regardless of audioOutput setting
6119
+ this.agentIsPlayingAudio = isSpeaking;
6120
+ const shouldReportSpeaking = this.audioOutput && isSpeaking;
6121
+ if (this.agentIsSpeaking === shouldReportSpeaking) {
6111
6122
  return;
6112
6123
  }
6113
- this.agentIsSpeaking = isSpeaking;
6114
- this.options.onAgentSpeakingChange(isSpeaking);
6124
+ this.agentIsSpeaking = shouldReportSpeaking;
6125
+ this.options.onAgentSpeakingChange(shouldReportSpeaking);
6115
6126
  }
6116
6127
  _setUserSpeaking(isSpeaking) {
6117
6128
  const shouldReportSpeaking = this._shouldCaptureUserAudio() && isSpeaking;
@@ -6179,7 +6190,6 @@ class LayercodeClient {
6179
6190
  if (message.role === 'assistant') {
6180
6191
  // Start tracking new agent turn
6181
6192
  console.debug('Agent turn started, will track new turn ID from audio/text');
6182
- this._setAgentSpeaking(true);
6183
6193
  this._setUserSpeaking(false);
6184
6194
  }
6185
6195
  else if (message.role === 'user' && !this.pushToTalkEnabled) {
@@ -6200,10 +6210,24 @@ class LayercodeClient {
6200
6210
  });
6201
6211
  break;
6202
6212
  }
6203
- case 'response.audio':
6204
- this._setAgentSpeaking(true);
6213
+ case 'response.audio': {
6214
+ await this._waitForAudioOutputReady();
6205
6215
  const audioBuffer = base64ToArrayBuffer(message.content);
6206
- this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
6216
+ const hasAudioSamples = audioBuffer.byteLength > 0;
6217
+ let audioEnqueued = false;
6218
+ if (hasAudioSamples) {
6219
+ try {
6220
+ const playbackBuffer = this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
6221
+ audioEnqueued = Boolean(playbackBuffer && playbackBuffer.length > 0);
6222
+ }
6223
+ catch (error) {
6224
+ this._setAgentSpeaking(false);
6225
+ throw error;
6226
+ }
6227
+ }
6228
+ else {
6229
+ console.debug(`Skipping empty audio response for turn ${message.turn_id}`);
6230
+ }
6207
6231
  // TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
6208
6232
  // Set current turn ID from first audio message, or update if different turn
6209
6233
  if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
@@ -6212,7 +6236,11 @@ class LayercodeClient {
6212
6236
  // Clean up interrupted tracks, keeping only the current turn
6213
6237
  this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
6214
6238
  }
6239
+ if (audioEnqueued) {
6240
+ this._setAgentSpeaking(true);
6241
+ }
6215
6242
  break;
6243
+ }
6216
6244
  case 'response.text':
6217
6245
  // Set turn ID from first text message if not set
6218
6246
  if (!this.currentTurnId) {
@@ -6424,12 +6452,26 @@ class LayercodeClient {
6424
6452
  this._emitAudioOutput();
6425
6453
  if (state) {
6426
6454
  this.wavPlayer.unmute();
6455
+ // Sync agentSpeaking state with actual playback state when enabling audio output
6456
+ this._syncAgentSpeakingState();
6427
6457
  }
6428
6458
  else {
6429
6459
  this.wavPlayer.mute();
6460
+ this._setAgentSpeaking(false);
6430
6461
  }
6431
6462
  }
6432
6463
  }
6464
+ /**
6465
+ * Syncs the reported agentSpeaking state with the actual audio playback state.
6466
+ * Called when audioOutput is enabled to ensure proper state synchronization.
6467
+ */
6468
+ _syncAgentSpeakingState() {
6469
+ const shouldReportSpeaking = this.audioOutput && this.agentIsPlayingAudio;
6470
+ if (this.agentIsSpeaking !== shouldReportSpeaking) {
6471
+ this.agentIsSpeaking = shouldReportSpeaking;
6472
+ this.options.onAgentSpeakingChange(shouldReportSpeaking);
6473
+ }
6474
+ }
6433
6475
  /** Emitters for audio flags */
6434
6476
  _emitAudioInput() {
6435
6477
  this.options.audioInputChanged(this.audioInput);
@@ -6485,7 +6527,9 @@ class LayercodeClient {
6485
6527
  this.setupVadConfig(config);
6486
6528
  // Bind the websocket message callbacks
6487
6529
  this.bindWebsocketMessageCallbacks(ws, config);
6488
- await this.setupAudioOutput();
6530
+ const audioOutputReady = this.setupAudioOutput();
6531
+ this.audioOutputReady = audioOutputReady;
6532
+ await audioOutputReady;
6489
6533
  }
6490
6534
  catch (error) {
6491
6535
  console.error('Error connecting to Layercode agent:', error);