@layercode/js-sdk 2.8.0 → 2.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5951,6 +5951,7 @@ registerProcessor('audio_processor', AudioProcessor);
5951
5951
  this._emitAudioInput();
5952
5952
  this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
5953
5953
  this._websocketUrl = DEFAULT_WS_URL;
5954
+ this.audioOutputReady = null;
5954
5955
  this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
5955
5956
  this.wavPlayer = new WavStreamPlayer({
5956
5957
  finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
@@ -5967,6 +5968,7 @@ registerProcessor('audio_processor', AudioProcessor);
5967
5968
  this.canInterrupt = false;
5968
5969
  this.userIsSpeaking = false;
5969
5970
  this.agentIsSpeaking = false;
5971
+ this.agentIsPlayingAudio = false;
5970
5972
  this.recorderStarted = false;
5971
5973
  this.readySent = false;
5972
5974
  this.currentTurnId = null;
@@ -6112,12 +6114,21 @@ registerProcessor('audio_processor', AudioProcessor);
6112
6114
  this.status = status;
6113
6115
  this.options.onStatusChange(status);
6114
6116
  }
6117
+ async _waitForAudioOutputReady() {
6118
+ if (!this.audioOutputReady) {
6119
+ return;
6120
+ }
6121
+ await this.audioOutputReady;
6122
+ }
6115
6123
  _setAgentSpeaking(isSpeaking) {
6116
- if (this.agentIsSpeaking === isSpeaking) {
6124
+ // Track the actual audio playback state regardless of audioOutput setting
6125
+ this.agentIsPlayingAudio = isSpeaking;
6126
+ const shouldReportSpeaking = this.audioOutput && isSpeaking;
6127
+ if (this.agentIsSpeaking === shouldReportSpeaking) {
6117
6128
  return;
6118
6129
  }
6119
- this.agentIsSpeaking = isSpeaking;
6120
- this.options.onAgentSpeakingChange(isSpeaking);
6130
+ this.agentIsSpeaking = shouldReportSpeaking;
6131
+ this.options.onAgentSpeakingChange(shouldReportSpeaking);
6121
6132
  }
6122
6133
  _setUserSpeaking(isSpeaking) {
6123
6134
  const shouldReportSpeaking = this._shouldCaptureUserAudio() && isSpeaking;
@@ -6185,7 +6196,6 @@ registerProcessor('audio_processor', AudioProcessor);
6185
6196
  if (message.role === 'assistant') {
6186
6197
  // Start tracking new agent turn
6187
6198
  console.debug('Agent turn started, will track new turn ID from audio/text');
6188
- this._setAgentSpeaking(true);
6189
6199
  this._setUserSpeaking(false);
6190
6200
  }
6191
6201
  else if (message.role === 'user' && !this.pushToTalkEnabled) {
@@ -6206,10 +6216,24 @@ registerProcessor('audio_processor', AudioProcessor);
6206
6216
  });
6207
6217
  break;
6208
6218
  }
6209
- case 'response.audio':
6210
- this._setAgentSpeaking(true);
6219
+ case 'response.audio': {
6220
+ await this._waitForAudioOutputReady();
6211
6221
  const audioBuffer = base64ToArrayBuffer(message.content);
6212
- this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
6222
+ const hasAudioSamples = audioBuffer.byteLength > 0;
6223
+ let audioEnqueued = false;
6224
+ if (hasAudioSamples) {
6225
+ try {
6226
+ const playbackBuffer = this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
6227
+ audioEnqueued = Boolean(playbackBuffer && playbackBuffer.length > 0);
6228
+ }
6229
+ catch (error) {
6230
+ this._setAgentSpeaking(false);
6231
+ throw error;
6232
+ }
6233
+ }
6234
+ else {
6235
+ console.debug(`Skipping empty audio response for turn ${message.turn_id}`);
6236
+ }
6213
6237
  // TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
6214
6238
  // Set current turn ID from first audio message, or update if different turn
6215
6239
  if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
@@ -6218,7 +6242,11 @@ registerProcessor('audio_processor', AudioProcessor);
6218
6242
  // Clean up interrupted tracks, keeping only the current turn
6219
6243
  this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
6220
6244
  }
6245
+ if (audioEnqueued) {
6246
+ this._setAgentSpeaking(true);
6247
+ }
6221
6248
  break;
6249
+ }
6222
6250
  case 'response.text':
6223
6251
  // Set turn ID from first text message if not set
6224
6252
  if (!this.currentTurnId) {
@@ -6430,12 +6458,26 @@ registerProcessor('audio_processor', AudioProcessor);
6430
6458
  this._emitAudioOutput();
6431
6459
  if (state) {
6432
6460
  this.wavPlayer.unmute();
6461
+ // Sync agentSpeaking state with actual playback state when enabling audio output
6462
+ this._syncAgentSpeakingState();
6433
6463
  }
6434
6464
  else {
6435
6465
  this.wavPlayer.mute();
6466
+ this._setAgentSpeaking(false);
6436
6467
  }
6437
6468
  }
6438
6469
  }
6470
+ /**
6471
+ * Syncs the reported agentSpeaking state with the actual audio playback state.
6472
+ * Called when audioOutput is enabled to ensure proper state synchronization.
6473
+ */
6474
+ _syncAgentSpeakingState() {
6475
+ const shouldReportSpeaking = this.audioOutput && this.agentIsPlayingAudio;
6476
+ if (this.agentIsSpeaking !== shouldReportSpeaking) {
6477
+ this.agentIsSpeaking = shouldReportSpeaking;
6478
+ this.options.onAgentSpeakingChange(shouldReportSpeaking);
6479
+ }
6480
+ }
6439
6481
  /** Emitters for audio flags */
6440
6482
  _emitAudioInput() {
6441
6483
  this.options.audioInputChanged(this.audioInput);
@@ -6491,7 +6533,9 @@ registerProcessor('audio_processor', AudioProcessor);
6491
6533
  this.setupVadConfig(config);
6492
6534
  // Bind the websocket message callbacks
6493
6535
  this.bindWebsocketMessageCallbacks(ws, config);
6494
- await this.setupAudioOutput();
6536
+ const audioOutputReady = this.setupAudioOutput();
6537
+ this.audioOutputReady = audioOutputReady;
6538
+ await audioOutputReady;
6495
6539
  }
6496
6540
  catch (error) {
6497
6541
  console.error('Error connecting to Layercode agent:', error);