@layercode/js-sdk 2.8.1 → 2.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5318,13 +5318,15 @@ registerProcessor('audio_processor', AudioProcessor);
5318
5318
  * @returns {Promise<true>}
5319
5319
  */
5320
5320
  async requestPermission() {
5321
+ console.log('ensureUserMediaAccess');
5321
5322
  try {
5322
- console.log('ensureUserMediaAccess');
5323
- await navigator.mediaDevices.getUserMedia({
5323
+ const stream = await navigator.mediaDevices.getUserMedia({
5324
5324
  audio: true,
5325
5325
  });
5326
+ // Stop the tracks immediately after getting permission
5327
+ stream.getTracks().forEach(track => track.stop());
5326
5328
  } catch (fallbackError) {
5327
- window.alert('You must grant microphone access to use this feature.');
5329
+ console.error('getUserMedia failed:', fallbackError.name, fallbackError.message);
5328
5330
  throw fallbackError;
5329
5331
  }
5330
5332
  return true;
@@ -5968,9 +5970,11 @@ registerProcessor('audio_processor', AudioProcessor);
5968
5970
  this.canInterrupt = false;
5969
5971
  this.userIsSpeaking = false;
5970
5972
  this.agentIsSpeaking = false;
5973
+ this.agentIsPlayingAudio = false;
5971
5974
  this.recorderStarted = false;
5972
5975
  this.readySent = false;
5973
5976
  this.currentTurnId = null;
5977
+ this.sentReplayFinishedForDisabledOutput = false;
5974
5978
  this.audioBuffer = [];
5975
5979
  this.vadConfig = null;
5976
5980
  this.activeDeviceId = null;
@@ -6120,6 +6124,8 @@ registerProcessor('audio_processor', AudioProcessor);
6120
6124
  await this.audioOutputReady;
6121
6125
  }
6122
6126
  _setAgentSpeaking(isSpeaking) {
6127
+ // Track the actual audio playback state regardless of audioOutput setting
6128
+ this.agentIsPlayingAudio = isSpeaking;
6123
6129
  const shouldReportSpeaking = this.audioOutput && isSpeaking;
6124
6130
  if (this.agentIsSpeaking === shouldReportSpeaking) {
6125
6131
  return;
@@ -6128,11 +6134,14 @@ registerProcessor('audio_processor', AudioProcessor);
6128
6134
  this.options.onAgentSpeakingChange(shouldReportSpeaking);
6129
6135
  }
6130
6136
  _setUserSpeaking(isSpeaking) {
6131
- const shouldReportSpeaking = this._shouldCaptureUserAudio() && isSpeaking;
6137
+ const shouldCapture = this._shouldCaptureUserAudio();
6138
+ const shouldReportSpeaking = shouldCapture && isSpeaking;
6139
+ console.log('_setUserSpeaking called:', isSpeaking, 'shouldCapture:', shouldCapture, 'shouldReportSpeaking:', shouldReportSpeaking, 'current userIsSpeaking:', this.userIsSpeaking);
6132
6140
  if (this.userIsSpeaking === shouldReportSpeaking) {
6133
6141
  return;
6134
6142
  }
6135
6143
  this.userIsSpeaking = shouldReportSpeaking;
6144
+ console.log('_setUserSpeaking: updated userIsSpeaking to:', this.userIsSpeaking);
6136
6145
  this.options.onUserIsSpeakingChange(shouldReportSpeaking);
6137
6146
  }
6138
6147
  /**
@@ -6182,6 +6191,7 @@ registerProcessor('audio_processor', AudioProcessor);
6182
6191
  * @param {MessageEvent} event - The WebSocket message event
6183
6192
  */
6184
6193
  async _handleWebSocketMessage(event) {
6194
+ var _a, _b;
6185
6195
  try {
6186
6196
  const message = JSON.parse(event.data);
6187
6197
  if (message.type !== 'response.audio') {
@@ -6194,6 +6204,20 @@ registerProcessor('audio_processor', AudioProcessor);
6194
6204
  // Start tracking new agent turn
6195
6205
  console.debug('Agent turn started, will track new turn ID from audio/text');
6196
6206
  this._setUserSpeaking(false);
6207
+ // Reset the flag for the new assistant turn
6208
+ this.sentReplayFinishedForDisabledOutput = false;
6209
+ // When assistant's turn starts but we're not playing audio,
6210
+ // we need to tell the server we're "done" with playback so it can
6211
+ // transition the turn back to user. Use a small delay to let any
6212
+ // response.audio/response.end messages arrive first.
6213
+ if (!this.audioOutput) {
6214
+ setTimeout(() => {
6215
+ if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
6216
+ this.sentReplayFinishedForDisabledOutput = true;
6217
+ this._clientResponseAudioReplayFinished();
6218
+ }
6219
+ }, 1000);
6220
+ }
6197
6221
  }
6198
6222
  else if (message.role === 'user' && !this.pushToTalkEnabled) {
6199
6223
  // Interrupt any playing agent audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
@@ -6213,11 +6237,42 @@ registerProcessor('audio_processor', AudioProcessor);
6213
6237
  });
6214
6238
  break;
6215
6239
  }
6216
- case 'response.audio':
6240
+ case 'response.end': {
6241
+ // When audioOutput is disabled, notify server that "playback" is complete
6242
+ if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
6243
+ this.sentReplayFinishedForDisabledOutput = true;
6244
+ this._clientResponseAudioReplayFinished();
6245
+ }
6246
+ (_b = (_a = this.options).onMessage) === null || _b === void 0 ? void 0 : _b.call(_a, message);
6247
+ break;
6248
+ }
6249
+ case 'response.audio': {
6250
+ // Skip audio playback if audioOutput is disabled
6251
+ if (!this.audioOutput) {
6252
+ // Send replay_finished so server knows we're "done" with playback (only once per turn)
6253
+ if (!this.sentReplayFinishedForDisabledOutput) {
6254
+ this.sentReplayFinishedForDisabledOutput = true;
6255
+ this._clientResponseAudioReplayFinished();
6256
+ }
6257
+ break;
6258
+ }
6217
6259
  await this._waitForAudioOutputReady();
6218
- this._setAgentSpeaking(true);
6219
6260
  const audioBuffer = base64ToArrayBuffer(message.content);
6220
- this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
6261
+ const hasAudioSamples = audioBuffer.byteLength > 0;
6262
+ let audioEnqueued = false;
6263
+ if (hasAudioSamples) {
6264
+ try {
6265
+ const playbackBuffer = this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
6266
+ audioEnqueued = Boolean(playbackBuffer && playbackBuffer.length > 0);
6267
+ }
6268
+ catch (error) {
6269
+ this._setAgentSpeaking(false);
6270
+ throw error;
6271
+ }
6272
+ }
6273
+ else {
6274
+ console.debug(`Skipping empty audio response for turn ${message.turn_id}`);
6275
+ }
6221
6276
  // TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
6222
6277
  // Set current turn ID from first audio message, or update if different turn
6223
6278
  if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
@@ -6226,7 +6281,11 @@ registerProcessor('audio_processor', AudioProcessor);
6226
6281
  // Clean up interrupted tracks, keeping only the current turn
6227
6282
  this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
6228
6283
  }
6284
+ if (audioEnqueued) {
6285
+ this._setAgentSpeaking(true);
6286
+ }
6229
6287
  break;
6288
+ }
6230
6289
  case 'response.text':
6231
6290
  // Set turn ID from first text message if not set
6232
6291
  if (!this.currentTurnId) {
@@ -6331,6 +6390,9 @@ registerProcessor('audio_processor', AudioProcessor);
6331
6390
  }
6332
6391
  _sendReadyIfNeeded() {
6333
6392
  var _a;
6393
+ // Send client.ready when either:
6394
+ // 1. Recorder is started (audio mode active)
6395
+ // 2. audioInput is false (text-only mode, but server should still be ready)
6334
6396
  const audioReady = this.recorderStarted || !this.audioInput;
6335
6397
  if (audioReady && ((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN && !this.readySent) {
6336
6398
  this._wsSend({ type: 'client.ready' });
@@ -6396,12 +6458,16 @@ registerProcessor('audio_processor', AudioProcessor);
6396
6458
  }
6397
6459
  async audioInputConnect() {
6398
6460
  // Turn mic ON
6461
+ console.log('audioInputConnect: requesting permission');
6399
6462
  await this.wavRecorder.requestPermission();
6463
+ console.log('audioInputConnect: setting up device change listener');
6400
6464
  await this._setupDeviceChangeListener();
6401
6465
  // If the recorder hasn't spun up yet, proactively select a device.
6402
6466
  if (!this.recorderStarted && this.deviceChangeListener) {
6467
+ console.log('audioInputConnect: initializing recorder with default device');
6403
6468
  await this._initializeRecorderWithDefaultDevice();
6404
6469
  }
6470
+ console.log('audioInputConnect: done, recorderStarted =', this.recorderStarted);
6405
6471
  }
6406
6472
  async audioInputDisconnect() {
6407
6473
  try {
@@ -6433,11 +6499,27 @@ registerProcessor('audio_processor', AudioProcessor);
6433
6499
  }
6434
6500
  }
6435
6501
  async setAudioOutput(state) {
6502
+ console.log('setAudioOutput called with state:', state, 'current:', this.audioOutput);
6436
6503
  if (this.audioOutput !== state) {
6437
6504
  this.audioOutput = state;
6438
6505
  this._emitAudioOutput();
6439
6506
  if (state) {
6440
- this.wavPlayer.unmute();
6507
+ // Initialize audio output if not already connected
6508
+ // This happens when audioOutput was initially false and is now being enabled
6509
+ if (!this.wavPlayer.context) {
6510
+ console.log('setAudioOutput: initializing audio output (no context yet)');
6511
+ // Store the promise so _waitForAudioOutputReady() can await it
6512
+ // This prevents response.audio from running before AudioContext is ready
6513
+ const setupPromise = this.setupAudioOutput();
6514
+ this.audioOutputReady = setupPromise;
6515
+ await setupPromise;
6516
+ }
6517
+ else {
6518
+ console.log('setAudioOutput: unmuting existing player');
6519
+ this.wavPlayer.unmute();
6520
+ }
6521
+ // Sync agentSpeaking state with actual playback state when enabling audio output
6522
+ this._syncAgentSpeakingState();
6441
6523
  }
6442
6524
  else {
6443
6525
  this.wavPlayer.mute();
@@ -6445,6 +6527,17 @@ registerProcessor('audio_processor', AudioProcessor);
6445
6527
  }
6446
6528
  }
6447
6529
  }
6530
+ /**
6531
+ * Syncs the reported agentSpeaking state with the actual audio playback state.
6532
+ * Called when audioOutput is enabled to ensure proper state synchronization.
6533
+ */
6534
+ _syncAgentSpeakingState() {
6535
+ const shouldReportSpeaking = this.audioOutput && this.agentIsPlayingAudio;
6536
+ if (this.agentIsSpeaking !== shouldReportSpeaking) {
6537
+ this.agentIsSpeaking = shouldReportSpeaking;
6538
+ this.options.onAgentSpeakingChange(shouldReportSpeaking);
6539
+ }
6540
+ }
6448
6541
  /** Emitters for audio flags */
6449
6542
  _emitAudioInput() {
6450
6543
  this.options.audioInputChanged(this.audioInput);
@@ -6581,6 +6674,11 @@ registerProcessor('audio_processor', AudioProcessor);
6581
6674
  return authorizeSessionResponseBody;
6582
6675
  }
6583
6676
  async setupAudioOutput() {
6677
+ // Only initialize audio player if audioOutput is enabled
6678
+ // This prevents AudioContext creation before user gesture when audio is disabled
6679
+ if (!this.audioOutput) {
6680
+ return;
6681
+ }
6584
6682
  // Initialize audio player
6585
6683
  // wavRecorder will be started from the onDeviceSwitched callback,
6586
6684
  // which is called when the device is first initialized and also when the device is switched
@@ -6591,12 +6689,7 @@ registerProcessor('audio_processor', AudioProcessor);
6591
6689
  if (!this.options.enableAmplitudeMonitoring) {
6592
6690
  this.agentAudioAmplitude = 0;
6593
6691
  }
6594
- if (this.audioOutput) {
6595
- this.wavPlayer.unmute();
6596
- }
6597
- else {
6598
- this.wavPlayer.mute();
6599
- }
6692
+ this.wavPlayer.unmute();
6600
6693
  }
6601
6694
  async connectToAudioInput() {
6602
6695
  if (!this.audioInput) {
@@ -6645,6 +6738,7 @@ registerProcessor('audio_processor', AudioProcessor);
6645
6738
  */
6646
6739
  async setInputDevice(deviceId) {
6647
6740
  var _a, _b, _c;
6741
+ console.log('setInputDevice called with:', deviceId, 'audioInput:', this.audioInput);
6648
6742
  const normalizedDeviceId = !deviceId || deviceId === 'default' ? null : deviceId;
6649
6743
  this.useSystemDefaultDevice = normalizedDeviceId === null;
6650
6744
  this.deviceId = normalizedDeviceId;
@@ -6653,6 +6747,7 @@ registerProcessor('audio_processor', AudioProcessor);
6653
6747
  return;
6654
6748
  }
6655
6749
  try {
6750
+ console.log('setInputDevice: calling _queueRecorderRestart');
6656
6751
  // Restart recording with the new device
6657
6752
  await this._queueRecorderRestart();
6658
6753
  // Reinitialize VAD with the new audio stream if VAD is enabled
@@ -6736,12 +6831,15 @@ registerProcessor('audio_processor', AudioProcessor);
6736
6831
  return run;
6737
6832
  }
6738
6833
  async _initializeRecorderWithDefaultDevice() {
6834
+ console.log('_initializeRecorderWithDefaultDevice called, deviceChangeListener:', !!this.deviceChangeListener);
6739
6835
  if (!this.deviceChangeListener) {
6740
6836
  return;
6741
6837
  }
6742
6838
  try {
6743
6839
  const devices = await this.wavRecorder.listDevices();
6840
+ console.log('_initializeRecorderWithDefaultDevice: got devices:', devices.length);
6744
6841
  if (devices.length) {
6842
+ console.log('_initializeRecorderWithDefaultDevice: calling deviceChangeListener');
6745
6843
  await this.deviceChangeListener(devices);
6746
6844
  return;
6747
6845
  }
@@ -6751,6 +6849,7 @@ registerProcessor('audio_processor', AudioProcessor);
6751
6849
  console.warn('Unable to prime audio devices from listDevices()', error);
6752
6850
  }
6753
6851
  try {
6852
+ console.log('_initializeRecorderWithDefaultDevice: calling setInputDevice default');
6754
6853
  await this.setInputDevice('default');
6755
6854
  }
6756
6855
  catch (error) {
@@ -6799,6 +6898,7 @@ registerProcessor('audio_processor', AudioProcessor);
6799
6898
  });
6800
6899
  this.deviceChangeListener = async (devices) => {
6801
6900
  var _a;
6901
+ console.log('deviceChangeListener called, devices:', devices.length, 'recorderStarted:', this.recorderStarted);
6802
6902
  try {
6803
6903
  // Notify user that devices have changed
6804
6904
  this.options.onDevicesChanged(devices);
@@ -6807,6 +6907,7 @@ registerProcessor('audio_processor', AudioProcessor);
6807
6907
  const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
6808
6908
  const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
6809
6909
  let shouldSwitch = !this.recorderStarted;
6910
+ console.log('deviceChangeListener: shouldSwitch initial:', shouldSwitch);
6810
6911
  if (!shouldSwitch) {
6811
6912
  if (usingDefaultDevice) {
6812
6913
  if (!defaultDevice) {
@@ -6826,6 +6927,7 @@ registerProcessor('audio_processor', AudioProcessor);
6826
6927
  }
6827
6928
  }
6828
6929
  this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
6930
+ console.log('deviceChangeListener: final shouldSwitch:', shouldSwitch);
6829
6931
  if (shouldSwitch) {
6830
6932
  console.debug('Selecting audio input device after change');
6831
6933
  let targetDeviceId = null;