@layercode/js-sdk 2.8.1 → 2.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5312,13 +5312,15 @@ class WavRecorder {
5312
5312
  * @returns {Promise<true>}
5313
5313
  */
5314
5314
  async requestPermission() {
5315
+ console.log('ensureUserMediaAccess');
5315
5316
  try {
5316
- console.log('ensureUserMediaAccess');
5317
- await navigator.mediaDevices.getUserMedia({
5317
+ const stream = await navigator.mediaDevices.getUserMedia({
5318
5318
  audio: true,
5319
5319
  });
5320
+ // Stop the tracks immediately after getting permission
5321
+ stream.getTracks().forEach(track => track.stop());
5320
5322
  } catch (fallbackError) {
5321
- window.alert('You must grant microphone access to use this feature.');
5323
+ console.error('getUserMedia failed:', fallbackError.name, fallbackError.message);
5322
5324
  throw fallbackError;
5323
5325
  }
5324
5326
  return true;
@@ -5962,9 +5964,11 @@ class LayercodeClient {
5962
5964
  this.canInterrupt = false;
5963
5965
  this.userIsSpeaking = false;
5964
5966
  this.agentIsSpeaking = false;
5967
+ this.agentIsPlayingAudio = false;
5965
5968
  this.recorderStarted = false;
5966
5969
  this.readySent = false;
5967
5970
  this.currentTurnId = null;
5971
+ this.sentReplayFinishedForDisabledOutput = false;
5968
5972
  this.audioBuffer = [];
5969
5973
  this.vadConfig = null;
5970
5974
  this.activeDeviceId = null;
@@ -6114,6 +6118,8 @@ class LayercodeClient {
6114
6118
  await this.audioOutputReady;
6115
6119
  }
6116
6120
  _setAgentSpeaking(isSpeaking) {
6121
+ // Track the actual audio playback state regardless of audioOutput setting
6122
+ this.agentIsPlayingAudio = isSpeaking;
6117
6123
  const shouldReportSpeaking = this.audioOutput && isSpeaking;
6118
6124
  if (this.agentIsSpeaking === shouldReportSpeaking) {
6119
6125
  return;
@@ -6122,11 +6128,14 @@ class LayercodeClient {
6122
6128
  this.options.onAgentSpeakingChange(shouldReportSpeaking);
6123
6129
  }
6124
6130
  _setUserSpeaking(isSpeaking) {
6125
- const shouldReportSpeaking = this._shouldCaptureUserAudio() && isSpeaking;
6131
+ const shouldCapture = this._shouldCaptureUserAudio();
6132
+ const shouldReportSpeaking = shouldCapture && isSpeaking;
6133
+ console.log('_setUserSpeaking called:', isSpeaking, 'shouldCapture:', shouldCapture, 'shouldReportSpeaking:', shouldReportSpeaking, 'current userIsSpeaking:', this.userIsSpeaking);
6126
6134
  if (this.userIsSpeaking === shouldReportSpeaking) {
6127
6135
  return;
6128
6136
  }
6129
6137
  this.userIsSpeaking = shouldReportSpeaking;
6138
+ console.log('_setUserSpeaking: updated userIsSpeaking to:', this.userIsSpeaking);
6130
6139
  this.options.onUserIsSpeakingChange(shouldReportSpeaking);
6131
6140
  }
6132
6141
  /**
@@ -6176,6 +6185,7 @@ class LayercodeClient {
6176
6185
  * @param {MessageEvent} event - The WebSocket message event
6177
6186
  */
6178
6187
  async _handleWebSocketMessage(event) {
6188
+ var _a, _b;
6179
6189
  try {
6180
6190
  const message = JSON.parse(event.data);
6181
6191
  if (message.type !== 'response.audio') {
@@ -6188,6 +6198,20 @@ class LayercodeClient {
6188
6198
  // Start tracking new agent turn
6189
6199
  console.debug('Agent turn started, will track new turn ID from audio/text');
6190
6200
  this._setUserSpeaking(false);
6201
+ // Reset the flag for the new assistant turn
6202
+ this.sentReplayFinishedForDisabledOutput = false;
6203
+ // When assistant's turn starts but we're not playing audio,
6204
+ // we need to tell the server we're "done" with playback so it can
6205
+ // transition the turn back to user. Use a small delay to let any
6206
+ // response.audio/response.end messages arrive first.
6207
+ if (!this.audioOutput) {
6208
+ setTimeout(() => {
6209
+ if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
6210
+ this.sentReplayFinishedForDisabledOutput = true;
6211
+ this._clientResponseAudioReplayFinished();
6212
+ }
6213
+ }, 1000);
6214
+ }
6191
6215
  }
6192
6216
  else if (message.role === 'user' && !this.pushToTalkEnabled) {
6193
6217
  // Interrupt any playing agent audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
@@ -6207,11 +6231,42 @@ class LayercodeClient {
6207
6231
  });
6208
6232
  break;
6209
6233
  }
6210
- case 'response.audio':
6234
+ case 'response.end': {
6235
+ // When audioOutput is disabled, notify server that "playback" is complete
6236
+ if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
6237
+ this.sentReplayFinishedForDisabledOutput = true;
6238
+ this._clientResponseAudioReplayFinished();
6239
+ }
6240
+ (_b = (_a = this.options).onMessage) === null || _b === void 0 ? void 0 : _b.call(_a, message);
6241
+ break;
6242
+ }
6243
+ case 'response.audio': {
6244
+ // Skip audio playback if audioOutput is disabled
6245
+ if (!this.audioOutput) {
6246
+ // Send replay_finished so server knows we're "done" with playback (only once per turn)
6247
+ if (!this.sentReplayFinishedForDisabledOutput) {
6248
+ this.sentReplayFinishedForDisabledOutput = true;
6249
+ this._clientResponseAudioReplayFinished();
6250
+ }
6251
+ break;
6252
+ }
6211
6253
  await this._waitForAudioOutputReady();
6212
- this._setAgentSpeaking(true);
6213
6254
  const audioBuffer = base64ToArrayBuffer(message.content);
6214
- this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
6255
+ const hasAudioSamples = audioBuffer.byteLength > 0;
6256
+ let audioEnqueued = false;
6257
+ if (hasAudioSamples) {
6258
+ try {
6259
+ const playbackBuffer = this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
6260
+ audioEnqueued = Boolean(playbackBuffer && playbackBuffer.length > 0);
6261
+ }
6262
+ catch (error) {
6263
+ this._setAgentSpeaking(false);
6264
+ throw error;
6265
+ }
6266
+ }
6267
+ else {
6268
+ console.debug(`Skipping empty audio response for turn ${message.turn_id}`);
6269
+ }
6215
6270
  // TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
6216
6271
  // Set current turn ID from first audio message, or update if different turn
6217
6272
  if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
@@ -6220,7 +6275,11 @@ class LayercodeClient {
6220
6275
  // Clean up interrupted tracks, keeping only the current turn
6221
6276
  this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
6222
6277
  }
6278
+ if (audioEnqueued) {
6279
+ this._setAgentSpeaking(true);
6280
+ }
6223
6281
  break;
6282
+ }
6224
6283
  case 'response.text':
6225
6284
  // Set turn ID from first text message if not set
6226
6285
  if (!this.currentTurnId) {
@@ -6325,6 +6384,9 @@ class LayercodeClient {
6325
6384
  }
6326
6385
  _sendReadyIfNeeded() {
6327
6386
  var _a;
6387
+ // Send client.ready when either:
6388
+ // 1. Recorder is started (audio mode active)
6389
+ // 2. audioInput is false (text-only mode, but server should still be ready)
6328
6390
  const audioReady = this.recorderStarted || !this.audioInput;
6329
6391
  if (audioReady && ((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN && !this.readySent) {
6330
6392
  this._wsSend({ type: 'client.ready' });
@@ -6390,12 +6452,16 @@ class LayercodeClient {
6390
6452
  }
6391
6453
  async audioInputConnect() {
6392
6454
  // Turn mic ON
6455
+ console.log('audioInputConnect: requesting permission');
6393
6456
  await this.wavRecorder.requestPermission();
6457
+ console.log('audioInputConnect: setting up device change listener');
6394
6458
  await this._setupDeviceChangeListener();
6395
6459
  // If the recorder hasn't spun up yet, proactively select a device.
6396
6460
  if (!this.recorderStarted && this.deviceChangeListener) {
6461
+ console.log('audioInputConnect: initializing recorder with default device');
6397
6462
  await this._initializeRecorderWithDefaultDevice();
6398
6463
  }
6464
+ console.log('audioInputConnect: done, recorderStarted =', this.recorderStarted);
6399
6465
  }
6400
6466
  async audioInputDisconnect() {
6401
6467
  try {
@@ -6427,11 +6493,27 @@ class LayercodeClient {
6427
6493
  }
6428
6494
  }
6429
6495
  async setAudioOutput(state) {
6496
+ console.log('setAudioOutput called with state:', state, 'current:', this.audioOutput);
6430
6497
  if (this.audioOutput !== state) {
6431
6498
  this.audioOutput = state;
6432
6499
  this._emitAudioOutput();
6433
6500
  if (state) {
6434
- this.wavPlayer.unmute();
6501
+ // Initialize audio output if not already connected
6502
+ // This happens when audioOutput was initially false and is now being enabled
6503
+ if (!this.wavPlayer.context) {
6504
+ console.log('setAudioOutput: initializing audio output (no context yet)');
6505
+ // Store the promise so _waitForAudioOutputReady() can await it
6506
+ // This prevents response.audio from running before AudioContext is ready
6507
+ const setupPromise = this.setupAudioOutput();
6508
+ this.audioOutputReady = setupPromise;
6509
+ await setupPromise;
6510
+ }
6511
+ else {
6512
+ console.log('setAudioOutput: unmuting existing player');
6513
+ this.wavPlayer.unmute();
6514
+ }
6515
+ // Sync agentSpeaking state with actual playback state when enabling audio output
6516
+ this._syncAgentSpeakingState();
6435
6517
  }
6436
6518
  else {
6437
6519
  this.wavPlayer.mute();
@@ -6439,6 +6521,17 @@ class LayercodeClient {
6439
6521
  }
6440
6522
  }
6441
6523
  }
6524
+ /**
6525
+ * Syncs the reported agentSpeaking state with the actual audio playback state.
6526
+ * Called when audioOutput is enabled to ensure proper state synchronization.
6527
+ */
6528
+ _syncAgentSpeakingState() {
6529
+ const shouldReportSpeaking = this.audioOutput && this.agentIsPlayingAudio;
6530
+ if (this.agentIsSpeaking !== shouldReportSpeaking) {
6531
+ this.agentIsSpeaking = shouldReportSpeaking;
6532
+ this.options.onAgentSpeakingChange(shouldReportSpeaking);
6533
+ }
6534
+ }
6442
6535
  /** Emitters for audio flags */
6443
6536
  _emitAudioInput() {
6444
6537
  this.options.audioInputChanged(this.audioInput);
@@ -6575,6 +6668,11 @@ class LayercodeClient {
6575
6668
  return authorizeSessionResponseBody;
6576
6669
  }
6577
6670
  async setupAudioOutput() {
6671
+ // Only initialize audio player if audioOutput is enabled
6672
+ // This prevents AudioContext creation before user gesture when audio is disabled
6673
+ if (!this.audioOutput) {
6674
+ return;
6675
+ }
6578
6676
  // Initialize audio player
6579
6677
  // wavRecorder will be started from the onDeviceSwitched callback,
6580
6678
  // which is called when the device is first initialized and also when the device is switched
@@ -6585,12 +6683,7 @@ class LayercodeClient {
6585
6683
  if (!this.options.enableAmplitudeMonitoring) {
6586
6684
  this.agentAudioAmplitude = 0;
6587
6685
  }
6588
- if (this.audioOutput) {
6589
- this.wavPlayer.unmute();
6590
- }
6591
- else {
6592
- this.wavPlayer.mute();
6593
- }
6686
+ this.wavPlayer.unmute();
6594
6687
  }
6595
6688
  async connectToAudioInput() {
6596
6689
  if (!this.audioInput) {
@@ -6639,6 +6732,7 @@ class LayercodeClient {
6639
6732
  */
6640
6733
  async setInputDevice(deviceId) {
6641
6734
  var _a, _b, _c;
6735
+ console.log('setInputDevice called with:', deviceId, 'audioInput:', this.audioInput);
6642
6736
  const normalizedDeviceId = !deviceId || deviceId === 'default' ? null : deviceId;
6643
6737
  this.useSystemDefaultDevice = normalizedDeviceId === null;
6644
6738
  this.deviceId = normalizedDeviceId;
@@ -6647,6 +6741,7 @@ class LayercodeClient {
6647
6741
  return;
6648
6742
  }
6649
6743
  try {
6744
+ console.log('setInputDevice: calling _queueRecorderRestart');
6650
6745
  // Restart recording with the new device
6651
6746
  await this._queueRecorderRestart();
6652
6747
  // Reinitialize VAD with the new audio stream if VAD is enabled
@@ -6730,12 +6825,15 @@ class LayercodeClient {
6730
6825
  return run;
6731
6826
  }
6732
6827
  async _initializeRecorderWithDefaultDevice() {
6828
+ console.log('_initializeRecorderWithDefaultDevice called, deviceChangeListener:', !!this.deviceChangeListener);
6733
6829
  if (!this.deviceChangeListener) {
6734
6830
  return;
6735
6831
  }
6736
6832
  try {
6737
6833
  const devices = await this.wavRecorder.listDevices();
6834
+ console.log('_initializeRecorderWithDefaultDevice: got devices:', devices.length);
6738
6835
  if (devices.length) {
6836
+ console.log('_initializeRecorderWithDefaultDevice: calling deviceChangeListener');
6739
6837
  await this.deviceChangeListener(devices);
6740
6838
  return;
6741
6839
  }
@@ -6745,6 +6843,7 @@ class LayercodeClient {
6745
6843
  console.warn('Unable to prime audio devices from listDevices()', error);
6746
6844
  }
6747
6845
  try {
6846
+ console.log('_initializeRecorderWithDefaultDevice: calling setInputDevice default');
6748
6847
  await this.setInputDevice('default');
6749
6848
  }
6750
6849
  catch (error) {
@@ -6793,6 +6892,7 @@ class LayercodeClient {
6793
6892
  });
6794
6893
  this.deviceChangeListener = async (devices) => {
6795
6894
  var _a;
6895
+ console.log('deviceChangeListener called, devices:', devices.length, 'recorderStarted:', this.recorderStarted);
6796
6896
  try {
6797
6897
  // Notify user that devices have changed
6798
6898
  this.options.onDevicesChanged(devices);
@@ -6801,6 +6901,7 @@ class LayercodeClient {
6801
6901
  const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
6802
6902
  const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
6803
6903
  let shouldSwitch = !this.recorderStarted;
6904
+ console.log('deviceChangeListener: shouldSwitch initial:', shouldSwitch);
6804
6905
  if (!shouldSwitch) {
6805
6906
  if (usingDefaultDevice) {
6806
6907
  if (!defaultDevice) {
@@ -6820,6 +6921,7 @@ class LayercodeClient {
6820
6921
  }
6821
6922
  }
6822
6923
  this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
6924
+ console.log('deviceChangeListener: final shouldSwitch:', shouldSwitch);
6823
6925
  if (shouldSwitch) {
6824
6926
  console.debug('Selecting audio input device after change');
6825
6927
  let targetDeviceId = null;