@layercode/js-sdk 2.8.2 → 2.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5312,13 +5312,15 @@ class WavRecorder {
5312
5312
  * @returns {Promise<true>}
5313
5313
  */
5314
5314
  async requestPermission() {
5315
+ console.log('ensureUserMediaAccess');
5315
5316
  try {
5316
- console.log('ensureUserMediaAccess');
5317
- await navigator.mediaDevices.getUserMedia({
5317
+ const stream = await navigator.mediaDevices.getUserMedia({
5318
5318
  audio: true,
5319
5319
  });
5320
+ // Stop the tracks immediately after getting permission
5321
+ stream.getTracks().forEach(track => track.stop());
5320
5322
  } catch (fallbackError) {
5321
- window.alert('You must grant microphone access to use this feature.');
5323
+ console.error('getUserMedia failed:', fallbackError.name, fallbackError.message);
5322
5324
  throw fallbackError;
5323
5325
  }
5324
5326
  return true;
@@ -5966,6 +5968,7 @@ class LayercodeClient {
5966
5968
  this.recorderStarted = false;
5967
5969
  this.readySent = false;
5968
5970
  this.currentTurnId = null;
5971
+ this.sentReplayFinishedForDisabledOutput = false;
5969
5972
  this.audioBuffer = [];
5970
5973
  this.vadConfig = null;
5971
5974
  this.activeDeviceId = null;
@@ -6125,11 +6128,14 @@ class LayercodeClient {
6125
6128
  this.options.onAgentSpeakingChange(shouldReportSpeaking);
6126
6129
  }
6127
6130
  _setUserSpeaking(isSpeaking) {
6128
- const shouldReportSpeaking = this._shouldCaptureUserAudio() && isSpeaking;
6131
+ const shouldCapture = this._shouldCaptureUserAudio();
6132
+ const shouldReportSpeaking = shouldCapture && isSpeaking;
6133
+ console.log('_setUserSpeaking called:', isSpeaking, 'shouldCapture:', shouldCapture, 'shouldReportSpeaking:', shouldReportSpeaking, 'current userIsSpeaking:', this.userIsSpeaking);
6129
6134
  if (this.userIsSpeaking === shouldReportSpeaking) {
6130
6135
  return;
6131
6136
  }
6132
6137
  this.userIsSpeaking = shouldReportSpeaking;
6138
+ console.log('_setUserSpeaking: updated userIsSpeaking to:', this.userIsSpeaking);
6133
6139
  this.options.onUserIsSpeakingChange(shouldReportSpeaking);
6134
6140
  }
6135
6141
  /**
@@ -6179,6 +6185,7 @@ class LayercodeClient {
6179
6185
  * @param {MessageEvent} event - The WebSocket message event
6180
6186
  */
6181
6187
  async _handleWebSocketMessage(event) {
6188
+ var _a, _b;
6182
6189
  try {
6183
6190
  const message = JSON.parse(event.data);
6184
6191
  if (message.type !== 'response.audio') {
@@ -6191,6 +6198,20 @@ class LayercodeClient {
6191
6198
  // Start tracking new agent turn
6192
6199
  console.debug('Agent turn started, will track new turn ID from audio/text');
6193
6200
  this._setUserSpeaking(false);
6201
+ // Reset the flag for the new assistant turn
6202
+ this.sentReplayFinishedForDisabledOutput = false;
6203
+ // When assistant's turn starts but we're not playing audio,
6204
+ // we need to tell the server we're "done" with playback so it can
6205
+ // transition the turn back to user. Use a small delay to let any
6206
+ // response.audio/response.end messages arrive first.
6207
+ if (!this.audioOutput) {
6208
+ setTimeout(() => {
6209
+ if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
6210
+ this.sentReplayFinishedForDisabledOutput = true;
6211
+ this._clientResponseAudioReplayFinished();
6212
+ }
6213
+ }, 1000);
6214
+ }
6194
6215
  }
6195
6216
  else if (message.role === 'user' && !this.pushToTalkEnabled) {
6196
6217
  // Interrupt any playing agent audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
@@ -6210,7 +6231,25 @@ class LayercodeClient {
6210
6231
  });
6211
6232
  break;
6212
6233
  }
6234
+ case 'response.end': {
6235
+ // When audioOutput is disabled, notify server that "playback" is complete
6236
+ if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
6237
+ this.sentReplayFinishedForDisabledOutput = true;
6238
+ this._clientResponseAudioReplayFinished();
6239
+ }
6240
+ (_b = (_a = this.options).onMessage) === null || _b === void 0 ? void 0 : _b.call(_a, message);
6241
+ break;
6242
+ }
6213
6243
  case 'response.audio': {
6244
+ // Skip audio playback if audioOutput is disabled
6245
+ if (!this.audioOutput) {
6246
+ // Send replay_finished so server knows we're "done" with playback (only once per turn)
6247
+ if (!this.sentReplayFinishedForDisabledOutput) {
6248
+ this.sentReplayFinishedForDisabledOutput = true;
6249
+ this._clientResponseAudioReplayFinished();
6250
+ }
6251
+ break;
6252
+ }
6214
6253
  await this._waitForAudioOutputReady();
6215
6254
  const audioBuffer = base64ToArrayBuffer(message.content);
6216
6255
  const hasAudioSamples = audioBuffer.byteLength > 0;
@@ -6345,6 +6384,9 @@ class LayercodeClient {
6345
6384
  }
6346
6385
  _sendReadyIfNeeded() {
6347
6386
  var _a;
6387
+ // Send client.ready when either:
6388
+ // 1. Recorder is started (audio mode active)
6389
+ // 2. audioInput is false (text-only mode, but server should still be ready)
6348
6390
  const audioReady = this.recorderStarted || !this.audioInput;
6349
6391
  if (audioReady && ((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN && !this.readySent) {
6350
6392
  this._wsSend({ type: 'client.ready' });
@@ -6410,12 +6452,16 @@ class LayercodeClient {
6410
6452
  }
6411
6453
  async audioInputConnect() {
6412
6454
  // Turn mic ON
6455
+ console.log('audioInputConnect: requesting permission');
6413
6456
  await this.wavRecorder.requestPermission();
6457
+ console.log('audioInputConnect: setting up device change listener');
6414
6458
  await this._setupDeviceChangeListener();
6415
6459
  // If the recorder hasn't spun up yet, proactively select a device.
6416
6460
  if (!this.recorderStarted && this.deviceChangeListener) {
6461
+ console.log('audioInputConnect: initializing recorder with default device');
6417
6462
  await this._initializeRecorderWithDefaultDevice();
6418
6463
  }
6464
+ console.log('audioInputConnect: done, recorderStarted =', this.recorderStarted);
6419
6465
  }
6420
6466
  async audioInputDisconnect() {
6421
6467
  try {
@@ -6447,11 +6493,25 @@ class LayercodeClient {
6447
6493
  }
6448
6494
  }
6449
6495
  async setAudioOutput(state) {
6496
+ console.log('setAudioOutput called with state:', state, 'current:', this.audioOutput);
6450
6497
  if (this.audioOutput !== state) {
6451
6498
  this.audioOutput = state;
6452
6499
  this._emitAudioOutput();
6453
6500
  if (state) {
6454
- this.wavPlayer.unmute();
6501
+ // Initialize audio output if not already connected
6502
+ // This happens when audioOutput was initially false and is now being enabled
6503
+ if (!this.wavPlayer.context) {
6504
+ console.log('setAudioOutput: initializing audio output (no context yet)');
6505
+ // Store the promise so _waitForAudioOutputReady() can await it
6506
+ // This prevents response.audio from running before AudioContext is ready
6507
+ const setupPromise = this.setupAudioOutput();
6508
+ this.audioOutputReady = setupPromise;
6509
+ await setupPromise;
6510
+ }
6511
+ else {
6512
+ console.log('setAudioOutput: unmuting existing player');
6513
+ this.wavPlayer.unmute();
6514
+ }
6455
6515
  // Sync agentSpeaking state with actual playback state when enabling audio output
6456
6516
  this._syncAgentSpeakingState();
6457
6517
  }
@@ -6608,6 +6668,11 @@ class LayercodeClient {
6608
6668
  return authorizeSessionResponseBody;
6609
6669
  }
6610
6670
  async setupAudioOutput() {
6671
+ // Only initialize audio player if audioOutput is enabled
6672
+ // This prevents AudioContext creation before user gesture when audio is disabled
6673
+ if (!this.audioOutput) {
6674
+ return;
6675
+ }
6611
6676
  // Initialize audio player
6612
6677
  // wavRecorder will be started from the onDeviceSwitched callback,
6613
6678
  // which is called when the device is first initialized and also when the device is switched
@@ -6618,12 +6683,7 @@ class LayercodeClient {
6618
6683
  if (!this.options.enableAmplitudeMonitoring) {
6619
6684
  this.agentAudioAmplitude = 0;
6620
6685
  }
6621
- if (this.audioOutput) {
6622
- this.wavPlayer.unmute();
6623
- }
6624
- else {
6625
- this.wavPlayer.mute();
6626
- }
6686
+ this.wavPlayer.unmute();
6627
6687
  }
6628
6688
  async connectToAudioInput() {
6629
6689
  if (!this.audioInput) {
@@ -6672,6 +6732,7 @@ class LayercodeClient {
6672
6732
  */
6673
6733
  async setInputDevice(deviceId) {
6674
6734
  var _a, _b, _c;
6735
+ console.log('setInputDevice called with:', deviceId, 'audioInput:', this.audioInput);
6675
6736
  const normalizedDeviceId = !deviceId || deviceId === 'default' ? null : deviceId;
6676
6737
  this.useSystemDefaultDevice = normalizedDeviceId === null;
6677
6738
  this.deviceId = normalizedDeviceId;
@@ -6680,6 +6741,7 @@ class LayercodeClient {
6680
6741
  return;
6681
6742
  }
6682
6743
  try {
6744
+ console.log('setInputDevice: calling _queueRecorderRestart');
6683
6745
  // Restart recording with the new device
6684
6746
  await this._queueRecorderRestart();
6685
6747
  // Reinitialize VAD with the new audio stream if VAD is enabled
@@ -6763,12 +6825,15 @@ class LayercodeClient {
6763
6825
  return run;
6764
6826
  }
6765
6827
  async _initializeRecorderWithDefaultDevice() {
6828
+ console.log('_initializeRecorderWithDefaultDevice called, deviceChangeListener:', !!this.deviceChangeListener);
6766
6829
  if (!this.deviceChangeListener) {
6767
6830
  return;
6768
6831
  }
6769
6832
  try {
6770
6833
  const devices = await this.wavRecorder.listDevices();
6834
+ console.log('_initializeRecorderWithDefaultDevice: got devices:', devices.length);
6771
6835
  if (devices.length) {
6836
+ console.log('_initializeRecorderWithDefaultDevice: calling deviceChangeListener');
6772
6837
  await this.deviceChangeListener(devices);
6773
6838
  return;
6774
6839
  }
@@ -6778,6 +6843,7 @@ class LayercodeClient {
6778
6843
  console.warn('Unable to prime audio devices from listDevices()', error);
6779
6844
  }
6780
6845
  try {
6846
+ console.log('_initializeRecorderWithDefaultDevice: calling setInputDevice default');
6781
6847
  await this.setInputDevice('default');
6782
6848
  }
6783
6849
  catch (error) {
@@ -6826,6 +6892,7 @@ class LayercodeClient {
6826
6892
  });
6827
6893
  this.deviceChangeListener = async (devices) => {
6828
6894
  var _a;
6895
+ console.log('deviceChangeListener called, devices:', devices.length, 'recorderStarted:', this.recorderStarted);
6829
6896
  try {
6830
6897
  // Notify user that devices have changed
6831
6898
  this.options.onDevicesChanged(devices);
@@ -6834,6 +6901,7 @@ class LayercodeClient {
6834
6901
  const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
6835
6902
  const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
6836
6903
  let shouldSwitch = !this.recorderStarted;
6904
+ console.log('deviceChangeListener: shouldSwitch initial:', shouldSwitch);
6837
6905
  if (!shouldSwitch) {
6838
6906
  if (usingDefaultDevice) {
6839
6907
  if (!defaultDevice) {
@@ -6853,6 +6921,7 @@ class LayercodeClient {
6853
6921
  }
6854
6922
  }
6855
6923
  this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
6924
+ console.log('deviceChangeListener: final shouldSwitch:', shouldSwitch);
6856
6925
  if (shouldSwitch) {
6857
6926
  console.debug('Selecting audio input device after change');
6858
6927
  let targetDeviceId = null;