@layercode/js-sdk 2.8.2 → 2.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5318,13 +5318,15 @@ registerProcessor('audio_processor', AudioProcessor);
5318
5318
  * @returns {Promise<true>}
5319
5319
  */
5320
5320
  async requestPermission() {
5321
+ console.log('ensureUserMediaAccess');
5321
5322
  try {
5322
- console.log('ensureUserMediaAccess');
5323
- await navigator.mediaDevices.getUserMedia({
5323
+ const stream = await navigator.mediaDevices.getUserMedia({
5324
5324
  audio: true,
5325
5325
  });
5326
+ // Stop the tracks immediately after getting permission
5327
+ stream.getTracks().forEach(track => track.stop());
5326
5328
  } catch (fallbackError) {
5327
- window.alert('You must grant microphone access to use this feature.');
5329
+ console.error('getUserMedia failed:', fallbackError.name, fallbackError.message);
5328
5330
  throw fallbackError;
5329
5331
  }
5330
5332
  return true;
@@ -5972,6 +5974,7 @@ registerProcessor('audio_processor', AudioProcessor);
5972
5974
  this.recorderStarted = false;
5973
5975
  this.readySent = false;
5974
5976
  this.currentTurnId = null;
5977
+ this.sentReplayFinishedForDisabledOutput = false;
5975
5978
  this.audioBuffer = [];
5976
5979
  this.vadConfig = null;
5977
5980
  this.activeDeviceId = null;
@@ -6131,11 +6134,14 @@ registerProcessor('audio_processor', AudioProcessor);
6131
6134
  this.options.onAgentSpeakingChange(shouldReportSpeaking);
6132
6135
  }
6133
6136
  _setUserSpeaking(isSpeaking) {
6134
- const shouldReportSpeaking = this._shouldCaptureUserAudio() && isSpeaking;
6137
+ const shouldCapture = this._shouldCaptureUserAudio();
6138
+ const shouldReportSpeaking = shouldCapture && isSpeaking;
6139
+ console.log('_setUserSpeaking called:', isSpeaking, 'shouldCapture:', shouldCapture, 'shouldReportSpeaking:', shouldReportSpeaking, 'current userIsSpeaking:', this.userIsSpeaking);
6135
6140
  if (this.userIsSpeaking === shouldReportSpeaking) {
6136
6141
  return;
6137
6142
  }
6138
6143
  this.userIsSpeaking = shouldReportSpeaking;
6144
+ console.log('_setUserSpeaking: updated userIsSpeaking to:', this.userIsSpeaking);
6139
6145
  this.options.onUserIsSpeakingChange(shouldReportSpeaking);
6140
6146
  }
6141
6147
  /**
@@ -6185,6 +6191,7 @@ registerProcessor('audio_processor', AudioProcessor);
6185
6191
  * @param {MessageEvent} event - The WebSocket message event
6186
6192
  */
6187
6193
  async _handleWebSocketMessage(event) {
6194
+ var _a, _b;
6188
6195
  try {
6189
6196
  const message = JSON.parse(event.data);
6190
6197
  if (message.type !== 'response.audio') {
@@ -6197,6 +6204,20 @@ registerProcessor('audio_processor', AudioProcessor);
6197
6204
  // Start tracking new agent turn
6198
6205
  console.debug('Agent turn started, will track new turn ID from audio/text');
6199
6206
  this._setUserSpeaking(false);
6207
+ // Reset the flag for the new assistant turn
6208
+ this.sentReplayFinishedForDisabledOutput = false;
6209
+ // When assistant's turn starts but we're not playing audio,
6210
+ // we need to tell the server we're "done" with playback so it can
6211
+ // transition the turn back to user. Use a small delay to let any
6212
+ // response.audio/response.end messages arrive first.
6213
+ if (!this.audioOutput) {
6214
+ setTimeout(() => {
6215
+ if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
6216
+ this.sentReplayFinishedForDisabledOutput = true;
6217
+ this._clientResponseAudioReplayFinished();
6218
+ }
6219
+ }, 1000);
6220
+ }
6200
6221
  }
6201
6222
  else if (message.role === 'user' && !this.pushToTalkEnabled) {
6202
6223
  // Interrupt any playing agent audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
@@ -6216,7 +6237,25 @@ registerProcessor('audio_processor', AudioProcessor);
6216
6237
  });
6217
6238
  break;
6218
6239
  }
6240
+ case 'response.end': {
6241
+ // When audioOutput is disabled, notify server that "playback" is complete
6242
+ if (!this.audioOutput && !this.sentReplayFinishedForDisabledOutput) {
6243
+ this.sentReplayFinishedForDisabledOutput = true;
6244
+ this._clientResponseAudioReplayFinished();
6245
+ }
6246
+ (_b = (_a = this.options).onMessage) === null || _b === void 0 ? void 0 : _b.call(_a, message);
6247
+ break;
6248
+ }
6219
6249
  case 'response.audio': {
6250
+ // Skip audio playback if audioOutput is disabled
6251
+ if (!this.audioOutput) {
6252
+ // Send replay_finished so server knows we're "done" with playback (only once per turn)
6253
+ if (!this.sentReplayFinishedForDisabledOutput) {
6254
+ this.sentReplayFinishedForDisabledOutput = true;
6255
+ this._clientResponseAudioReplayFinished();
6256
+ }
6257
+ break;
6258
+ }
6220
6259
  await this._waitForAudioOutputReady();
6221
6260
  const audioBuffer = base64ToArrayBuffer(message.content);
6222
6261
  const hasAudioSamples = audioBuffer.byteLength > 0;
@@ -6351,6 +6390,9 @@ registerProcessor('audio_processor', AudioProcessor);
6351
6390
  }
6352
6391
  _sendReadyIfNeeded() {
6353
6392
  var _a;
6393
+ // Send client.ready when either:
6394
+ // 1. Recorder is started (audio mode active)
6395
+ // 2. audioInput is false (text-only mode, but server should still be ready)
6354
6396
  const audioReady = this.recorderStarted || !this.audioInput;
6355
6397
  if (audioReady && ((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN && !this.readySent) {
6356
6398
  this._wsSend({ type: 'client.ready' });
@@ -6416,12 +6458,16 @@ registerProcessor('audio_processor', AudioProcessor);
6416
6458
  }
6417
6459
  async audioInputConnect() {
6418
6460
  // Turn mic ON
6461
+ console.log('audioInputConnect: requesting permission');
6419
6462
  await this.wavRecorder.requestPermission();
6463
+ console.log('audioInputConnect: setting up device change listener');
6420
6464
  await this._setupDeviceChangeListener();
6421
6465
  // If the recorder hasn't spun up yet, proactively select a device.
6422
6466
  if (!this.recorderStarted && this.deviceChangeListener) {
6467
+ console.log('audioInputConnect: initializing recorder with default device');
6423
6468
  await this._initializeRecorderWithDefaultDevice();
6424
6469
  }
6470
+ console.log('audioInputConnect: done, recorderStarted =', this.recorderStarted);
6425
6471
  }
6426
6472
  async audioInputDisconnect() {
6427
6473
  try {
@@ -6453,11 +6499,25 @@ registerProcessor('audio_processor', AudioProcessor);
6453
6499
  }
6454
6500
  }
6455
6501
  async setAudioOutput(state) {
6502
+ console.log('setAudioOutput called with state:', state, 'current:', this.audioOutput);
6456
6503
  if (this.audioOutput !== state) {
6457
6504
  this.audioOutput = state;
6458
6505
  this._emitAudioOutput();
6459
6506
  if (state) {
6460
- this.wavPlayer.unmute();
6507
+ // Initialize audio output if not already connected
6508
+ // This happens when audioOutput was initially false and is now being enabled
6509
+ if (!this.wavPlayer.context) {
6510
+ console.log('setAudioOutput: initializing audio output (no context yet)');
6511
+ // Store the promise so _waitForAudioOutputReady() can await it
6512
+ // This prevents response.audio from running before AudioContext is ready
6513
+ const setupPromise = this.setupAudioOutput();
6514
+ this.audioOutputReady = setupPromise;
6515
+ await setupPromise;
6516
+ }
6517
+ else {
6518
+ console.log('setAudioOutput: unmuting existing player');
6519
+ this.wavPlayer.unmute();
6520
+ }
6461
6521
  // Sync agentSpeaking state with actual playback state when enabling audio output
6462
6522
  this._syncAgentSpeakingState();
6463
6523
  }
@@ -6614,6 +6674,11 @@ registerProcessor('audio_processor', AudioProcessor);
6614
6674
  return authorizeSessionResponseBody;
6615
6675
  }
6616
6676
  async setupAudioOutput() {
6677
+ // Only initialize audio player if audioOutput is enabled
6678
+ // This prevents AudioContext creation before user gesture when audio is disabled
6679
+ if (!this.audioOutput) {
6680
+ return;
6681
+ }
6617
6682
  // Initialize audio player
6618
6683
  // wavRecorder will be started from the onDeviceSwitched callback,
6619
6684
  // which is called when the device is first initialized and also when the device is switched
@@ -6624,12 +6689,7 @@ registerProcessor('audio_processor', AudioProcessor);
6624
6689
  if (!this.options.enableAmplitudeMonitoring) {
6625
6690
  this.agentAudioAmplitude = 0;
6626
6691
  }
6627
- if (this.audioOutput) {
6628
- this.wavPlayer.unmute();
6629
- }
6630
- else {
6631
- this.wavPlayer.mute();
6632
- }
6692
+ this.wavPlayer.unmute();
6633
6693
  }
6634
6694
  async connectToAudioInput() {
6635
6695
  if (!this.audioInput) {
@@ -6678,6 +6738,7 @@ registerProcessor('audio_processor', AudioProcessor);
6678
6738
  */
6679
6739
  async setInputDevice(deviceId) {
6680
6740
  var _a, _b, _c;
6741
+ console.log('setInputDevice called with:', deviceId, 'audioInput:', this.audioInput);
6681
6742
  const normalizedDeviceId = !deviceId || deviceId === 'default' ? null : deviceId;
6682
6743
  this.useSystemDefaultDevice = normalizedDeviceId === null;
6683
6744
  this.deviceId = normalizedDeviceId;
@@ -6686,6 +6747,7 @@ registerProcessor('audio_processor', AudioProcessor);
6686
6747
  return;
6687
6748
  }
6688
6749
  try {
6750
+ console.log('setInputDevice: calling _queueRecorderRestart');
6689
6751
  // Restart recording with the new device
6690
6752
  await this._queueRecorderRestart();
6691
6753
  // Reinitialize VAD with the new audio stream if VAD is enabled
@@ -6769,12 +6831,15 @@ registerProcessor('audio_processor', AudioProcessor);
6769
6831
  return run;
6770
6832
  }
6771
6833
  async _initializeRecorderWithDefaultDevice() {
6834
+ console.log('_initializeRecorderWithDefaultDevice called, deviceChangeListener:', !!this.deviceChangeListener);
6772
6835
  if (!this.deviceChangeListener) {
6773
6836
  return;
6774
6837
  }
6775
6838
  try {
6776
6839
  const devices = await this.wavRecorder.listDevices();
6840
+ console.log('_initializeRecorderWithDefaultDevice: got devices:', devices.length);
6777
6841
  if (devices.length) {
6842
+ console.log('_initializeRecorderWithDefaultDevice: calling deviceChangeListener');
6778
6843
  await this.deviceChangeListener(devices);
6779
6844
  return;
6780
6845
  }
@@ -6784,6 +6849,7 @@ registerProcessor('audio_processor', AudioProcessor);
6784
6849
  console.warn('Unable to prime audio devices from listDevices()', error);
6785
6850
  }
6786
6851
  try {
6852
+ console.log('_initializeRecorderWithDefaultDevice: calling setInputDevice default');
6787
6853
  await this.setInputDevice('default');
6788
6854
  }
6789
6855
  catch (error) {
@@ -6832,6 +6898,7 @@ registerProcessor('audio_processor', AudioProcessor);
6832
6898
  });
6833
6899
  this.deviceChangeListener = async (devices) => {
6834
6900
  var _a;
6901
+ console.log('deviceChangeListener called, devices:', devices.length, 'recorderStarted:', this.recorderStarted);
6835
6902
  try {
6836
6903
  // Notify user that devices have changed
6837
6904
  this.options.onDevicesChanged(devices);
@@ -6840,6 +6907,7 @@ registerProcessor('audio_processor', AudioProcessor);
6840
6907
  const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
6841
6908
  const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
6842
6909
  let shouldSwitch = !this.recorderStarted;
6910
+ console.log('deviceChangeListener: shouldSwitch initial:', shouldSwitch);
6843
6911
  if (!shouldSwitch) {
6844
6912
  if (usingDefaultDevice) {
6845
6913
  if (!defaultDevice) {
@@ -6859,6 +6927,7 @@ registerProcessor('audio_processor', AudioProcessor);
6859
6927
  }
6860
6928
  }
6861
6929
  this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
6930
+ console.log('deviceChangeListener: final shouldSwitch:', shouldSwitch);
6862
6931
  if (shouldSwitch) {
6863
6932
  console.debug('Selecting audio input device after change');
6864
6933
  let targetDeviceId = null;