@layercode/js-sdk 2.8.3 → 2.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5082,11 +5082,7 @@ class WavRecorder {
5082
5082
  * @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
5083
5083
  * @returns {WavRecorder}
5084
5084
  */
5085
- constructor({
5086
- sampleRate = 24000,
5087
- outputToSpeakers = false,
5088
- debug = false,
5089
- } = {}) {
5085
+ constructor({ sampleRate = 24000, outputToSpeakers = false, debug = false } = {}) {
5090
5086
  // Script source
5091
5087
  this.scriptSrc = AudioProcessorSrc;
5092
5088
  // Config
@@ -5104,6 +5100,11 @@ class WavRecorder {
5104
5100
  this.analyser = null;
5105
5101
  this.recording = false;
5106
5102
  this.contextSampleRate = sampleRate;
5103
+ // Track whether we've already obtained microphone permission
5104
+ // This avoids redundant getUserMedia calls which are expensive on iOS Safari
5105
+ this._hasPermission = false;
5106
+ // Promise used to dedupe concurrent requestPermission() calls
5107
+ this._permissionPromise = null;
5107
5108
  // Event handling with AudioWorklet
5108
5109
  this._lastEventId = 0;
5109
5110
  this.eventReceipts = {};
@@ -5131,17 +5132,13 @@ class WavRecorder {
5131
5132
  let blob;
5132
5133
  if (audioData instanceof Blob) {
5133
5134
  if (fromSampleRate !== -1) {
5134
- throw new Error(
5135
- `Can not specify "fromSampleRate" when reading from Blob`,
5136
- );
5135
+ throw new Error(`Can not specify "fromSampleRate" when reading from Blob`);
5137
5136
  }
5138
5137
  blob = audioData;
5139
5138
  arrayBuffer = await blob.arrayBuffer();
5140
5139
  } else if (audioData instanceof ArrayBuffer) {
5141
5140
  if (fromSampleRate !== -1) {
5142
- throw new Error(
5143
- `Can not specify "fromSampleRate" when reading from ArrayBuffer`,
5144
- );
5141
+ throw new Error(`Can not specify "fromSampleRate" when reading from ArrayBuffer`);
5145
5142
  }
5146
5143
  arrayBuffer = audioData;
5147
5144
  blob = new Blob([arrayBuffer], { type: 'audio/wav' });
@@ -5159,14 +5156,10 @@ class WavRecorder {
5159
5156
  } else if (audioData instanceof Array) {
5160
5157
  float32Array = new Float32Array(audioData);
5161
5158
  } else {
5162
- throw new Error(
5163
- `"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array<number>`,
5164
- );
5159
+ throw new Error(`"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array<number>`);
5165
5160
  }
5166
5161
  if (fromSampleRate === -1) {
5167
- throw new Error(
5168
- `Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`,
5169
- );
5162
+ throw new Error(`Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`);
5170
5163
  } else if (fromSampleRate < 3000) {
5171
5164
  throw new Error(`Minimum "fromSampleRate" is 3000 (3kHz)`);
5172
5165
  }
@@ -5196,12 +5189,13 @@ class WavRecorder {
5196
5189
 
5197
5190
  /**
5198
5191
  * Logs data in debug mode
5199
- * @param {...any} arguments
5192
+ * @param {...any} args
5200
5193
  * @returns {true}
5201
5194
  */
5202
- log() {
5195
+ log(...args) {
5203
5196
  if (this.debug) {
5204
- this.log(...arguments);
5197
+ // eslint-disable-next-line no-console
5198
+ console.log(...args);
5205
5199
  }
5206
5200
  return true;
5207
5201
  }
@@ -5274,10 +5268,7 @@ class WavRecorder {
5274
5268
  */
5275
5269
  listenForDeviceChange(callback) {
5276
5270
  if (callback === null && this._deviceChangeCallback) {
5277
- navigator.mediaDevices.removeEventListener(
5278
- 'devicechange',
5279
- this._deviceChangeCallback,
5280
- );
5271
+ navigator.mediaDevices.removeEventListener('devicechange', this._deviceChangeCallback);
5281
5272
  this._deviceChangeCallback = null;
5282
5273
  } else if (callback !== null) {
5283
5274
  // Basically a debounce; we only want this called once when devices change
@@ -5309,21 +5300,39 @@ class WavRecorder {
5309
5300
 
5310
5301
  /**
5311
5302
  * Manually request permission to use the microphone
5303
+ * Skips if permission has already been granted to avoid expensive redundant getUserMedia calls.
5304
+ * Dedupes concurrent calls to prevent multiple getUserMedia requests.
5312
5305
  * @returns {Promise<true>}
5313
5306
  */
5314
5307
  async requestPermission() {
5315
- console.log('ensureUserMediaAccess');
5316
- try {
5317
- const stream = await navigator.mediaDevices.getUserMedia({
5318
- audio: true,
5319
- });
5320
- // Stop the tracks immediately after getting permission
5321
- stream.getTracks().forEach(track => track.stop());
5322
- } catch (fallbackError) {
5323
- console.error('getUserMedia failed:', fallbackError.name, fallbackError.message);
5324
- throw fallbackError;
5308
+ // Skip if we already have permission - each getUserMedia is expensive on iOS Safari
5309
+ if (this._hasPermission) {
5310
+ return true;
5325
5311
  }
5326
- return true;
5312
+ // Dedupe concurrent calls: if a permission request is already in flight, wait for it
5313
+ if (this._permissionPromise) {
5314
+ return this._permissionPromise;
5315
+ }
5316
+
5317
+ console.log('ensureUserMediaAccess');
5318
+ this._permissionPromise = (async () => {
5319
+ try {
5320
+ const stream = await navigator.mediaDevices.getUserMedia({
5321
+ audio: true,
5322
+ });
5323
+ // Stop the tracks immediately after getting permission
5324
+ stream.getTracks().forEach((track) => track.stop());
5325
+ this._hasPermission = true;
5326
+ return true;
5327
+ } catch (fallbackError) {
5328
+ console.error('getUserMedia failed:', fallbackError.name, fallbackError.message);
5329
+ throw fallbackError;
5330
+ } finally {
5331
+ this._permissionPromise = null;
5332
+ }
5333
+ })();
5334
+
5335
+ return this._permissionPromise;
5327
5336
  }
5328
5337
 
5329
5338
  /**
@@ -5331,25 +5340,18 @@ class WavRecorder {
5331
5340
  * @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
5332
5341
  */
5333
5342
  async listDevices() {
5334
- if (
5335
- !navigator.mediaDevices ||
5336
- !('enumerateDevices' in navigator.mediaDevices)
5337
- ) {
5343
+ if (!navigator.mediaDevices || !('enumerateDevices' in navigator.mediaDevices)) {
5338
5344
  throw new Error('Could not request user devices');
5339
5345
  }
5340
5346
  await this.requestPermission();
5341
5347
 
5342
5348
  const devices = await navigator.mediaDevices.enumerateDevices();
5343
5349
  const audioDevices = devices.filter((device) => device.kind === 'audioinput');
5344
- const defaultDeviceIndex = audioDevices.findIndex(
5345
- (device) => device.deviceId === 'default',
5346
- );
5350
+ const defaultDeviceIndex = audioDevices.findIndex((device) => device.deviceId === 'default');
5347
5351
  const deviceList = [];
5348
5352
  if (defaultDeviceIndex !== -1) {
5349
5353
  let defaultDevice = audioDevices.splice(defaultDeviceIndex, 1)[0];
5350
- let existingIndex = audioDevices.findIndex(
5351
- (device) => device.groupId === defaultDevice.groupId,
5352
- );
5354
+ let existingIndex = audioDevices.findIndex((device) => device.groupId === defaultDevice.groupId);
5353
5355
  if (existingIndex !== -1) {
5354
5356
  defaultDevice = audioDevices.splice(existingIndex, 1)[0];
5355
5357
  }
@@ -5371,15 +5373,10 @@ class WavRecorder {
5371
5373
  */
5372
5374
  async begin(deviceId) {
5373
5375
  if (this.processor) {
5374
- throw new Error(
5375
- `Already connected: please call .end() to start a new session`,
5376
- );
5376
+ throw new Error(`Already connected: please call .end() to start a new session`);
5377
5377
  }
5378
5378
 
5379
- if (
5380
- !navigator.mediaDevices ||
5381
- !('getUserMedia' in navigator.mediaDevices)
5382
- ) {
5379
+ if (!navigator.mediaDevices || !('getUserMedia' in navigator.mediaDevices)) {
5383
5380
  throw new Error('Could not request user media');
5384
5381
  }
5385
5382
  try {
@@ -5390,14 +5387,16 @@ class WavRecorder {
5390
5387
  echoCancellation: true,
5391
5388
  autoGainControl: true,
5392
5389
  noiseSuppression: true,
5393
- }
5390
+ },
5394
5391
  };
5395
5392
  if (deviceId) {
5396
5393
  config.audio.deviceId = { exact: deviceId };
5397
5394
  }
5398
5395
  this.stream = await navigator.mediaDevices.getUserMedia(config);
5396
+ // Mark permission as granted so listDevices() won't call requestPermission() again
5397
+ this._hasPermission = true;
5399
5398
  } catch (err) {
5400
- throw new Error('Could not start media stream');
5399
+ throw err;
5401
5400
  }
5402
5401
 
5403
5402
  const createContext = (rate) => {
@@ -5449,10 +5448,7 @@ class WavRecorder {
5449
5448
  raw: WavPacker.mergeBuffers(buffer.raw, data.raw),
5450
5449
  mono: WavPacker.mergeBuffers(buffer.mono, data.mono),
5451
5450
  };
5452
- if (
5453
- this._chunkProcessorBuffer.mono.byteLength >=
5454
- this._chunkProcessorSize
5455
- ) {
5451
+ if (this._chunkProcessorBuffer.mono.byteLength >= this._chunkProcessorSize) {
5456
5452
  this._chunkProcessor(this._chunkProcessorBuffer);
5457
5453
  this._chunkProcessorBuffer = {
5458
5454
  raw: new ArrayBuffer(0),
@@ -5480,11 +5476,7 @@ class WavRecorder {
5480
5476
  node.connect(analyser);
5481
5477
  if (this.outputToSpeakers) {
5482
5478
  // eslint-disable-next-line no-console
5483
- console.warn(
5484
- 'Warning: Output to speakers may affect sound quality,\n' +
5485
- 'especially due to system audio feedback preventative measures.\n' +
5486
- 'use only for debugging',
5487
- );
5479
+ console.warn('Warning: Output to speakers may affect sound quality,\n' + 'especially due to system audio feedback preventative measures.\n' + 'use only for debugging');
5488
5480
  analyser.connect(context.destination);
5489
5481
  }
5490
5482
 
@@ -5511,26 +5503,14 @@ class WavRecorder {
5511
5503
  * @param {number} [maxDecibels] default -30
5512
5504
  * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
5513
5505
  */
5514
- getFrequencies(
5515
- analysisType = 'frequency',
5516
- minDecibels = -100,
5517
- maxDecibels = -30,
5518
- ) {
5506
+ getFrequencies(analysisType = 'frequency', minDecibels = -100, maxDecibels = -30) {
5519
5507
  if (!this.processor) {
5520
5508
  throw new Error('Session ended: please call .begin() first');
5521
5509
  }
5522
- return AudioAnalysis.getFrequencies(
5523
- this.analyser,
5524
- this.sampleRate,
5525
- null,
5526
- analysisType,
5527
- minDecibels,
5528
- maxDecibels,
5529
- );
5510
+ return AudioAnalysis.getFrequencies(this.analyser, this.sampleRate, null, analysisType, minDecibels, maxDecibels);
5530
5511
  }
5531
5512
 
5532
-
5533
- /**
5513
+ /**
5534
5514
  * Gets the real-time amplitude of the audio signal
5535
5515
  * @returns {number} Amplitude value between 0 and 1
5536
5516
  */
@@ -5655,9 +5635,7 @@ class WavRecorder {
5655
5635
  throw new Error('Session ended: please call .begin() first');
5656
5636
  }
5657
5637
  if (!force && this.recording) {
5658
- throw new Error(
5659
- 'Currently recording: please call .pause() first, or call .save(true) to force',
5660
- );
5638
+ throw new Error('Currently recording: please call .pause() first, or call .save(true) to force');
5661
5639
  }
5662
5640
  this.log('Exporting ...');
5663
5641
  const exportData = await this._event('export');
@@ -5764,6 +5742,7 @@ function arrayBufferToBase64(arrayBuffer) {
5764
5742
  return btoa(binary);
5765
5743
  }
5766
5744
 
5745
+ //// src/index.ts
5767
5746
  /* eslint-env browser */
5768
5747
  // import { env as ortEnv } from 'onnxruntime-web';
5769
5748
  // @ts-ignore - VAD package does not provide TypeScript types
@@ -5771,137 +5750,40 @@ const NOOP = () => { };
5771
5750
  const DEFAULT_WS_URL = 'wss://api.layercode.com/v1/agents/web/websocket';
5772
5751
  // SDK version - updated when publishing
5773
5752
  const SDK_VERSION = '2.7.0';
5774
- const MEDIA_DEVICE_CHANGE_EVENT = 'devicechange';
5775
- const MEDIA_DEVICE_KIND_AUDIO = 'audioinput';
5753
+ const DEFAULT_RECORDER_SAMPLE_RATE = 8000;
5776
5754
  const hasMediaDevicesSupport = () => typeof navigator !== 'undefined' && !!navigator.mediaDevices;
5777
- let microphonePermissionPromise = null;
5778
- let microphonePermissionGranted = false;
5779
- const stopStreamTracks = (stream) => {
5780
- if (!stream) {
5781
- return;
5782
- }
5783
- stream.getTracks().forEach((track) => {
5784
- try {
5785
- track.stop();
5786
- }
5787
- catch (_a) {
5788
- /* noop */
5789
- }
5790
- });
5791
- };
5792
- const ensureMicrophonePermissions = async () => {
5793
- if (!hasMediaDevicesSupport()) {
5794
- throw new Error('Media devices are not available in this environment');
5795
- }
5796
- if (microphonePermissionGranted) {
5797
- return;
5798
- }
5799
- if (!microphonePermissionPromise) {
5800
- microphonePermissionPromise = navigator.mediaDevices
5801
- .getUserMedia({ audio: true })
5802
- .then((stream) => {
5803
- microphonePermissionGranted = true;
5804
- stopStreamTracks(stream);
5805
- })
5806
- .finally(() => {
5807
- microphonePermissionPromise = null;
5808
- });
5809
- }
5810
- return microphonePermissionPromise;
5811
- };
5812
- const cloneAudioDevice = (device, isDefault) => {
5755
+ const toLayercodeAudioInputDevice = (device) => {
5813
5756
  const cloned = {
5814
- deviceId: device.deviceId,
5815
- groupId: device.groupId,
5816
- kind: device.kind,
5757
+ ...device,
5817
5758
  label: device.label,
5818
- default: isDefault,
5759
+ default: Boolean(device.default),
5819
5760
  };
5820
5761
  if (typeof device.toJSON === 'function') {
5821
5762
  cloned.toJSON = device.toJSON.bind(device);
5822
5763
  }
5823
5764
  return cloned;
5824
5765
  };
5825
- const normalizeAudioInputDevices = (devices) => {
5826
- const audioDevices = devices.filter((device) => device.kind === MEDIA_DEVICE_KIND_AUDIO);
5827
- if (!audioDevices.length) {
5828
- return [];
5829
- }
5830
- const remaining = [...audioDevices];
5831
- const normalized = [];
5832
- const defaultIndex = remaining.findIndex((device) => device.deviceId === 'default');
5833
- if (defaultIndex !== -1) {
5834
- let defaultDevice = remaining.splice(defaultIndex, 1)[0];
5835
- const groupMatchIndex = remaining.findIndex((device) => device.groupId && defaultDevice.groupId && device.groupId === defaultDevice.groupId);
5836
- if (groupMatchIndex !== -1) {
5837
- defaultDevice = remaining.splice(groupMatchIndex, 1)[0];
5838
- }
5839
- normalized.push(cloneAudioDevice(defaultDevice, true));
5840
- }
5841
- else if (remaining.length) {
5842
- const fallbackDefault = remaining.shift();
5843
- normalized.push(cloneAudioDevice(fallbackDefault, true));
5844
- }
5845
- return normalized.concat(remaining.map((device) => cloneAudioDevice(device, false)));
5846
- };
5847
5766
  const listAudioInputDevices = async () => {
5848
5767
  if (!hasMediaDevicesSupport()) {
5849
5768
  throw new Error('Media devices are not available in this environment');
5850
5769
  }
5851
- await ensureMicrophonePermissions();
5852
- const devices = await navigator.mediaDevices.enumerateDevices();
5853
- return normalizeAudioInputDevices(devices);
5770
+ const recorder = new WavRecorder({ sampleRate: DEFAULT_RECORDER_SAMPLE_RATE });
5771
+ const devices = (await recorder.listDevices());
5772
+ return devices.map(toLayercodeAudioInputDevice);
5854
5773
  };
5855
5774
  const watchAudioInputDevices = (callback) => {
5856
5775
  if (!hasMediaDevicesSupport()) {
5857
5776
  return () => { };
5858
5777
  }
5859
- let disposed = false;
5860
- let lastSignature = null;
5861
- let requestId = 0;
5862
- const emitDevices = async () => {
5863
- requestId += 1;
5864
- const currentRequest = requestId;
5865
- try {
5866
- const devices = await listAudioInputDevices();
5867
- if (disposed || currentRequest !== requestId) {
5868
- return;
5869
- }
5870
- const signature = devices.map((device) => `${device.deviceId}:${device.label}:${device.groupId}:${device.default ? '1' : '0'}`).join('|');
5871
- if (signature !== lastSignature) {
5872
- lastSignature = signature;
5873
- callback(devices);
5874
- }
5875
- }
5876
- catch (error) {
5877
- if (!disposed) {
5878
- console.warn('Failed to refresh audio devices', error);
5879
- }
5880
- }
5881
- };
5882
- const handler = () => {
5883
- void emitDevices();
5778
+ const recorder = new WavRecorder({ sampleRate: DEFAULT_RECORDER_SAMPLE_RATE });
5779
+ const handleDevicesChange = (devices) => {
5780
+ callback(devices.map(toLayercodeAudioInputDevice));
5884
5781
  };
5885
- const mediaDevices = navigator.mediaDevices;
5886
- let teardown = null;
5887
- if (typeof mediaDevices.addEventListener === 'function') {
5888
- mediaDevices.addEventListener(MEDIA_DEVICE_CHANGE_EVENT, handler);
5889
- teardown = () => mediaDevices.removeEventListener(MEDIA_DEVICE_CHANGE_EVENT, handler);
5890
- }
5891
- else if ('ondevicechange' in mediaDevices) {
5892
- const previousHandler = mediaDevices.ondevicechange;
5893
- mediaDevices.ondevicechange = handler;
5894
- teardown = () => {
5895
- if (mediaDevices.ondevicechange === handler) {
5896
- mediaDevices.ondevicechange = previousHandler || null;
5897
- }
5898
- };
5899
- }
5900
- // Always emit once on subscribe
5901
- void emitDevices();
5782
+ // WavRecorder handles initial emit + deduping devicechange events
5783
+ recorder.listenForDeviceChange(handleDevicesChange);
5902
5784
  return () => {
5903
- disposed = true;
5904
- teardown === null || teardown === void 0 ? void 0 : teardown();
5785
+ recorder.listenForDeviceChange(null);
5786
+ recorder.quit().catch(() => { });
5905
5787
  };
5906
5788
  };
5907
5789
  /**
@@ -5948,7 +5830,7 @@ class LayercodeClient {
5948
5830
  this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
5949
5831
  this._websocketUrl = DEFAULT_WS_URL;
5950
5832
  this.audioOutputReady = null;
5951
- this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
5833
+ this.wavRecorder = new WavRecorder({ sampleRate: DEFAULT_RECORDER_SAMPLE_RATE }); // TODO should be set by fetched agent config
5952
5834
  this.wavPlayer = new WavStreamPlayer({
5953
5835
  finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
5954
5836
  sampleRate: 16000, // TODO should be set my fetched agent config
@@ -5980,6 +5862,7 @@ class LayercodeClient {
5980
5862
  this.stopRecorderAmplitude = undefined;
5981
5863
  this.deviceChangeListener = null;
5982
5864
  this.recorderRestartChain = Promise.resolve();
5865
+ this._skipFirstDeviceCallback = false;
5983
5866
  this.deviceListenerReady = null;
5984
5867
  this.resolveDeviceListenerReady = null;
5985
5868
  // this.audioPauseTime = null;
@@ -5999,7 +5882,7 @@ class LayercodeClient {
5999
5882
  set onDevicesChanged(callback) {
6000
5883
  this.options.onDevicesChanged = callback !== null && callback !== void 0 ? callback : NOOP;
6001
5884
  }
6002
- _initializeVAD() {
5885
+ async _initializeVAD() {
6003
5886
  var _a;
6004
5887
  console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt, vadConfig: this.vadConfig });
6005
5888
  // If we're in push to talk mode or mute mode, we don't need to use the VAD model
@@ -6083,13 +5966,13 @@ class LayercodeClient {
6083
5966
  vadOptions.frameSamples = 512; // Required for v5
6084
5967
  }
6085
5968
  console.log('Creating VAD with options:', vadOptions);
6086
- dist.MicVAD.new(vadOptions)
6087
- .then((vad) => {
5969
+ try {
5970
+ const vad = await dist.MicVAD.new(vadOptions);
6088
5971
  this.vad = vad;
6089
5972
  this.vad.start();
6090
5973
  console.log('VAD started successfully');
6091
- })
6092
- .catch((error) => {
5974
+ }
5975
+ catch (error) {
6093
5976
  console.warn('Error initializing VAD:', error);
6094
5977
  // Send a message to server indicating VAD failure
6095
5978
  const vadFailureMessage = {
@@ -6101,7 +5984,7 @@ class LayercodeClient {
6101
5984
  ...vadFailureMessage,
6102
5985
  userSpeaking: this.userIsSpeaking,
6103
5986
  });
6104
- });
5987
+ }
6105
5988
  }
6106
5989
  /**
6107
5990
  * Updates the connection status and triggers the callback
@@ -6452,18 +6335,99 @@ class LayercodeClient {
6452
6335
  }
6453
6336
  async audioInputConnect() {
6454
6337
  // Turn mic ON
6455
- console.log('audioInputConnect: requesting permission');
6456
- await this.wavRecorder.requestPermission();
6338
+ // NOTE: On iOS Safari, each getUserMedia call is expensive (~2-3 seconds).
6339
+ // We optimize by:
6340
+ // 1. Starting the recorder FIRST with begin() (single getUserMedia)
6341
+ // 2. THEN setting up device change listeners (which will skip getUserMedia since permission is cached)
6342
+ console.log('audioInputConnect: recorderStarted =', this.recorderStarted);
6343
+ // If the recorder hasn't spun up yet, start it first with the preferred or default device
6344
+ // This ensures we only make ONE getUserMedia call instead of multiple sequential ones
6345
+ if (!this.recorderStarted) {
6346
+ // Use preferred device if set, otherwise use system default
6347
+ const targetDeviceId = this.useSystemDefaultDevice ? undefined : this.deviceId || undefined;
6348
+ // Mark as using system default if no specific device is set
6349
+ if (!targetDeviceId) {
6350
+ this.useSystemDefaultDevice = true;
6351
+ }
6352
+ console.log('audioInputConnect: starting recorder with device:', targetDeviceId !== null && targetDeviceId !== void 0 ? targetDeviceId : 'system default');
6353
+ await this._startRecorderWithDevice(targetDeviceId);
6354
+ }
6355
+ // Now set up device change listeners - permission is already granted so listDevices() won't call getUserMedia
6356
+ // Skip the first callback since we've already started with the correct device
6357
+ this._skipFirstDeviceCallback = true;
6457
6358
  console.log('audioInputConnect: setting up device change listener');
6458
6359
  await this._setupDeviceChangeListener();
6459
- // If the recorder hasn't spun up yet, proactively select a device.
6460
- if (!this.recorderStarted && this.deviceChangeListener) {
6461
- console.log('audioInputConnect: initializing recorder with default device');
6462
- await this._initializeRecorderWithDefaultDevice();
6463
- }
6464
6360
  console.log('audioInputConnect: done, recorderStarted =', this.recorderStarted);
6465
6361
  }
6362
+ /**
6363
+ * Starts the recorder with a specific device (or default if undefined)
6364
+ * This is the single point where getUserMedia is called during initial setup.
6365
+ * Idempotent: returns early if recorder is already started or has a live stream.
6366
+ */
6367
+ async _startRecorderWithDevice(deviceId) {
6368
+ var _a, _b;
6369
+ // Idempotency guard: don't start again if already running
6370
+ if (this.recorderStarted || this._hasLiveRecorderStream()) {
6371
+ console.debug('_startRecorderWithDevice: already started, skipping');
6372
+ return;
6373
+ }
6374
+ try {
6375
+ this._stopRecorderAmplitudeMonitoring();
6376
+ try {
6377
+ await this.wavRecorder.end();
6378
+ }
6379
+ catch (_c) {
6380
+ // Ignore cleanup errors
6381
+ }
6382
+ await this.wavRecorder.begin(deviceId);
6383
+ await this.wavRecorder.record(this._handleDataAvailable, 1638);
6384
+ // Re-setup amplitude monitoring with the new stream
6385
+ this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
6386
+ if (!this.options.enableAmplitudeMonitoring) {
6387
+ this.userAudioAmplitude = 0;
6388
+ }
6389
+ const stream = this.wavRecorder.getStream();
6390
+ const activeTrack = (stream === null || stream === void 0 ? void 0 : stream.getAudioTracks()[0]) || null;
6391
+ const trackSettings = activeTrack && typeof activeTrack.getSettings === 'function' ? activeTrack.getSettings() : null;
6392
+ const trackDeviceId = trackSettings && typeof trackSettings.deviceId === 'string' ? trackSettings.deviceId : null;
6393
+ this.activeDeviceId = trackDeviceId !== null && trackDeviceId !== void 0 ? trackDeviceId : (this.useSystemDefaultDevice ? null : this.deviceId);
6394
+ if (!this.recorderStarted) {
6395
+ this.recorderStarted = true;
6396
+ this._sendReadyIfNeeded();
6397
+ }
6398
+ const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : ((_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default'));
6399
+ if (reportedDeviceId !== this.lastReportedDeviceId) {
6400
+ this.lastReportedDeviceId = reportedDeviceId;
6401
+ if (this.options.onDeviceSwitched) {
6402
+ this.options.onDeviceSwitched(reportedDeviceId);
6403
+ }
6404
+ }
6405
+ console.debug('Recorder started successfully with device:', reportedDeviceId);
6406
+ }
6407
+ catch (error) {
6408
+ const permissionDeniedError = await this._microphonePermissionDeniedError(error);
6409
+ if (permissionDeniedError) {
6410
+ console.error(permissionDeniedError.message);
6411
+ this.options.onError(permissionDeniedError);
6412
+ throw permissionDeniedError;
6413
+ }
6414
+ if (await this._shouldWarnAudioDevicesRequireUserGesture(error)) {
6415
+ console.error('Cannot load audio devices before user has interacted with the page. Please move connect() to be triggered by a button, or load the SDK with "audioInput: false" to connection() on page load');
6416
+ }
6417
+ console.error('Error starting recorder:', error);
6418
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
6419
+ throw error;
6420
+ }
6421
+ }
6466
6422
  async audioInputDisconnect() {
6423
+ // If we never started the recorder, avoid touching audio APIs at all.
6424
+ if (!this.recorderStarted && !this._hasLiveRecorderStream()) {
6425
+ this._stopRecorderAmplitudeMonitoring();
6426
+ this.stopVad();
6427
+ this._teardownDeviceListeners();
6428
+ this.recorderStarted = false;
6429
+ return;
6430
+ }
6467
6431
  try {
6468
6432
  // stop amplitude monitoring tied to the recorder
6469
6433
  this._stopRecorderAmplitudeMonitoring();
@@ -6485,7 +6449,9 @@ class LayercodeClient {
6485
6449
  this.audioInput = state;
6486
6450
  this._emitAudioInput();
6487
6451
  if (state) {
6452
+ this._setStatus('connecting');
6488
6453
  await this.audioInputConnect();
6454
+ this._setStatus('connected');
6489
6455
  }
6490
6456
  else {
6491
6457
  await this.audioInputDisconnect();
@@ -6493,7 +6459,6 @@ class LayercodeClient {
6493
6459
  }
6494
6460
  }
6495
6461
  async setAudioOutput(state) {
6496
- console.log('setAudioOutput called with state:', state, 'current:', this.audioOutput);
6497
6462
  if (this.audioOutput !== state) {
6498
6463
  this.audioOutput = state;
6499
6464
  this._emitAudioOutput();
@@ -6501,15 +6466,15 @@ class LayercodeClient {
6501
6466
  // Initialize audio output if not already connected
6502
6467
  // This happens when audioOutput was initially false and is now being enabled
6503
6468
  if (!this.wavPlayer.context) {
6504
- console.log('setAudioOutput: initializing audio output (no context yet)');
6469
+ this._setStatus('connecting');
6505
6470
  // Store the promise so _waitForAudioOutputReady() can await it
6506
6471
  // This prevents response.audio from running before AudioContext is ready
6507
6472
  const setupPromise = this.setupAudioOutput();
6508
6473
  this.audioOutputReady = setupPromise;
6509
6474
  await setupPromise;
6475
+ this._setStatus('connected');
6510
6476
  }
6511
6477
  else {
6512
- console.log('setAudioOutput: unmuting existing player');
6513
6478
  this.wavPlayer.unmute();
6514
6479
  }
6515
6480
  // Sync agentSpeaking state with actual playback state when enabling audio output
@@ -6592,7 +6557,19 @@ class LayercodeClient {
6592
6557
  await audioOutputReady;
6593
6558
  }
6594
6559
  catch (error) {
6595
- console.error('Error connecting to Layercode agent:', error);
6560
+ const permissionDeniedError = await this._microphonePermissionDeniedError(error);
6561
+ if (permissionDeniedError) {
6562
+ console.error(permissionDeniedError.message);
6563
+ this._setStatus('error');
6564
+ this.options.onError(permissionDeniedError);
6565
+ return;
6566
+ }
6567
+ if (await this._shouldWarnAudioDevicesRequireUserGesture(error)) {
6568
+ console.error('Cannot load audio devices before user has interacted with the page. Please move connect() to be triggered by a button, or load the SDK with "audioInput: false" to connection() on page load');
6569
+ }
6570
+ else {
6571
+ console.error('Error connecting to Layercode agent:', error);
6572
+ }
6596
6573
  this._setStatus('error');
6597
6574
  this.options.onError(error instanceof Error ? error : new Error(String(error)));
6598
6575
  }
@@ -6751,7 +6728,7 @@ class LayercodeClient {
6751
6728
  const newStream = this.wavRecorder.getStream();
6752
6729
  await this._reinitializeVAD(newStream);
6753
6730
  }
6754
- const reportedDeviceId = (_c = (_b = this.lastReportedDeviceId) !== null && _b !== void 0 ? _b : this.activeDeviceId) !== null && _c !== void 0 ? _c : (this.useSystemDefaultDevice ? 'default' : normalizedDeviceId !== null && normalizedDeviceId !== void 0 ? normalizedDeviceId : 'default');
6731
+ const reportedDeviceId = (_c = (_b = this.lastReportedDeviceId) !== null && _b !== void 0 ? _b : this.activeDeviceId) !== null && _c !== void 0 ? _c : (this.useSystemDefaultDevice ? 'default' : (normalizedDeviceId !== null && normalizedDeviceId !== void 0 ? normalizedDeviceId : 'default'));
6755
6732
  console.debug(`Successfully switched to input device: ${reportedDeviceId}`);
6756
6733
  }
6757
6734
  catch (error) {
@@ -6805,7 +6782,7 @@ class LayercodeClient {
6805
6782
  this.recorderStarted = true;
6806
6783
  this._sendReadyIfNeeded();
6807
6784
  }
6808
- const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : (_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default');
6785
+ const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : ((_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default'));
6809
6786
  if (reportedDeviceId !== previousReportedDeviceId) {
6810
6787
  this.lastReportedDeviceId = reportedDeviceId;
6811
6788
  if (this.options.onDeviceSwitched) {
@@ -6824,33 +6801,6 @@ class LayercodeClient {
6824
6801
  this.recorderRestartChain = run.catch(() => { });
6825
6802
  return run;
6826
6803
  }
6827
- async _initializeRecorderWithDefaultDevice() {
6828
- console.log('_initializeRecorderWithDefaultDevice called, deviceChangeListener:', !!this.deviceChangeListener);
6829
- if (!this.deviceChangeListener) {
6830
- return;
6831
- }
6832
- try {
6833
- const devices = await this.wavRecorder.listDevices();
6834
- console.log('_initializeRecorderWithDefaultDevice: got devices:', devices.length);
6835
- if (devices.length) {
6836
- console.log('_initializeRecorderWithDefaultDevice: calling deviceChangeListener');
6837
- await this.deviceChangeListener(devices);
6838
- return;
6839
- }
6840
- console.warn('No audio input devices available when enabling microphone');
6841
- }
6842
- catch (error) {
6843
- console.warn('Unable to prime audio devices from listDevices()', error);
6844
- }
6845
- try {
6846
- console.log('_initializeRecorderWithDefaultDevice: calling setInputDevice default');
6847
- await this.setInputDevice('default');
6848
- }
6849
- catch (error) {
6850
- console.error('Failed to start recording with the system default device:', error);
6851
- throw error;
6852
- }
6853
- }
6854
6804
  /**
6855
6805
  * Disconnect VAD
6856
6806
  */
@@ -6869,7 +6819,7 @@ class LayercodeClient {
6869
6819
  this.stopVad();
6870
6820
  // Reinitialize with new stream only if we're actually capturing audio
6871
6821
  if (stream && this._shouldCaptureUserAudio()) {
6872
- this._initializeVAD();
6822
+ await this._initializeVAD();
6873
6823
  }
6874
6824
  }
6875
6825
  /**
@@ -6891,8 +6841,8 @@ class LayercodeClient {
6891
6841
  };
6892
6842
  });
6893
6843
  this.deviceChangeListener = async (devices) => {
6894
- var _a;
6895
- console.log('deviceChangeListener called, devices:', devices.length, 'recorderStarted:', this.recorderStarted);
6844
+ var _a, _b;
6845
+ console.log('deviceChangeListener called, devices:', devices.length, 'recorderStarted:', this.recorderStarted, '_skipFirstDeviceCallback:', this._skipFirstDeviceCallback);
6896
6846
  try {
6897
6847
  // Notify user that devices have changed
6898
6848
  this.options.onDevicesChanged(devices);
@@ -6900,6 +6850,15 @@ class LayercodeClient {
6900
6850
  const usingDefaultDevice = this.useSystemDefaultDevice;
6901
6851
  const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
6902
6852
  const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
6853
+ // Skip switching on the first callback after starting the recorder to avoid redundant begin() calls
6854
+ // This is set by audioInputConnect() after _startRecorderWithDevice() completes
6855
+ if (this._skipFirstDeviceCallback) {
6856
+ console.log('deviceChangeListener: skipping first callback after recorder start');
6857
+ this._skipFirstDeviceCallback = false;
6858
+ this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
6859
+ (_a = this.resolveDeviceListenerReady) === null || _a === void 0 ? void 0 : _a.call(this);
6860
+ return;
6861
+ }
6903
6862
  let shouldSwitch = !this.recorderStarted;
6904
6863
  console.log('deviceChangeListener: shouldSwitch initial:', shouldSwitch);
6905
6864
  if (!shouldSwitch) {
@@ -6910,8 +6869,7 @@ class LayercodeClient {
6910
6869
  else if (this.activeDeviceId && defaultDevice.deviceId !== 'default' && defaultDevice.deviceId !== this.activeDeviceId) {
6911
6870
  shouldSwitch = true;
6912
6871
  }
6913
- else if ((previousDefaultDeviceKey && previousDefaultDeviceKey !== currentDefaultDeviceKey) ||
6914
- (!previousDefaultDeviceKey && !currentDefaultDeviceKey && this.recorderStarted)) {
6872
+ else if ((previousDefaultDeviceKey && previousDefaultDeviceKey !== currentDefaultDeviceKey) || (!previousDefaultDeviceKey && !currentDefaultDeviceKey && this.recorderStarted)) {
6915
6873
  shouldSwitch = true;
6916
6874
  }
6917
6875
  }
@@ -6951,7 +6909,7 @@ class LayercodeClient {
6951
6909
  this.options.onError(error instanceof Error ? error : new Error(String(error)));
6952
6910
  }
6953
6911
  finally {
6954
- (_a = this.resolveDeviceListenerReady) === null || _a === void 0 ? void 0 : _a.call(this);
6912
+ (_b = this.resolveDeviceListenerReady) === null || _b === void 0 ? void 0 : _b.call(this);
6955
6913
  }
6956
6914
  };
6957
6915
  this.wavRecorder.listenForDeviceChange(this.deviceChangeListener);
@@ -6975,6 +6933,7 @@ class LayercodeClient {
6975
6933
  this.lastKnownSystemDefaultDeviceKey = null;
6976
6934
  this.recorderStarted = false;
6977
6935
  this.readySent = false;
6936
+ this._skipFirstDeviceCallback = false;
6978
6937
  this._stopAmplitudeMonitoring();
6979
6938
  this._teardownDeviceListeners();
6980
6939
  if (this.vad) {
@@ -7010,6 +6969,81 @@ class LayercodeClient {
7010
6969
  }
7011
6970
  return null;
7012
6971
  }
6972
+ _getUserActivationState() {
6973
+ try {
6974
+ const nav = typeof navigator !== 'undefined' ? navigator : null;
6975
+ const act = nav === null || nav === void 0 ? void 0 : nav.userActivation;
6976
+ if (act && typeof act === 'object') {
6977
+ if (typeof act.hasBeenActive === 'boolean')
6978
+ return act.hasBeenActive;
6979
+ if (typeof act.isActive === 'boolean')
6980
+ return act.isActive ? true : null;
6981
+ }
6982
+ const doc = typeof document !== 'undefined' ? document : null;
6983
+ const dact = doc === null || doc === void 0 ? void 0 : doc.userActivation;
6984
+ if (dact && typeof dact === 'object') {
6985
+ if (typeof dact.hasBeenActive === 'boolean')
6986
+ return dact.hasBeenActive;
6987
+ if (typeof dact.isActive === 'boolean')
6988
+ return dact.isActive ? true : null;
6989
+ }
6990
+ }
6991
+ catch (_a) { }
6992
+ return null;
6993
+ }
6994
+ async _isMicrophonePermissionDenied() {
6995
+ try {
6996
+ const nav = typeof navigator !== 'undefined' ? navigator : null;
6997
+ const permissions = nav === null || nav === void 0 ? void 0 : nav.permissions;
6998
+ if (!(permissions === null || permissions === void 0 ? void 0 : permissions.query))
6999
+ return null;
7000
+ const status = await permissions.query({ name: 'microphone' });
7001
+ const state = status === null || status === void 0 ? void 0 : status.state;
7002
+ if (state === 'denied')
7003
+ return true;
7004
+ if (state === 'granted' || state === 'prompt')
7005
+ return false;
7006
+ }
7007
+ catch (_a) { }
7008
+ return null;
7009
+ }
7010
+ async _microphonePermissionDeniedError(error) {
7011
+ const err = error;
7012
+ const message = typeof (err === null || err === void 0 ? void 0 : err.message) === 'string' ? err.message : typeof error === 'string' ? error : '';
7013
+ if (message === 'User has denined audio device permissions') {
7014
+ return err instanceof Error ? err : new Error(message);
7015
+ }
7016
+ const name = typeof (err === null || err === void 0 ? void 0 : err.name) === 'string' ? err.name : '';
7017
+ const isPermissionLike = name === 'NotAllowedError' || name === 'SecurityError' || name === 'PermissionDeniedError';
7018
+ if (!isPermissionLike) {
7019
+ return null;
7020
+ }
7021
+ const micDenied = await this._isMicrophonePermissionDenied();
7022
+ if (micDenied === true || /permission denied/i.test(message)) {
7023
+ return new Error('User has denined audio device permissions');
7024
+ }
7025
+ return null;
7026
+ }
7027
+ async _shouldWarnAudioDevicesRequireUserGesture(error) {
7028
+ const e = error;
7029
+ const name = typeof (e === null || e === void 0 ? void 0 : e.name) === 'string' ? e.name : '';
7030
+ const msg = typeof (e === null || e === void 0 ? void 0 : e.message) === 'string'
7031
+ ? e.message
7032
+ : typeof error === 'string'
7033
+ ? error
7034
+ : '';
7035
+ const isPermissionLike = name === 'NotAllowedError' || name === 'SecurityError' || name === 'PermissionDeniedError';
7036
+ if (!isPermissionLike)
7037
+ return false;
7038
+ // If the browser can tell us mic permission is explicitly denied, don't show the "user gesture" guidance.
7039
+ const micDenied = await this._isMicrophonePermissionDenied();
7040
+ if (micDenied === true)
7041
+ return false;
7042
+ if (/user activation|user gesture|interacte?d? with( the)? (page|document)|before user has interacted/i.test(msg)) {
7043
+ return true;
7044
+ }
7045
+ return this._getUserActivationState() === false;
7046
+ }
7013
7047
  /**
7014
7048
  * Mutes the microphone to stop sending audio to the server
7015
7049
  * The connection and recording remain active for quick unmute
@@ -7026,13 +7060,13 @@ class LayercodeClient {
7026
7060
  /**
7027
7061
  * Unmutes the microphone to resume sending audio to the server
7028
7062
  */
7029
- unmute() {
7063
+ async unmute() {
7030
7064
  if (this.isMuted) {
7031
7065
  this.isMuted = false;
7032
7066
  console.log('Microphone unmuted');
7033
7067
  this.options.onMuteStateChange(false);
7034
7068
  if (this.audioInput && this.recorderStarted) {
7035
- this._initializeVAD();
7069
+ await this._initializeVAD();
7036
7070
  if (this.stopRecorderAmplitude === undefined) {
7037
7071
  this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
7038
7072
  }