@layercode/js-sdk 2.8.3 → 2.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5088,11 +5088,7 @@ registerProcessor('audio_processor', AudioProcessor);
5088
5088
  * @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
5089
5089
  * @returns {WavRecorder}
5090
5090
  */
5091
- constructor({
5092
- sampleRate = 24000,
5093
- outputToSpeakers = false,
5094
- debug = false,
5095
- } = {}) {
5091
+ constructor({ sampleRate = 24000, outputToSpeakers = false, debug = false } = {}) {
5096
5092
  // Script source
5097
5093
  this.scriptSrc = AudioProcessorSrc;
5098
5094
  // Config
@@ -5110,6 +5106,11 @@ registerProcessor('audio_processor', AudioProcessor);
5110
5106
  this.analyser = null;
5111
5107
  this.recording = false;
5112
5108
  this.contextSampleRate = sampleRate;
5109
+ // Track whether we've already obtained microphone permission
5110
+ // This avoids redundant getUserMedia calls which are expensive on iOS Safari
5111
+ this._hasPermission = false;
5112
+ // Promise used to dedupe concurrent requestPermission() calls
5113
+ this._permissionPromise = null;
5113
5114
  // Event handling with AudioWorklet
5114
5115
  this._lastEventId = 0;
5115
5116
  this.eventReceipts = {};
@@ -5137,17 +5138,13 @@ registerProcessor('audio_processor', AudioProcessor);
5137
5138
  let blob;
5138
5139
  if (audioData instanceof Blob) {
5139
5140
  if (fromSampleRate !== -1) {
5140
- throw new Error(
5141
- `Can not specify "fromSampleRate" when reading from Blob`,
5142
- );
5141
+ throw new Error(`Can not specify "fromSampleRate" when reading from Blob`);
5143
5142
  }
5144
5143
  blob = audioData;
5145
5144
  arrayBuffer = await blob.arrayBuffer();
5146
5145
  } else if (audioData instanceof ArrayBuffer) {
5147
5146
  if (fromSampleRate !== -1) {
5148
- throw new Error(
5149
- `Can not specify "fromSampleRate" when reading from ArrayBuffer`,
5150
- );
5147
+ throw new Error(`Can not specify "fromSampleRate" when reading from ArrayBuffer`);
5151
5148
  }
5152
5149
  arrayBuffer = audioData;
5153
5150
  blob = new Blob([arrayBuffer], { type: 'audio/wav' });
@@ -5165,14 +5162,10 @@ registerProcessor('audio_processor', AudioProcessor);
5165
5162
  } else if (audioData instanceof Array) {
5166
5163
  float32Array = new Float32Array(audioData);
5167
5164
  } else {
5168
- throw new Error(
5169
- `"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array<number>`,
5170
- );
5165
+ throw new Error(`"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array<number>`);
5171
5166
  }
5172
5167
  if (fromSampleRate === -1) {
5173
- throw new Error(
5174
- `Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`,
5175
- );
5168
+ throw new Error(`Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`);
5176
5169
  } else if (fromSampleRate < 3000) {
5177
5170
  throw new Error(`Minimum "fromSampleRate" is 3000 (3kHz)`);
5178
5171
  }
@@ -5202,12 +5195,13 @@ registerProcessor('audio_processor', AudioProcessor);
5202
5195
 
5203
5196
  /**
5204
5197
  * Logs data in debug mode
5205
- * @param {...any} arguments
5198
+ * @param {...any} args
5206
5199
  * @returns {true}
5207
5200
  */
5208
- log() {
5201
+ log(...args) {
5209
5202
  if (this.debug) {
5210
- this.log(...arguments);
5203
+ // eslint-disable-next-line no-console
5204
+ console.log(...args);
5211
5205
  }
5212
5206
  return true;
5213
5207
  }
@@ -5280,10 +5274,7 @@ registerProcessor('audio_processor', AudioProcessor);
5280
5274
  */
5281
5275
  listenForDeviceChange(callback) {
5282
5276
  if (callback === null && this._deviceChangeCallback) {
5283
- navigator.mediaDevices.removeEventListener(
5284
- 'devicechange',
5285
- this._deviceChangeCallback,
5286
- );
5277
+ navigator.mediaDevices.removeEventListener('devicechange', this._deviceChangeCallback);
5287
5278
  this._deviceChangeCallback = null;
5288
5279
  } else if (callback !== null) {
5289
5280
  // Basically a debounce; we only want this called once when devices change
@@ -5315,21 +5306,39 @@ registerProcessor('audio_processor', AudioProcessor);
5315
5306
 
5316
5307
  /**
5317
5308
  * Manually request permission to use the microphone
5309
+ * Skips if permission has already been granted to avoid expensive redundant getUserMedia calls.
5310
+ * Dedupes concurrent calls to prevent multiple getUserMedia requests.
5318
5311
  * @returns {Promise<true>}
5319
5312
  */
5320
5313
  async requestPermission() {
5321
- console.log('ensureUserMediaAccess');
5322
- try {
5323
- const stream = await navigator.mediaDevices.getUserMedia({
5324
- audio: true,
5325
- });
5326
- // Stop the tracks immediately after getting permission
5327
- stream.getTracks().forEach(track => track.stop());
5328
- } catch (fallbackError) {
5329
- console.error('getUserMedia failed:', fallbackError.name, fallbackError.message);
5330
- throw fallbackError;
5314
+ // Skip if we already have permission - each getUserMedia is expensive on iOS Safari
5315
+ if (this._hasPermission) {
5316
+ return true;
5331
5317
  }
5332
- return true;
5318
+ // Dedupe concurrent calls: if a permission request is already in flight, wait for it
5319
+ if (this._permissionPromise) {
5320
+ return this._permissionPromise;
5321
+ }
5322
+
5323
+ console.log('ensureUserMediaAccess');
5324
+ this._permissionPromise = (async () => {
5325
+ try {
5326
+ const stream = await navigator.mediaDevices.getUserMedia({
5327
+ audio: true,
5328
+ });
5329
+ // Stop the tracks immediately after getting permission
5330
+ stream.getTracks().forEach((track) => track.stop());
5331
+ this._hasPermission = true;
5332
+ return true;
5333
+ } catch (fallbackError) {
5334
+ console.error('getUserMedia failed:', fallbackError.name, fallbackError.message);
5335
+ throw fallbackError;
5336
+ } finally {
5337
+ this._permissionPromise = null;
5338
+ }
5339
+ })();
5340
+
5341
+ return this._permissionPromise;
5333
5342
  }
5334
5343
 
5335
5344
  /**
@@ -5337,25 +5346,18 @@ registerProcessor('audio_processor', AudioProcessor);
5337
5346
  * @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
5338
5347
  */
5339
5348
  async listDevices() {
5340
- if (
5341
- !navigator.mediaDevices ||
5342
- !('enumerateDevices' in navigator.mediaDevices)
5343
- ) {
5349
+ if (!navigator.mediaDevices || !('enumerateDevices' in navigator.mediaDevices)) {
5344
5350
  throw new Error('Could not request user devices');
5345
5351
  }
5346
5352
  await this.requestPermission();
5347
5353
 
5348
5354
  const devices = await navigator.mediaDevices.enumerateDevices();
5349
5355
  const audioDevices = devices.filter((device) => device.kind === 'audioinput');
5350
- const defaultDeviceIndex = audioDevices.findIndex(
5351
- (device) => device.deviceId === 'default',
5352
- );
5356
+ const defaultDeviceIndex = audioDevices.findIndex((device) => device.deviceId === 'default');
5353
5357
  const deviceList = [];
5354
5358
  if (defaultDeviceIndex !== -1) {
5355
5359
  let defaultDevice = audioDevices.splice(defaultDeviceIndex, 1)[0];
5356
- let existingIndex = audioDevices.findIndex(
5357
- (device) => device.groupId === defaultDevice.groupId,
5358
- );
5360
+ let existingIndex = audioDevices.findIndex((device) => device.groupId === defaultDevice.groupId);
5359
5361
  if (existingIndex !== -1) {
5360
5362
  defaultDevice = audioDevices.splice(existingIndex, 1)[0];
5361
5363
  }
@@ -5377,15 +5379,10 @@ registerProcessor('audio_processor', AudioProcessor);
5377
5379
  */
5378
5380
  async begin(deviceId) {
5379
5381
  if (this.processor) {
5380
- throw new Error(
5381
- `Already connected: please call .end() to start a new session`,
5382
- );
5382
+ throw new Error(`Already connected: please call .end() to start a new session`);
5383
5383
  }
5384
5384
 
5385
- if (
5386
- !navigator.mediaDevices ||
5387
- !('getUserMedia' in navigator.mediaDevices)
5388
- ) {
5385
+ if (!navigator.mediaDevices || !('getUserMedia' in navigator.mediaDevices)) {
5389
5386
  throw new Error('Could not request user media');
5390
5387
  }
5391
5388
  try {
@@ -5396,14 +5393,16 @@ registerProcessor('audio_processor', AudioProcessor);
5396
5393
  echoCancellation: true,
5397
5394
  autoGainControl: true,
5398
5395
  noiseSuppression: true,
5399
- }
5396
+ },
5400
5397
  };
5401
5398
  if (deviceId) {
5402
5399
  config.audio.deviceId = { exact: deviceId };
5403
5400
  }
5404
5401
  this.stream = await navigator.mediaDevices.getUserMedia(config);
5402
+ // Mark permission as granted so listDevices() won't call requestPermission() again
5403
+ this._hasPermission = true;
5405
5404
  } catch (err) {
5406
- throw new Error('Could not start media stream');
5405
+ throw err;
5407
5406
  }
5408
5407
 
5409
5408
  const createContext = (rate) => {
@@ -5455,10 +5454,7 @@ registerProcessor('audio_processor', AudioProcessor);
5455
5454
  raw: WavPacker.mergeBuffers(buffer.raw, data.raw),
5456
5455
  mono: WavPacker.mergeBuffers(buffer.mono, data.mono),
5457
5456
  };
5458
- if (
5459
- this._chunkProcessorBuffer.mono.byteLength >=
5460
- this._chunkProcessorSize
5461
- ) {
5457
+ if (this._chunkProcessorBuffer.mono.byteLength >= this._chunkProcessorSize) {
5462
5458
  this._chunkProcessor(this._chunkProcessorBuffer);
5463
5459
  this._chunkProcessorBuffer = {
5464
5460
  raw: new ArrayBuffer(0),
@@ -5486,11 +5482,7 @@ registerProcessor('audio_processor', AudioProcessor);
5486
5482
  node.connect(analyser);
5487
5483
  if (this.outputToSpeakers) {
5488
5484
  // eslint-disable-next-line no-console
5489
- console.warn(
5490
- 'Warning: Output to speakers may affect sound quality,\n' +
5491
- 'especially due to system audio feedback preventative measures.\n' +
5492
- 'use only for debugging',
5493
- );
5485
+ console.warn('Warning: Output to speakers may affect sound quality,\n' + 'especially due to system audio feedback preventative measures.\n' + 'use only for debugging');
5494
5486
  analyser.connect(context.destination);
5495
5487
  }
5496
5488
 
@@ -5517,26 +5509,14 @@ registerProcessor('audio_processor', AudioProcessor);
5517
5509
  * @param {number} [maxDecibels] default -30
5518
5510
  * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
5519
5511
  */
5520
- getFrequencies(
5521
- analysisType = 'frequency',
5522
- minDecibels = -100,
5523
- maxDecibels = -30,
5524
- ) {
5512
+ getFrequencies(analysisType = 'frequency', minDecibels = -100, maxDecibels = -30) {
5525
5513
  if (!this.processor) {
5526
5514
  throw new Error('Session ended: please call .begin() first');
5527
5515
  }
5528
- return AudioAnalysis.getFrequencies(
5529
- this.analyser,
5530
- this.sampleRate,
5531
- null,
5532
- analysisType,
5533
- minDecibels,
5534
- maxDecibels,
5535
- );
5516
+ return AudioAnalysis.getFrequencies(this.analyser, this.sampleRate, null, analysisType, minDecibels, maxDecibels);
5536
5517
  }
5537
5518
 
5538
-
5539
- /**
5519
+ /**
5540
5520
  * Gets the real-time amplitude of the audio signal
5541
5521
  * @returns {number} Amplitude value between 0 and 1
5542
5522
  */
@@ -5661,9 +5641,7 @@ registerProcessor('audio_processor', AudioProcessor);
5661
5641
  throw new Error('Session ended: please call .begin() first');
5662
5642
  }
5663
5643
  if (!force && this.recording) {
5664
- throw new Error(
5665
- 'Currently recording: please call .pause() first, or call .save(true) to force',
5666
- );
5644
+ throw new Error('Currently recording: please call .pause() first, or call .save(true) to force');
5667
5645
  }
5668
5646
  this.log('Exporting ...');
5669
5647
  const exportData = await this._event('export');
@@ -5770,6 +5748,7 @@ registerProcessor('audio_processor', AudioProcessor);
5770
5748
  return btoa(binary);
5771
5749
  }
5772
5750
 
5751
+ //// src/index.ts
5773
5752
  /* eslint-env browser */
5774
5753
  // import { env as ortEnv } from 'onnxruntime-web';
5775
5754
  // @ts-ignore - VAD package does not provide TypeScript types
@@ -5777,137 +5756,40 @@ registerProcessor('audio_processor', AudioProcessor);
5777
5756
  const DEFAULT_WS_URL = 'wss://api.layercode.com/v1/agents/web/websocket';
5778
5757
  // SDK version - updated when publishing
5779
5758
  const SDK_VERSION = '2.7.0';
5780
- const MEDIA_DEVICE_CHANGE_EVENT = 'devicechange';
5781
- const MEDIA_DEVICE_KIND_AUDIO = 'audioinput';
5759
+ const DEFAULT_RECORDER_SAMPLE_RATE = 8000;
5782
5760
  const hasMediaDevicesSupport = () => typeof navigator !== 'undefined' && !!navigator.mediaDevices;
5783
- let microphonePermissionPromise = null;
5784
- let microphonePermissionGranted = false;
5785
- const stopStreamTracks = (stream) => {
5786
- if (!stream) {
5787
- return;
5788
- }
5789
- stream.getTracks().forEach((track) => {
5790
- try {
5791
- track.stop();
5792
- }
5793
- catch (_a) {
5794
- /* noop */
5795
- }
5796
- });
5797
- };
5798
- const ensureMicrophonePermissions = async () => {
5799
- if (!hasMediaDevicesSupport()) {
5800
- throw new Error('Media devices are not available in this environment');
5801
- }
5802
- if (microphonePermissionGranted) {
5803
- return;
5804
- }
5805
- if (!microphonePermissionPromise) {
5806
- microphonePermissionPromise = navigator.mediaDevices
5807
- .getUserMedia({ audio: true })
5808
- .then((stream) => {
5809
- microphonePermissionGranted = true;
5810
- stopStreamTracks(stream);
5811
- })
5812
- .finally(() => {
5813
- microphonePermissionPromise = null;
5814
- });
5815
- }
5816
- return microphonePermissionPromise;
5817
- };
5818
- const cloneAudioDevice = (device, isDefault) => {
5761
+ const toLayercodeAudioInputDevice = (device) => {
5819
5762
  const cloned = {
5820
- deviceId: device.deviceId,
5821
- groupId: device.groupId,
5822
- kind: device.kind,
5763
+ ...device,
5823
5764
  label: device.label,
5824
- default: isDefault,
5765
+ default: Boolean(device.default),
5825
5766
  };
5826
5767
  if (typeof device.toJSON === 'function') {
5827
5768
  cloned.toJSON = device.toJSON.bind(device);
5828
5769
  }
5829
5770
  return cloned;
5830
5771
  };
5831
- const normalizeAudioInputDevices = (devices) => {
5832
- const audioDevices = devices.filter((device) => device.kind === MEDIA_DEVICE_KIND_AUDIO);
5833
- if (!audioDevices.length) {
5834
- return [];
5835
- }
5836
- const remaining = [...audioDevices];
5837
- const normalized = [];
5838
- const defaultIndex = remaining.findIndex((device) => device.deviceId === 'default');
5839
- if (defaultIndex !== -1) {
5840
- let defaultDevice = remaining.splice(defaultIndex, 1)[0];
5841
- const groupMatchIndex = remaining.findIndex((device) => device.groupId && defaultDevice.groupId && device.groupId === defaultDevice.groupId);
5842
- if (groupMatchIndex !== -1) {
5843
- defaultDevice = remaining.splice(groupMatchIndex, 1)[0];
5844
- }
5845
- normalized.push(cloneAudioDevice(defaultDevice, true));
5846
- }
5847
- else if (remaining.length) {
5848
- const fallbackDefault = remaining.shift();
5849
- normalized.push(cloneAudioDevice(fallbackDefault, true));
5850
- }
5851
- return normalized.concat(remaining.map((device) => cloneAudioDevice(device, false)));
5852
- };
5853
5772
  const listAudioInputDevices = async () => {
5854
5773
  if (!hasMediaDevicesSupport()) {
5855
5774
  throw new Error('Media devices are not available in this environment');
5856
5775
  }
5857
- await ensureMicrophonePermissions();
5858
- const devices = await navigator.mediaDevices.enumerateDevices();
5859
- return normalizeAudioInputDevices(devices);
5776
+ const recorder = new WavRecorder({ sampleRate: DEFAULT_RECORDER_SAMPLE_RATE });
5777
+ const devices = (await recorder.listDevices());
5778
+ return devices.map(toLayercodeAudioInputDevice);
5860
5779
  };
5861
5780
  const watchAudioInputDevices = (callback) => {
5862
5781
  if (!hasMediaDevicesSupport()) {
5863
5782
  return () => { };
5864
5783
  }
5865
- let disposed = false;
5866
- let lastSignature = null;
5867
- let requestId = 0;
5868
- const emitDevices = async () => {
5869
- requestId += 1;
5870
- const currentRequest = requestId;
5871
- try {
5872
- const devices = await listAudioInputDevices();
5873
- if (disposed || currentRequest !== requestId) {
5874
- return;
5875
- }
5876
- const signature = devices.map((device) => `${device.deviceId}:${device.label}:${device.groupId}:${device.default ? '1' : '0'}`).join('|');
5877
- if (signature !== lastSignature) {
5878
- lastSignature = signature;
5879
- callback(devices);
5880
- }
5881
- }
5882
- catch (error) {
5883
- if (!disposed) {
5884
- console.warn('Failed to refresh audio devices', error);
5885
- }
5886
- }
5784
+ const recorder = new WavRecorder({ sampleRate: DEFAULT_RECORDER_SAMPLE_RATE });
5785
+ const handleDevicesChange = (devices) => {
5786
+ callback(devices.map(toLayercodeAudioInputDevice));
5887
5787
  };
5888
- const handler = () => {
5889
- void emitDevices();
5890
- };
5891
- const mediaDevices = navigator.mediaDevices;
5892
- let teardown = null;
5893
- if (typeof mediaDevices.addEventListener === 'function') {
5894
- mediaDevices.addEventListener(MEDIA_DEVICE_CHANGE_EVENT, handler);
5895
- teardown = () => mediaDevices.removeEventListener(MEDIA_DEVICE_CHANGE_EVENT, handler);
5896
- }
5897
- else if ('ondevicechange' in mediaDevices) {
5898
- const previousHandler = mediaDevices.ondevicechange;
5899
- mediaDevices.ondevicechange = handler;
5900
- teardown = () => {
5901
- if (mediaDevices.ondevicechange === handler) {
5902
- mediaDevices.ondevicechange = previousHandler || null;
5903
- }
5904
- };
5905
- }
5906
- // Always emit once on subscribe
5907
- void emitDevices();
5788
+ // WavRecorder handles initial emit + deduping devicechange events
5789
+ recorder.listenForDeviceChange(handleDevicesChange);
5908
5790
  return () => {
5909
- disposed = true;
5910
- teardown === null || teardown === void 0 ? void 0 : teardown();
5791
+ recorder.listenForDeviceChange(null);
5792
+ recorder.quit().catch(() => { });
5911
5793
  };
5912
5794
  };
5913
5795
  /**
@@ -5954,7 +5836,7 @@ registerProcessor('audio_processor', AudioProcessor);
5954
5836
  this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
5955
5837
  this._websocketUrl = DEFAULT_WS_URL;
5956
5838
  this.audioOutputReady = null;
5957
- this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
5839
+ this.wavRecorder = new WavRecorder({ sampleRate: DEFAULT_RECORDER_SAMPLE_RATE }); // TODO should be set by fetched agent config
5958
5840
  this.wavPlayer = new WavStreamPlayer({
5959
5841
  finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
5960
5842
  sampleRate: 16000, // TODO should be set my fetched agent config
@@ -5986,6 +5868,7 @@ registerProcessor('audio_processor', AudioProcessor);
5986
5868
  this.stopRecorderAmplitude = undefined;
5987
5869
  this.deviceChangeListener = null;
5988
5870
  this.recorderRestartChain = Promise.resolve();
5871
+ this._skipFirstDeviceCallback = false;
5989
5872
  this.deviceListenerReady = null;
5990
5873
  this.resolveDeviceListenerReady = null;
5991
5874
  // this.audioPauseTime = null;
@@ -6005,7 +5888,7 @@ registerProcessor('audio_processor', AudioProcessor);
6005
5888
  set onDevicesChanged(callback) {
6006
5889
  this.options.onDevicesChanged = callback !== null && callback !== void 0 ? callback : NOOP;
6007
5890
  }
6008
- _initializeVAD() {
5891
+ async _initializeVAD() {
6009
5892
  var _a;
6010
5893
  console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt, vadConfig: this.vadConfig });
6011
5894
  // If we're in push to talk mode or mute mode, we don't need to use the VAD model
@@ -6089,13 +5972,13 @@ registerProcessor('audio_processor', AudioProcessor);
6089
5972
  vadOptions.frameSamples = 512; // Required for v5
6090
5973
  }
6091
5974
  console.log('Creating VAD with options:', vadOptions);
6092
- dist.MicVAD.new(vadOptions)
6093
- .then((vad) => {
5975
+ try {
5976
+ const vad = await dist.MicVAD.new(vadOptions);
6094
5977
  this.vad = vad;
6095
5978
  this.vad.start();
6096
5979
  console.log('VAD started successfully');
6097
- })
6098
- .catch((error) => {
5980
+ }
5981
+ catch (error) {
6099
5982
  console.warn('Error initializing VAD:', error);
6100
5983
  // Send a message to server indicating VAD failure
6101
5984
  const vadFailureMessage = {
@@ -6107,7 +5990,7 @@ registerProcessor('audio_processor', AudioProcessor);
6107
5990
  ...vadFailureMessage,
6108
5991
  userSpeaking: this.userIsSpeaking,
6109
5992
  });
6110
- });
5993
+ }
6111
5994
  }
6112
5995
  /**
6113
5996
  * Updates the connection status and triggers the callback
@@ -6458,18 +6341,99 @@ registerProcessor('audio_processor', AudioProcessor);
6458
6341
  }
6459
6342
  async audioInputConnect() {
6460
6343
  // Turn mic ON
6461
- console.log('audioInputConnect: requesting permission');
6462
- await this.wavRecorder.requestPermission();
6344
+ // NOTE: On iOS Safari, each getUserMedia call is expensive (~2-3 seconds).
6345
+ // We optimize by:
6346
+ // 1. Starting the recorder FIRST with begin() (single getUserMedia)
6347
+ // 2. THEN setting up device change listeners (which will skip getUserMedia since permission is cached)
6348
+ console.log('audioInputConnect: recorderStarted =', this.recorderStarted);
6349
+ // If the recorder hasn't spun up yet, start it first with the preferred or default device
6350
+ // This ensures we only make ONE getUserMedia call instead of multiple sequential ones
6351
+ if (!this.recorderStarted) {
6352
+ // Use preferred device if set, otherwise use system default
6353
+ const targetDeviceId = this.useSystemDefaultDevice ? undefined : this.deviceId || undefined;
6354
+ // Mark as using system default if no specific device is set
6355
+ if (!targetDeviceId) {
6356
+ this.useSystemDefaultDevice = true;
6357
+ }
6358
+ console.log('audioInputConnect: starting recorder with device:', targetDeviceId !== null && targetDeviceId !== void 0 ? targetDeviceId : 'system default');
6359
+ await this._startRecorderWithDevice(targetDeviceId);
6360
+ }
6361
+ // Now set up device change listeners - permission is already granted so listDevices() won't call getUserMedia
6362
+ // Skip the first callback since we've already started with the correct device
6363
+ this._skipFirstDeviceCallback = true;
6463
6364
  console.log('audioInputConnect: setting up device change listener');
6464
6365
  await this._setupDeviceChangeListener();
6465
- // If the recorder hasn't spun up yet, proactively select a device.
6466
- if (!this.recorderStarted && this.deviceChangeListener) {
6467
- console.log('audioInputConnect: initializing recorder with default device');
6468
- await this._initializeRecorderWithDefaultDevice();
6469
- }
6470
6366
  console.log('audioInputConnect: done, recorderStarted =', this.recorderStarted);
6471
6367
  }
6368
+ /**
6369
+ * Starts the recorder with a specific device (or default if undefined)
6370
+ * This is the single point where getUserMedia is called during initial setup.
6371
+ * Idempotent: returns early if recorder is already started or has a live stream.
6372
+ */
6373
+ async _startRecorderWithDevice(deviceId) {
6374
+ var _a, _b;
6375
+ // Idempotency guard: don't start again if already running
6376
+ if (this.recorderStarted || this._hasLiveRecorderStream()) {
6377
+ console.debug('_startRecorderWithDevice: already started, skipping');
6378
+ return;
6379
+ }
6380
+ try {
6381
+ this._stopRecorderAmplitudeMonitoring();
6382
+ try {
6383
+ await this.wavRecorder.end();
6384
+ }
6385
+ catch (_c) {
6386
+ // Ignore cleanup errors
6387
+ }
6388
+ await this.wavRecorder.begin(deviceId);
6389
+ await this.wavRecorder.record(this._handleDataAvailable, 1638);
6390
+ // Re-setup amplitude monitoring with the new stream
6391
+ this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
6392
+ if (!this.options.enableAmplitudeMonitoring) {
6393
+ this.userAudioAmplitude = 0;
6394
+ }
6395
+ const stream = this.wavRecorder.getStream();
6396
+ const activeTrack = (stream === null || stream === void 0 ? void 0 : stream.getAudioTracks()[0]) || null;
6397
+ const trackSettings = activeTrack && typeof activeTrack.getSettings === 'function' ? activeTrack.getSettings() : null;
6398
+ const trackDeviceId = trackSettings && typeof trackSettings.deviceId === 'string' ? trackSettings.deviceId : null;
6399
+ this.activeDeviceId = trackDeviceId !== null && trackDeviceId !== void 0 ? trackDeviceId : (this.useSystemDefaultDevice ? null : this.deviceId);
6400
+ if (!this.recorderStarted) {
6401
+ this.recorderStarted = true;
6402
+ this._sendReadyIfNeeded();
6403
+ }
6404
+ const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : ((_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default'));
6405
+ if (reportedDeviceId !== this.lastReportedDeviceId) {
6406
+ this.lastReportedDeviceId = reportedDeviceId;
6407
+ if (this.options.onDeviceSwitched) {
6408
+ this.options.onDeviceSwitched(reportedDeviceId);
6409
+ }
6410
+ }
6411
+ console.debug('Recorder started successfully with device:', reportedDeviceId);
6412
+ }
6413
+ catch (error) {
6414
+ const permissionDeniedError = await this._microphonePermissionDeniedError(error);
6415
+ if (permissionDeniedError) {
6416
+ console.error(permissionDeniedError.message);
6417
+ this.options.onError(permissionDeniedError);
6418
+ throw permissionDeniedError;
6419
+ }
6420
+ if (await this._shouldWarnAudioDevicesRequireUserGesture(error)) {
6421
+ console.error('Cannot load audio devices before user has interacted with the page. Please move connect() to be triggered by a button, or load the SDK with "audioInput: false" to connection() on page load');
6422
+ }
6423
+ console.error('Error starting recorder:', error);
6424
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
6425
+ throw error;
6426
+ }
6427
+ }
6472
6428
  async audioInputDisconnect() {
6429
+ // If we never started the recorder, avoid touching audio APIs at all.
6430
+ if (!this.recorderStarted && !this._hasLiveRecorderStream()) {
6431
+ this._stopRecorderAmplitudeMonitoring();
6432
+ this.stopVad();
6433
+ this._teardownDeviceListeners();
6434
+ this.recorderStarted = false;
6435
+ return;
6436
+ }
6473
6437
  try {
6474
6438
  // stop amplitude monitoring tied to the recorder
6475
6439
  this._stopRecorderAmplitudeMonitoring();
@@ -6491,7 +6455,9 @@ registerProcessor('audio_processor', AudioProcessor);
6491
6455
  this.audioInput = state;
6492
6456
  this._emitAudioInput();
6493
6457
  if (state) {
6458
+ this._setStatus('connecting');
6494
6459
  await this.audioInputConnect();
6460
+ this._setStatus('connected');
6495
6461
  }
6496
6462
  else {
6497
6463
  await this.audioInputDisconnect();
@@ -6499,7 +6465,6 @@ registerProcessor('audio_processor', AudioProcessor);
6499
6465
  }
6500
6466
  }
6501
6467
  async setAudioOutput(state) {
6502
- console.log('setAudioOutput called with state:', state, 'current:', this.audioOutput);
6503
6468
  if (this.audioOutput !== state) {
6504
6469
  this.audioOutput = state;
6505
6470
  this._emitAudioOutput();
@@ -6507,15 +6472,15 @@ registerProcessor('audio_processor', AudioProcessor);
6507
6472
  // Initialize audio output if not already connected
6508
6473
  // This happens when audioOutput was initially false and is now being enabled
6509
6474
  if (!this.wavPlayer.context) {
6510
- console.log('setAudioOutput: initializing audio output (no context yet)');
6475
+ this._setStatus('connecting');
6511
6476
  // Store the promise so _waitForAudioOutputReady() can await it
6512
6477
  // This prevents response.audio from running before AudioContext is ready
6513
6478
  const setupPromise = this.setupAudioOutput();
6514
6479
  this.audioOutputReady = setupPromise;
6515
6480
  await setupPromise;
6481
+ this._setStatus('connected');
6516
6482
  }
6517
6483
  else {
6518
- console.log('setAudioOutput: unmuting existing player');
6519
6484
  this.wavPlayer.unmute();
6520
6485
  }
6521
6486
  // Sync agentSpeaking state with actual playback state when enabling audio output
@@ -6598,7 +6563,19 @@ registerProcessor('audio_processor', AudioProcessor);
6598
6563
  await audioOutputReady;
6599
6564
  }
6600
6565
  catch (error) {
6601
- console.error('Error connecting to Layercode agent:', error);
6566
+ const permissionDeniedError = await this._microphonePermissionDeniedError(error);
6567
+ if (permissionDeniedError) {
6568
+ console.error(permissionDeniedError.message);
6569
+ this._setStatus('error');
6570
+ this.options.onError(permissionDeniedError);
6571
+ return;
6572
+ }
6573
+ if (await this._shouldWarnAudioDevicesRequireUserGesture(error)) {
6574
+ console.error('Cannot load audio devices before user has interacted with the page. Please move connect() to be triggered by a button, or load the SDK with "audioInput: false" to connection() on page load');
6575
+ }
6576
+ else {
6577
+ console.error('Error connecting to Layercode agent:', error);
6578
+ }
6602
6579
  this._setStatus('error');
6603
6580
  this.options.onError(error instanceof Error ? error : new Error(String(error)));
6604
6581
  }
@@ -6757,7 +6734,7 @@ registerProcessor('audio_processor', AudioProcessor);
6757
6734
  const newStream = this.wavRecorder.getStream();
6758
6735
  await this._reinitializeVAD(newStream);
6759
6736
  }
6760
- const reportedDeviceId = (_c = (_b = this.lastReportedDeviceId) !== null && _b !== void 0 ? _b : this.activeDeviceId) !== null && _c !== void 0 ? _c : (this.useSystemDefaultDevice ? 'default' : normalizedDeviceId !== null && normalizedDeviceId !== void 0 ? normalizedDeviceId : 'default');
6737
+ const reportedDeviceId = (_c = (_b = this.lastReportedDeviceId) !== null && _b !== void 0 ? _b : this.activeDeviceId) !== null && _c !== void 0 ? _c : (this.useSystemDefaultDevice ? 'default' : (normalizedDeviceId !== null && normalizedDeviceId !== void 0 ? normalizedDeviceId : 'default'));
6761
6738
  console.debug(`Successfully switched to input device: ${reportedDeviceId}`);
6762
6739
  }
6763
6740
  catch (error) {
@@ -6811,7 +6788,7 @@ registerProcessor('audio_processor', AudioProcessor);
6811
6788
  this.recorderStarted = true;
6812
6789
  this._sendReadyIfNeeded();
6813
6790
  }
6814
- const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : (_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default');
6791
+ const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : ((_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default'));
6815
6792
  if (reportedDeviceId !== previousReportedDeviceId) {
6816
6793
  this.lastReportedDeviceId = reportedDeviceId;
6817
6794
  if (this.options.onDeviceSwitched) {
@@ -6830,33 +6807,6 @@ registerProcessor('audio_processor', AudioProcessor);
6830
6807
  this.recorderRestartChain = run.catch(() => { });
6831
6808
  return run;
6832
6809
  }
6833
- async _initializeRecorderWithDefaultDevice() {
6834
- console.log('_initializeRecorderWithDefaultDevice called, deviceChangeListener:', !!this.deviceChangeListener);
6835
- if (!this.deviceChangeListener) {
6836
- return;
6837
- }
6838
- try {
6839
- const devices = await this.wavRecorder.listDevices();
6840
- console.log('_initializeRecorderWithDefaultDevice: got devices:', devices.length);
6841
- if (devices.length) {
6842
- console.log('_initializeRecorderWithDefaultDevice: calling deviceChangeListener');
6843
- await this.deviceChangeListener(devices);
6844
- return;
6845
- }
6846
- console.warn('No audio input devices available when enabling microphone');
6847
- }
6848
- catch (error) {
6849
- console.warn('Unable to prime audio devices from listDevices()', error);
6850
- }
6851
- try {
6852
- console.log('_initializeRecorderWithDefaultDevice: calling setInputDevice default');
6853
- await this.setInputDevice('default');
6854
- }
6855
- catch (error) {
6856
- console.error('Failed to start recording with the system default device:', error);
6857
- throw error;
6858
- }
6859
- }
6860
6810
  /**
6861
6811
  * Disconnect VAD
6862
6812
  */
@@ -6875,7 +6825,7 @@ registerProcessor('audio_processor', AudioProcessor);
6875
6825
  this.stopVad();
6876
6826
  // Reinitialize with new stream only if we're actually capturing audio
6877
6827
  if (stream && this._shouldCaptureUserAudio()) {
6878
- this._initializeVAD();
6828
+ await this._initializeVAD();
6879
6829
  }
6880
6830
  }
6881
6831
  /**
@@ -6897,8 +6847,8 @@ registerProcessor('audio_processor', AudioProcessor);
6897
6847
  };
6898
6848
  });
6899
6849
  this.deviceChangeListener = async (devices) => {
6900
- var _a;
6901
- console.log('deviceChangeListener called, devices:', devices.length, 'recorderStarted:', this.recorderStarted);
6850
+ var _a, _b;
6851
+ console.log('deviceChangeListener called, devices:', devices.length, 'recorderStarted:', this.recorderStarted, '_skipFirstDeviceCallback:', this._skipFirstDeviceCallback);
6902
6852
  try {
6903
6853
  // Notify user that devices have changed
6904
6854
  this.options.onDevicesChanged(devices);
@@ -6906,6 +6856,15 @@ registerProcessor('audio_processor', AudioProcessor);
6906
6856
  const usingDefaultDevice = this.useSystemDefaultDevice;
6907
6857
  const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
6908
6858
  const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
6859
+ // Skip switching on the first callback after starting the recorder to avoid redundant begin() calls
6860
+ // This is set by audioInputConnect() after _startRecorderWithDevice() completes
6861
+ if (this._skipFirstDeviceCallback) {
6862
+ console.log('deviceChangeListener: skipping first callback after recorder start');
6863
+ this._skipFirstDeviceCallback = false;
6864
+ this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
6865
+ (_a = this.resolveDeviceListenerReady) === null || _a === void 0 ? void 0 : _a.call(this);
6866
+ return;
6867
+ }
6909
6868
  let shouldSwitch = !this.recorderStarted;
6910
6869
  console.log('deviceChangeListener: shouldSwitch initial:', shouldSwitch);
6911
6870
  if (!shouldSwitch) {
@@ -6916,8 +6875,7 @@ registerProcessor('audio_processor', AudioProcessor);
6916
6875
  else if (this.activeDeviceId && defaultDevice.deviceId !== 'default' && defaultDevice.deviceId !== this.activeDeviceId) {
6917
6876
  shouldSwitch = true;
6918
6877
  }
6919
- else if ((previousDefaultDeviceKey && previousDefaultDeviceKey !== currentDefaultDeviceKey) ||
6920
- (!previousDefaultDeviceKey && !currentDefaultDeviceKey && this.recorderStarted)) {
6878
+ else if ((previousDefaultDeviceKey && previousDefaultDeviceKey !== currentDefaultDeviceKey) || (!previousDefaultDeviceKey && !currentDefaultDeviceKey && this.recorderStarted)) {
6921
6879
  shouldSwitch = true;
6922
6880
  }
6923
6881
  }
@@ -6957,7 +6915,7 @@ registerProcessor('audio_processor', AudioProcessor);
6957
6915
  this.options.onError(error instanceof Error ? error : new Error(String(error)));
6958
6916
  }
6959
6917
  finally {
6960
- (_a = this.resolveDeviceListenerReady) === null || _a === void 0 ? void 0 : _a.call(this);
6918
+ (_b = this.resolveDeviceListenerReady) === null || _b === void 0 ? void 0 : _b.call(this);
6961
6919
  }
6962
6920
  };
6963
6921
  this.wavRecorder.listenForDeviceChange(this.deviceChangeListener);
@@ -6981,6 +6939,7 @@ registerProcessor('audio_processor', AudioProcessor);
6981
6939
  this.lastKnownSystemDefaultDeviceKey = null;
6982
6940
  this.recorderStarted = false;
6983
6941
  this.readySent = false;
6942
+ this._skipFirstDeviceCallback = false;
6984
6943
  this._stopAmplitudeMonitoring();
6985
6944
  this._teardownDeviceListeners();
6986
6945
  if (this.vad) {
@@ -7016,6 +6975,81 @@ registerProcessor('audio_processor', AudioProcessor);
7016
6975
  }
7017
6976
  return null;
7018
6977
  }
6978
+ _getUserActivationState() {
6979
+ try {
6980
+ const nav = typeof navigator !== 'undefined' ? navigator : null;
6981
+ const act = nav === null || nav === void 0 ? void 0 : nav.userActivation;
6982
+ if (act && typeof act === 'object') {
6983
+ if (typeof act.hasBeenActive === 'boolean')
6984
+ return act.hasBeenActive;
6985
+ if (typeof act.isActive === 'boolean')
6986
+ return act.isActive ? true : null;
6987
+ }
6988
+ const doc = typeof document !== 'undefined' ? document : null;
6989
+ const dact = doc === null || doc === void 0 ? void 0 : doc.userActivation;
6990
+ if (dact && typeof dact === 'object') {
6991
+ if (typeof dact.hasBeenActive === 'boolean')
6992
+ return dact.hasBeenActive;
6993
+ if (typeof dact.isActive === 'boolean')
6994
+ return dact.isActive ? true : null;
6995
+ }
6996
+ }
6997
+ catch (_a) { }
6998
+ return null;
6999
+ }
7000
+ async _isMicrophonePermissionDenied() {
7001
+ try {
7002
+ const nav = typeof navigator !== 'undefined' ? navigator : null;
7003
+ const permissions = nav === null || nav === void 0 ? void 0 : nav.permissions;
7004
+ if (!(permissions === null || permissions === void 0 ? void 0 : permissions.query))
7005
+ return null;
7006
+ const status = await permissions.query({ name: 'microphone' });
7007
+ const state = status === null || status === void 0 ? void 0 : status.state;
7008
+ if (state === 'denied')
7009
+ return true;
7010
+ if (state === 'granted' || state === 'prompt')
7011
+ return false;
7012
+ }
7013
+ catch (_a) { }
7014
+ return null;
7015
+ }
7016
+ async _microphonePermissionDeniedError(error) {
7017
+ const err = error;
7018
+ const message = typeof (err === null || err === void 0 ? void 0 : err.message) === 'string' ? err.message : typeof error === 'string' ? error : '';
7019
+ if (message === 'User has denined audio device permissions') {
7020
+ return err instanceof Error ? err : new Error(message);
7021
+ }
7022
+ const name = typeof (err === null || err === void 0 ? void 0 : err.name) === 'string' ? err.name : '';
7023
+ const isPermissionLike = name === 'NotAllowedError' || name === 'SecurityError' || name === 'PermissionDeniedError';
7024
+ if (!isPermissionLike) {
7025
+ return null;
7026
+ }
7027
+ const micDenied = await this._isMicrophonePermissionDenied();
7028
+ if (micDenied === true || /permission denied/i.test(message)) {
7029
+ return new Error('User has denined audio device permissions');
7030
+ }
7031
+ return null;
7032
+ }
7033
+ async _shouldWarnAudioDevicesRequireUserGesture(error) {
7034
+ const e = error;
7035
+ const name = typeof (e === null || e === void 0 ? void 0 : e.name) === 'string' ? e.name : '';
7036
+ const msg = typeof (e === null || e === void 0 ? void 0 : e.message) === 'string'
7037
+ ? e.message
7038
+ : typeof error === 'string'
7039
+ ? error
7040
+ : '';
7041
+ const isPermissionLike = name === 'NotAllowedError' || name === 'SecurityError' || name === 'PermissionDeniedError';
7042
+ if (!isPermissionLike)
7043
+ return false;
7044
+ // If the browser can tell us mic permission is explicitly denied, don't show the "user gesture" guidance.
7045
+ const micDenied = await this._isMicrophonePermissionDenied();
7046
+ if (micDenied === true)
7047
+ return false;
7048
+ if (/user activation|user gesture|interacte?d? with( the)? (page|document)|before user has interacted/i.test(msg)) {
7049
+ return true;
7050
+ }
7051
+ return this._getUserActivationState() === false;
7052
+ }
7019
7053
  /**
7020
7054
  * Mutes the microphone to stop sending audio to the server
7021
7055
  * The connection and recording remain active for quick unmute
@@ -7032,13 +7066,13 @@ registerProcessor('audio_processor', AudioProcessor);
7032
7066
  /**
7033
7067
  * Unmutes the microphone to resume sending audio to the server
7034
7068
  */
7035
- unmute() {
7069
+ async unmute() {
7036
7070
  if (this.isMuted) {
7037
7071
  this.isMuted = false;
7038
7072
  console.log('Microphone unmuted');
7039
7073
  this.options.onMuteStateChange(false);
7040
7074
  if (this.audioInput && this.recorderStarted) {
7041
- this._initializeVAD();
7075
+ await this._initializeVAD();
7042
7076
  if (this.stopRecorderAmplitude === undefined) {
7043
7077
  this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
7044
7078
  }