@layercode/js-sdk 2.8.3 → 2.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5082,11 +5082,7 @@ class WavRecorder {
5082
5082
  * @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
5083
5083
  * @returns {WavRecorder}
5084
5084
  */
5085
- constructor({
5086
- sampleRate = 24000,
5087
- outputToSpeakers = false,
5088
- debug = false,
5089
- } = {}) {
5085
+ constructor({ sampleRate = 24000, outputToSpeakers = false, debug = false } = {}) {
5090
5086
  // Script source
5091
5087
  this.scriptSrc = AudioProcessorSrc;
5092
5088
  // Config
@@ -5104,6 +5100,11 @@ class WavRecorder {
5104
5100
  this.analyser = null;
5105
5101
  this.recording = false;
5106
5102
  this.contextSampleRate = sampleRate;
5103
+ // Track whether we've already obtained microphone permission
5104
+ // This avoids redundant getUserMedia calls which are expensive on iOS Safari
5105
+ this._hasPermission = false;
5106
+ // Promise used to dedupe concurrent requestPermission() calls
5107
+ this._permissionPromise = null;
5107
5108
  // Event handling with AudioWorklet
5108
5109
  this._lastEventId = 0;
5109
5110
  this.eventReceipts = {};
@@ -5131,17 +5132,13 @@ class WavRecorder {
5131
5132
  let blob;
5132
5133
  if (audioData instanceof Blob) {
5133
5134
  if (fromSampleRate !== -1) {
5134
- throw new Error(
5135
- `Can not specify "fromSampleRate" when reading from Blob`,
5136
- );
5135
+ throw new Error(`Can not specify "fromSampleRate" when reading from Blob`);
5137
5136
  }
5138
5137
  blob = audioData;
5139
5138
  arrayBuffer = await blob.arrayBuffer();
5140
5139
  } else if (audioData instanceof ArrayBuffer) {
5141
5140
  if (fromSampleRate !== -1) {
5142
- throw new Error(
5143
- `Can not specify "fromSampleRate" when reading from ArrayBuffer`,
5144
- );
5141
+ throw new Error(`Can not specify "fromSampleRate" when reading from ArrayBuffer`);
5145
5142
  }
5146
5143
  arrayBuffer = audioData;
5147
5144
  blob = new Blob([arrayBuffer], { type: 'audio/wav' });
@@ -5159,14 +5156,10 @@ class WavRecorder {
5159
5156
  } else if (audioData instanceof Array) {
5160
5157
  float32Array = new Float32Array(audioData);
5161
5158
  } else {
5162
- throw new Error(
5163
- `"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array<number>`,
5164
- );
5159
+ throw new Error(`"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array<number>`);
5165
5160
  }
5166
5161
  if (fromSampleRate === -1) {
5167
- throw new Error(
5168
- `Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`,
5169
- );
5162
+ throw new Error(`Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`);
5170
5163
  } else if (fromSampleRate < 3000) {
5171
5164
  throw new Error(`Minimum "fromSampleRate" is 3000 (3kHz)`);
5172
5165
  }
@@ -5196,12 +5189,13 @@ class WavRecorder {
5196
5189
 
5197
5190
  /**
5198
5191
  * Logs data in debug mode
5199
- * @param {...any} arguments
5192
+ * @param {...any} args
5200
5193
  * @returns {true}
5201
5194
  */
5202
- log() {
5195
+ log(...args) {
5203
5196
  if (this.debug) {
5204
- this.log(...arguments);
5197
+ // eslint-disable-next-line no-console
5198
+ console.log(...args);
5205
5199
  }
5206
5200
  return true;
5207
5201
  }
@@ -5274,10 +5268,7 @@ class WavRecorder {
5274
5268
  */
5275
5269
  listenForDeviceChange(callback) {
5276
5270
  if (callback === null && this._deviceChangeCallback) {
5277
- navigator.mediaDevices.removeEventListener(
5278
- 'devicechange',
5279
- this._deviceChangeCallback,
5280
- );
5271
+ navigator.mediaDevices.removeEventListener('devicechange', this._deviceChangeCallback);
5281
5272
  this._deviceChangeCallback = null;
5282
5273
  } else if (callback !== null) {
5283
5274
  // Basically a debounce; we only want this called once when devices change
@@ -5309,21 +5300,39 @@ class WavRecorder {
5309
5300
 
5310
5301
  /**
5311
5302
  * Manually request permission to use the microphone
5303
+ * Skips if permission has already been granted to avoid expensive redundant getUserMedia calls.
5304
+ * Dedupes concurrent calls to prevent multiple getUserMedia requests.
5312
5305
  * @returns {Promise<true>}
5313
5306
  */
5314
5307
  async requestPermission() {
5315
- console.log('ensureUserMediaAccess');
5316
- try {
5317
- const stream = await navigator.mediaDevices.getUserMedia({
5318
- audio: true,
5319
- });
5320
- // Stop the tracks immediately after getting permission
5321
- stream.getTracks().forEach(track => track.stop());
5322
- } catch (fallbackError) {
5323
- console.error('getUserMedia failed:', fallbackError.name, fallbackError.message);
5324
- throw fallbackError;
5308
+ // Skip if we already have permission - each getUserMedia is expensive on iOS Safari
5309
+ if (this._hasPermission) {
5310
+ return true;
5325
5311
  }
5326
- return true;
5312
+ // Dedupe concurrent calls: if a permission request is already in flight, wait for it
5313
+ if (this._permissionPromise) {
5314
+ return this._permissionPromise;
5315
+ }
5316
+
5317
+ console.log('ensureUserMediaAccess');
5318
+ this._permissionPromise = (async () => {
5319
+ try {
5320
+ const stream = await navigator.mediaDevices.getUserMedia({
5321
+ audio: true,
5322
+ });
5323
+ // Stop the tracks immediately after getting permission
5324
+ stream.getTracks().forEach((track) => track.stop());
5325
+ this._hasPermission = true;
5326
+ return true;
5327
+ } catch (fallbackError) {
5328
+ console.error('getUserMedia failed:', fallbackError.name, fallbackError.message);
5329
+ throw fallbackError;
5330
+ } finally {
5331
+ this._permissionPromise = null;
5332
+ }
5333
+ })();
5334
+
5335
+ return this._permissionPromise;
5327
5336
  }
5328
5337
 
5329
5338
  /**
@@ -5331,25 +5340,18 @@ class WavRecorder {
5331
5340
  * @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
5332
5341
  */
5333
5342
  async listDevices() {
5334
- if (
5335
- !navigator.mediaDevices ||
5336
- !('enumerateDevices' in navigator.mediaDevices)
5337
- ) {
5343
+ if (!navigator.mediaDevices || !('enumerateDevices' in navigator.mediaDevices)) {
5338
5344
  throw new Error('Could not request user devices');
5339
5345
  }
5340
5346
  await this.requestPermission();
5341
5347
 
5342
5348
  const devices = await navigator.mediaDevices.enumerateDevices();
5343
5349
  const audioDevices = devices.filter((device) => device.kind === 'audioinput');
5344
- const defaultDeviceIndex = audioDevices.findIndex(
5345
- (device) => device.deviceId === 'default',
5346
- );
5350
+ const defaultDeviceIndex = audioDevices.findIndex((device) => device.deviceId === 'default');
5347
5351
  const deviceList = [];
5348
5352
  if (defaultDeviceIndex !== -1) {
5349
5353
  let defaultDevice = audioDevices.splice(defaultDeviceIndex, 1)[0];
5350
- let existingIndex = audioDevices.findIndex(
5351
- (device) => device.groupId === defaultDevice.groupId,
5352
- );
5354
+ let existingIndex = audioDevices.findIndex((device) => device.groupId === defaultDevice.groupId);
5353
5355
  if (existingIndex !== -1) {
5354
5356
  defaultDevice = audioDevices.splice(existingIndex, 1)[0];
5355
5357
  }
@@ -5371,15 +5373,10 @@ class WavRecorder {
5371
5373
  */
5372
5374
  async begin(deviceId) {
5373
5375
  if (this.processor) {
5374
- throw new Error(
5375
- `Already connected: please call .end() to start a new session`,
5376
- );
5376
+ throw new Error(`Already connected: please call .end() to start a new session`);
5377
5377
  }
5378
5378
 
5379
- if (
5380
- !navigator.mediaDevices ||
5381
- !('getUserMedia' in navigator.mediaDevices)
5382
- ) {
5379
+ if (!navigator.mediaDevices || !('getUserMedia' in navigator.mediaDevices)) {
5383
5380
  throw new Error('Could not request user media');
5384
5381
  }
5385
5382
  try {
@@ -5390,14 +5387,16 @@ class WavRecorder {
5390
5387
  echoCancellation: true,
5391
5388
  autoGainControl: true,
5392
5389
  noiseSuppression: true,
5393
- }
5390
+ },
5394
5391
  };
5395
5392
  if (deviceId) {
5396
5393
  config.audio.deviceId = { exact: deviceId };
5397
5394
  }
5398
5395
  this.stream = await navigator.mediaDevices.getUserMedia(config);
5396
+ // Mark permission as granted so listDevices() won't call requestPermission() again
5397
+ this._hasPermission = true;
5399
5398
  } catch (err) {
5400
- throw new Error('Could not start media stream');
5399
+ throw err;
5401
5400
  }
5402
5401
 
5403
5402
  const createContext = (rate) => {
@@ -5449,10 +5448,7 @@ class WavRecorder {
5449
5448
  raw: WavPacker.mergeBuffers(buffer.raw, data.raw),
5450
5449
  mono: WavPacker.mergeBuffers(buffer.mono, data.mono),
5451
5450
  };
5452
- if (
5453
- this._chunkProcessorBuffer.mono.byteLength >=
5454
- this._chunkProcessorSize
5455
- ) {
5451
+ if (this._chunkProcessorBuffer.mono.byteLength >= this._chunkProcessorSize) {
5456
5452
  this._chunkProcessor(this._chunkProcessorBuffer);
5457
5453
  this._chunkProcessorBuffer = {
5458
5454
  raw: new ArrayBuffer(0),
@@ -5480,11 +5476,7 @@ class WavRecorder {
5480
5476
  node.connect(analyser);
5481
5477
  if (this.outputToSpeakers) {
5482
5478
  // eslint-disable-next-line no-console
5483
- console.warn(
5484
- 'Warning: Output to speakers may affect sound quality,\n' +
5485
- 'especially due to system audio feedback preventative measures.\n' +
5486
- 'use only for debugging',
5487
- );
5479
+ console.warn('Warning: Output to speakers may affect sound quality,\n' + 'especially due to system audio feedback preventative measures.\n' + 'use only for debugging');
5488
5480
  analyser.connect(context.destination);
5489
5481
  }
5490
5482
 
@@ -5511,26 +5503,14 @@ class WavRecorder {
5511
5503
  * @param {number} [maxDecibels] default -30
5512
5504
  * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
5513
5505
  */
5514
- getFrequencies(
5515
- analysisType = 'frequency',
5516
- minDecibels = -100,
5517
- maxDecibels = -30,
5518
- ) {
5506
+ getFrequencies(analysisType = 'frequency', minDecibels = -100, maxDecibels = -30) {
5519
5507
  if (!this.processor) {
5520
5508
  throw new Error('Session ended: please call .begin() first');
5521
5509
  }
5522
- return AudioAnalysis.getFrequencies(
5523
- this.analyser,
5524
- this.sampleRate,
5525
- null,
5526
- analysisType,
5527
- minDecibels,
5528
- maxDecibels,
5529
- );
5510
+ return AudioAnalysis.getFrequencies(this.analyser, this.sampleRate, null, analysisType, minDecibels, maxDecibels);
5530
5511
  }
5531
5512
 
5532
-
5533
- /**
5513
+ /**
5534
5514
  * Gets the real-time amplitude of the audio signal
5535
5515
  * @returns {number} Amplitude value between 0 and 1
5536
5516
  */
@@ -5655,9 +5635,7 @@ class WavRecorder {
5655
5635
  throw new Error('Session ended: please call .begin() first');
5656
5636
  }
5657
5637
  if (!force && this.recording) {
5658
- throw new Error(
5659
- 'Currently recording: please call .pause() first, or call .save(true) to force',
5660
- );
5638
+ throw new Error('Currently recording: please call .pause() first, or call .save(true) to force');
5661
5639
  }
5662
5640
  this.log('Exporting ...');
5663
5641
  const exportData = await this._event('export');
@@ -5764,6 +5742,7 @@ function arrayBufferToBase64(arrayBuffer) {
5764
5742
  return btoa(binary);
5765
5743
  }
5766
5744
 
5745
+ //// src/index.ts
5767
5746
  /* eslint-env browser */
5768
5747
  // import { env as ortEnv } from 'onnxruntime-web';
5769
5748
  // @ts-ignore - VAD package does not provide TypeScript types
@@ -5771,137 +5750,40 @@ const NOOP = () => { };
5771
5750
  const DEFAULT_WS_URL = 'wss://api.layercode.com/v1/agents/web/websocket';
5772
5751
  // SDK version - updated when publishing
5773
5752
  const SDK_VERSION = '2.7.0';
5774
- const MEDIA_DEVICE_CHANGE_EVENT = 'devicechange';
5775
- const MEDIA_DEVICE_KIND_AUDIO = 'audioinput';
5753
+ const DEFAULT_RECORDER_SAMPLE_RATE = 8000;
5776
5754
  const hasMediaDevicesSupport = () => typeof navigator !== 'undefined' && !!navigator.mediaDevices;
5777
- let microphonePermissionPromise = null;
5778
- let microphonePermissionGranted = false;
5779
- const stopStreamTracks = (stream) => {
5780
- if (!stream) {
5781
- return;
5782
- }
5783
- stream.getTracks().forEach((track) => {
5784
- try {
5785
- track.stop();
5786
- }
5787
- catch (_a) {
5788
- /* noop */
5789
- }
5790
- });
5791
- };
5792
- const ensureMicrophonePermissions = async () => {
5793
- if (!hasMediaDevicesSupport()) {
5794
- throw new Error('Media devices are not available in this environment');
5795
- }
5796
- if (microphonePermissionGranted) {
5797
- return;
5798
- }
5799
- if (!microphonePermissionPromise) {
5800
- microphonePermissionPromise = navigator.mediaDevices
5801
- .getUserMedia({ audio: true })
5802
- .then((stream) => {
5803
- microphonePermissionGranted = true;
5804
- stopStreamTracks(stream);
5805
- })
5806
- .finally(() => {
5807
- microphonePermissionPromise = null;
5808
- });
5809
- }
5810
- return microphonePermissionPromise;
5811
- };
5812
- const cloneAudioDevice = (device, isDefault) => {
5755
+ const toLayercodeAudioInputDevice = (device) => {
5813
5756
  const cloned = {
5814
- deviceId: device.deviceId,
5815
- groupId: device.groupId,
5816
- kind: device.kind,
5757
+ ...device,
5817
5758
  label: device.label,
5818
- default: isDefault,
5759
+ default: Boolean(device.default),
5819
5760
  };
5820
5761
  if (typeof device.toJSON === 'function') {
5821
5762
  cloned.toJSON = device.toJSON.bind(device);
5822
5763
  }
5823
5764
  return cloned;
5824
5765
  };
5825
- const normalizeAudioInputDevices = (devices) => {
5826
- const audioDevices = devices.filter((device) => device.kind === MEDIA_DEVICE_KIND_AUDIO);
5827
- if (!audioDevices.length) {
5828
- return [];
5829
- }
5830
- const remaining = [...audioDevices];
5831
- const normalized = [];
5832
- const defaultIndex = remaining.findIndex((device) => device.deviceId === 'default');
5833
- if (defaultIndex !== -1) {
5834
- let defaultDevice = remaining.splice(defaultIndex, 1)[0];
5835
- const groupMatchIndex = remaining.findIndex((device) => device.groupId && defaultDevice.groupId && device.groupId === defaultDevice.groupId);
5836
- if (groupMatchIndex !== -1) {
5837
- defaultDevice = remaining.splice(groupMatchIndex, 1)[0];
5838
- }
5839
- normalized.push(cloneAudioDevice(defaultDevice, true));
5840
- }
5841
- else if (remaining.length) {
5842
- const fallbackDefault = remaining.shift();
5843
- normalized.push(cloneAudioDevice(fallbackDefault, true));
5844
- }
5845
- return normalized.concat(remaining.map((device) => cloneAudioDevice(device, false)));
5846
- };
5847
5766
  const listAudioInputDevices = async () => {
5848
5767
  if (!hasMediaDevicesSupport()) {
5849
5768
  throw new Error('Media devices are not available in this environment');
5850
5769
  }
5851
- await ensureMicrophonePermissions();
5852
- const devices = await navigator.mediaDevices.enumerateDevices();
5853
- return normalizeAudioInputDevices(devices);
5770
+ const recorder = new WavRecorder({ sampleRate: DEFAULT_RECORDER_SAMPLE_RATE });
5771
+ const devices = (await recorder.listDevices());
5772
+ return devices.map(toLayercodeAudioInputDevice);
5854
5773
  };
5855
5774
  const watchAudioInputDevices = (callback) => {
5856
5775
  if (!hasMediaDevicesSupport()) {
5857
5776
  return () => { };
5858
5777
  }
5859
- let disposed = false;
5860
- let lastSignature = null;
5861
- let requestId = 0;
5862
- const emitDevices = async () => {
5863
- requestId += 1;
5864
- const currentRequest = requestId;
5865
- try {
5866
- const devices = await listAudioInputDevices();
5867
- if (disposed || currentRequest !== requestId) {
5868
- return;
5869
- }
5870
- const signature = devices.map((device) => `${device.deviceId}:${device.label}:${device.groupId}:${device.default ? '1' : '0'}`).join('|');
5871
- if (signature !== lastSignature) {
5872
- lastSignature = signature;
5873
- callback(devices);
5874
- }
5875
- }
5876
- catch (error) {
5877
- if (!disposed) {
5878
- console.warn('Failed to refresh audio devices', error);
5879
- }
5880
- }
5881
- };
5882
- const handler = () => {
5883
- void emitDevices();
5778
+ const recorder = new WavRecorder({ sampleRate: DEFAULT_RECORDER_SAMPLE_RATE });
5779
+ const handleDevicesChange = (devices) => {
5780
+ callback(devices.map(toLayercodeAudioInputDevice));
5884
5781
  };
5885
- const mediaDevices = navigator.mediaDevices;
5886
- let teardown = null;
5887
- if (typeof mediaDevices.addEventListener === 'function') {
5888
- mediaDevices.addEventListener(MEDIA_DEVICE_CHANGE_EVENT, handler);
5889
- teardown = () => mediaDevices.removeEventListener(MEDIA_DEVICE_CHANGE_EVENT, handler);
5890
- }
5891
- else if ('ondevicechange' in mediaDevices) {
5892
- const previousHandler = mediaDevices.ondevicechange;
5893
- mediaDevices.ondevicechange = handler;
5894
- teardown = () => {
5895
- if (mediaDevices.ondevicechange === handler) {
5896
- mediaDevices.ondevicechange = previousHandler || null;
5897
- }
5898
- };
5899
- }
5900
- // Always emit once on subscribe
5901
- void emitDevices();
5782
+ // WavRecorder handles initial emit + deduping devicechange events
5783
+ recorder.listenForDeviceChange(handleDevicesChange);
5902
5784
  return () => {
5903
- disposed = true;
5904
- teardown === null || teardown === void 0 ? void 0 : teardown();
5785
+ recorder.listenForDeviceChange(null);
5786
+ recorder.quit().catch(() => { });
5905
5787
  };
5906
5788
  };
5907
5789
  /**
@@ -5948,7 +5830,7 @@ class LayercodeClient {
5948
5830
  this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
5949
5831
  this._websocketUrl = DEFAULT_WS_URL;
5950
5832
  this.audioOutputReady = null;
5951
- this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
5833
+ this.wavRecorder = new WavRecorder({ sampleRate: DEFAULT_RECORDER_SAMPLE_RATE }); // TODO should be set by fetched agent config
5952
5834
  this.wavPlayer = new WavStreamPlayer({
5953
5835
  finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
5954
5836
  sampleRate: 16000, // TODO should be set my fetched agent config
@@ -5980,6 +5862,7 @@ class LayercodeClient {
5980
5862
  this.stopRecorderAmplitude = undefined;
5981
5863
  this.deviceChangeListener = null;
5982
5864
  this.recorderRestartChain = Promise.resolve();
5865
+ this._skipFirstDeviceCallback = false;
5983
5866
  this.deviceListenerReady = null;
5984
5867
  this.resolveDeviceListenerReady = null;
5985
5868
  // this.audioPauseTime = null;
@@ -5999,7 +5882,7 @@ class LayercodeClient {
5999
5882
  set onDevicesChanged(callback) {
6000
5883
  this.options.onDevicesChanged = callback !== null && callback !== void 0 ? callback : NOOP;
6001
5884
  }
6002
- _initializeVAD() {
5885
+ async _initializeVAD() {
6003
5886
  var _a;
6004
5887
  console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt, vadConfig: this.vadConfig });
6005
5888
  // If we're in push to talk mode or mute mode, we don't need to use the VAD model
@@ -6083,13 +5966,13 @@ class LayercodeClient {
6083
5966
  vadOptions.frameSamples = 512; // Required for v5
6084
5967
  }
6085
5968
  console.log('Creating VAD with options:', vadOptions);
6086
- dist.MicVAD.new(vadOptions)
6087
- .then((vad) => {
5969
+ try {
5970
+ const vad = await dist.MicVAD.new(vadOptions);
6088
5971
  this.vad = vad;
6089
5972
  this.vad.start();
6090
5973
  console.log('VAD started successfully');
6091
- })
6092
- .catch((error) => {
5974
+ }
5975
+ catch (error) {
6093
5976
  console.warn('Error initializing VAD:', error);
6094
5977
  // Send a message to server indicating VAD failure
6095
5978
  const vadFailureMessage = {
@@ -6101,7 +5984,7 @@ class LayercodeClient {
6101
5984
  ...vadFailureMessage,
6102
5985
  userSpeaking: this.userIsSpeaking,
6103
5986
  });
6104
- });
5987
+ }
6105
5988
  }
6106
5989
  /**
6107
5990
  * Updates the connection status and triggers the callback
@@ -6319,27 +6202,34 @@ class LayercodeClient {
6319
6202
  * @param {ArrayBuffer} data - The audio data buffer
6320
6203
  */
6321
6204
  _handleDataAvailable(data) {
6322
- var _a, _b, _c;
6205
+ var _a, _b, _c, _d;
6323
6206
  try {
6324
- const base64 = arrayBufferToBase64(data.mono);
6325
- // Don't send audio if muted
6207
+ // Don't send or buffer audio if muted. Also clear any stale buffer so we
6208
+ // don't accidentally flush old audio after unmute.
6326
6209
  if (this.isMuted) {
6210
+ this.audioBuffer = [];
6327
6211
  return;
6328
6212
  }
6329
6213
  // Determine if we should gate audio based on VAD configuration
6330
- const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
6331
- const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
6214
+ const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // default true
6215
+ const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // default 10
6216
+ // If VAD is disabled or failed to init, gating would deadlock (userIsSpeaking never flips true).
6217
+ // Only gate if we actually have a running VAD instance.
6218
+ const vadEnabledByConfig = ((_d = this.vadConfig) === null || _d === void 0 ? void 0 : _d.enabled) !== false; // default true
6219
+ const vadAvailable = vadEnabledByConfig && !!this.vad && !this.pushToTalkEnabled;
6332
6220
  let sendAudio;
6333
6221
  if (this.pushToTalkEnabled) {
6334
6222
  sendAudio = this.pushToTalkActive;
6335
6223
  }
6336
6224
  else if (shouldGateAudio) {
6337
- sendAudio = this.userIsSpeaking;
6225
+ // Key fix: if VAD isn't available, don't gate — send audio.
6226
+ sendAudio = vadAvailable ? this.userIsSpeaking : true;
6338
6227
  }
6339
6228
  else {
6340
6229
  // If gate_audio is false, always send audio
6341
6230
  sendAudio = true;
6342
6231
  }
6232
+ const base64 = arrayBufferToBase64(data.mono);
6343
6233
  if (sendAudio) {
6344
6234
  // If we have buffered audio and we're gating, send it first
6345
6235
  if (shouldGateAudio && this.audioBuffer.length > 0) {
@@ -6452,18 +6342,103 @@ class LayercodeClient {
6452
6342
  }
6453
6343
  async audioInputConnect() {
6454
6344
  // Turn mic ON
6455
- console.log('audioInputConnect: requesting permission');
6456
- await this.wavRecorder.requestPermission();
6345
+ // NOTE: On iOS Safari, each getUserMedia call is expensive (~2-3 seconds).
6346
+ // We optimize by:
6347
+ // 1. Starting the recorder FIRST with begin() (single getUserMedia)
6348
+ // 2. THEN setting up device change listeners (which will skip getUserMedia since permission is cached)
6349
+ console.log('audioInputConnect: recorderStarted =', this.recorderStarted);
6350
+ // If the recorder hasn't spun up yet, start it first with the preferred or default device
6351
+ // This ensures we only make ONE getUserMedia call instead of multiple sequential ones
6352
+ if (!this.recorderStarted) {
6353
+ // Use preferred device if set, otherwise use system default
6354
+ const targetDeviceId = this.useSystemDefaultDevice ? undefined : this.deviceId || undefined;
6355
+ // Mark as using system default if no specific device is set
6356
+ if (!targetDeviceId) {
6357
+ this.useSystemDefaultDevice = true;
6358
+ }
6359
+ console.log('audioInputConnect: starting recorder with device:', targetDeviceId !== null && targetDeviceId !== void 0 ? targetDeviceId : 'system default');
6360
+ await this._startRecorderWithDevice(targetDeviceId);
6361
+ }
6362
+ // Now set up device change listeners - permission is already granted so listDevices() won't call getUserMedia
6363
+ // Skip the first callback since we've already started with the correct device
6364
+ this._skipFirstDeviceCallback = true;
6457
6365
  console.log('audioInputConnect: setting up device change listener');
6458
6366
  await this._setupDeviceChangeListener();
6459
- // If the recorder hasn't spun up yet, proactively select a device.
6460
- if (!this.recorderStarted && this.deviceChangeListener) {
6461
- console.log('audioInputConnect: initializing recorder with default device');
6462
- await this._initializeRecorderWithDefaultDevice();
6463
- }
6464
6367
  console.log('audioInputConnect: done, recorderStarted =', this.recorderStarted);
6465
6368
  }
6369
+ /**
6370
+ * Starts the recorder with a specific device (or default if undefined)
6371
+ * This is the single point where getUserMedia is called during initial setup.
6372
+ * Idempotent: returns early if recorder is already started or has a live stream.
6373
+ */
6374
+ async _startRecorderWithDevice(deviceId) {
6375
+ var _a, _b;
6376
+ // Idempotency guard: don't start again if already running
6377
+ if (this.recorderStarted || this._hasLiveRecorderStream()) {
6378
+ console.debug('_startRecorderWithDevice: already started, skipping');
6379
+ return;
6380
+ }
6381
+ try {
6382
+ this._stopRecorderAmplitudeMonitoring();
6383
+ try {
6384
+ await this.wavRecorder.end();
6385
+ }
6386
+ catch (_c) {
6387
+ // Ignore cleanup errors
6388
+ }
6389
+ await this.wavRecorder.begin(deviceId);
6390
+ await this.wavRecorder.record(this._handleDataAvailable, 1638);
6391
+ // Re-setup amplitude monitoring with the new stream
6392
+ this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
6393
+ if (!this.options.enableAmplitudeMonitoring) {
6394
+ this.userAudioAmplitude = 0;
6395
+ }
6396
+ const stream = this.wavRecorder.getStream();
6397
+ const activeTrack = (stream === null || stream === void 0 ? void 0 : stream.getAudioTracks()[0]) || null;
6398
+ const trackSettings = activeTrack && typeof activeTrack.getSettings === 'function' ? activeTrack.getSettings() : null;
6399
+ const trackDeviceId = trackSettings && typeof trackSettings.deviceId === 'string' ? trackSettings.deviceId : null;
6400
+ this.activeDeviceId = trackDeviceId !== null && trackDeviceId !== void 0 ? trackDeviceId : (this.useSystemDefaultDevice ? null : this.deviceId);
6401
+ if (!this.recorderStarted) {
6402
+ this.recorderStarted = true;
6403
+ this._sendReadyIfNeeded();
6404
+ }
6405
+ const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : (_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default');
6406
+ if (reportedDeviceId !== this.lastReportedDeviceId) {
6407
+ this.lastReportedDeviceId = reportedDeviceId;
6408
+ if (this.options.onDeviceSwitched) {
6409
+ this.options.onDeviceSwitched(reportedDeviceId);
6410
+ }
6411
+ }
6412
+ // Ensure automatic mode has a VAD instance once the recorder stream is live
6413
+ if (!this.vad && !this.pushToTalkEnabled) {
6414
+ await this._initializeVAD();
6415
+ }
6416
+ console.debug('Recorder started successfully with device:', reportedDeviceId);
6417
+ }
6418
+ catch (error) {
6419
+ const permissionDeniedError = await this._microphonePermissionDeniedError(error);
6420
+ if (permissionDeniedError) {
6421
+ console.error(permissionDeniedError.message);
6422
+ this.options.onError(permissionDeniedError);
6423
+ throw permissionDeniedError;
6424
+ }
6425
+ if (await this._shouldWarnAudioDevicesRequireUserGesture(error)) {
6426
+ console.error('Cannot load audio devices before user has interacted with the page. Please move connect() to be triggered by a button, or load the SDK with "audioInput: false" to connection() on page load');
6427
+ }
6428
+ console.error('Error starting recorder:', error);
6429
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
6430
+ throw error;
6431
+ }
6432
+ }
6466
6433
  async audioInputDisconnect() {
6434
+ // If we never started the recorder, avoid touching audio APIs at all.
6435
+ if (!this.recorderStarted && !this._hasLiveRecorderStream()) {
6436
+ this._stopRecorderAmplitudeMonitoring();
6437
+ this.stopVad();
6438
+ this._teardownDeviceListeners();
6439
+ this.recorderStarted = false;
6440
+ return;
6441
+ }
6467
6442
  try {
6468
6443
  // stop amplitude monitoring tied to the recorder
6469
6444
  this._stopRecorderAmplitudeMonitoring();
@@ -6485,7 +6460,9 @@ class LayercodeClient {
6485
6460
  this.audioInput = state;
6486
6461
  this._emitAudioInput();
6487
6462
  if (state) {
6463
+ this._setStatus('connecting');
6488
6464
  await this.audioInputConnect();
6465
+ this._setStatus('connected');
6489
6466
  }
6490
6467
  else {
6491
6468
  await this.audioInputDisconnect();
@@ -6493,7 +6470,6 @@ class LayercodeClient {
6493
6470
  }
6494
6471
  }
6495
6472
  async setAudioOutput(state) {
6496
- console.log('setAudioOutput called with state:', state, 'current:', this.audioOutput);
6497
6473
  if (this.audioOutput !== state) {
6498
6474
  this.audioOutput = state;
6499
6475
  this._emitAudioOutput();
@@ -6501,15 +6477,15 @@ class LayercodeClient {
6501
6477
  // Initialize audio output if not already connected
6502
6478
  // This happens when audioOutput was initially false and is now being enabled
6503
6479
  if (!this.wavPlayer.context) {
6504
- console.log('setAudioOutput: initializing audio output (no context yet)');
6480
+ this._setStatus('connecting');
6505
6481
  // Store the promise so _waitForAudioOutputReady() can await it
6506
6482
  // This prevents response.audio from running before AudioContext is ready
6507
6483
  const setupPromise = this.setupAudioOutput();
6508
6484
  this.audioOutputReady = setupPromise;
6509
6485
  await setupPromise;
6486
+ this._setStatus('connected');
6510
6487
  }
6511
6488
  else {
6512
- console.log('setAudioOutput: unmuting existing player');
6513
6489
  this.wavPlayer.unmute();
6514
6490
  }
6515
6491
  // Sync agentSpeaking state with actual playback state when enabling audio output
@@ -6592,7 +6568,19 @@ class LayercodeClient {
6592
6568
  await audioOutputReady;
6593
6569
  }
6594
6570
  catch (error) {
6595
- console.error('Error connecting to Layercode agent:', error);
6571
+ const permissionDeniedError = await this._microphonePermissionDeniedError(error);
6572
+ if (permissionDeniedError) {
6573
+ console.error(permissionDeniedError.message);
6574
+ this._setStatus('error');
6575
+ this.options.onError(permissionDeniedError);
6576
+ return;
6577
+ }
6578
+ if (await this._shouldWarnAudioDevicesRequireUserGesture(error)) {
6579
+ console.error('Cannot load audio devices before user has interacted with the page. Please move connect() to be triggered by a button, or load the SDK with "audioInput: false" to connection() on page load');
6580
+ }
6581
+ else {
6582
+ console.error('Error connecting to Layercode agent:', error);
6583
+ }
6596
6584
  this._setStatus('error');
6597
6585
  this.options.onError(error instanceof Error ? error : new Error(String(error)));
6598
6586
  }
@@ -6824,33 +6812,6 @@ class LayercodeClient {
6824
6812
  this.recorderRestartChain = run.catch(() => { });
6825
6813
  return run;
6826
6814
  }
6827
- async _initializeRecorderWithDefaultDevice() {
6828
- console.log('_initializeRecorderWithDefaultDevice called, deviceChangeListener:', !!this.deviceChangeListener);
6829
- if (!this.deviceChangeListener) {
6830
- return;
6831
- }
6832
- try {
6833
- const devices = await this.wavRecorder.listDevices();
6834
- console.log('_initializeRecorderWithDefaultDevice: got devices:', devices.length);
6835
- if (devices.length) {
6836
- console.log('_initializeRecorderWithDefaultDevice: calling deviceChangeListener');
6837
- await this.deviceChangeListener(devices);
6838
- return;
6839
- }
6840
- console.warn('No audio input devices available when enabling microphone');
6841
- }
6842
- catch (error) {
6843
- console.warn('Unable to prime audio devices from listDevices()', error);
6844
- }
6845
- try {
6846
- console.log('_initializeRecorderWithDefaultDevice: calling setInputDevice default');
6847
- await this.setInputDevice('default');
6848
- }
6849
- catch (error) {
6850
- console.error('Failed to start recording with the system default device:', error);
6851
- throw error;
6852
- }
6853
- }
6854
6815
  /**
6855
6816
  * Disconnect VAD
6856
6817
  */
@@ -6869,7 +6830,7 @@ class LayercodeClient {
6869
6830
  this.stopVad();
6870
6831
  // Reinitialize with new stream only if we're actually capturing audio
6871
6832
  if (stream && this._shouldCaptureUserAudio()) {
6872
- this._initializeVAD();
6833
+ await this._initializeVAD();
6873
6834
  }
6874
6835
  }
6875
6836
  /**
@@ -6891,8 +6852,8 @@ class LayercodeClient {
6891
6852
  };
6892
6853
  });
6893
6854
  this.deviceChangeListener = async (devices) => {
6894
- var _a;
6895
- console.log('deviceChangeListener called, devices:', devices.length, 'recorderStarted:', this.recorderStarted);
6855
+ var _a, _b;
6856
+ console.log('deviceChangeListener called, devices:', devices.length, 'recorderStarted:', this.recorderStarted, '_skipFirstDeviceCallback:', this._skipFirstDeviceCallback);
6896
6857
  try {
6897
6858
  // Notify user that devices have changed
6898
6859
  this.options.onDevicesChanged(devices);
@@ -6900,6 +6861,15 @@ class LayercodeClient {
6900
6861
  const usingDefaultDevice = this.useSystemDefaultDevice;
6901
6862
  const previousDefaultDeviceKey = this.lastKnownSystemDefaultDeviceKey;
6902
6863
  const currentDefaultDeviceKey = this._getDeviceComparisonKey(defaultDevice);
6864
+ // Skip switching on the first callback after starting the recorder to avoid redundant begin() calls
6865
+ // This is set by audioInputConnect() after _startRecorderWithDevice() completes
6866
+ if (this._skipFirstDeviceCallback) {
6867
+ console.log('deviceChangeListener: skipping first callback after recorder start');
6868
+ this._skipFirstDeviceCallback = false;
6869
+ this.lastKnownSystemDefaultDeviceKey = currentDefaultDeviceKey;
6870
+ (_a = this.resolveDeviceListenerReady) === null || _a === void 0 ? void 0 : _a.call(this);
6871
+ return;
6872
+ }
6903
6873
  let shouldSwitch = !this.recorderStarted;
6904
6874
  console.log('deviceChangeListener: shouldSwitch initial:', shouldSwitch);
6905
6875
  if (!shouldSwitch) {
@@ -6910,8 +6880,7 @@ class LayercodeClient {
6910
6880
  else if (this.activeDeviceId && defaultDevice.deviceId !== 'default' && defaultDevice.deviceId !== this.activeDeviceId) {
6911
6881
  shouldSwitch = true;
6912
6882
  }
6913
- else if ((previousDefaultDeviceKey && previousDefaultDeviceKey !== currentDefaultDeviceKey) ||
6914
- (!previousDefaultDeviceKey && !currentDefaultDeviceKey && this.recorderStarted)) {
6883
+ else if ((previousDefaultDeviceKey && previousDefaultDeviceKey !== currentDefaultDeviceKey) || (!previousDefaultDeviceKey && !currentDefaultDeviceKey && this.recorderStarted)) {
6915
6884
  shouldSwitch = true;
6916
6885
  }
6917
6886
  }
@@ -6951,7 +6920,7 @@ class LayercodeClient {
6951
6920
  this.options.onError(error instanceof Error ? error : new Error(String(error)));
6952
6921
  }
6953
6922
  finally {
6954
- (_a = this.resolveDeviceListenerReady) === null || _a === void 0 ? void 0 : _a.call(this);
6923
+ (_b = this.resolveDeviceListenerReady) === null || _b === void 0 ? void 0 : _b.call(this);
6955
6924
  }
6956
6925
  };
6957
6926
  this.wavRecorder.listenForDeviceChange(this.deviceChangeListener);
@@ -6975,6 +6944,7 @@ class LayercodeClient {
6975
6944
  this.lastKnownSystemDefaultDeviceKey = null;
6976
6945
  this.recorderStarted = false;
6977
6946
  this.readySent = false;
6947
+ this._skipFirstDeviceCallback = false;
6978
6948
  this._stopAmplitudeMonitoring();
6979
6949
  this._teardownDeviceListeners();
6980
6950
  if (this.vad) {
@@ -7010,6 +6980,77 @@ class LayercodeClient {
7010
6980
  }
7011
6981
  return null;
7012
6982
  }
6983
+ _getUserActivationState() {
6984
+ try {
6985
+ const nav = typeof navigator !== 'undefined' ? navigator : null;
6986
+ const act = nav === null || nav === void 0 ? void 0 : nav.userActivation;
6987
+ if (act && typeof act === 'object') {
6988
+ if (typeof act.hasBeenActive === 'boolean')
6989
+ return act.hasBeenActive;
6990
+ if (typeof act.isActive === 'boolean')
6991
+ return act.isActive ? true : null;
6992
+ }
6993
+ const doc = typeof document !== 'undefined' ? document : null;
6994
+ const dact = doc === null || doc === void 0 ? void 0 : doc.userActivation;
6995
+ if (dact && typeof dact === 'object') {
6996
+ if (typeof dact.hasBeenActive === 'boolean')
6997
+ return dact.hasBeenActive;
6998
+ if (typeof dact.isActive === 'boolean')
6999
+ return dact.isActive ? true : null;
7000
+ }
7001
+ }
7002
+ catch (_a) { }
7003
+ return null;
7004
+ }
7005
+ async _isMicrophonePermissionDenied() {
7006
+ try {
7007
+ const nav = typeof navigator !== 'undefined' ? navigator : null;
7008
+ const permissions = nav === null || nav === void 0 ? void 0 : nav.permissions;
7009
+ if (!(permissions === null || permissions === void 0 ? void 0 : permissions.query))
7010
+ return null;
7011
+ const status = await permissions.query({ name: 'microphone' });
7012
+ const state = status === null || status === void 0 ? void 0 : status.state;
7013
+ if (state === 'denied')
7014
+ return true;
7015
+ if (state === 'granted' || state === 'prompt')
7016
+ return false;
7017
+ }
7018
+ catch (_a) { }
7019
+ return null;
7020
+ }
7021
+ async _microphonePermissionDeniedError(error) {
7022
+ const err = error;
7023
+ const message = typeof (err === null || err === void 0 ? void 0 : err.message) === 'string' ? err.message : typeof error === 'string' ? error : '';
7024
+ if (message === 'User has denined audio device permissions') {
7025
+ return err instanceof Error ? err : new Error(message);
7026
+ }
7027
+ const name = typeof (err === null || err === void 0 ? void 0 : err.name) === 'string' ? err.name : '';
7028
+ const isPermissionLike = name === 'NotAllowedError' || name === 'SecurityError' || name === 'PermissionDeniedError';
7029
+ if (!isPermissionLike) {
7030
+ return null;
7031
+ }
7032
+ const micDenied = await this._isMicrophonePermissionDenied();
7033
+ if (micDenied === true || /permission denied/i.test(message)) {
7034
+ return new Error('User has denined audio device permissions');
7035
+ }
7036
+ return null;
7037
+ }
7038
+ async _shouldWarnAudioDevicesRequireUserGesture(error) {
7039
+ const e = error;
7040
+ const name = typeof (e === null || e === void 0 ? void 0 : e.name) === 'string' ? e.name : '';
7041
+ const msg = typeof (e === null || e === void 0 ? void 0 : e.message) === 'string' ? e.message : typeof error === 'string' ? error : '';
7042
+ const isPermissionLike = name === 'NotAllowedError' || name === 'SecurityError' || name === 'PermissionDeniedError';
7043
+ if (!isPermissionLike)
7044
+ return false;
7045
+ // If the browser can tell us mic permission is explicitly denied, don't show the "user gesture" guidance.
7046
+ const micDenied = await this._isMicrophonePermissionDenied();
7047
+ if (micDenied === true)
7048
+ return false;
7049
+ if (/user activation|user gesture|interacte?d? with( the)? (page|document)|before user has interacted/i.test(msg)) {
7050
+ return true;
7051
+ }
7052
+ return this._getUserActivationState() === false;
7053
+ }
7013
7054
  /**
7014
7055
  * Mutes the microphone to stop sending audio to the server
7015
7056
  * The connection and recording remain active for quick unmute
@@ -7026,13 +7067,13 @@ class LayercodeClient {
7026
7067
  /**
7027
7068
  * Unmutes the microphone to resume sending audio to the server
7028
7069
  */
7029
- unmute() {
7070
+ async unmute() {
7030
7071
  if (this.isMuted) {
7031
7072
  this.isMuted = false;
7032
7073
  console.log('Microphone unmuted');
7033
7074
  this.options.onMuteStateChange(false);
7034
7075
  if (this.audioInput && this.recorderStarted) {
7035
- this._initializeVAD();
7076
+ await this._initializeVAD();
7036
7077
  if (this.stopRecorderAmplitude === undefined) {
7037
7078
  this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
7038
7079
  }