@layercode/js-sdk 2.8.4 → 2.8.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5289,7 +5289,7 @@ registerProcessor('audio_processor', AudioProcessor);
5289
5289
  .join(',');
5290
5290
  const cb = async () => {
5291
5291
  let id = ++lastId;
5292
- const devices = await this.listDevices();
5292
+ const devices = await this.listDevices({ requestPermission: false });
5293
5293
  if (id === lastId) {
5294
5294
  if (serializeDevices(lastDevices) !== serializeDevices(devices)) {
5295
5295
  lastDevices = devices;
@@ -5342,14 +5342,22 @@ registerProcessor('audio_processor', AudioProcessor);
5342
5342
  }
5343
5343
 
5344
5344
  /**
5345
- * List all eligible devices for recording, will request permission to use microphone
5345
+ * List all eligible devices for recording.
5346
+ *
5347
+ * By default this will *not* request mic permission; labels may be empty until
5348
+ * the user has granted permission via begin()/getUserMedia.
5349
+ * Pass { requestPermission: true } to explicitly trigger a permission prompt.
5350
+ *
5351
+ * @param {{ requestPermission?: boolean }} [options]
5346
5352
  * @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
5347
5353
  */
5348
- async listDevices() {
5354
+ async listDevices({ requestPermission = false } = {}) {
5349
5355
  if (!navigator.mediaDevices || !('enumerateDevices' in navigator.mediaDevices)) {
5350
5356
  throw new Error('Could not request user devices');
5351
5357
  }
5352
- await this.requestPermission();
5358
+ if (requestPermission) {
5359
+ await this.requestPermission();
5360
+ }
5353
5361
 
5354
5362
  const devices = await navigator.mediaDevices.enumerateDevices();
5355
5363
  const audioDevices = devices.filter((device) => device.kind === 'audioinput');
@@ -5399,7 +5407,7 @@ registerProcessor('audio_processor', AudioProcessor);
5399
5407
  config.audio.deviceId = { exact: deviceId };
5400
5408
  }
5401
5409
  this.stream = await navigator.mediaDevices.getUserMedia(config);
5402
- // Mark permission as granted so listDevices() won't call requestPermission() again
5410
+ // Mark permission as granted so requestPermission() can skip expensive redundant getUserMedia calls
5403
5411
  this._hasPermission = true;
5404
5412
  } catch (err) {
5405
5413
  throw err;
@@ -5752,6 +5760,29 @@ registerProcessor('audio_processor', AudioProcessor);
5752
5760
  /* eslint-env browser */
5753
5761
  // import { env as ortEnv } from 'onnxruntime-web';
5754
5762
  // @ts-ignore - VAD package does not provide TypeScript types
5763
+ /**
5764
+ * Layercode Web SDK notes / gotchas
5765
+ *
5766
+ * Modes:
5767
+ * - audioInput=false => text-only mode. Do NOT touch getUserMedia or request mic permissions.
5768
+ * - audioInput=true => capture/send mic audio (may trigger permission prompt).
5769
+ * - audioOutput=false => do not play audio, but MUST still send
5770
+ * `trigger.response.audio.replay_finished` once per assistant turn so the server can advance.
5771
+ *
5772
+ * Transcription:
5773
+ * - trigger='push_to_talk' => no VAD; only send audio between triggerUserTurnStarted/Finished.
5774
+ * - trigger='automatic' => VAD drives userSpeaking + optional audio gating.
5775
+ *
5776
+ * Performance:
5777
+ * - Mobile browsers e.g. iOS Safari media APIs can be slow; avoid multiple sequential getUserMedia calls.
5778
+ * This file starts the recorder first, then sets up device listeners.
5779
+ *
5780
+ * Compatibility:
5781
+ * - This is consumed by the Layercode React SDK; treat public API/event shape changes as breaking.
5782
+ *
5783
+ * Assets:
5784
+ * - VAD/ORT assets are loaded from assets.layercode.com (Cloudflare).
5785
+ */
5755
5786
  const NOOP = () => { };
5756
5787
  const DEFAULT_WS_URL = 'wss://api.layercode.com/v1/agents/web/websocket';
5757
5788
  // SDK version - updated when publishing
@@ -5769,12 +5800,12 @@ registerProcessor('audio_processor', AudioProcessor);
5769
5800
  }
5770
5801
  return cloned;
5771
5802
  };
5772
- const listAudioInputDevices = async () => {
5803
+ const listAudioInputDevices = async (options = {}) => {
5773
5804
  if (!hasMediaDevicesSupport()) {
5774
5805
  throw new Error('Media devices are not available in this environment');
5775
5806
  }
5776
5807
  const recorder = new WavRecorder({ sampleRate: DEFAULT_RECORDER_SAMPLE_RATE });
5777
- const devices = (await recorder.listDevices());
5808
+ const devices = (await recorder.listDevices({ requestPermission: Boolean(options.requestPermission) }));
5778
5809
  return devices.map(toLayercodeAudioInputDevice);
5779
5810
  };
5780
5811
  const watchAudioInputDevices = (callback) => {
@@ -6208,27 +6239,34 @@ registerProcessor('audio_processor', AudioProcessor);
6208
6239
  * @param {ArrayBuffer} data - The audio data buffer
6209
6240
  */
6210
6241
  _handleDataAvailable(data) {
6211
- var _a, _b, _c;
6242
+ var _a, _b, _c, _d;
6212
6243
  try {
6213
- const base64 = arrayBufferToBase64(data.mono);
6214
- // Don't send audio if muted
6244
+ // Don't send or buffer audio if muted. Also clear any stale buffer so we
6245
+ // don't accidentally flush old audio after unmute.
6215
6246
  if (this.isMuted) {
6247
+ this.audioBuffer = [];
6216
6248
  return;
6217
6249
  }
6218
6250
  // Determine if we should gate audio based on VAD configuration
6219
- const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
6220
- const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
6251
+ const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // default true
6252
+ const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // default 10
6253
+ // If VAD is disabled or failed to init, gating would deadlock (userIsSpeaking never flips true).
6254
+ // Only gate if we actually have a running VAD instance.
6255
+ const vadEnabledByConfig = ((_d = this.vadConfig) === null || _d === void 0 ? void 0 : _d.enabled) !== false; // default true
6256
+ const vadAvailable = vadEnabledByConfig && !!this.vad && !this.pushToTalkEnabled;
6221
6257
  let sendAudio;
6222
6258
  if (this.pushToTalkEnabled) {
6223
6259
  sendAudio = this.pushToTalkActive;
6224
6260
  }
6225
6261
  else if (shouldGateAudio) {
6226
- sendAudio = this.userIsSpeaking;
6262
+ // Key fix: if VAD isn't available, don't gate — send audio.
6263
+ sendAudio = vadAvailable ? this.userIsSpeaking : true;
6227
6264
  }
6228
6265
  else {
6229
6266
  // If gate_audio is false, always send audio
6230
6267
  sendAudio = true;
6231
6268
  }
6269
+ const base64 = arrayBufferToBase64(data.mono);
6232
6270
  if (sendAudio) {
6233
6271
  // If we have buffered audio and we're gating, send it first
6234
6272
  if (shouldGateAudio && this.audioBuffer.length > 0) {
@@ -6401,13 +6439,17 @@ registerProcessor('audio_processor', AudioProcessor);
6401
6439
  this.recorderStarted = true;
6402
6440
  this._sendReadyIfNeeded();
6403
6441
  }
6404
- const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : ((_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default'));
6442
+ const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : (_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default');
6405
6443
  if (reportedDeviceId !== this.lastReportedDeviceId) {
6406
6444
  this.lastReportedDeviceId = reportedDeviceId;
6407
6445
  if (this.options.onDeviceSwitched) {
6408
6446
  this.options.onDeviceSwitched(reportedDeviceId);
6409
6447
  }
6410
6448
  }
6449
+ // Ensure automatic mode has a VAD instance once the recorder stream is live
6450
+ if (!this.vad && !this.pushToTalkEnabled) {
6451
+ await this._initializeVAD();
6452
+ }
6411
6453
  console.debug('Recorder started successfully with device:', reportedDeviceId);
6412
6454
  }
6413
6455
  catch (error) {
@@ -6706,8 +6748,8 @@ registerProcessor('audio_processor', AudioProcessor);
6706
6748
  * List all available audio input devices
6707
6749
  * @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
6708
6750
  */
6709
- async listDevices() {
6710
- return this.wavRecorder.listDevices();
6751
+ async listDevices(options) {
6752
+ return this.wavRecorder.listDevices(options);
6711
6753
  }
6712
6754
  /**
6713
6755
  * Switches the input device for the microphone and restarts recording
@@ -6734,7 +6776,7 @@ registerProcessor('audio_processor', AudioProcessor);
6734
6776
  const newStream = this.wavRecorder.getStream();
6735
6777
  await this._reinitializeVAD(newStream);
6736
6778
  }
6737
- const reportedDeviceId = (_c = (_b = this.lastReportedDeviceId) !== null && _b !== void 0 ? _b : this.activeDeviceId) !== null && _c !== void 0 ? _c : (this.useSystemDefaultDevice ? 'default' : (normalizedDeviceId !== null && normalizedDeviceId !== void 0 ? normalizedDeviceId : 'default'));
6779
+ const reportedDeviceId = (_c = (_b = this.lastReportedDeviceId) !== null && _b !== void 0 ? _b : this.activeDeviceId) !== null && _c !== void 0 ? _c : (this.useSystemDefaultDevice ? 'default' : normalizedDeviceId !== null && normalizedDeviceId !== void 0 ? normalizedDeviceId : 'default');
6738
6780
  console.debug(`Successfully switched to input device: ${reportedDeviceId}`);
6739
6781
  }
6740
6782
  catch (error) {
@@ -6788,7 +6830,7 @@ registerProcessor('audio_processor', AudioProcessor);
6788
6830
  this.recorderStarted = true;
6789
6831
  this._sendReadyIfNeeded();
6790
6832
  }
6791
- const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : ((_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default'));
6833
+ const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : (_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default');
6792
6834
  if (reportedDeviceId !== previousReportedDeviceId) {
6793
6835
  this.lastReportedDeviceId = reportedDeviceId;
6794
6836
  if (this.options.onDeviceSwitched) {
@@ -7033,11 +7075,7 @@ registerProcessor('audio_processor', AudioProcessor);
7033
7075
  async _shouldWarnAudioDevicesRequireUserGesture(error) {
7034
7076
  const e = error;
7035
7077
  const name = typeof (e === null || e === void 0 ? void 0 : e.name) === 'string' ? e.name : '';
7036
- const msg = typeof (e === null || e === void 0 ? void 0 : e.message) === 'string'
7037
- ? e.message
7038
- : typeof error === 'string'
7039
- ? error
7040
- : '';
7078
+ const msg = typeof (e === null || e === void 0 ? void 0 : e.message) === 'string' ? e.message : typeof error === 'string' ? error : '';
7041
7079
  const isPermissionLike = name === 'NotAllowedError' || name === 'SecurityError' || name === 'PermissionDeniedError';
7042
7080
  if (!isPermissionLike)
7043
7081
  return false;