@layercode/js-sdk 2.8.4 → 2.8.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5283,7 +5283,7 @@ class WavRecorder {
5283
5283
  .join(',');
5284
5284
  const cb = async () => {
5285
5285
  let id = ++lastId;
5286
- const devices = await this.listDevices();
5286
+ const devices = await this.listDevices({ requestPermission: false });
5287
5287
  if (id === lastId) {
5288
5288
  if (serializeDevices(lastDevices) !== serializeDevices(devices)) {
5289
5289
  lastDevices = devices;
@@ -5336,14 +5336,22 @@ class WavRecorder {
5336
5336
  }
5337
5337
 
5338
5338
  /**
5339
- * List all eligible devices for recording, will request permission to use microphone
5339
+ * List all eligible devices for recording.
5340
+ *
5341
+ * By default this will *not* request mic permission; labels may be empty until
5342
+ * the user has granted permission via begin()/getUserMedia.
5343
+ * Pass { requestPermission: true } to explicitly trigger a permission prompt.
5344
+ *
5345
+ * @param {{ requestPermission?: boolean }} [options]
5340
5346
  * @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
5341
5347
  */
5342
- async listDevices() {
5348
+ async listDevices({ requestPermission = false } = {}) {
5343
5349
  if (!navigator.mediaDevices || !('enumerateDevices' in navigator.mediaDevices)) {
5344
5350
  throw new Error('Could not request user devices');
5345
5351
  }
5346
- await this.requestPermission();
5352
+ if (requestPermission) {
5353
+ await this.requestPermission();
5354
+ }
5347
5355
 
5348
5356
  const devices = await navigator.mediaDevices.enumerateDevices();
5349
5357
  const audioDevices = devices.filter((device) => device.kind === 'audioinput');
@@ -5393,7 +5401,7 @@ class WavRecorder {
5393
5401
  config.audio.deviceId = { exact: deviceId };
5394
5402
  }
5395
5403
  this.stream = await navigator.mediaDevices.getUserMedia(config);
5396
- // Mark permission as granted so listDevices() won't call requestPermission() again
5404
+ // Mark permission as granted so requestPermission() can skip expensive redundant getUserMedia calls
5397
5405
  this._hasPermission = true;
5398
5406
  } catch (err) {
5399
5407
  throw err;
@@ -5746,6 +5754,29 @@ function arrayBufferToBase64(arrayBuffer) {
5746
5754
  /* eslint-env browser */
5747
5755
  // import { env as ortEnv } from 'onnxruntime-web';
5748
5756
  // @ts-ignore - VAD package does not provide TypeScript types
5757
+ /**
5758
+ * Layercode Web SDK notes / gotchas
5759
+ *
5760
+ * Modes:
5761
+ * - audioInput=false => text-only mode. Do NOT touch getUserMedia or request mic permissions.
5762
+ * - audioInput=true => capture/send mic audio (may trigger permission prompt).
5763
+ * - audioOutput=false => do not play audio, but MUST still send
5764
+ * `trigger.response.audio.replay_finished` once per assistant turn so the server can advance.
5765
+ *
5766
+ * Transcription:
5767
+ * - trigger='push_to_talk' => no VAD; only send audio between triggerUserTurnStarted/Finished.
5768
+ * - trigger='automatic' => VAD drives userSpeaking + optional audio gating.
5769
+ *
5770
+ * Performance:
5771
+ * - Mobile browsers e.g. iOS Safari media APIs can be slow; avoid multiple sequential getUserMedia calls.
5772
+ * This file starts the recorder first, then sets up device listeners.
5773
+ *
5774
+ * Compatibility:
5775
+ * - This is consumed by the Layercode React SDK; treat public API/event shape changes as breaking.
5776
+ *
5777
+ * Assets:
5778
+ * - VAD/ORT assets are loaded from assets.layercode.com (Cloudflare).
5779
+ */
5749
5780
  const NOOP = () => { };
5750
5781
  const DEFAULT_WS_URL = 'wss://api.layercode.com/v1/agents/web/websocket';
5751
5782
  // SDK version - updated when publishing
@@ -5763,12 +5794,12 @@ const toLayercodeAudioInputDevice = (device) => {
5763
5794
  }
5764
5795
  return cloned;
5765
5796
  };
5766
- const listAudioInputDevices = async () => {
5797
+ const listAudioInputDevices = async (options = {}) => {
5767
5798
  if (!hasMediaDevicesSupport()) {
5768
5799
  throw new Error('Media devices are not available in this environment');
5769
5800
  }
5770
5801
  const recorder = new WavRecorder({ sampleRate: DEFAULT_RECORDER_SAMPLE_RATE });
5771
- const devices = (await recorder.listDevices());
5802
+ const devices = (await recorder.listDevices({ requestPermission: Boolean(options.requestPermission) }));
5772
5803
  return devices.map(toLayercodeAudioInputDevice);
5773
5804
  };
5774
5805
  const watchAudioInputDevices = (callback) => {
@@ -6202,27 +6233,34 @@ class LayercodeClient {
6202
6233
  * @param {ArrayBuffer} data - The audio data buffer
6203
6234
  */
6204
6235
  _handleDataAvailable(data) {
6205
- var _a, _b, _c;
6236
+ var _a, _b, _c, _d;
6206
6237
  try {
6207
- const base64 = arrayBufferToBase64(data.mono);
6208
- // Don't send audio if muted
6238
+ // Don't send or buffer audio if muted. Also clear any stale buffer so we
6239
+ // don't accidentally flush old audio after unmute.
6209
6240
  if (this.isMuted) {
6241
+ this.audioBuffer = [];
6210
6242
  return;
6211
6243
  }
6212
6244
  // Determine if we should gate audio based on VAD configuration
6213
- const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
6214
- const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
6245
+ const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // default true
6246
+ const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // default 10
6247
+ // If VAD is disabled or failed to init, gating would deadlock (userIsSpeaking never flips true).
6248
+ // Only gate if we actually have a running VAD instance.
6249
+ const vadEnabledByConfig = ((_d = this.vadConfig) === null || _d === void 0 ? void 0 : _d.enabled) !== false; // default true
6250
+ const vadAvailable = vadEnabledByConfig && !!this.vad && !this.pushToTalkEnabled;
6215
6251
  let sendAudio;
6216
6252
  if (this.pushToTalkEnabled) {
6217
6253
  sendAudio = this.pushToTalkActive;
6218
6254
  }
6219
6255
  else if (shouldGateAudio) {
6220
- sendAudio = this.userIsSpeaking;
6256
+ // Key fix: if VAD isn't available, don't gate — send audio.
6257
+ sendAudio = vadAvailable ? this.userIsSpeaking : true;
6221
6258
  }
6222
6259
  else {
6223
6260
  // If gate_audio is false, always send audio
6224
6261
  sendAudio = true;
6225
6262
  }
6263
+ const base64 = arrayBufferToBase64(data.mono);
6226
6264
  if (sendAudio) {
6227
6265
  // If we have buffered audio and we're gating, send it first
6228
6266
  if (shouldGateAudio && this.audioBuffer.length > 0) {
@@ -6395,13 +6433,17 @@ class LayercodeClient {
6395
6433
  this.recorderStarted = true;
6396
6434
  this._sendReadyIfNeeded();
6397
6435
  }
6398
- const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : ((_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default'));
6436
+ const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : (_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default');
6399
6437
  if (reportedDeviceId !== this.lastReportedDeviceId) {
6400
6438
  this.lastReportedDeviceId = reportedDeviceId;
6401
6439
  if (this.options.onDeviceSwitched) {
6402
6440
  this.options.onDeviceSwitched(reportedDeviceId);
6403
6441
  }
6404
6442
  }
6443
+ // Ensure automatic mode has a VAD instance once the recorder stream is live
6444
+ if (!this.vad && !this.pushToTalkEnabled) {
6445
+ await this._initializeVAD();
6446
+ }
6405
6447
  console.debug('Recorder started successfully with device:', reportedDeviceId);
6406
6448
  }
6407
6449
  catch (error) {
@@ -6700,8 +6742,8 @@ class LayercodeClient {
6700
6742
  * List all available audio input devices
6701
6743
  * @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
6702
6744
  */
6703
- async listDevices() {
6704
- return this.wavRecorder.listDevices();
6745
+ async listDevices(options) {
6746
+ return this.wavRecorder.listDevices(options);
6705
6747
  }
6706
6748
  /**
6707
6749
  * Switches the input device for the microphone and restarts recording
@@ -6728,7 +6770,7 @@ class LayercodeClient {
6728
6770
  const newStream = this.wavRecorder.getStream();
6729
6771
  await this._reinitializeVAD(newStream);
6730
6772
  }
6731
- const reportedDeviceId = (_c = (_b = this.lastReportedDeviceId) !== null && _b !== void 0 ? _b : this.activeDeviceId) !== null && _c !== void 0 ? _c : (this.useSystemDefaultDevice ? 'default' : (normalizedDeviceId !== null && normalizedDeviceId !== void 0 ? normalizedDeviceId : 'default'));
6773
+ const reportedDeviceId = (_c = (_b = this.lastReportedDeviceId) !== null && _b !== void 0 ? _b : this.activeDeviceId) !== null && _c !== void 0 ? _c : (this.useSystemDefaultDevice ? 'default' : normalizedDeviceId !== null && normalizedDeviceId !== void 0 ? normalizedDeviceId : 'default');
6732
6774
  console.debug(`Successfully switched to input device: ${reportedDeviceId}`);
6733
6775
  }
6734
6776
  catch (error) {
@@ -6782,7 +6824,7 @@ class LayercodeClient {
6782
6824
  this.recorderStarted = true;
6783
6825
  this._sendReadyIfNeeded();
6784
6826
  }
6785
- const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : ((_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default'));
6827
+ const reportedDeviceId = (_a = this.activeDeviceId) !== null && _a !== void 0 ? _a : (this.useSystemDefaultDevice ? 'default' : (_b = this.deviceId) !== null && _b !== void 0 ? _b : 'default');
6786
6828
  if (reportedDeviceId !== previousReportedDeviceId) {
6787
6829
  this.lastReportedDeviceId = reportedDeviceId;
6788
6830
  if (this.options.onDeviceSwitched) {
@@ -7027,11 +7069,7 @@ class LayercodeClient {
7027
7069
  async _shouldWarnAudioDevicesRequireUserGesture(error) {
7028
7070
  const e = error;
7029
7071
  const name = typeof (e === null || e === void 0 ? void 0 : e.name) === 'string' ? e.name : '';
7030
- const msg = typeof (e === null || e === void 0 ? void 0 : e.message) === 'string'
7031
- ? e.message
7032
- : typeof error === 'string'
7033
- ? error
7034
- : '';
7072
+ const msg = typeof (e === null || e === void 0 ? void 0 : e.message) === 'string' ? e.message : typeof error === 'string' ? error : '';
7035
7073
  const isPermissionLike = name === 'NotAllowedError' || name === 'SecurityError' || name === 'PermissionDeniedError';
7036
7074
  if (!isPermissionLike)
7037
7075
  return false;