@layercode/js-sdk 1.0.26 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Layercode JavaScript SDK
2
2
 
3
- A JavaScript SDK for integrating Layercode voice pipelines into web applications.
3
+ A JavaScript SDK for integrating Layercode voice agents into web applications.
4
4
 
5
5
  ## Installation
6
6
 
@@ -3477,9 +3477,11 @@ function arrayBufferToBase64(arrayBuffer) {
3477
3477
  }
3478
3478
 
3479
3479
  /* eslint-env browser */
3480
+ // SDK version - updated when publishing
3481
+ const SDK_VERSION = '2.0.0';
3480
3482
  /**
3481
3483
  * @class LayercodeClient
3482
- * @classdesc Core client for Layercode audio pipeline that manages audio recording, WebSocket communication, and speech processing.
3484
+ * @classdesc Core client for Layercode audio agent that manages audio recording, WebSocket communication, and speech processing.
3483
3485
  */
3484
3486
  class LayercodeClient {
3485
3487
  /**
@@ -3487,15 +3489,17 @@ class LayercodeClient {
3487
3489
  * @param {Object} options - Configuration options
3488
3490
  */
3489
3491
  constructor(options) {
3492
+ this.deviceId = null;
3490
3493
  this.options = {
3491
- pipelineId: options.pipelineId,
3492
- sessionId: options.sessionId || null,
3494
+ agentId: options.agentId,
3495
+ conversationId: options.conversationId || null,
3493
3496
  authorizeSessionEndpoint: options.authorizeSessionEndpoint,
3494
3497
  metadata: options.metadata || {},
3495
3498
  vadResumeDelay: options.vadResumeDelay || 500,
3496
3499
  onConnect: options.onConnect || (() => { }),
3497
3500
  onDisconnect: options.onDisconnect || (() => { }),
3498
3501
  onError: options.onError || (() => { }),
3502
+ onDeviceSwitched: options.onDeviceSwitched || (() => { }),
3499
3503
  onDataMessage: options.onDataMessage || (() => { }),
3500
3504
  onUserAmplitudeChange: options.onUserAmplitudeChange || (() => { }),
3501
3505
  onAgentAmplitudeChange: options.onAgentAmplitudeChange || (() => { }),
@@ -3503,18 +3507,18 @@ class LayercodeClient {
3503
3507
  onUserIsSpeakingChange: options.onUserIsSpeakingChange || (() => { }),
3504
3508
  };
3505
3509
  this.AMPLITUDE_MONITORING_SAMPLE_RATE = 10;
3506
- this._websocketUrl = 'wss://api.layercode.com/v1/pipelines/websocket';
3507
- this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched pipeline config
3510
+ this._websocketUrl = 'wss://api.layercode.com/v1/agents/websocket';
3511
+ this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
3508
3512
  this.wavPlayer = new WavStreamPlayer({
3509
3513
  finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
3510
- sampleRate: 16000, // TODO should be set my fetched pipeline config
3514
+ sampleRate: 16000, // TODO should be set my fetched agent config
3511
3515
  });
3512
3516
  this.vad = null;
3513
3517
  this.ws = null;
3514
3518
  this.status = 'disconnected';
3515
3519
  this.userAudioAmplitude = 0;
3516
3520
  this.agentAudioAmplitude = 0;
3517
- this.sessionId = options.sessionId || null;
3521
+ this.conversationId = options.conversationId || null;
3518
3522
  this.pushToTalkActive = false;
3519
3523
  this.pushToTalkEnabled = false;
3520
3524
  this.canInterrupt = false;
@@ -3523,71 +3527,28 @@ class LayercodeClient {
3523
3527
  this.readySent = false;
3524
3528
  this.currentTurnId = null;
3525
3529
  this.audioBuffer = [];
3530
+ this.vadConfig = null;
3526
3531
  // this.audioPauseTime = null;
3527
3532
  // Bind event handlers
3528
3533
  this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
3529
3534
  this._handleDataAvailable = this._handleDataAvailable.bind(this);
3530
- }
3531
- _setupAmplitudeBasedVAD() {
3532
- let isSpeakingByAmplitude = false;
3533
- let silenceFrames = 0;
3534
- const AMPLITUDE_THRESHOLD = 0.01; // Adjust based on testing
3535
- const SILENCE_FRAMES_THRESHOLD = 6.4; // 6.4 * 20ms chunks = 128ms silence. Same as Silero ((frame samples: 512 / sampleRate: 16000) * 1000 * redemptionFrames: 4) = 128 ms silence
3536
- // Monitor amplitude changes
3537
- this.wavRecorder.startAmplitudeMonitoring((amplitude) => {
3538
- const wasSpeaking = isSpeakingByAmplitude;
3539
- if (amplitude > AMPLITUDE_THRESHOLD) {
3540
- silenceFrames = 0;
3541
- if (!wasSpeaking) {
3542
- isSpeakingByAmplitude = true;
3543
- this.userIsSpeaking = true;
3544
- this.options.onUserIsSpeakingChange(true);
3545
- this._wsSend({
3546
- type: 'vad_events',
3547
- event: 'vad_start',
3548
- });
3549
- }
3550
- }
3551
- else {
3552
- silenceFrames++;
3553
- if (wasSpeaking && silenceFrames >= SILENCE_FRAMES_THRESHOLD) {
3554
- isSpeakingByAmplitude = false;
3555
- this.userIsSpeaking = false;
3556
- this.options.onUserIsSpeakingChange(false);
3557
- this._wsSend({
3558
- type: 'vad_events',
3559
- event: 'vad_end',
3560
- });
3561
- }
3562
- }
3563
- });
3535
+ this._setupDeviceChangeListener();
3564
3536
  }
3565
3537
  _initializeVAD() {
3566
- console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt });
3538
+ var _a;
3539
+ console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt, vadConfig: this.vadConfig });
3567
3540
  // If we're in push to talk mode, we don't need to use the VAD model
3568
3541
  if (this.pushToTalkEnabled) {
3569
3542
  return;
3570
3543
  }
3571
- const vadLoadTimeout = setTimeout(() => {
3572
- console.log('silero vad model timeout');
3573
- console.warn('VAD model failed to load - falling back to amplitude-based detection');
3574
- // Send a message to server indicating VAD failure
3575
- this._wsSend({
3576
- type: 'vad_events',
3577
- event: 'vad_model_failed',
3578
- });
3579
- // Set up amplitude-based fallback detection
3580
- this._setupAmplitudeBasedVAD();
3581
- }, 2000);
3582
- dist.MicVAD.new({
3544
+ // Check if VAD is disabled
3545
+ if (((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) === false) {
3546
+ console.log('VAD is disabled by backend configuration');
3547
+ return;
3548
+ }
3549
+ // Build VAD configuration object, only including keys that are defined
3550
+ const vadOptions = {
3583
3551
  stream: this.wavRecorder.getStream() || undefined,
3584
- model: 'v5',
3585
- positiveSpeechThreshold: 0.15,
3586
- negativeSpeechThreshold: 0.05,
3587
- redemptionFrames: 4,
3588
- minSpeechFrames: 2,
3589
- preSpeechPadFrames: 0,
3590
- frameSamples: 512, // Required for v5 as per https://docs.vad.ricky0123.com/user-guide/algorithm/#configuration
3591
3552
  onSpeechStart: () => {
3592
3553
  console.log('onSpeechStart: sending vad_start');
3593
3554
  this.userIsSpeaking = true;
@@ -3607,18 +3568,49 @@ class LayercodeClient {
3607
3568
  event: 'vad_end',
3608
3569
  });
3609
3570
  },
3610
- // onVADMisfire: () => {
3611
- // // If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd.
3612
- // },
3613
- })
3571
+ };
3572
+ // Apply VAD configuration from backend if available
3573
+ if (this.vadConfig) {
3574
+ // Only add keys that are explicitly defined (not undefined)
3575
+ if (this.vadConfig.model !== undefined)
3576
+ vadOptions.model = this.vadConfig.model;
3577
+ if (this.vadConfig.positive_speech_threshold !== undefined)
3578
+ vadOptions.positiveSpeechThreshold = this.vadConfig.positive_speech_threshold;
3579
+ if (this.vadConfig.negative_speech_threshold !== undefined)
3580
+ vadOptions.negativeSpeechThreshold = this.vadConfig.negative_speech_threshold;
3581
+ if (this.vadConfig.redemption_frames !== undefined)
3582
+ vadOptions.redemptionFrames = this.vadConfig.redemption_frames;
3583
+ if (this.vadConfig.min_speech_frames !== undefined)
3584
+ vadOptions.minSpeechFrames = this.vadConfig.min_speech_frames;
3585
+ if (this.vadConfig.pre_speech_pad_frames !== undefined)
3586
+ vadOptions.preSpeechPadFrames = this.vadConfig.pre_speech_pad_frames;
3587
+ if (this.vadConfig.frame_samples !== undefined)
3588
+ vadOptions.frameSamples = this.vadConfig.frame_samples;
3589
+ }
3590
+ else {
3591
+ // Default values if no config from backend
3592
+ vadOptions.model = 'v5';
3593
+ vadOptions.positiveSpeechThreshold = 0.15;
3594
+ vadOptions.negativeSpeechThreshold = 0.05;
3595
+ vadOptions.redemptionFrames = 4;
3596
+ vadOptions.minSpeechFrames = 2;
3597
+ vadOptions.preSpeechPadFrames = 0;
3598
+ vadOptions.frameSamples = 512; // Required for v5
3599
+ }
3600
+ console.log('Creating VAD with options:', vadOptions);
3601
+ dist.MicVAD.new(vadOptions)
3614
3602
  .then((vad) => {
3615
- clearTimeout(vadLoadTimeout);
3616
3603
  this.vad = vad;
3617
3604
  this.vad.start();
3618
- console.log('VAD started');
3605
+ console.log('VAD started successfully');
3619
3606
  })
3620
3607
  .catch((error) => {
3621
- console.error('Error initializing VAD:', error);
3608
+ console.warn('Error initializing VAD:', error);
3609
+ // Send a message to server indicating VAD failure
3610
+ this._wsSend({
3611
+ type: 'vad_events',
3612
+ event: 'vad_model_failed',
3613
+ });
3622
3614
  });
3623
3615
  }
3624
3616
  /**
@@ -3722,7 +3714,7 @@ class LayercodeClient {
3722
3714
  this.options.onDataMessage(message);
3723
3715
  break;
3724
3716
  default:
3725
- console.error('Unknown message type received:', message);
3717
+ console.warn('Unknown message type received:', message);
3726
3718
  break;
3727
3719
  }
3728
3720
  }
@@ -3736,12 +3728,26 @@ class LayercodeClient {
3736
3728
  * @param {ArrayBuffer} data - The audio data buffer
3737
3729
  */
3738
3730
  _handleDataAvailable(data) {
3731
+ var _a, _b, _c;
3739
3732
  try {
3740
3733
  const base64 = arrayBufferToBase64(data.mono);
3741
- const sendAudio = this.pushToTalkEnabled ? this.pushToTalkActive : this.userIsSpeaking;
3734
+ // Determine if we should gate audio based on VAD configuration
3735
+ const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
3736
+ const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
3737
+ let sendAudio;
3738
+ if (this.pushToTalkEnabled) {
3739
+ sendAudio = this.pushToTalkActive;
3740
+ }
3741
+ else if (shouldGateAudio) {
3742
+ sendAudio = this.userIsSpeaking;
3743
+ }
3744
+ else {
3745
+ // If gate_audio is false, always send audio
3746
+ sendAudio = true;
3747
+ }
3742
3748
  if (sendAudio) {
3743
- // If we have buffered audio, send it first
3744
- if (this.audioBuffer.length > 0) {
3749
+ // If we have buffered audio and we're gating, send it first
3750
+ if (shouldGateAudio && this.audioBuffer.length > 0) {
3745
3751
  console.log(`Sending ${this.audioBuffer.length} buffered audio chunks`);
3746
3752
  for (const bufferedAudio of this.audioBuffer) {
3747
3753
  this._wsSend({
@@ -3760,8 +3766,8 @@ class LayercodeClient {
3760
3766
  else {
3761
3767
  // Buffer audio when not sending (to catch audio just before VAD triggers)
3762
3768
  this.audioBuffer.push(base64);
3763
- // Keep buffer size reasonable (e.g., last 10 chunks ≈ 200ms at 20ms chunks)
3764
- if (this.audioBuffer.length > 10) {
3769
+ // Keep buffer size based on configuration
3770
+ if (this.audioBuffer.length > bufferFrames) {
3765
3771
  this.audioBuffer.shift(); // Remove oldest chunk
3766
3772
  }
3767
3773
  }
@@ -3811,7 +3817,7 @@ class LayercodeClient {
3811
3817
  }
3812
3818
  }
3813
3819
  /**
3814
- * Connects to the Layercode pipeline and starts the audio session
3820
+ * Connects to the Layercode agent and starts the audio conversation
3815
3821
  * @async
3816
3822
  * @returns {Promise<void>}
3817
3823
  */
@@ -3820,14 +3826,15 @@ class LayercodeClient {
3820
3826
  this._setStatus('connecting');
3821
3827
  // Reset turn tracking for clean start
3822
3828
  this._resetTurnTracking();
3823
- // Get session key from server
3829
+ // Get conversation key from server
3824
3830
  let authorizeSessionRequestBody = {
3825
- pipeline_id: this.options.pipelineId,
3831
+ agent_id: this.options.agentId,
3826
3832
  metadata: this.options.metadata,
3833
+ sdk_version: SDK_VERSION,
3827
3834
  };
3828
- // If we're reconnecting to a previous session, we need to include the session_id in the request. Otherwise we don't send session_id, and a new session will be created and the session_id will be returned in the response.
3829
- if (this.options.sessionId) {
3830
- authorizeSessionRequestBody.session_id = this.options.sessionId;
3835
+ // If we're reconnecting to a previous conversation, we need to include the conversation_id in the request. Otherwise we don't send conversation_id, and a new conversation will be created and the conversation_id will be returned in the response.
3836
+ if (this.options.conversationId) {
3837
+ authorizeSessionRequestBody.conversation_id = this.options.conversationId;
3831
3838
  }
3832
3839
  const authorizeSessionResponse = await fetch(this.options.authorizeSessionEndpoint, {
3833
3840
  method: 'POST',
@@ -3837,16 +3844,18 @@ class LayercodeClient {
3837
3844
  body: JSON.stringify(authorizeSessionRequestBody),
3838
3845
  });
3839
3846
  if (!authorizeSessionResponse.ok) {
3840
- throw new Error(`Failed to authorize session: ${authorizeSessionResponse.statusText}`);
3847
+ throw new Error(`Failed to authorize conversation: ${authorizeSessionResponse.statusText}`);
3841
3848
  }
3842
3849
  const authorizeSessionResponseBody = await authorizeSessionResponse.json();
3843
- this.sessionId = authorizeSessionResponseBody.session_id; // Save the session_id for use in future reconnects
3850
+ this.conversationId = authorizeSessionResponseBody.conversation_id; // Save the conversation_id for use in future reconnects
3844
3851
  // Connect WebSocket
3845
3852
  this.ws = new WebSocket(`${this._websocketUrl}?${new URLSearchParams({
3846
3853
  client_session_key: authorizeSessionResponseBody.client_session_key,
3847
3854
  })}`);
3848
3855
  const config = authorizeSessionResponseBody.config;
3849
3856
  console.log('config', config);
3857
+ // Store VAD configuration
3858
+ this.vadConfig = config.vad || null;
3850
3859
  if (config.transcription.trigger === 'push_to_talk') {
3851
3860
  this.pushToTalkEnabled = true;
3852
3861
  }
@@ -3857,13 +3866,12 @@ class LayercodeClient {
3857
3866
  else {
3858
3867
  throw new Error(`Unknown trigger: ${config.transcription.trigger}`);
3859
3868
  }
3860
- this._initializeVAD();
3861
3869
  // Bind the websocket message callbacks
3862
3870
  this.ws.onmessage = this._handleWebSocketMessage;
3863
3871
  this.ws.onopen = () => {
3864
3872
  console.log('WebSocket connection established');
3865
3873
  this._setStatus('connected');
3866
- this.options.onConnect({ sessionId: this.sessionId });
3874
+ this.options.onConnect({ conversationId: this.conversationId });
3867
3875
  // Attempt to send ready message if recorder already started
3868
3876
  this._sendReadyIfNeeded();
3869
3877
  };
@@ -3877,21 +3885,16 @@ class LayercodeClient {
3877
3885
  this._setStatus('error');
3878
3886
  this.options.onError(new Error('WebSocket connection error'));
3879
3887
  };
3880
- // Initialize microphone audio capture
3881
- await this.wavRecorder.begin();
3882
- await this.wavRecorder.record(this._handleDataAvailable, 1638);
3883
- // Set up microphone amplitude monitoring
3884
- this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
3885
3888
  // Initialize audio player
3886
3889
  await this.wavPlayer.connect();
3887
3890
  // Set up audio player amplitude monitoring
3888
3891
  this._setupAmplitudeMonitoring(this.wavPlayer, this.options.onAgentAmplitudeChange, (amp) => (this.agentAudioAmplitude = amp));
3889
- // Mark recorder as started and attempt to notify server
3890
- this.recorderStarted = true;
3891
- this._sendReadyIfNeeded();
3892
+ // wavRecorder will be started from the onDeviceSwitched callback,
3893
+ // which is called when the device is first initialized and also when the device is switched
3894
+ // this is to ensure that the device is initialized before the recorder is started
3892
3895
  }
3893
3896
  catch (error) {
3894
- console.error('Error connecting to Layercode pipeline:', error);
3897
+ console.error('Error connecting to Layercode agent:', error);
3895
3898
  this._setStatus('error');
3896
3899
  this.options.onError(error instanceof Error ? error : new Error(String(error)));
3897
3900
  throw error;
@@ -3908,6 +3911,7 @@ class LayercodeClient {
3908
3911
  this.vad.destroy();
3909
3912
  this.vad = null;
3910
3913
  }
3914
+ this.wavRecorder.listenForDeviceChange(null);
3911
3915
  this.wavRecorder.quit();
3912
3916
  this.wavPlayer.disconnect();
3913
3917
  // Reset turn tracking
@@ -3931,19 +3935,101 @@ class LayercodeClient {
3931
3935
  * @param {string} deviceId - The deviceId of the new microphone
3932
3936
  */
3933
3937
  async setInputDevice(deviceId) {
3934
- if (this.wavRecorder) {
3938
+ var _a;
3939
+ try {
3940
+ this.deviceId = deviceId;
3941
+ // Restart recording with the new device
3942
+ await this._restartAudioRecording();
3943
+ // Reinitialize VAD with the new audio stream if VAD is enabled
3944
+ const shouldUseVAD = !this.pushToTalkEnabled && ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) !== false;
3945
+ if (shouldUseVAD) {
3946
+ console.log('Reinitializing VAD with new audio stream');
3947
+ const newStream = this.wavRecorder.getStream();
3948
+ await this._reinitializeVAD(newStream);
3949
+ }
3950
+ console.log(`Successfully switched to input device: ${deviceId}`);
3951
+ }
3952
+ catch (error) {
3953
+ console.error(`Failed to switch to input device ${deviceId}:`, error);
3954
+ throw new Error(`Failed to switch to input device: ${error instanceof Error ? error.message : String(error)}`);
3955
+ }
3956
+ }
3957
+ /**
3958
+ * Restarts audio recording after a device switch to ensure audio is captured from the new device
3959
+ */
3960
+ async _restartAudioRecording() {
3961
+ try {
3962
+ console.log('Restarting audio recording after device switch...');
3935
3963
  try {
3936
3964
  await this.wavRecorder.end();
3937
3965
  }
3938
- catch (e) { }
3939
- try {
3940
- await this.wavRecorder.quit();
3966
+ catch (_a) {
3967
+ // Ignore cleanup errors
3941
3968
  }
3942
- catch (e) { }
3969
+ // Start with new device
3970
+ await this.wavRecorder.begin(this.deviceId || undefined);
3971
+ await this.wavRecorder.record(this._handleDataAvailable, 1638);
3972
+ // Re-setup amplitude monitoring with the new stream
3973
+ this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
3974
+ console.log('Audio recording restart completed successfully');
3975
+ }
3976
+ catch (error) {
3977
+ console.error('Error restarting audio recording after device switch:', error);
3978
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
3979
+ }
3980
+ }
3981
+ /**
3982
+ * Reinitializes VAD with a new stream (used after device switching)
3983
+ */
3984
+ async _reinitializeVAD(stream) {
3985
+ // Clean up existing VAD
3986
+ if (this.vad) {
3987
+ this.vad.pause();
3988
+ this.vad.destroy();
3989
+ this.vad = null;
3943
3990
  }
3944
- await this.wavRecorder.begin(deviceId);
3945
- await this.wavRecorder.record(this._handleDataAvailable, 1638);
3946
- this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
3991
+ // Reinitialize with new stream
3992
+ if (stream) {
3993
+ this._initializeVAD();
3994
+ }
3995
+ }
3996
+ /**
3997
+ * Sets up the device change event listener
3998
+ */
3999
+ _setupDeviceChangeListener() {
4000
+ this.wavRecorder.listenForDeviceChange(async (devices) => {
4001
+ try {
4002
+ const currentDeviceExists = devices.some((device) => device.deviceId === this.deviceId);
4003
+ if (!currentDeviceExists) {
4004
+ console.log('Current device disconnected, switching to next available device');
4005
+ try {
4006
+ const nextDevice = devices.find((d) => d.default);
4007
+ if (nextDevice) {
4008
+ await this.setInputDevice(nextDevice.deviceId);
4009
+ // Mark recorder as started and attempt to notify server
4010
+ if (!this.recorderStarted) {
4011
+ this.recorderStarted = true;
4012
+ this._sendReadyIfNeeded();
4013
+ }
4014
+ // Notify about device switch
4015
+ if (this.options.onDeviceSwitched) {
4016
+ this.options.onDeviceSwitched(nextDevice.deviceId);
4017
+ }
4018
+ }
4019
+ else {
4020
+ console.warn('No alternative audio device found');
4021
+ }
4022
+ }
4023
+ catch (error) {
4024
+ console.error('Error switching to next device:', error);
4025
+ throw error;
4026
+ }
4027
+ }
4028
+ }
4029
+ catch (error) {
4030
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
4031
+ }
4032
+ });
3947
4033
  }
3948
4034
  }
3949
4035