@layercode/js-sdk 1.0.26 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3483,9 +3483,11 @@ registerProcessor('audio_processor', AudioProcessor);
3483
3483
  }
3484
3484
 
3485
3485
  /* eslint-env browser */
3486
+ // SDK version - updated when publishing
3487
+ const SDK_VERSION = '2.0.0';
3486
3488
  /**
3487
3489
  * @class LayercodeClient
3488
- * @classdesc Core client for Layercode audio pipeline that manages audio recording, WebSocket communication, and speech processing.
3490
+ * @classdesc Core client for Layercode audio agent that manages audio recording, WebSocket communication, and speech processing.
3489
3491
  */
3490
3492
  class LayercodeClient {
3491
3493
  /**
@@ -3493,15 +3495,17 @@ registerProcessor('audio_processor', AudioProcessor);
3493
3495
  * @param {Object} options - Configuration options
3494
3496
  */
3495
3497
  constructor(options) {
3498
+ this.deviceId = null;
3496
3499
  this.options = {
3497
- pipelineId: options.pipelineId,
3498
- sessionId: options.sessionId || null,
3500
+ agentId: options.agentId,
3501
+ conversationId: options.conversationId || null,
3499
3502
  authorizeSessionEndpoint: options.authorizeSessionEndpoint,
3500
3503
  metadata: options.metadata || {},
3501
3504
  vadResumeDelay: options.vadResumeDelay || 500,
3502
3505
  onConnect: options.onConnect || (() => { }),
3503
3506
  onDisconnect: options.onDisconnect || (() => { }),
3504
3507
  onError: options.onError || (() => { }),
3508
+ onDeviceSwitched: options.onDeviceSwitched || (() => { }),
3505
3509
  onDataMessage: options.onDataMessage || (() => { }),
3506
3510
  onUserAmplitudeChange: options.onUserAmplitudeChange || (() => { }),
3507
3511
  onAgentAmplitudeChange: options.onAgentAmplitudeChange || (() => { }),
@@ -3509,18 +3513,18 @@ registerProcessor('audio_processor', AudioProcessor);
3509
3513
  onUserIsSpeakingChange: options.onUserIsSpeakingChange || (() => { }),
3510
3514
  };
3511
3515
  this.AMPLITUDE_MONITORING_SAMPLE_RATE = 10;
3512
- this._websocketUrl = 'wss://api.layercode.com/v1/pipelines/websocket';
3513
- this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched pipeline config
3516
+ this._websocketUrl = 'wss://api.layercode.com/v1/agents/websocket';
3517
+ this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
3514
3518
  this.wavPlayer = new WavStreamPlayer({
3515
3519
  finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
3516
- sampleRate: 16000, // TODO should be set my fetched pipeline config
3520
+ sampleRate: 16000, // TODO should be set my fetched agent config
3517
3521
  });
3518
3522
  this.vad = null;
3519
3523
  this.ws = null;
3520
3524
  this.status = 'disconnected';
3521
3525
  this.userAudioAmplitude = 0;
3522
3526
  this.agentAudioAmplitude = 0;
3523
- this.sessionId = options.sessionId || null;
3527
+ this.conversationId = options.conversationId || null;
3524
3528
  this.pushToTalkActive = false;
3525
3529
  this.pushToTalkEnabled = false;
3526
3530
  this.canInterrupt = false;
@@ -3529,71 +3533,28 @@ registerProcessor('audio_processor', AudioProcessor);
3529
3533
  this.readySent = false;
3530
3534
  this.currentTurnId = null;
3531
3535
  this.audioBuffer = [];
3536
+ this.vadConfig = null;
3532
3537
  // this.audioPauseTime = null;
3533
3538
  // Bind event handlers
3534
3539
  this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
3535
3540
  this._handleDataAvailable = this._handleDataAvailable.bind(this);
3536
- }
3537
- _setupAmplitudeBasedVAD() {
3538
- let isSpeakingByAmplitude = false;
3539
- let silenceFrames = 0;
3540
- const AMPLITUDE_THRESHOLD = 0.01; // Adjust based on testing
3541
- const SILENCE_FRAMES_THRESHOLD = 6.4; // 6.4 * 20ms chunks = 128ms silence. Same as Silero ((frame samples: 512 / sampleRate: 16000) * 1000 * redemptionFrames: 4) = 128 ms silence
3542
- // Monitor amplitude changes
3543
- this.wavRecorder.startAmplitudeMonitoring((amplitude) => {
3544
- const wasSpeaking = isSpeakingByAmplitude;
3545
- if (amplitude > AMPLITUDE_THRESHOLD) {
3546
- silenceFrames = 0;
3547
- if (!wasSpeaking) {
3548
- isSpeakingByAmplitude = true;
3549
- this.userIsSpeaking = true;
3550
- this.options.onUserIsSpeakingChange(true);
3551
- this._wsSend({
3552
- type: 'vad_events',
3553
- event: 'vad_start',
3554
- });
3555
- }
3556
- }
3557
- else {
3558
- silenceFrames++;
3559
- if (wasSpeaking && silenceFrames >= SILENCE_FRAMES_THRESHOLD) {
3560
- isSpeakingByAmplitude = false;
3561
- this.userIsSpeaking = false;
3562
- this.options.onUserIsSpeakingChange(false);
3563
- this._wsSend({
3564
- type: 'vad_events',
3565
- event: 'vad_end',
3566
- });
3567
- }
3568
- }
3569
- });
3541
+ this._setupDeviceChangeListener();
3570
3542
  }
3571
3543
  _initializeVAD() {
3572
- console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt });
3544
+ var _a;
3545
+ console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt, vadConfig: this.vadConfig });
3573
3546
  // If we're in push to talk mode, we don't need to use the VAD model
3574
3547
  if (this.pushToTalkEnabled) {
3575
3548
  return;
3576
3549
  }
3577
- const vadLoadTimeout = setTimeout(() => {
3578
- console.log('silero vad model timeout');
3579
- console.warn('VAD model failed to load - falling back to amplitude-based detection');
3580
- // Send a message to server indicating VAD failure
3581
- this._wsSend({
3582
- type: 'vad_events',
3583
- event: 'vad_model_failed',
3584
- });
3585
- // Set up amplitude-based fallback detection
3586
- this._setupAmplitudeBasedVAD();
3587
- }, 2000);
3588
- dist.MicVAD.new({
3550
+ // Check if VAD is disabled
3551
+ if (((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) === false) {
3552
+ console.log('VAD is disabled by backend configuration');
3553
+ return;
3554
+ }
3555
+ // Build VAD configuration object, only including keys that are defined
3556
+ const vadOptions = {
3589
3557
  stream: this.wavRecorder.getStream() || undefined,
3590
- model: 'v5',
3591
- positiveSpeechThreshold: 0.15,
3592
- negativeSpeechThreshold: 0.05,
3593
- redemptionFrames: 4,
3594
- minSpeechFrames: 2,
3595
- preSpeechPadFrames: 0,
3596
- frameSamples: 512, // Required for v5 as per https://docs.vad.ricky0123.com/user-guide/algorithm/#configuration
3597
3558
  onSpeechStart: () => {
3598
3559
  console.log('onSpeechStart: sending vad_start');
3599
3560
  this.userIsSpeaking = true;
@@ -3613,18 +3574,49 @@ registerProcessor('audio_processor', AudioProcessor);
3613
3574
  event: 'vad_end',
3614
3575
  });
3615
3576
  },
3616
- // onVADMisfire: () => {
3617
- // // If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd.
3618
- // },
3619
- })
3577
+ };
3578
+ // Apply VAD configuration from backend if available
3579
+ if (this.vadConfig) {
3580
+ // Only add keys that are explicitly defined (not undefined)
3581
+ if (this.vadConfig.model !== undefined)
3582
+ vadOptions.model = this.vadConfig.model;
3583
+ if (this.vadConfig.positive_speech_threshold !== undefined)
3584
+ vadOptions.positiveSpeechThreshold = this.vadConfig.positive_speech_threshold;
3585
+ if (this.vadConfig.negative_speech_threshold !== undefined)
3586
+ vadOptions.negativeSpeechThreshold = this.vadConfig.negative_speech_threshold;
3587
+ if (this.vadConfig.redemption_frames !== undefined)
3588
+ vadOptions.redemptionFrames = this.vadConfig.redemption_frames;
3589
+ if (this.vadConfig.min_speech_frames !== undefined)
3590
+ vadOptions.minSpeechFrames = this.vadConfig.min_speech_frames;
3591
+ if (this.vadConfig.pre_speech_pad_frames !== undefined)
3592
+ vadOptions.preSpeechPadFrames = this.vadConfig.pre_speech_pad_frames;
3593
+ if (this.vadConfig.frame_samples !== undefined)
3594
+ vadOptions.frameSamples = this.vadConfig.frame_samples;
3595
+ }
3596
+ else {
3597
+ // Default values if no config from backend
3598
+ vadOptions.model = 'v5';
3599
+ vadOptions.positiveSpeechThreshold = 0.15;
3600
+ vadOptions.negativeSpeechThreshold = 0.05;
3601
+ vadOptions.redemptionFrames = 4;
3602
+ vadOptions.minSpeechFrames = 2;
3603
+ vadOptions.preSpeechPadFrames = 0;
3604
+ vadOptions.frameSamples = 512; // Required for v5
3605
+ }
3606
+ console.log('Creating VAD with options:', vadOptions);
3607
+ dist.MicVAD.new(vadOptions)
3620
3608
  .then((vad) => {
3621
- clearTimeout(vadLoadTimeout);
3622
3609
  this.vad = vad;
3623
3610
  this.vad.start();
3624
- console.log('VAD started');
3611
+ console.log('VAD started successfully');
3625
3612
  })
3626
3613
  .catch((error) => {
3627
- console.error('Error initializing VAD:', error);
3614
+ console.warn('Error initializing VAD:', error);
3615
+ // Send a message to server indicating VAD failure
3616
+ this._wsSend({
3617
+ type: 'vad_events',
3618
+ event: 'vad_model_failed',
3619
+ });
3628
3620
  });
3629
3621
  }
3630
3622
  /**
@@ -3728,7 +3720,7 @@ registerProcessor('audio_processor', AudioProcessor);
3728
3720
  this.options.onDataMessage(message);
3729
3721
  break;
3730
3722
  default:
3731
- console.error('Unknown message type received:', message);
3723
+ console.warn('Unknown message type received:', message);
3732
3724
  break;
3733
3725
  }
3734
3726
  }
@@ -3742,12 +3734,26 @@ registerProcessor('audio_processor', AudioProcessor);
3742
3734
  * @param {ArrayBuffer} data - The audio data buffer
3743
3735
  */
3744
3736
  _handleDataAvailable(data) {
3737
+ var _a, _b, _c;
3745
3738
  try {
3746
3739
  const base64 = arrayBufferToBase64(data.mono);
3747
- const sendAudio = this.pushToTalkEnabled ? this.pushToTalkActive : this.userIsSpeaking;
3740
+ // Determine if we should gate audio based on VAD configuration
3741
+ const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
3742
+ const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
3743
+ let sendAudio;
3744
+ if (this.pushToTalkEnabled) {
3745
+ sendAudio = this.pushToTalkActive;
3746
+ }
3747
+ else if (shouldGateAudio) {
3748
+ sendAudio = this.userIsSpeaking;
3749
+ }
3750
+ else {
3751
+ // If gate_audio is false, always send audio
3752
+ sendAudio = true;
3753
+ }
3748
3754
  if (sendAudio) {
3749
- // If we have buffered audio, send it first
3750
- if (this.audioBuffer.length > 0) {
3755
+ // If we have buffered audio and we're gating, send it first
3756
+ if (shouldGateAudio && this.audioBuffer.length > 0) {
3751
3757
  console.log(`Sending ${this.audioBuffer.length} buffered audio chunks`);
3752
3758
  for (const bufferedAudio of this.audioBuffer) {
3753
3759
  this._wsSend({
@@ -3766,8 +3772,8 @@ registerProcessor('audio_processor', AudioProcessor);
3766
3772
  else {
3767
3773
  // Buffer audio when not sending (to catch audio just before VAD triggers)
3768
3774
  this.audioBuffer.push(base64);
3769
- // Keep buffer size reasonable (e.g., last 10 chunks ≈ 200ms at 20ms chunks)
3770
- if (this.audioBuffer.length > 10) {
3775
+ // Keep buffer size based on configuration
3776
+ if (this.audioBuffer.length > bufferFrames) {
3771
3777
  this.audioBuffer.shift(); // Remove oldest chunk
3772
3778
  }
3773
3779
  }
@@ -3817,7 +3823,7 @@ registerProcessor('audio_processor', AudioProcessor);
3817
3823
  }
3818
3824
  }
3819
3825
  /**
3820
- * Connects to the Layercode pipeline and starts the audio session
3826
+ * Connects to the Layercode agent and starts the audio conversation
3821
3827
  * @async
3822
3828
  * @returns {Promise<void>}
3823
3829
  */
@@ -3826,14 +3832,15 @@ registerProcessor('audio_processor', AudioProcessor);
3826
3832
  this._setStatus('connecting');
3827
3833
  // Reset turn tracking for clean start
3828
3834
  this._resetTurnTracking();
3829
- // Get session key from server
3835
+ // Get conversation key from server
3830
3836
  let authorizeSessionRequestBody = {
3831
- pipeline_id: this.options.pipelineId,
3837
+ agent_id: this.options.agentId,
3832
3838
  metadata: this.options.metadata,
3839
+ sdk_version: SDK_VERSION,
3833
3840
  };
3834
- // If we're reconnecting to a previous session, we need to include the session_id in the request. Otherwise we don't send session_id, and a new session will be created and the session_id will be returned in the response.
3835
- if (this.options.sessionId) {
3836
- authorizeSessionRequestBody.session_id = this.options.sessionId;
3841
+ // If we're reconnecting to a previous conversation, we need to include the conversation_id in the request. Otherwise we don't send conversation_id, and a new conversation will be created and the conversation_id will be returned in the response.
3842
+ if (this.options.conversationId) {
3843
+ authorizeSessionRequestBody.conversation_id = this.options.conversationId;
3837
3844
  }
3838
3845
  const authorizeSessionResponse = await fetch(this.options.authorizeSessionEndpoint, {
3839
3846
  method: 'POST',
@@ -3843,16 +3850,18 @@ registerProcessor('audio_processor', AudioProcessor);
3843
3850
  body: JSON.stringify(authorizeSessionRequestBody),
3844
3851
  });
3845
3852
  if (!authorizeSessionResponse.ok) {
3846
- throw new Error(`Failed to authorize session: ${authorizeSessionResponse.statusText}`);
3853
+ throw new Error(`Failed to authorize conversation: ${authorizeSessionResponse.statusText}`);
3847
3854
  }
3848
3855
  const authorizeSessionResponseBody = await authorizeSessionResponse.json();
3849
- this.sessionId = authorizeSessionResponseBody.session_id; // Save the session_id for use in future reconnects
3856
+ this.conversationId = authorizeSessionResponseBody.conversation_id; // Save the conversation_id for use in future reconnects
3850
3857
  // Connect WebSocket
3851
3858
  this.ws = new WebSocket(`${this._websocketUrl}?${new URLSearchParams({
3852
3859
  client_session_key: authorizeSessionResponseBody.client_session_key,
3853
3860
  })}`);
3854
3861
  const config = authorizeSessionResponseBody.config;
3855
3862
  console.log('config', config);
3863
+ // Store VAD configuration
3864
+ this.vadConfig = config.vad || null;
3856
3865
  if (config.transcription.trigger === 'push_to_talk') {
3857
3866
  this.pushToTalkEnabled = true;
3858
3867
  }
@@ -3863,13 +3872,12 @@ registerProcessor('audio_processor', AudioProcessor);
3863
3872
  else {
3864
3873
  throw new Error(`Unknown trigger: ${config.transcription.trigger}`);
3865
3874
  }
3866
- this._initializeVAD();
3867
3875
  // Bind the websocket message callbacks
3868
3876
  this.ws.onmessage = this._handleWebSocketMessage;
3869
3877
  this.ws.onopen = () => {
3870
3878
  console.log('WebSocket connection established');
3871
3879
  this._setStatus('connected');
3872
- this.options.onConnect({ sessionId: this.sessionId });
3880
+ this.options.onConnect({ conversationId: this.conversationId });
3873
3881
  // Attempt to send ready message if recorder already started
3874
3882
  this._sendReadyIfNeeded();
3875
3883
  };
@@ -3883,21 +3891,16 @@ registerProcessor('audio_processor', AudioProcessor);
3883
3891
  this._setStatus('error');
3884
3892
  this.options.onError(new Error('WebSocket connection error'));
3885
3893
  };
3886
- // Initialize microphone audio capture
3887
- await this.wavRecorder.begin();
3888
- await this.wavRecorder.record(this._handleDataAvailable, 1638);
3889
- // Set up microphone amplitude monitoring
3890
- this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
3891
3894
  // Initialize audio player
3892
3895
  await this.wavPlayer.connect();
3893
3896
  // Set up audio player amplitude monitoring
3894
3897
  this._setupAmplitudeMonitoring(this.wavPlayer, this.options.onAgentAmplitudeChange, (amp) => (this.agentAudioAmplitude = amp));
3895
- // Mark recorder as started and attempt to notify server
3896
- this.recorderStarted = true;
3897
- this._sendReadyIfNeeded();
3898
+ // wavRecorder will be started from the onDeviceSwitched callback,
3899
+ // which is called when the device is first initialized and also when the device is switched
3900
+ // this is to ensure that the device is initialized before the recorder is started
3898
3901
  }
3899
3902
  catch (error) {
3900
- console.error('Error connecting to Layercode pipeline:', error);
3903
+ console.error('Error connecting to Layercode agent:', error);
3901
3904
  this._setStatus('error');
3902
3905
  this.options.onError(error instanceof Error ? error : new Error(String(error)));
3903
3906
  throw error;
@@ -3914,6 +3917,7 @@ registerProcessor('audio_processor', AudioProcessor);
3914
3917
  this.vad.destroy();
3915
3918
  this.vad = null;
3916
3919
  }
3920
+ this.wavRecorder.listenForDeviceChange(null);
3917
3921
  this.wavRecorder.quit();
3918
3922
  this.wavPlayer.disconnect();
3919
3923
  // Reset turn tracking
@@ -3937,19 +3941,101 @@ registerProcessor('audio_processor', AudioProcessor);
3937
3941
  * @param {string} deviceId - The deviceId of the new microphone
3938
3942
  */
3939
3943
  async setInputDevice(deviceId) {
3940
- if (this.wavRecorder) {
3944
+ var _a;
3945
+ try {
3946
+ this.deviceId = deviceId;
3947
+ // Restart recording with the new device
3948
+ await this._restartAudioRecording();
3949
+ // Reinitialize VAD with the new audio stream if VAD is enabled
3950
+ const shouldUseVAD = !this.pushToTalkEnabled && ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) !== false;
3951
+ if (shouldUseVAD) {
3952
+ console.log('Reinitializing VAD with new audio stream');
3953
+ const newStream = this.wavRecorder.getStream();
3954
+ await this._reinitializeVAD(newStream);
3955
+ }
3956
+ console.log(`Successfully switched to input device: ${deviceId}`);
3957
+ }
3958
+ catch (error) {
3959
+ console.error(`Failed to switch to input device ${deviceId}:`, error);
3960
+ throw new Error(`Failed to switch to input device: ${error instanceof Error ? error.message : String(error)}`);
3961
+ }
3962
+ }
3963
+ /**
3964
+ * Restarts audio recording after a device switch to ensure audio is captured from the new device
3965
+ */
3966
+ async _restartAudioRecording() {
3967
+ try {
3968
+ console.log('Restarting audio recording after device switch...');
3941
3969
  try {
3942
3970
  await this.wavRecorder.end();
3943
3971
  }
3944
- catch (e) { }
3945
- try {
3946
- await this.wavRecorder.quit();
3972
+ catch (_a) {
3973
+ // Ignore cleanup errors
3947
3974
  }
3948
- catch (e) { }
3975
+ // Start with new device
3976
+ await this.wavRecorder.begin(this.deviceId || undefined);
3977
+ await this.wavRecorder.record(this._handleDataAvailable, 1638);
3978
+ // Re-setup amplitude monitoring with the new stream
3979
+ this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
3980
+ console.log('Audio recording restart completed successfully');
3981
+ }
3982
+ catch (error) {
3983
+ console.error('Error restarting audio recording after device switch:', error);
3984
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
3985
+ }
3986
+ }
3987
+ /**
3988
+ * Reinitializes VAD with a new stream (used after device switching)
3989
+ */
3990
+ async _reinitializeVAD(stream) {
3991
+ // Clean up existing VAD
3992
+ if (this.vad) {
3993
+ this.vad.pause();
3994
+ this.vad.destroy();
3995
+ this.vad = null;
3949
3996
  }
3950
- await this.wavRecorder.begin(deviceId);
3951
- await this.wavRecorder.record(this._handleDataAvailable, 1638);
3952
- this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
3997
+ // Reinitialize with new stream
3998
+ if (stream) {
3999
+ this._initializeVAD();
4000
+ }
4001
+ }
4002
+ /**
4003
+ * Sets up the device change event listener
4004
+ */
4005
+ _setupDeviceChangeListener() {
4006
+ this.wavRecorder.listenForDeviceChange(async (devices) => {
4007
+ try {
4008
+ const currentDeviceExists = devices.some((device) => device.deviceId === this.deviceId);
4009
+ if (!currentDeviceExists) {
4010
+ console.log('Current device disconnected, switching to next available device');
4011
+ try {
4012
+ const nextDevice = devices.find((d) => d.default);
4013
+ if (nextDevice) {
4014
+ await this.setInputDevice(nextDevice.deviceId);
4015
+ // Mark recorder as started and attempt to notify server
4016
+ if (!this.recorderStarted) {
4017
+ this.recorderStarted = true;
4018
+ this._sendReadyIfNeeded();
4019
+ }
4020
+ // Notify about device switch
4021
+ if (this.options.onDeviceSwitched) {
4022
+ this.options.onDeviceSwitched(nextDevice.deviceId);
4023
+ }
4024
+ }
4025
+ else {
4026
+ console.warn('No alternative audio device found');
4027
+ }
4028
+ }
4029
+ catch (error) {
4030
+ console.error('Error switching to next device:', error);
4031
+ throw error;
4032
+ }
4033
+ }
4034
+ }
4035
+ catch (error) {
4036
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
4037
+ }
4038
+ });
3953
4039
  }
3954
4040
  }
3955
4041