@layercode/js-sdk 2.0.1 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3478,7 +3478,7 @@ function arrayBufferToBase64(arrayBuffer) {
3478
3478
 
3479
3479
  /* eslint-env browser */
3480
3480
  // SDK version - updated when publishing
3481
- const SDK_VERSION = '2.0.0';
3481
+ const SDK_VERSION = '2.0.2';
3482
3482
  /**
3483
3483
  * @class LayercodeClient
3484
3484
  * @classdesc Core client for Layercode audio agent that manages audio recording, WebSocket communication, and speech processing.
@@ -3501,13 +3501,15 @@ class LayercodeClient {
3501
3501
  onError: options.onError || (() => { }),
3502
3502
  onDeviceSwitched: options.onDeviceSwitched || (() => { }),
3503
3503
  onDataMessage: options.onDataMessage || (() => { }),
3504
+ onMessage: options.onMessage || (() => { }),
3504
3505
  onUserAmplitudeChange: options.onUserAmplitudeChange || (() => { }),
3505
3506
  onAgentAmplitudeChange: options.onAgentAmplitudeChange || (() => { }),
3506
3507
  onStatusChange: options.onStatusChange || (() => { }),
3507
3508
  onUserIsSpeakingChange: options.onUserIsSpeakingChange || (() => { }),
3509
+ onMuteStateChange: options.onMuteStateChange || (() => { }),
3508
3510
  };
3509
- this.AMPLITUDE_MONITORING_SAMPLE_RATE = 10;
3510
- this._websocketUrl = 'wss://api.layercode.com/v1/agents/websocket';
3511
+ this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
3512
+ this._websocketUrl = 'wss://api.layercode.com/v1/agents/web/websocket';
3511
3513
  this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
3512
3514
  this.wavPlayer = new WavStreamPlayer({
3513
3515
  finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
@@ -3528,6 +3530,7 @@ class LayercodeClient {
3528
3530
  this.currentTurnId = null;
3529
3531
  this.audioBuffer = [];
3530
3532
  this.vadConfig = null;
3533
+ this.isMuted = false;
3531
3534
  // this.audioPauseTime = null;
3532
3535
  // Bind event handlers
3533
3536
  this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
@@ -3550,16 +3553,20 @@ class LayercodeClient {
3550
3553
  const vadOptions = {
3551
3554
  stream: this.wavRecorder.getStream() || undefined,
3552
3555
  onSpeechStart: () => {
3553
- console.log('onSpeechStart: sending vad_start');
3556
+ console.debug('onSpeechStart: sending vad_start');
3554
3557
  this.userIsSpeaking = true;
3555
3558
  this.options.onUserIsSpeakingChange(true);
3556
3559
  this._wsSend({
3557
3560
  type: 'vad_events',
3558
3561
  event: 'vad_start',
3559
3562
  });
3563
+ this.options.onMessage({
3564
+ type: 'vad_events',
3565
+ event: 'vad_start',
3566
+ });
3560
3567
  },
3561
3568
  onSpeechEnd: () => {
3562
- console.log('onSpeechEnd: sending vad_end');
3569
+ console.debug('onSpeechEnd: sending vad_end');
3563
3570
  this.userIsSpeaking = false;
3564
3571
  this.options.onUserIsSpeakingChange(false);
3565
3572
  this.audioBuffer = []; // Clear buffer on speech end
@@ -3567,6 +3574,10 @@ class LayercodeClient {
3567
3574
  type: 'vad_events',
3568
3575
  event: 'vad_end',
3569
3576
  });
3577
+ this.options.onMessage({
3578
+ type: 'vad_events',
3579
+ event: 'vad_end',
3580
+ });
3570
3581
  },
3571
3582
  };
3572
3583
  // Apply VAD configuration from backend if available
@@ -3625,32 +3636,14 @@ class LayercodeClient {
3625
3636
  * Handles when agent audio finishes playing
3626
3637
  */
3627
3638
  _clientResponseAudioReplayFinished() {
3628
- console.log('clientResponseAudioReplayFinished');
3639
+ console.debug('clientResponseAudioReplayFinished');
3629
3640
  this._wsSend({
3630
3641
  type: 'trigger.response.audio.replay_finished',
3631
3642
  reason: 'completed',
3632
3643
  });
3633
3644
  }
3634
3645
  async _clientInterruptAssistantReplay() {
3635
- const offsetData = await this.wavPlayer.interrupt();
3636
- if (offsetData && this.currentTurnId) {
3637
- let offsetMs = offsetData.currentTime * 1000;
3638
- // Send interruption event with accurate playback offset in milliseconds
3639
- this._wsSend({
3640
- type: 'trigger.response.audio.interrupted',
3641
- playback_offset: offsetMs,
3642
- interruption_context: {
3643
- turn_id: this.currentTurnId,
3644
- playback_offset_ms: offsetMs,
3645
- },
3646
- });
3647
- }
3648
- else {
3649
- console.warn('Interruption requested but missing required data:', {
3650
- hasOffsetData: !!offsetData,
3651
- hasTurnId: !!this.currentTurnId,
3652
- });
3653
- }
3646
+ await this.wavPlayer.interrupt();
3654
3647
  }
3655
3648
  async triggerUserTurnStarted() {
3656
3649
  if (!this.pushToTalkActive) {
@@ -3673,49 +3666,51 @@ class LayercodeClient {
3673
3666
  try {
3674
3667
  const message = JSON.parse(event.data);
3675
3668
  if (message.type !== 'response.audio') {
3676
- console.log('received ws msg:', message);
3669
+ console.debug('msg:', message);
3677
3670
  }
3678
3671
  switch (message.type) {
3679
3672
  case 'turn.start':
3680
3673
  // Sent from the server to this client when a new user turn is detected
3681
- console.log('received turn.start from server');
3682
- console.log(message);
3683
3674
  if (message.role === 'assistant') {
3684
3675
  // Start tracking new assistant turn
3685
- console.log('Assistant turn started, will track new turn ID from audio/text');
3676
+ console.debug('Assistant turn started, will track new turn ID from audio/text');
3686
3677
  }
3687
3678
  else if (message.role === 'user' && !this.pushToTalkEnabled) {
3688
3679
  // Interrupt any playing assistant audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
3689
- console.log('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
3680
+ console.debug('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
3690
3681
  await this._clientInterruptAssistantReplay();
3691
3682
  }
3683
+ this.options.onMessage(message);
3692
3684
  break;
3693
3685
  case 'response.audio':
3694
3686
  const audioBuffer = base64ToArrayBuffer(message.content);
3695
3687
  this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
3688
+ // TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
3696
3689
  // Set current turn ID from first audio message, or update if different turn
3697
3690
  if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
3698
- console.log(`Setting current turn ID to: ${message.turn_id} (was: ${this.currentTurnId})`);
3691
+ console.debug(`Setting current turn ID to: ${message.turn_id} (was: ${this.currentTurnId})`);
3699
3692
  this.currentTurnId = message.turn_id;
3700
3693
  // Clean up interrupted tracks, keeping only the current turn
3701
3694
  this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
3702
3695
  }
3703
3696
  break;
3704
- case 'response.text': {
3697
+ case 'response.text':
3705
3698
  // Set turn ID from first text message if not set
3706
3699
  if (!this.currentTurnId) {
3707
3700
  this.currentTurnId = message.turn_id;
3708
- console.log(`Setting current turn ID to: ${message.turn_id} from text message`);
3701
+ console.debug(`Setting current turn ID to: ${message.turn_id} from text message`);
3709
3702
  }
3703
+ this.options.onMessage(message);
3710
3704
  break;
3711
- }
3712
3705
  case 'response.data':
3713
- console.log('received response.data', message);
3714
3706
  this.options.onDataMessage(message);
3715
3707
  break;
3708
+ case 'user.transcript':
3709
+ case 'user.transcript.delta':
3710
+ this.options.onMessage(message);
3711
+ break;
3716
3712
  default:
3717
3713
  console.warn('Unknown message type received:', message);
3718
- break;
3719
3714
  }
3720
3715
  }
3721
3716
  catch (error) {
@@ -3731,6 +3726,10 @@ class LayercodeClient {
3731
3726
  var _a, _b, _c;
3732
3727
  try {
3733
3728
  const base64 = arrayBufferToBase64(data.mono);
3729
+ // Don't send audio if muted
3730
+ if (this.isMuted) {
3731
+ return;
3732
+ }
3734
3733
  // Determine if we should gate audio based on VAD configuration
3735
3734
  const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
3736
3735
  const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
@@ -3748,7 +3747,7 @@ class LayercodeClient {
3748
3747
  if (sendAudio) {
3749
3748
  // If we have buffered audio and we're gating, send it first
3750
3749
  if (shouldGateAudio && this.audioBuffer.length > 0) {
3751
- console.log(`Sending ${this.audioBuffer.length} buffered audio chunks`);
3750
+ console.debug(`Sending ${this.audioBuffer.length} buffered audio chunks`);
3752
3751
  for (const bufferedAudio of this.audioBuffer) {
3753
3752
  this._wsSend({
3754
3753
  type: 'client.audio',
@@ -3780,7 +3779,7 @@ class LayercodeClient {
3780
3779
  _wsSend(message) {
3781
3780
  var _a;
3782
3781
  if (message.type !== 'client.audio') {
3783
- console.log('sent ws msg:', message);
3782
+ console.debug('sent_msg:', message);
3784
3783
  }
3785
3784
  const messageString = JSON.stringify(message);
3786
3785
  if (((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN) {
@@ -3853,7 +3852,7 @@ class LayercodeClient {
3853
3852
  client_session_key: authorizeSessionResponseBody.client_session_key,
3854
3853
  })}`);
3855
3854
  const config = authorizeSessionResponseBody.config;
3856
- console.log('config', config);
3855
+ console.log('AgentConfig', config);
3857
3856
  // Store VAD configuration
3858
3857
  this.vadConfig = config.vad || null;
3859
3858
  if (config.transcription.trigger === 'push_to_talk') {
@@ -3902,7 +3901,7 @@ class LayercodeClient {
3902
3901
  }
3903
3902
  _resetTurnTracking() {
3904
3903
  this.currentTurnId = null;
3905
- console.log('Reset turn tracking state');
3904
+ console.debug('Reset turn tracking state');
3906
3905
  }
3907
3906
  async disconnect() {
3908
3907
  // Clean up VAD if it exists
@@ -3943,11 +3942,11 @@ class LayercodeClient {
3943
3942
  // Reinitialize VAD with the new audio stream if VAD is enabled
3944
3943
  const shouldUseVAD = !this.pushToTalkEnabled && ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) !== false;
3945
3944
  if (shouldUseVAD) {
3946
- console.log('Reinitializing VAD with new audio stream');
3945
+ console.debug('Reinitializing VAD with new audio stream');
3947
3946
  const newStream = this.wavRecorder.getStream();
3948
3947
  await this._reinitializeVAD(newStream);
3949
3948
  }
3950
- console.log(`Successfully switched to input device: ${deviceId}`);
3949
+ console.debug(`Successfully switched to input device: ${deviceId}`);
3951
3950
  }
3952
3951
  catch (error) {
3953
3952
  console.error(`Failed to switch to input device ${deviceId}:`, error);
@@ -3959,7 +3958,7 @@ class LayercodeClient {
3959
3958
  */
3960
3959
  async _restartAudioRecording() {
3961
3960
  try {
3962
- console.log('Restarting audio recording after device switch...');
3961
+ console.debug('Restarting audio recording after device switch...');
3963
3962
  try {
3964
3963
  await this.wavRecorder.end();
3965
3964
  }
@@ -3971,7 +3970,7 @@ class LayercodeClient {
3971
3970
  await this.wavRecorder.record(this._handleDataAvailable, 1638);
3972
3971
  // Re-setup amplitude monitoring with the new stream
3973
3972
  this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
3974
- console.log('Audio recording restart completed successfully');
3973
+ console.debug('Audio recording restart completed successfully');
3975
3974
  }
3976
3975
  catch (error) {
3977
3976
  console.error('Error restarting audio recording after device switch:', error);
@@ -4001,7 +4000,7 @@ class LayercodeClient {
4001
4000
  try {
4002
4001
  const currentDeviceExists = devices.some((device) => device.deviceId === this.deviceId);
4003
4002
  if (!currentDeviceExists) {
4004
- console.log('Current device disconnected, switching to next available device');
4003
+ console.debug('Current device disconnected, switching to next available device');
4005
4004
  try {
4006
4005
  const nextDevice = devices.find((d) => d.default);
4007
4006
  if (nextDevice) {
@@ -4031,6 +4030,27 @@ class LayercodeClient {
4031
4030
  }
4032
4031
  });
4033
4032
  }
4033
+ /**
4034
+ * Mutes the microphone to stop sending audio to the server
4035
+ * The connection and recording remain active for quick unmute
4036
+ */
4037
+ mute() {
4038
+ if (!this.isMuted) {
4039
+ this.isMuted = true;
4040
+ console.log('Microphone muted');
4041
+ this.options.onMuteStateChange(true);
4042
+ }
4043
+ }
4044
+ /**
4045
+ * Unmutes the microphone to resume sending audio to the server
4046
+ */
4047
+ unmute() {
4048
+ if (this.isMuted) {
4049
+ this.isMuted = false;
4050
+ console.log('Microphone unmuted');
4051
+ this.options.onMuteStateChange(false);
4052
+ }
4053
+ }
4034
4054
  }
4035
4055
 
4036
4056
  export { LayercodeClient as default };