@layercode/js-sdk 2.0.1 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3484,7 +3484,7 @@ registerProcessor('audio_processor', AudioProcessor);
3484
3484
 
3485
3485
  /* eslint-env browser */
3486
3486
  // SDK version - updated when publishing
3487
- const SDK_VERSION = '2.0.0';
3487
+ const SDK_VERSION = '2.0.2';
3488
3488
  /**
3489
3489
  * @class LayercodeClient
3490
3490
  * @classdesc Core client for Layercode audio agent that manages audio recording, WebSocket communication, and speech processing.
@@ -3507,13 +3507,15 @@ registerProcessor('audio_processor', AudioProcessor);
3507
3507
  onError: options.onError || (() => { }),
3508
3508
  onDeviceSwitched: options.onDeviceSwitched || (() => { }),
3509
3509
  onDataMessage: options.onDataMessage || (() => { }),
3510
+ onMessage: options.onMessage || (() => { }),
3510
3511
  onUserAmplitudeChange: options.onUserAmplitudeChange || (() => { }),
3511
3512
  onAgentAmplitudeChange: options.onAgentAmplitudeChange || (() => { }),
3512
3513
  onStatusChange: options.onStatusChange || (() => { }),
3513
3514
  onUserIsSpeakingChange: options.onUserIsSpeakingChange || (() => { }),
3515
+ onMuteStateChange: options.onMuteStateChange || (() => { }),
3514
3516
  };
3515
- this.AMPLITUDE_MONITORING_SAMPLE_RATE = 10;
3516
- this._websocketUrl = 'wss://api.layercode.com/v1/agents/websocket';
3517
+ this.AMPLITUDE_MONITORING_SAMPLE_RATE = 2;
3518
+ this._websocketUrl = 'wss://api.layercode.com/v1/agents/web/websocket';
3517
3519
  this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched agent config
3518
3520
  this.wavPlayer = new WavStreamPlayer({
3519
3521
  finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
@@ -3534,6 +3536,7 @@ registerProcessor('audio_processor', AudioProcessor);
3534
3536
  this.currentTurnId = null;
3535
3537
  this.audioBuffer = [];
3536
3538
  this.vadConfig = null;
3539
+ this.isMuted = false;
3537
3540
  // this.audioPauseTime = null;
3538
3541
  // Bind event handlers
3539
3542
  this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
@@ -3556,16 +3559,20 @@ registerProcessor('audio_processor', AudioProcessor);
3556
3559
  const vadOptions = {
3557
3560
  stream: this.wavRecorder.getStream() || undefined,
3558
3561
  onSpeechStart: () => {
3559
- console.log('onSpeechStart: sending vad_start');
3562
+ console.debug('onSpeechStart: sending vad_start');
3560
3563
  this.userIsSpeaking = true;
3561
3564
  this.options.onUserIsSpeakingChange(true);
3562
3565
  this._wsSend({
3563
3566
  type: 'vad_events',
3564
3567
  event: 'vad_start',
3565
3568
  });
3569
+ this.options.onMessage({
3570
+ type: 'vad_events',
3571
+ event: 'vad_start',
3572
+ });
3566
3573
  },
3567
3574
  onSpeechEnd: () => {
3568
- console.log('onSpeechEnd: sending vad_end');
3575
+ console.debug('onSpeechEnd: sending vad_end');
3569
3576
  this.userIsSpeaking = false;
3570
3577
  this.options.onUserIsSpeakingChange(false);
3571
3578
  this.audioBuffer = []; // Clear buffer on speech end
@@ -3573,6 +3580,10 @@ registerProcessor('audio_processor', AudioProcessor);
3573
3580
  type: 'vad_events',
3574
3581
  event: 'vad_end',
3575
3582
  });
3583
+ this.options.onMessage({
3584
+ type: 'vad_events',
3585
+ event: 'vad_end',
3586
+ });
3576
3587
  },
3577
3588
  };
3578
3589
  // Apply VAD configuration from backend if available
@@ -3631,32 +3642,14 @@ registerProcessor('audio_processor', AudioProcessor);
3631
3642
  * Handles when agent audio finishes playing
3632
3643
  */
3633
3644
  _clientResponseAudioReplayFinished() {
3634
- console.log('clientResponseAudioReplayFinished');
3645
+ console.debug('clientResponseAudioReplayFinished');
3635
3646
  this._wsSend({
3636
3647
  type: 'trigger.response.audio.replay_finished',
3637
3648
  reason: 'completed',
3638
3649
  });
3639
3650
  }
3640
3651
  async _clientInterruptAssistantReplay() {
3641
- const offsetData = await this.wavPlayer.interrupt();
3642
- if (offsetData && this.currentTurnId) {
3643
- let offsetMs = offsetData.currentTime * 1000;
3644
- // Send interruption event with accurate playback offset in milliseconds
3645
- this._wsSend({
3646
- type: 'trigger.response.audio.interrupted',
3647
- playback_offset: offsetMs,
3648
- interruption_context: {
3649
- turn_id: this.currentTurnId,
3650
- playback_offset_ms: offsetMs,
3651
- },
3652
- });
3653
- }
3654
- else {
3655
- console.warn('Interruption requested but missing required data:', {
3656
- hasOffsetData: !!offsetData,
3657
- hasTurnId: !!this.currentTurnId,
3658
- });
3659
- }
3652
+ await this.wavPlayer.interrupt();
3660
3653
  }
3661
3654
  async triggerUserTurnStarted() {
3662
3655
  if (!this.pushToTalkActive) {
@@ -3679,49 +3672,51 @@ registerProcessor('audio_processor', AudioProcessor);
3679
3672
  try {
3680
3673
  const message = JSON.parse(event.data);
3681
3674
  if (message.type !== 'response.audio') {
3682
- console.log('received ws msg:', message);
3675
+ console.debug('msg:', message);
3683
3676
  }
3684
3677
  switch (message.type) {
3685
3678
  case 'turn.start':
3686
3679
  // Sent from the server to this client when a new user turn is detected
3687
- console.log('received turn.start from server');
3688
- console.log(message);
3689
3680
  if (message.role === 'assistant') {
3690
3681
  // Start tracking new assistant turn
3691
- console.log('Assistant turn started, will track new turn ID from audio/text');
3682
+ console.debug('Assistant turn started, will track new turn ID from audio/text');
3692
3683
  }
3693
3684
  else if (message.role === 'user' && !this.pushToTalkEnabled) {
3694
3685
  // Interrupt any playing assistant audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
3695
- console.log('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
3686
+ console.debug('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
3696
3687
  await this._clientInterruptAssistantReplay();
3697
3688
  }
3689
+ this.options.onMessage(message);
3698
3690
  break;
3699
3691
  case 'response.audio':
3700
3692
  const audioBuffer = base64ToArrayBuffer(message.content);
3701
3693
  this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
3694
+ // TODO: once we've added turn_id to the turn.start msgs sent from teh server, we should move this currentTurnId switching logic to the turn.start msg case. We can then remove the currentTurnId setting logic from the response.audio and response.text cases.
3702
3695
  // Set current turn ID from first audio message, or update if different turn
3703
3696
  if (!this.currentTurnId || this.currentTurnId !== message.turn_id) {
3704
- console.log(`Setting current turn ID to: ${message.turn_id} (was: ${this.currentTurnId})`);
3697
+ console.debug(`Setting current turn ID to: ${message.turn_id} (was: ${this.currentTurnId})`);
3705
3698
  this.currentTurnId = message.turn_id;
3706
3699
  // Clean up interrupted tracks, keeping only the current turn
3707
3700
  this.wavPlayer.clearInterruptedTracks(this.currentTurnId ? [this.currentTurnId] : []);
3708
3701
  }
3709
3702
  break;
3710
- case 'response.text': {
3703
+ case 'response.text':
3711
3704
  // Set turn ID from first text message if not set
3712
3705
  if (!this.currentTurnId) {
3713
3706
  this.currentTurnId = message.turn_id;
3714
- console.log(`Setting current turn ID to: ${message.turn_id} from text message`);
3707
+ console.debug(`Setting current turn ID to: ${message.turn_id} from text message`);
3715
3708
  }
3709
+ this.options.onMessage(message);
3716
3710
  break;
3717
- }
3718
3711
  case 'response.data':
3719
- console.log('received response.data', message);
3720
3712
  this.options.onDataMessage(message);
3721
3713
  break;
3714
+ case 'user.transcript':
3715
+ case 'user.transcript.delta':
3716
+ this.options.onMessage(message);
3717
+ break;
3722
3718
  default:
3723
3719
  console.warn('Unknown message type received:', message);
3724
- break;
3725
3720
  }
3726
3721
  }
3727
3722
  catch (error) {
@@ -3737,6 +3732,10 @@ registerProcessor('audio_processor', AudioProcessor);
3737
3732
  var _a, _b, _c;
3738
3733
  try {
3739
3734
  const base64 = arrayBufferToBase64(data.mono);
3735
+ // Don't send audio if muted
3736
+ if (this.isMuted) {
3737
+ return;
3738
+ }
3740
3739
  // Determine if we should gate audio based on VAD configuration
3741
3740
  const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
3742
3741
  const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
@@ -3754,7 +3753,7 @@ registerProcessor('audio_processor', AudioProcessor);
3754
3753
  if (sendAudio) {
3755
3754
  // If we have buffered audio and we're gating, send it first
3756
3755
  if (shouldGateAudio && this.audioBuffer.length > 0) {
3757
- console.log(`Sending ${this.audioBuffer.length} buffered audio chunks`);
3756
+ console.debug(`Sending ${this.audioBuffer.length} buffered audio chunks`);
3758
3757
  for (const bufferedAudio of this.audioBuffer) {
3759
3758
  this._wsSend({
3760
3759
  type: 'client.audio',
@@ -3786,7 +3785,7 @@ registerProcessor('audio_processor', AudioProcessor);
3786
3785
  _wsSend(message) {
3787
3786
  var _a;
3788
3787
  if (message.type !== 'client.audio') {
3789
- console.log('sent ws msg:', message);
3788
+ console.debug('sent_msg:', message);
3790
3789
  }
3791
3790
  const messageString = JSON.stringify(message);
3792
3791
  if (((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN) {
@@ -3859,7 +3858,7 @@ registerProcessor('audio_processor', AudioProcessor);
3859
3858
  client_session_key: authorizeSessionResponseBody.client_session_key,
3860
3859
  })}`);
3861
3860
  const config = authorizeSessionResponseBody.config;
3862
- console.log('config', config);
3861
+ console.log('AgentConfig', config);
3863
3862
  // Store VAD configuration
3864
3863
  this.vadConfig = config.vad || null;
3865
3864
  if (config.transcription.trigger === 'push_to_talk') {
@@ -3908,7 +3907,7 @@ registerProcessor('audio_processor', AudioProcessor);
3908
3907
  }
3909
3908
  _resetTurnTracking() {
3910
3909
  this.currentTurnId = null;
3911
- console.log('Reset turn tracking state');
3910
+ console.debug('Reset turn tracking state');
3912
3911
  }
3913
3912
  async disconnect() {
3914
3913
  // Clean up VAD if it exists
@@ -3949,11 +3948,11 @@ registerProcessor('audio_processor', AudioProcessor);
3949
3948
  // Reinitialize VAD with the new audio stream if VAD is enabled
3950
3949
  const shouldUseVAD = !this.pushToTalkEnabled && ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) !== false;
3951
3950
  if (shouldUseVAD) {
3952
- console.log('Reinitializing VAD with new audio stream');
3951
+ console.debug('Reinitializing VAD with new audio stream');
3953
3952
  const newStream = this.wavRecorder.getStream();
3954
3953
  await this._reinitializeVAD(newStream);
3955
3954
  }
3956
- console.log(`Successfully switched to input device: ${deviceId}`);
3955
+ console.debug(`Successfully switched to input device: ${deviceId}`);
3957
3956
  }
3958
3957
  catch (error) {
3959
3958
  console.error(`Failed to switch to input device ${deviceId}:`, error);
@@ -3965,7 +3964,7 @@ registerProcessor('audio_processor', AudioProcessor);
3965
3964
  */
3966
3965
  async _restartAudioRecording() {
3967
3966
  try {
3968
- console.log('Restarting audio recording after device switch...');
3967
+ console.debug('Restarting audio recording after device switch...');
3969
3968
  try {
3970
3969
  await this.wavRecorder.end();
3971
3970
  }
@@ -3977,7 +3976,7 @@ registerProcessor('audio_processor', AudioProcessor);
3977
3976
  await this.wavRecorder.record(this._handleDataAvailable, 1638);
3978
3977
  // Re-setup amplitude monitoring with the new stream
3979
3978
  this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
3980
- console.log('Audio recording restart completed successfully');
3979
+ console.debug('Audio recording restart completed successfully');
3981
3980
  }
3982
3981
  catch (error) {
3983
3982
  console.error('Error restarting audio recording after device switch:', error);
@@ -4007,7 +4006,7 @@ registerProcessor('audio_processor', AudioProcessor);
4007
4006
  try {
4008
4007
  const currentDeviceExists = devices.some((device) => device.deviceId === this.deviceId);
4009
4008
  if (!currentDeviceExists) {
4010
- console.log('Current device disconnected, switching to next available device');
4009
+ console.debug('Current device disconnected, switching to next available device');
4011
4010
  try {
4012
4011
  const nextDevice = devices.find((d) => d.default);
4013
4012
  if (nextDevice) {
@@ -4037,6 +4036,27 @@ registerProcessor('audio_processor', AudioProcessor);
4037
4036
  }
4038
4037
  });
4039
4038
  }
4039
+ /**
4040
+ * Mutes the microphone to stop sending audio to the server
4041
+ * The connection and recording remain active for quick unmute
4042
+ */
4043
+ mute() {
4044
+ if (!this.isMuted) {
4045
+ this.isMuted = true;
4046
+ console.log('Microphone muted');
4047
+ this.options.onMuteStateChange(true);
4048
+ }
4049
+ }
4050
+ /**
4051
+ * Unmutes the microphone to resume sending audio to the server
4052
+ */
4053
+ unmute() {
4054
+ if (this.isMuted) {
4055
+ this.isMuted = false;
4056
+ console.log('Microphone unmuted');
4057
+ this.options.onMuteStateChange(false);
4058
+ }
4059
+ }
4040
4060
  }
4041
4061
 
4042
4062
  return LayercodeClient;