@layercode/js-sdk 1.0.25 → 1.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3483,6 +3483,8 @@ registerProcessor('audio_processor', AudioProcessor);
3483
3483
  }
3484
3484
 
3485
3485
  /* eslint-env browser */
3486
+ // SDK version - updated when publishing
3487
+ const SDK_VERSION = '1.0.27';
3486
3488
  /**
3487
3489
  * @class LayercodeClient
3488
3490
  * @classdesc Core client for Layercode audio pipeline that manages audio recording, WebSocket communication, and speech processing.
@@ -3493,6 +3495,7 @@ registerProcessor('audio_processor', AudioProcessor);
3493
3495
  * @param {Object} options - Configuration options
3494
3496
  */
3495
3497
  constructor(options) {
3498
+ this.deviceId = null;
3496
3499
  this.options = {
3497
3500
  pipelineId: options.pipelineId,
3498
3501
  sessionId: options.sessionId || null,
@@ -3502,6 +3505,7 @@ registerProcessor('audio_processor', AudioProcessor);
3502
3505
  onConnect: options.onConnect || (() => { }),
3503
3506
  onDisconnect: options.onDisconnect || (() => { }),
3504
3507
  onError: options.onError || (() => { }),
3508
+ onDeviceSwitched: options.onDeviceSwitched || (() => { }),
3505
3509
  onDataMessage: options.onDataMessage || (() => { }),
3506
3510
  onUserAmplitudeChange: options.onUserAmplitudeChange || (() => { }),
3507
3511
  onAgentAmplitudeChange: options.onAgentAmplitudeChange || (() => { }),
@@ -3525,198 +3529,99 @@ registerProcessor('audio_processor', AudioProcessor);
3525
3529
  this.pushToTalkEnabled = false;
3526
3530
  this.canInterrupt = false;
3527
3531
  this.userIsSpeaking = false;
3528
- this.endUserTurn = false;
3529
3532
  this.recorderStarted = false;
3530
3533
  this.readySent = false;
3531
3534
  this.currentTurnId = null;
3532
3535
  this.audioBuffer = [];
3533
- this.audioPauseTime = null;
3536
+ this.vadConfig = null;
3537
+ // this.audioPauseTime = null;
3534
3538
  // Bind event handlers
3535
3539
  this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
3536
3540
  this._handleDataAvailable = this._handleDataAvailable.bind(this);
3537
- }
3538
- _setupAmplitudeBasedVAD() {
3539
- let isSpeakingByAmplitude = false;
3540
- let silenceFrames = 0;
3541
- const AMPLITUDE_THRESHOLD = 0.01; // Adjust based on testing
3542
- const SILENCE_FRAMES_THRESHOLD = 30; // ~600ms at 20ms chunks
3543
- // Monitor amplitude changes
3544
- this.wavRecorder.startAmplitudeMonitoring((amplitude) => {
3545
- const wasSpeaking = isSpeakingByAmplitude;
3546
- if (amplitude > AMPLITUDE_THRESHOLD) {
3547
- silenceFrames = 0;
3548
- if (!wasSpeaking) {
3549
- // Speech started - pause audio if playing and track timing for interruption calculation
3550
- if (this.canInterrupt && this.wavPlayer.isPlaying) {
3551
- this.audioPauseTime = Date.now();
3552
- this.wavPlayer.pause();
3553
- }
3554
- isSpeakingByAmplitude = true;
3555
- this.userIsSpeaking = true;
3556
- this.options.onUserIsSpeakingChange(true);
3557
- this._wsSend({
3558
- type: 'vad_events',
3559
- event: 'vad_start',
3560
- });
3561
- }
3562
- }
3563
- else {
3564
- silenceFrames++;
3565
- if (wasSpeaking && silenceFrames >= SILENCE_FRAMES_THRESHOLD) {
3566
- // Speech ended
3567
- isSpeakingByAmplitude = false;
3568
- this.userIsSpeaking = false;
3569
- this.options.onUserIsSpeakingChange(false);
3570
- this._wsSend({
3571
- type: 'vad_events',
3572
- event: 'vad_end',
3573
- });
3574
- }
3575
- }
3576
- });
3541
+ this._setupDeviceChangeListener();
3577
3542
  }
3578
3543
  _initializeVAD() {
3579
- console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt });
3544
+ var _a;
3545
+ console.log('initializing VAD', { pushToTalkEnabled: this.pushToTalkEnabled, canInterrupt: this.canInterrupt, vadConfig: this.vadConfig });
3580
3546
  // If we're in push to talk mode, we don't need to use the VAD model
3581
3547
  if (this.pushToTalkEnabled) {
3582
3548
  return;
3583
3549
  }
3584
- const timeout = setTimeout(() => {
3585
- console.log('silero vad model timeout');
3586
- console.warn('VAD model failed to load - falling back to amplitude-based detection');
3550
+ // Check if VAD is disabled
3551
+ if (((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) === false) {
3552
+ console.log('VAD is disabled by backend configuration');
3553
+ return;
3554
+ }
3555
+ // Build VAD configuration object, only including keys that are defined
3556
+ const vadOptions = {
3557
+ stream: this.wavRecorder.getStream() || undefined,
3558
+ onSpeechStart: () => {
3559
+ console.log('onSpeechStart: sending vad_start');
3560
+ this.userIsSpeaking = true;
3561
+ this.options.onUserIsSpeakingChange(true);
3562
+ this._wsSend({
3563
+ type: 'vad_events',
3564
+ event: 'vad_start',
3565
+ });
3566
+ },
3567
+ onSpeechEnd: () => {
3568
+ console.log('onSpeechEnd: sending vad_end');
3569
+ this.userIsSpeaking = false;
3570
+ this.options.onUserIsSpeakingChange(false);
3571
+ this.audioBuffer = []; // Clear buffer on speech end
3572
+ this._wsSend({
3573
+ type: 'vad_events',
3574
+ event: 'vad_end',
3575
+ });
3576
+ },
3577
+ };
3578
+ // Apply VAD configuration from backend if available
3579
+ if (this.vadConfig) {
3580
+ // Only add keys that are explicitly defined (not undefined)
3581
+ if (this.vadConfig.model !== undefined)
3582
+ vadOptions.model = this.vadConfig.model;
3583
+ if (this.vadConfig.positive_speech_threshold !== undefined)
3584
+ vadOptions.positiveSpeechThreshold = this.vadConfig.positive_speech_threshold;
3585
+ if (this.vadConfig.negative_speech_threshold !== undefined)
3586
+ vadOptions.negativeSpeechThreshold = this.vadConfig.negative_speech_threshold;
3587
+ if (this.vadConfig.redemption_frames !== undefined)
3588
+ vadOptions.redemptionFrames = this.vadConfig.redemption_frames;
3589
+ if (this.vadConfig.min_speech_frames !== undefined)
3590
+ vadOptions.minSpeechFrames = this.vadConfig.min_speech_frames;
3591
+ if (this.vadConfig.pre_speech_pad_frames !== undefined)
3592
+ vadOptions.preSpeechPadFrames = this.vadConfig.pre_speech_pad_frames;
3593
+ if (this.vadConfig.frame_samples !== undefined)
3594
+ vadOptions.frameSamples = this.vadConfig.frame_samples;
3595
+ }
3596
+ else {
3597
+ // Default values if no config from backend
3598
+ vadOptions.model = 'v5';
3599
+ vadOptions.positiveSpeechThreshold = 0.15;
3600
+ vadOptions.negativeSpeechThreshold = 0.05;
3601
+ vadOptions.redemptionFrames = 4;
3602
+ vadOptions.minSpeechFrames = 2;
3603
+ vadOptions.preSpeechPadFrames = 0;
3604
+ vadOptions.frameSamples = 512; // Required for v5
3605
+ }
3606
+ console.log('Creating VAD with options:', vadOptions);
3607
+ dist.MicVAD.new(vadOptions)
3608
+ .then((vad) => {
3609
+ this.vad = vad;
3610
+ this.vad.start();
3611
+ console.log('VAD started successfully');
3612
+ })
3613
+ .catch((error) => {
3614
+ console.warn('Error initializing VAD:', error);
3587
3615
  // Send a message to server indicating VAD failure
3588
3616
  this._wsSend({
3589
3617
  type: 'vad_events',
3590
3618
  event: 'vad_model_failed',
3591
3619
  });
3592
- // In automatic mode without VAD, allow the bot to speak initially
3593
- this.userIsSpeaking = false;
3594
- this.options.onUserIsSpeakingChange(false);
3595
- // Set up amplitude-based fallback detection
3596
- this._setupAmplitudeBasedVAD();
3597
- }, 2000);
3598
- if (!this.canInterrupt) {
3599
- dist.MicVAD.new({
3600
- stream: this.wavRecorder.getStream() || undefined,
3601
- model: 'v5',
3602
- positiveSpeechThreshold: 0.7,
3603
- negativeSpeechThreshold: 0.55,
3604
- redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
3605
- minSpeechFrames: 0,
3606
- preSpeechPadFrames: 0,
3607
- onSpeechStart: () => {
3608
- this.userIsSpeaking = true;
3609
- this.options.onUserIsSpeakingChange(true);
3610
- console.log('onSpeechStart: sending vad_start');
3611
- this._wsSend({
3612
- type: 'vad_events',
3613
- event: 'vad_start',
3614
- });
3615
- },
3616
- onSpeechEnd: () => {
3617
- console.log('onSpeechEnd: sending vad_end');
3618
- this.endUserTurn = true; // Set flag to indicate that the user turn has ended
3619
- this.audioBuffer = []; // Clear buffer on speech end
3620
- this.userIsSpeaking = false;
3621
- this.options.onUserIsSpeakingChange(false);
3622
- console.log('onSpeechEnd: State after update - endUserTurn:', this.endUserTurn, 'userIsSpeaking:', this.userIsSpeaking);
3623
- // Send vad_end immediately instead of waiting for next audio chunk
3624
- this._wsSend({
3625
- type: 'vad_events',
3626
- event: 'vad_end',
3627
- });
3628
- this.endUserTurn = false; // Reset the flag after sending vad_end
3629
- },
3630
- })
3631
- .then((vad) => {
3632
- clearTimeout(timeout);
3633
- this.vad = vad;
3634
- this.vad.start();
3635
- console.log('VAD started');
3636
- })
3637
- .catch((error) => {
3638
- console.error('Error initializing VAD:', error);
3639
- });
3640
- }
3641
- else {
3642
- dist.MicVAD.new({
3643
- stream: this.wavRecorder.getStream() || undefined,
3644
- model: 'v5',
3645
- // baseAssetPath: '/', // Use if bundling model locally
3646
- // onnxWASMBasePath: '/', // Use if bundling model locally
3647
- positiveSpeechThreshold: 0.7,
3648
- negativeSpeechThreshold: 0.55,
3649
- redemptionFrames: 25, // Number of frames of silence before onVADMisfire or onSpeechEnd is called. Effectively a delay before restarting.
3650
- minSpeechFrames: 25,
3651
- preSpeechPadFrames: 0,
3652
- onSpeechStart: () => {
3653
- // Only pause agent audio if it's currently playing
3654
- if (this.wavPlayer.isPlaying) {
3655
- console.log('onSpeechStart: WavPlayer is playing, pausing it.');
3656
- this.audioPauseTime = Date.now(); // Track when we paused
3657
- this.wavPlayer.pause();
3658
- }
3659
- else {
3660
- console.log('onSpeechStart: WavPlayer is not playing, VAD will not pause.');
3661
- }
3662
- console.log('onSpeechStart: sending vad_start');
3663
- this._wsSend({
3664
- type: 'vad_events',
3665
- event: 'vad_start',
3666
- });
3667
- this.userIsSpeaking = true;
3668
- this.options.onUserIsSpeakingChange(true);
3669
- this.endUserTurn = false; // Reset endUserTurn when speech starts
3670
- console.log('onSpeechStart: State after update - endUserTurn:', this.endUserTurn, 'userIsSpeaking:', this.userIsSpeaking);
3671
- },
3672
- onVADMisfire: () => {
3673
- // If the speech detected was for less than minSpeechFrames, this is called instead of onSpeechEnd, and we should resume the assistant audio as it was a false interruption. We include a configurable delay so the assistant isn't too quick to start speaking again.
3674
- this.userIsSpeaking = false;
3675
- this.audioBuffer = []; // Clear buffer on misfire
3676
- this.options.onUserIsSpeakingChange(false);
3677
- // Add the missing delay before resuming to prevent race conditions
3678
- setTimeout(() => {
3679
- if (!this.wavPlayer.isPlaying) {
3680
- console.log('onVADMisfire: Resuming after delay');
3681
- this.audioPauseTime = null; // Clear pause time since we're resuming
3682
- this.wavPlayer.play();
3683
- }
3684
- else {
3685
- console.log('onVADMisfire: Not resuming - either no pause or user speaking again');
3686
- this.endUserTurn = true;
3687
- }
3688
- }, this.options.vadResumeDelay);
3689
- },
3690
- onSpeechEnd: () => {
3691
- console.log('onSpeechEnd: sending vad_end');
3692
- this.endUserTurn = true; // Set flag to indicate that the user turn has ended
3693
- this.audioBuffer = []; // Clear buffer on speech end
3694
- this.userIsSpeaking = false;
3695
- this.options.onUserIsSpeakingChange(false);
3696
- console.log('onSpeechEnd: State after update - endUserTurn:', this.endUserTurn, 'userIsSpeaking:', this.userIsSpeaking);
3697
- // Send vad_end immediately instead of waiting for next audio chunk
3698
- this._wsSend({
3699
- type: 'vad_events',
3700
- event: 'vad_end',
3701
- });
3702
- this.endUserTurn = false; // Reset the flag after sending vad_end
3703
- },
3704
- })
3705
- .then((vad) => {
3706
- clearTimeout(timeout);
3707
- this.vad = vad;
3708
- this.vad.start();
3709
- console.log('VAD started');
3710
- })
3711
- .catch((error) => {
3712
- console.error('Error initializing VAD:', error);
3713
- });
3714
- }
3620
+ });
3715
3621
  }
3716
3622
  /**
3717
3623
  * Updates the connection status and triggers the callback
3718
3624
  * @param {string} status - New status value
3719
- * @private
3720
3625
  */
3721
3626
  _setStatus(status) {
3722
3627
  this.status = status;
@@ -3724,7 +3629,6 @@ registerProcessor('audio_processor', AudioProcessor);
3724
3629
  }
3725
3630
  /**
3726
3631
  * Handles when agent audio finishes playing
3727
- * @private
3728
3632
  */
3729
3633
  _clientResponseAudioReplayFinished() {
3730
3634
  console.log('clientResponseAudioReplayFinished');
@@ -3737,17 +3641,6 @@ registerProcessor('audio_processor', AudioProcessor);
3737
3641
  const offsetData = await this.wavPlayer.interrupt();
3738
3642
  if (offsetData && this.currentTurnId) {
3739
3643
  let offsetMs = offsetData.currentTime * 1000;
3740
- // Calculate accurate offset by subtracting pause time if audio was paused for VAD
3741
- if (this.audioPauseTime) {
3742
- const pauseDurationMs = Date.now() - this.audioPauseTime;
3743
- const adjustedOffsetMs = Math.max(0, offsetMs - pauseDurationMs);
3744
- console.log(`Interruption detected: Raw offset ${offsetMs}ms, pause duration ${pauseDurationMs}ms, adjusted offset ${adjustedOffsetMs}ms for turn ${this.currentTurnId}`);
3745
- offsetMs = adjustedOffsetMs;
3746
- this.audioPauseTime = null; // Clear the pause time
3747
- }
3748
- else {
3749
- console.log(`Interruption detected: ${offsetMs}ms offset for turn ${this.currentTurnId} (no pause adjustment needed)`);
3750
- }
3751
3644
  // Send interruption event with accurate playback offset in milliseconds
3752
3645
  this._wsSend({
3753
3646
  type: 'trigger.response.audio.interrupted',
@@ -3781,7 +3674,6 @@ registerProcessor('audio_processor', AudioProcessor);
3781
3674
  /**
3782
3675
  * Handles incoming WebSocket messages
3783
3676
  * @param {MessageEvent} event - The WebSocket message event
3784
- * @private
3785
3677
  */
3786
3678
  async _handleWebSocketMessage(event) {
3787
3679
  try {
@@ -3796,12 +3688,10 @@ registerProcessor('audio_processor', AudioProcessor);
3796
3688
  console.log(message);
3797
3689
  if (message.role === 'assistant') {
3798
3690
  // Start tracking new assistant turn
3799
- // Note: Don't reset currentTurnId here - let response.audio set it
3800
- // This prevents race conditions where text arrives before audio
3801
3691
  console.log('Assistant turn started, will track new turn ID from audio/text');
3802
3692
  }
3803
- else if (message.role === 'user' && !this.pushToTalkEnabled && this.canInterrupt) {
3804
- // Interrupt any playing assistant audio if this is a turn trigged by the server (and not push to talk, which will have already called interrupt)
3693
+ else if (message.role === 'user' && !this.pushToTalkEnabled) {
3694
+ // Interrupt any playing assistant audio if this is a turn triggered by the server (and not push to talk, which will have already called interrupt)
3805
3695
  console.log('interrupting assistant audio, as user turn has started and pushToTalkEnabled is false');
3806
3696
  await this._clientInterruptAssistantReplay();
3807
3697
  }
@@ -3823,7 +3713,6 @@ registerProcessor('audio_processor', AudioProcessor);
3823
3713
  this.currentTurnId = message.turn_id;
3824
3714
  console.log(`Setting current turn ID to: ${message.turn_id} from text message`);
3825
3715
  }
3826
- // Note: We no longer track text content in the client - the pipeline handles interruption estimation
3827
3716
  break;
3828
3717
  }
3829
3718
  case 'response.data':
@@ -3831,7 +3720,7 @@ registerProcessor('audio_processor', AudioProcessor);
3831
3720
  this.options.onDataMessage(message);
3832
3721
  break;
3833
3722
  default:
3834
- console.error('Unknown message type received:', message);
3723
+ console.warn('Unknown message type received:', message);
3835
3724
  break;
3836
3725
  }
3837
3726
  }
@@ -3843,15 +3732,28 @@ registerProcessor('audio_processor', AudioProcessor);
3843
3732
  /**
3844
3733
  * Handles available client browser microphone audio data and sends it over the WebSocket
3845
3734
  * @param {ArrayBuffer} data - The audio data buffer
3846
- * @private
3847
3735
  */
3848
3736
  _handleDataAvailable(data) {
3737
+ var _a, _b, _c;
3849
3738
  try {
3850
3739
  const base64 = arrayBufferToBase64(data.mono);
3851
- const sendAudio = this.pushToTalkEnabled ? this.pushToTalkActive : this.userIsSpeaking;
3740
+ // Determine if we should gate audio based on VAD configuration
3741
+ const shouldGateAudio = ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.gate_audio) !== false; // Default to true if not specified
3742
+ const bufferFrames = (_c = (_b = this.vadConfig) === null || _b === void 0 ? void 0 : _b.buffer_frames) !== null && _c !== void 0 ? _c : 10; // Default to 10 if not specified
3743
+ let sendAudio;
3744
+ if (this.pushToTalkEnabled) {
3745
+ sendAudio = this.pushToTalkActive;
3746
+ }
3747
+ else if (shouldGateAudio) {
3748
+ sendAudio = this.userIsSpeaking;
3749
+ }
3750
+ else {
3751
+ // If gate_audio is false, always send audio
3752
+ sendAudio = true;
3753
+ }
3852
3754
  if (sendAudio) {
3853
- // If we have buffered audio, send it first
3854
- if (this.audioBuffer.length > 0) {
3755
+ // If we have buffered audio and we're gating, send it first
3756
+ if (shouldGateAudio && this.audioBuffer.length > 0) {
3855
3757
  console.log(`Sending ${this.audioBuffer.length} buffered audio chunks`);
3856
3758
  for (const bufferedAudio of this.audioBuffer) {
3857
3759
  this._wsSend({
@@ -3870,8 +3772,8 @@ registerProcessor('audio_processor', AudioProcessor);
3870
3772
  else {
3871
3773
  // Buffer audio when not sending (to catch audio just before VAD triggers)
3872
3774
  this.audioBuffer.push(base64);
3873
- // Keep buffer size reasonable (e.g., last 10 chunks ≈ 200ms at 20ms chunks)
3874
- if (this.audioBuffer.length > 10) {
3775
+ // Keep buffer size based on configuration
3776
+ if (this.audioBuffer.length > bufferFrames) {
3875
3777
  this.audioBuffer.shift(); // Remove oldest chunk
3876
3778
  }
3877
3779
  }
@@ -3903,7 +3805,6 @@ registerProcessor('audio_processor', AudioProcessor);
3903
3805
  * @param {WavRecorder | WavStreamPlayer} source - The audio source (recorder or player).
3904
3806
  * @param {(amplitude: number) => void} callback - The callback function to invoke on amplitude change.
3905
3807
  * @param {(amplitude: number) => void} updateInternalState - Function to update the internal amplitude state.
3906
- * @private
3907
3808
  */
3908
3809
  _setupAmplitudeMonitoring(source, callback, updateInternalState) {
3909
3810
  // Set up amplitude monitoring only if a callback is provided
@@ -3935,6 +3836,7 @@ registerProcessor('audio_processor', AudioProcessor);
3935
3836
  let authorizeSessionRequestBody = {
3936
3837
  pipeline_id: this.options.pipelineId,
3937
3838
  metadata: this.options.metadata,
3839
+ sdk_version: SDK_VERSION,
3938
3840
  };
3939
3841
  // If we're reconnecting to a previous session, we need to include the session_id in the request. Otherwise we don't send session_id, and a new session will be created and the session_id will be returned in the response.
3940
3842
  if (this.options.sessionId) {
@@ -3958,6 +3860,8 @@ registerProcessor('audio_processor', AudioProcessor);
3958
3860
  })}`);
3959
3861
  const config = authorizeSessionResponseBody.config;
3960
3862
  console.log('config', config);
3863
+ // Store VAD configuration
3864
+ this.vadConfig = config.vad || null;
3961
3865
  if (config.transcription.trigger === 'push_to_talk') {
3962
3866
  this.pushToTalkEnabled = true;
3963
3867
  }
@@ -3968,7 +3872,6 @@ registerProcessor('audio_processor', AudioProcessor);
3968
3872
  else {
3969
3873
  throw new Error(`Unknown trigger: ${config.transcription.trigger}`);
3970
3874
  }
3971
- this._initializeVAD();
3972
3875
  // Bind the websocket message callbacks
3973
3876
  this.ws.onmessage = this._handleWebSocketMessage;
3974
3877
  this.ws.onopen = () => {
@@ -3988,18 +3891,13 @@ registerProcessor('audio_processor', AudioProcessor);
3988
3891
  this._setStatus('error');
3989
3892
  this.options.onError(new Error('WebSocket connection error'));
3990
3893
  };
3991
- // Initialize microphone audio capture
3992
- await this.wavRecorder.begin();
3993
- await this.wavRecorder.record(this._handleDataAvailable, 1638);
3994
- // Set up microphone amplitude monitoring
3995
- this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
3996
3894
  // Initialize audio player
3997
3895
  await this.wavPlayer.connect();
3998
3896
  // Set up audio player amplitude monitoring
3999
3897
  this._setupAmplitudeMonitoring(this.wavPlayer, this.options.onAgentAmplitudeChange, (amp) => (this.agentAudioAmplitude = amp));
4000
- // Mark recorder as started and attempt to notify server
4001
- this.recorderStarted = true;
4002
- this._sendReadyIfNeeded();
3898
+ // wavRecorder will be started from the onDeviceSwitched callback,
3899
+ // which is called when the device is first initialized and also when the device is switched
3900
+ // this is to ensure that the device is initialized before the recorder is started
4003
3901
  }
4004
3902
  catch (error) {
4005
3903
  console.error('Error connecting to Layercode pipeline:', error);
@@ -4019,6 +3917,7 @@ registerProcessor('audio_processor', AudioProcessor);
4019
3917
  this.vad.destroy();
4020
3918
  this.vad = null;
4021
3919
  }
3920
+ this.wavRecorder.listenForDeviceChange(null);
4022
3921
  this.wavRecorder.quit();
4023
3922
  this.wavPlayer.disconnect();
4024
3923
  // Reset turn tracking
@@ -4042,19 +3941,101 @@ registerProcessor('audio_processor', AudioProcessor);
4042
3941
  * @param {string} deviceId - The deviceId of the new microphone
4043
3942
  */
4044
3943
  async setInputDevice(deviceId) {
4045
- if (this.wavRecorder) {
3944
+ var _a;
3945
+ try {
3946
+ this.deviceId = deviceId;
3947
+ // Restart recording with the new device
3948
+ await this._restartAudioRecording();
3949
+ // Reinitialize VAD with the new audio stream if VAD is enabled
3950
+ const shouldUseVAD = !this.pushToTalkEnabled && ((_a = this.vadConfig) === null || _a === void 0 ? void 0 : _a.enabled) !== false;
3951
+ if (shouldUseVAD) {
3952
+ console.log('Reinitializing VAD with new audio stream');
3953
+ const newStream = this.wavRecorder.getStream();
3954
+ await this._reinitializeVAD(newStream);
3955
+ }
3956
+ console.log(`Successfully switched to input device: ${deviceId}`);
3957
+ }
3958
+ catch (error) {
3959
+ console.error(`Failed to switch to input device ${deviceId}:`, error);
3960
+ throw new Error(`Failed to switch to input device: ${error instanceof Error ? error.message : String(error)}`);
3961
+ }
3962
+ }
3963
+ /**
3964
+ * Restarts audio recording after a device switch to ensure audio is captured from the new device
3965
+ */
3966
+ async _restartAudioRecording() {
3967
+ try {
3968
+ console.log('Restarting audio recording after device switch...');
4046
3969
  try {
4047
3970
  await this.wavRecorder.end();
4048
3971
  }
4049
- catch (e) { }
4050
- try {
4051
- await this.wavRecorder.quit();
3972
+ catch (_a) {
3973
+ // Ignore cleanup errors
4052
3974
  }
4053
- catch (e) { }
3975
+ // Start with new device
3976
+ await this.wavRecorder.begin(this.deviceId || undefined);
3977
+ await this.wavRecorder.record(this._handleDataAvailable, 1638);
3978
+ // Re-setup amplitude monitoring with the new stream
3979
+ this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
3980
+ console.log('Audio recording restart completed successfully');
3981
+ }
3982
+ catch (error) {
3983
+ console.error('Error restarting audio recording after device switch:', error);
3984
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
4054
3985
  }
4055
- await this.wavRecorder.begin(deviceId);
4056
- await this.wavRecorder.record(this._handleDataAvailable, 1638);
4057
- this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
3986
+ }
3987
+ /**
3988
+ * Reinitializes VAD with a new stream (used after device switching)
3989
+ */
3990
+ async _reinitializeVAD(stream) {
3991
+ // Clean up existing VAD
3992
+ if (this.vad) {
3993
+ this.vad.pause();
3994
+ this.vad.destroy();
3995
+ this.vad = null;
3996
+ }
3997
+ // Reinitialize with new stream
3998
+ if (stream) {
3999
+ this._initializeVAD();
4000
+ }
4001
+ }
4002
+ /**
4003
+ * Sets up the device change event listener
4004
+ */
4005
+ _setupDeviceChangeListener() {
4006
+ this.wavRecorder.listenForDeviceChange(async (devices) => {
4007
+ try {
4008
+ const currentDeviceExists = devices.some((device) => device.deviceId === this.deviceId);
4009
+ if (!currentDeviceExists) {
4010
+ console.log('Current device disconnected, switching to next available device');
4011
+ try {
4012
+ const nextDevice = devices.find((d) => d.default);
4013
+ if (nextDevice) {
4014
+ await this.setInputDevice(nextDevice.deviceId);
4015
+ // Mark recorder as started and attempt to notify server
4016
+ if (!this.recorderStarted) {
4017
+ this.recorderStarted = true;
4018
+ this._sendReadyIfNeeded();
4019
+ }
4020
+ // Notify about device switch
4021
+ if (this.options.onDeviceSwitched) {
4022
+ this.options.onDeviceSwitched(nextDevice.deviceId);
4023
+ }
4024
+ }
4025
+ else {
4026
+ console.warn('No alternative audio device found');
4027
+ }
4028
+ }
4029
+ catch (error) {
4030
+ console.error('Error switching to next device:', error);
4031
+ throw error;
4032
+ }
4033
+ }
4034
+ }
4035
+ catch (error) {
4036
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
4037
+ }
4038
+ });
4058
4039
  }
4059
4040
  }
4060
4041