npm - @chat21/chat21-web-widget - Versions diffs - 5.1.33-rc11 → 5.1.33-rc9 - Mend

@chat21/chat21-web-widget 5.1.33-rc11 → 5.1.33-rc9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/src/app/providers/voice/voice-streaming.service.ts CHANGED Viewed

@@ -13,11 +13,10 @@ import {
   VoiceWsControlMessage,
 } from './voice-streaming.types';
-// Flux docs recommend 80ms chunks for optimal latency; 160ms is a practical
-// balance for WebM containerization overhead in the browser while providing
-// good STT accuracy.
+// Flux docs recommend 80ms chunks for optimal latency; 250ms is a practical
+// balance for WebM containerization overhead in the browser.
 // Source: https://developers.deepgram.com/docs/flux/quickstart
-const DEFAULT_TIMESLICE_MS = 160;
+const DEFAULT_TIMESLICE_MS = 250;
 const READY_TIMEOUT_MS = 10_000;
 const SESSION_STARTED_TIMEOUT_MS = 10_000;
@@ -259,12 +258,6 @@ export class VoiceStreamingService {
       this.mediaStream = shared
         ? shared
         : await navigator.mediaDevices.getUserMedia({ audio: true });
-      const tracks = this.mediaStream.getAudioTracks();
-      this.logger.info('[VoiceStreaming] microphone acquired', {
-        shared: !!shared,
-        tracks: tracks.length,
-        label: tracks[0]?.label ?? '(unknown)',
-      });
       const recorderOpts: MediaRecorderOptions = {};
       if (mime) {
         recorderOpts.mimeType = mime;
@@ -578,7 +571,7 @@ export class VoiceStreamingService {
   /**
    * Send `{ event: "tts_playback_complete" }` to the proxy, signalling that TTS
-   * playback has finished and the microphone is now safe to receive user speech.
+   * playback has finished and the microphone is ready to receive user speech.
    */
   sendPlaybackComplete(): void {
     if (this.ws?.readyState === WebSocket.OPEN) {
@@ -587,6 +580,21 @@ export class VoiceStreamingService {
     }
   }
+  /**
+   * Send `{ event: "barge_in" }` to the proxy, requesting an immediate interruption
+   * of the ongoing TTS playback.  Use when the user explicitly wants to speak while
+   * the bot is talking (e.g. via a UI button or a client-side VAD onset).
+   *
+   * The proxy will stop the TTS stream and transition to LISTENING; the widget should
+   * handle the server-sent `barge_in` and `listening` events to update local state.
+   */
+  sendBargeIn(): void {
+    if (this.ws?.readyState === WebSocket.OPEN) {
+      this.ws.send(JSON.stringify({ event: 'barge_in' }));
+      this.logger.info('[VoiceStreaming] barge_in sent');
+    }
+  }
   private cleanup(): void {
     this.logger.info('[VoiceStreaming] cleanup', { state: this._currentState, sessionId: this.currentSessionId });
     this.audioChunkCount = 0;

package/src/app/providers/voice/voice-streaming.types.ts CHANGED Viewed

@@ -86,6 +86,7 @@ export type VoiceWsServerEventName =
   | 'thinking'
   | 'speaking'
   | 'done'
+  | 'barge_in'
   | 'error';
 /** Messaggio di controllo JSON dal proxy (`msg.event`); altri campi sono ignorati se non gestiti. */

package/src/app/providers/voice/voice.service.spec.ts CHANGED Viewed

@@ -44,7 +44,7 @@ describe('VoiceService', () => {
     voiceStreamingMock = jasmine.createSpyObj<VoiceStreamingService>(
       'VoiceStreamingService',
-      ['start', 'stop', 'setAudioMuted', 'sendPlaybackComplete', 'pauseRecording', 'resumeRecording'],
+      ['start', 'stop', 'setAudioMuted', 'sendPlaybackComplete', 'sendBargeIn'],
     );
     voiceStreamingMock.start.and.returnValue(Promise.resolve());
     voiceStreamingMock.stop.and.returnValue(
@@ -65,8 +65,6 @@ describe('VoiceService', () => {
       ],
     });
     service = TestBed.inject(VoiceService);
-    spyOn(service as any, '_startKeyboardSound').and.stub();
-    spyOn(service as any, '_stopKeyboardSound').and.stub();
   });
   // ── Existing session lifecycle tests ──────────────────────────────────────
@@ -158,21 +156,22 @@ describe('VoiceService', () => {
     expect(voiceStreamingMock.setAudioMuted).not.toHaveBeenCalled();
   });
-  it('empty-audio path: sendPlaybackComplete after flush but acquisition stays blocked until "listening"', async () => {
+  it('empty-audio path: sendPlaybackComplete immediately but acquisition stays blocked until "listening"', async () => {
     const blocked = await startWssSession();
     const initialLen = blocked.length;
-    // done with no binary audio arms unblock; flush sends playback complete to proxy
+    // Simulate done arriving with NO binary audio (_activeTtsSources === 0)
     wsControl$.next({ event: 'speaking', text: 'hello' } as VoiceWsControlMessage);
     wsControl$.next({ event: 'done' } as VoiceWsControlMessage);
-    expect(voiceStreamingMock.sendPlaybackComplete).not.toHaveBeenCalled();
-    (service as any)._flushTtsUnblock(false);
+    // Proxy signalled immediately
     expect(voiceStreamingMock.sendPlaybackComplete).toHaveBeenCalledTimes(1);
+    // Acquisition must still be blocked — proxy hasn't confirmed LISTENING yet
     const afterDone = blocked.slice(initialLen);
     expect(afterDone.every((v) => v === true)).toBeTrue();
+    // Unblock only after proxy confirms
     wsControl$.next({ event: 'listening' } as VoiceWsControlMessage);
     expect(blocked[blocked.length - 1]).toBeFalse();
   });
@@ -190,19 +189,22 @@ describe('VoiceService', () => {
   // ── Audio preemption tests (SPEC-002) ────────────────────────────────────
-  it('second "speaking" cancels first audio: sendPlaybackComplete only after flush for the new turn', async () => {
+  it('second "speaking" cancels first audio: sendPlaybackComplete called exactly once for the new turn', async () => {
     await startWssSession();
     voiceStreamingMock.sendPlaybackComplete.calls.reset();
+    // First turn: audio chunk arrives → _activeTtsSources = 1 (sync) → done sets _unblockAfterTts
     wsControl$.next({ event: 'speaking', text: 'first' } as VoiceWsControlMessage);
-    ttsBinaryChunk$.next(new ArrayBuffer(4));
-    wsControl$.next({ event: 'done' } as VoiceWsControlMessage);
+    ttsBinaryChunk$.next(new ArrayBuffer(4));           // _activeTtsSources++ synchronously
+    wsControl$.next({ event: 'done' } as VoiceWsControlMessage); // _unblockAfterTts = true
+    // Second turn preempts while first audio is still "playing"
     wsControl$.next({ event: 'speaking', text: 'second' } as VoiceWsControlMessage);
+    // _cancelAllTtsAudio() resets _activeTtsSources=0, _unblockAfterTts=false
+    // done with no audio → sendPlaybackComplete immediately (new turn, _activeTtsSources = 0)
     wsControl$.next({ event: 'done' } as VoiceWsControlMessage);
-    expect(voiceStreamingMock.sendPlaybackComplete).not.toHaveBeenCalled();
-    (service as any)._flushTtsUnblock(false);
     expect(voiceStreamingMock.sendPlaybackComplete).toHaveBeenCalledTimes(1);
   });
@@ -224,4 +226,35 @@ describe('VoiceService', () => {
     expect(voiceStreamingMock.sendPlaybackComplete).not.toHaveBeenCalled();
   });
+  // ── Barge-in ──────────────────────────────────────────────────────────────
+  it('barge_in event cancels TTS audio and unblocks acquisition without sending tts_playback_complete', async () => {
+    await startWssSession();
+    voiceStreamingMock.sendPlaybackComplete.calls.reset();
+    // Simulate bot speaking with audio in flight
+    wsControl$.next({ event: 'speaking', text: 'hello' } as VoiceWsControlMessage);
+    ttsBinaryChunk$.next(new ArrayBuffer(4));   // _activeTtsSources++ synchronously
+    wsControl$.next({ event: 'done' } as VoiceWsControlMessage); // _unblockAfterTts = true
+    // Proxy detects user speech and sends barge_in
+    wsControl$.next({ event: 'barge_in' } as VoiceWsControlMessage);
+    // tts_playback_complete must NOT be sent — it was an interruption, not a completion
+    expect(voiceStreamingMock.sendPlaybackComplete).not.toHaveBeenCalled();
+    expect(voiceStreamingMock.setAudioMuted).not.toHaveBeenCalled();
+    expect((service as any)._isAcquisitionBlocked$.getValue()).toBe(false);
+  });
+  it('barge_in while no TTS is active does not throw and still unblocks acquisition', async () => {
+    await startWssSession();
+    voiceStreamingMock.sendPlaybackComplete.calls.reset();
+    // No speaking event — mic was never muted
+    expect(() => {
+      wsControl$.next({ event: 'barge_in' } as VoiceWsControlMessage);
+    }).not.toThrow();
+    expect(voiceStreamingMock.sendPlaybackComplete).not.toHaveBeenCalled();
+  });
 });

package/src/app/providers/voice/voice.service.ts CHANGED Viewed

@@ -4,7 +4,6 @@ import { getDefaultRealTimeVADOptions } from '@ricky0123/vad-web';
 import { BehaviorSubject, Observable, Subject, Subscription } from 'rxjs';
 import { LoggerInstance } from 'src/chat21-core/providers/logger/loggerInstance';
 import { LoggerService } from 'src/chat21-core/providers/abstract/logger.service';
-import { Globals } from 'src/app/utils/globals';
 import {
   DEFAULT_VOICE_MEDIA_STREAM_CONSTRAINTS,
@@ -110,8 +109,6 @@ export class VoiceService {
   private analyser?: AnalyserNode;
   /** Buffer dedicato (`ArrayBuffer`) per compatibilità con `getByteFrequencyData`. */
   private dataArray?: Uint8Array;
-  /** RAF ID for volume loop - used to cancel on cleanup */
-  private volumeRafId?: number;
   /** Riproduzione chunk TTS binari dal proxy (Web Audio). */
   private ttsPlayContext?: AudioContext;
@@ -127,29 +124,9 @@ export class VoiceService {
   // (barge_in or a new speaking event).  playWsTtsChunk captures this at entry and
   // checks it after the async decodeAudioData call to discard stale results.
   private _ttsGeneration = 0;
-  // ── Ordered-scheduling state ──────────────────────────────────────────────────────────────────
-  // Chunks arrive over WebSocket and their decodeAudioData calls run concurrently.
-  // Because a smaller/later chunk can decode faster than a larger/earlier one, scheduling
-  // based solely on decode-completion order causes audio to play out of arrival order
-  // (e.g. "manuale" starts before "scrittura" even though it arrived after it).
-  // Fix: assign a monotonic sequence number on arrival, decode in parallel, but only
-  // schedule a buffer once every preceding buffer has already been scheduled.
-  private _ttsChunkSeq = 0;       // Incremented on each chunk arrival (arrival order)
-  private _ttsScheduledSeq = 0;   // Next sequence slot that is allowed to be scheduled
-  // Decoded buffers waiting for their turn to be scheduled (keyed by arrival sequence)
-  private _ttsDecodedPending = new Map<number, AudioBuffer>();
-  // ─────────────────────────────────────────────────────────────────────────────────────────────
   // Set to true by the 'done' event; triggers acquisition unblock once all sources end.
   private _unblockAfterTts = false;
   private _unblockSafetyTimer: ReturnType<typeof setTimeout> | null = null;
-  // Fallback timer started after sendPlaybackComplete. If the proxy does not reply
-  // with 'listening' within the timeout window, the UI is force-unblocked so the
-  // user is not left stuck waiting indefinitely.
-  private _listeningFallbackTimer: ReturnType<typeof setTimeout> | null = null;
-  // Track when the last TTS chunk is expected to finish playing.
-  // Used to calculate a proper safety timer duration for long messages.
-  private _ttsExpectedEndTime = 0;
   // ── WSS TTS Karaoke ──────────────────────────────────────────────────────────────────────────
   private _kText = '';
@@ -164,22 +141,13 @@ export class VoiceService {
   readonly voiceTtsKaraoke$: Observable<VoiceTtsKaraokeFrame> = this._voiceTtsKaraokeSubject.asObservable();
   // ─────────────────────────────────────────────────────────────────────────────────────────────
-  // ── Thinking / typing-indicator sound ─────────────────────────────────────────────────────────
-  // Played on loop while the bot is thinking or the first TTS chunk hasn't arrived yet.
-  // Only active during WSS voice sessions (voice-proxy mode).
-  private _keyboardSoundEl: HTMLAudioElement | null = null;
-  // ─────────────────────────────────────────────────────────────────────────────────────────────
   private readonly logger: LoggerService = LoggerInstance.getInstance();
-  private readonly bufferTime = 200000; // used as max safety timer duration for long TTS messages
   constructor(
     private readonly vadService: VadService,
     private readonly ttsPlayback: TtsAudioPlaybackCoordinator,
     private readonly voiceStreaming: VoiceStreamingService,
     @Optional() @Inject(SpeechToTextProvider) private readonly speechToText: SpeechToTextProvider | null,
-    private readonly globals: Globals,
   ) {}
   get isSessionActive(): boolean {
@@ -204,8 +172,6 @@ export class VoiceService {
    * Richiede il microfono, avvia VAD in ascolto (inizio/fine parlato) e registra in WebM per segmento.
    */
   async startSession(options: VoiceSessionStartOptions = {}): Promise<void> {
-    const mode = options.voiceIngressStream ? 'wss-proxy' : 'legacy-vad';
-    this.logger.info('[VoiceService] startSession', { mode });
     await this.stopSession();
     this.sessionConstraints = options.constraints ?? DEFAULT_VOICE_MEDIA_STREAM_CONSTRAINTS;
@@ -223,13 +189,7 @@ export class VoiceService {
   /** Sessione guidata dal proxy: solo mic + volume + WSS (mic in upload, eventi + TTS in download). */
   private async startWssVoiceSession(): Promise<void> {
-    this.logger.info('[VoiceService] acquiring microphone for WSS session');
     this.stream = await navigator.mediaDevices.getUserMedia(this.sessionConstraints);
-    const tracks = this.stream.getAudioTracks();
-    this.logger.info('[VoiceService] microphone acquired', {
-      tracks: tracks.length,
-      label: tracks[0]?.label ?? '(unknown)',
-    });
     // 🎧 AUDIO ANALYSER INIT
     this.initAudioAnalyser(this.stream);
@@ -242,7 +202,7 @@ export class VoiceService {
       await this.voiceStreaming.start(this.voiceIngressConfig!, { sharedMediaStream: this.stream });
       // Signal that the voice proxy is now live — suppresses tiledesk-server TTS.
       this._isWssVoiceActive$.next(true);
-      this.logger.info('[VoiceService] WSS voice session started (no local VAD)');
+      this.logger.log('[VoiceService] sessione WSS (nessun VAD locale)');
     } catch (e) {
       this.wsControlSub?.unsubscribe();
       this.wsControlSub = undefined;
@@ -320,45 +280,25 @@ export class VoiceService {
         this.logger.log('[VoiceService] session_started', { requestId: msg.requestId ?? '' });
         break;
       case 'listening':
-        // Proxy confirmed it is in LISTENING state — unblock the UI and resume
-        // the MediaRecorder. Recording was paused on 'thinking' and must only
-        // restart here, after TTS playback has fully completed and the proxy
-        // is confirmed ready to receive audio again.
-        if (this._listeningFallbackTimer !== null) {
-          clearTimeout(this._listeningFallbackTimer);
-          this._listeningFallbackTimer = null;
-        }
-        // If TTS never arrived (edge case) the keyboard sound would still be looping — stop it.
-        this._stopKeyboardSound();
+        // Proxy confirmed it is in LISTENING state — unblock the UI.
+        // Audio has been flowing continuously (AEC handles echo suppression),
+        // so there is nothing to unmute here.
         this._isAcquisitionBlocked$.next(false);
-        this.voiceStreaming.resumeRecording();
-        this.logger.log('[VoiceService] listening – acquisition unblocked, recording resumed');
+        this.logger.log('[VoiceService] listening – acquisition unblocked');
         break;
       case 'transcript': {
         const text = typeof msg.text === 'string' ? msg.text : '';
         const isFinal = !!msg.isFinal;
-        // Guard: if the proxy has already moved to PROCESSING (thinking) or SPEAKING,
-        // this transcript is a stale in-flight STT result. Discard it so it cannot
-        // override the blocked acquisition state or reach any downstream subscriber.
-        // 'thinking' is stronger than 'transcript' — state must not regress.
-        if (this._isAcquisitionBlocked$.value) {
-          this.logger.warn('[VoiceService] transcript discarded – arrived after thinking/speaking (stale STT result)', { text, isFinal });
-          break;
-        }
         this.logger.log('[VoiceService] transcript', { text, isFinal });
         this.voiceTranscriptSubject.next({ text, isFinal });
         break;
       }
       case 'thinking':
         // Block acquisition UI while the bot processes the utterance.
-        // Pause the MediaRecorder so no audio chunks are sent to the proxy
-        // during PROCESSING state. Recording resumes only after the proxy
-        // confirms LISTENING (i.e. after TTS playback has fully finished).
+        // Audio continues flowing to the proxy so the server can detect
+        // barge-in via Flux STT even during PROCESSING state.
         this._isAcquisitionBlocked$.next(true);
-        this.voiceStreaming.pauseRecording();
-        // Play keyboard typing sound to mask the silence while the bot generates its response.
-        this._startKeyboardSound();
-        this.logger.log('[VoiceService] thinking – acquisition blocked, recording paused', { activeTtsSources: this._activeTtsSources });
+        this.logger.log('[VoiceService] thinking – acquisition blocked', { activeTtsSources: this._activeTtsSources });
         break;
       case 'speaking': {
         this._isAcquisitionBlocked$.next(true);
@@ -370,13 +310,8 @@ export class VoiceService {
         this._cancelAllTtsAudio();
         // Reset TTS scheduling so new chunks play from now, not a stale future time.
         this.ttsNextPlayTime = this.ttsPlayContext?.currentTime ?? 0;
-        // Reset expected end time for new TTS stream
-        this._ttsExpectedEndTime = 0;
         const preview = typeof msg.text === 'string' ? msg.text.slice(0, 80) : '';
         this.logger.log('[VoiceService] speaking – acquisition blocked, TTS text preview', { preview });
-        // Keep keyboard sound going (or start it as a fallback if 'thinking' was missed)
-        // until the first TTS audio chunk actually starts playing.
-        this._startKeyboardSound();
         // Emit the text being spoken so UI can display it alongside the audio.
         if (typeof msg.text === 'string' && msg.text) {
           this.voiceTtsTextSubject.next(msg.text);
@@ -389,31 +324,31 @@ export class VoiceService {
         // _activeTtsSources tracks pending sources; when the last one ends, acquisition unblocks.
         if (this._activeTtsSources > 0) {
           this._unblockAfterTts = true;
-          // Calculate safety timer based on expected audio end time.
-          // Add 5 seconds buffer for network/decode latency.
-          // Minimum 5 seconds, maximum 300 seconds for very long messages.
-          const remainingMs = Math.max(0, this._ttsExpectedEndTime - Date.now());
-          const safetyMs = Math.min(this.bufferTime, Math.max(5000, remainingMs + 5000));
+          // Safety: force-unblock after 15 s in case onended never fires.
           if (this._unblockSafetyTimer !== null) clearTimeout(this._unblockSafetyTimer);
-          this._unblockSafetyTimer = setTimeout(() => this._flushTtsUnblock(true), safetyMs);
-          this.logger.log('[VoiceService] done – TTS still pending, waiting for all sources to end', {
-            activeTtsSources: this._activeTtsSources,
-            expectedEndInMs: remainingMs,
-            safetyTimerMs: safetyMs
-          });
+          this._unblockSafetyTimer = setTimeout(() => this._flushTtsUnblock(true), 15000);
+          this.logger.log('[VoiceService] done – TTS still pending, waiting for all sources to end', { activeTtsSources: this._activeTtsSources });
         } else {
-          // No audio sources tracked yet, but binary TTS chunks may still be in-flight
-          // (WebSocket binary frames can arrive after the JSON 'done' control message).
-          // Set _unblockAfterTts so that _onTtsSourceEnded() triggers _flushTtsUnblock
-          // naturally when those chunks finish playing, instead of relying solely on the
-          // safety timer (which would delay unblock by 10 s even when audio ends sooner).
-          this._unblockAfterTts = true;
-          this.logger.log('[VoiceService] done – no active sources yet, arming unblock for in-flight chunks');
-          // Safety timer as last resort in case no chunks arrive at all.
-          if (this._unblockSafetyTimer !== null) clearTimeout(this._unblockSafetyTimer);
-          this._unblockSafetyTimer = setTimeout(() => this._flushTtsUnblock(true), 10000);
+          // No audio sources pending — playback was already complete (or audio was empty).
+          // Signal the proxy synchronously; mic stays muted until the proxy confirms
+          // LISTENING via the 'listening' event.
+          this.logger.log('[VoiceService] done – no pending TTS, sending playback complete immediately');
+          this.voiceStreaming.sendPlaybackComplete();
+          // Do NOT unblock acquisition here — proxy will send 'listening' which is
+          // the single source of truth for unblocking both UI and mic.
         }
         break;
+      case 'barge_in':
+        // Proxy's VAD detected user speech while the bot was talking — stop TTS immediately.
+        // Do NOT send tts_playback_complete; this is an interruption, not a normal completion.
+        // The proxy will follow with { event: "listening" } which authoritatively unblocks the UI.
+        // Audio was never muted, so there is nothing to unmute.
+        this._cancelAllTtsAudio();
+        this.ttsNextPlayTime = 0;
+        this._unblockAfterTts = false;
+        this._isAcquisitionBlocked$.next(false);
+        this.logger.log('[VoiceService] barge_in – TTS cancelled, acquisition unblocked');
+        break;
       case 'error': {
         const errorMsg = typeof msg.message === 'string' ? msg.message : 'Voice session error';
         this.logger.error('[VoiceService] WSS error', errorMsg);
@@ -426,19 +361,8 @@ export class VoiceService {
     }
   }
-  /**
-   * Chunk TTS: ogni buffer deve essere decodificabile da `decodeAudioData` (es. segmento WebM/Opus completo).
-   *
-   * Decode-race fix: multiple chunks decode concurrently; a smaller/later chunk can finish
-   * decoding before a larger/earlier one, which would cause the AudioBufferSourceNode to be
-   * scheduled out of arrival order (e.g. "manuale" before "scrittura").  To prevent this, each
-   * chunk is assigned a monotonic sequence number on arrival and stored in _ttsDecodedPending
-   * after decoding.  _drainTtsDecodedBuffers() only advances the schedule when the next
-   * expected sequence slot is present, guaranteeing arrival-order playback regardless of decode speed.
-   */
+  /** Chunk TTS: ogni buffer deve essere decodificabile da `decodeAudioData` (es. segmento WebM/Opus completo). */
   private async playWsTtsChunk(buf: ArrayBuffer): Promise<void> {
-    // Assign arrival-order sequence number SYNCHRONOUSLY before any await.
-    const seq = this._ttsChunkSeq++;
     // Capture the current generation BEFORE the synchronous increment so that
     // if _cancelAllTtsAudio() fires (incrementing _ttsGeneration) while this
     // decode is in-flight, the mismatch is detected and the stale chunk is discarded.
@@ -446,12 +370,11 @@ export class VoiceService {
     // Increment SYNCHRONOUSLY before any await so the 'done' event handler (which arrives
     // on the next WebSocket message — a different event-loop tick) sees a non-zero count.
     this._activeTtsSources++;
-    this.logger.log('[VoiceService] TTS chunk received', { seq, bytes: buf.byteLength, activeTtsSources: this._activeTtsSources });
+    this.logger.log('[VoiceService] TTS chunk received', { bytes: buf.byteLength, activeTtsSources: this._activeTtsSources });
     try {
       if (!this.ttsPlayContext || this.ttsPlayContext.state === 'closed') {
         this.ttsPlayContext = new AudioContext();
         this.ttsNextPlayTime = this.ttsPlayContext.currentTime;
-        this.logger.info('[VoiceService] TTS AudioContext created');
       }
       const ctx = this.ttsPlayContext;
       const audioBuf = await ctx.decodeAudioData(buf.slice(0));
@@ -460,57 +383,21 @@ export class VoiceService {
       // for a turn that was already cancelled, and undo the counter increment.
       if (this._ttsGeneration !== capturedGeneration) {
         this._activeTtsSources = Math.max(0, this._activeTtsSources - 1);
-        this.logger.log('[VoiceService] TTS chunk discarded – stale generation', { seq, capturedGeneration, currentGeneration: this._ttsGeneration });
+        this.logger.log('[VoiceService] TTS chunk discarded – stale generation', { capturedGeneration, currentGeneration: this._ttsGeneration });
         return;
       }
-      // Store the decoded buffer under its arrival sequence number and attempt to
-      // flush any contiguous run of decoded buffers in order.
-      this._ttsDecodedPending.set(seq, audioBuf);
-      this._drainTtsDecodedBuffers();
-    } catch (e) {
-      // Advance the scheduler past this failed slot so subsequent decoded chunks are
-      // not blocked waiting for a slot that will never be filled.
-      if (seq === this._ttsScheduledSeq) {
-        this._ttsScheduledSeq++;
-        this._drainTtsDecodedBuffers();
-      }
-      this._onTtsSourceEnded();
-      this.logger.warn('[VoiceService] TTS chunk decode failed', { seq }, e);
-    }
-  }
-  /**
-   * Schedules decoded TTS buffers in strict arrival order.
-   * Called after every successful decode. Drains the _ttsDecodedPending map
-   * starting at _ttsScheduledSeq, stopping as soon as the next slot is missing
-   * (i.e. that chunk is still decoding or failed).
-   */
-  private _drainTtsDecodedBuffers(): void {
-    const ctx = this.ttsPlayContext;
-    if (!ctx) return;
-    while (this._ttsDecodedPending.has(this._ttsScheduledSeq)) {
-      const audioBuf = this._ttsDecodedPending.get(this._ttsScheduledSeq)!;
-      this._ttsDecodedPending.delete(this._ttsScheduledSeq);
-      this._ttsScheduledSeq++;
       const src = ctx.createBufferSource();
       src.buffer = audioBuf;
       src.connect(ctx.destination);
       const t0 = Math.max(ctx.currentTime, this.ttsNextPlayTime);
       src.start(t0);
       this.ttsNextPlayTime = t0 + audioBuf.duration;
-      // Track the expected end time in wall-clock time (ms) for safety timer calculation.
-      const audioEndDelayMs = (this.ttsNextPlayTime - ctx.currentTime) * 1000;
-      this._ttsExpectedEndTime = Date.now() + audioEndDelayMs;
-      const isFirstChunk = this._activeTtsSourceNodes.length === 0;
       this._activeTtsSourceNodes.push(src);
-      if (isFirstChunk) {
-        // First real audio about to play — stop the keyboard typing sound immediately.
-        this._stopKeyboardSound();
-        this.logger.info('[VoiceService] TTS playback started', { durationS: audioBuf.duration.toFixed(3), startsAtS: t0.toFixed(3) });
-      }
-      this.logger.log('[VoiceService] TTS chunk scheduled', { seq: this._ttsScheduledSeq - 1, durationS: audioBuf.duration.toFixed(3), startsAtS: t0.toFixed(3), activeTtsSources: this._activeTtsSources, expectedEndInMs: audioEndDelayMs.toFixed(0) });
+      this.logger.log('[VoiceService] TTS chunk scheduled', { durationS: audioBuf.duration.toFixed(3), startsAtS: t0.toFixed(3), activeTtsSources: this._activeTtsSources });
       src.onended = () => this._onTtsSourceEnded(src);
+    } catch (e) {
+      this._onTtsSourceEnded();
+      this.logger.warn('[VoiceService] TTS chunk decode failed', e);
     }
   }
@@ -521,10 +408,6 @@ export class VoiceService {
       if (idx !== -1) { this._activeTtsSourceNodes.splice(idx, 1); }
     }
     this.logger.log('[VoiceService] TTS source ended', { activeTtsSources: this._activeTtsSources, unblockPending: this._unblockAfterTts });
-    if (this._activeTtsSources === 0) {
-      this.logger.info('[VoiceService] TTS playback ended – all sources finished');
-      console.log('[VoiceService] TTS audio finished playing');
-    }
     if (this._unblockAfterTts && this._activeTtsSources === 0) {
       this._flushTtsUnblock(false);
     }
@@ -552,11 +435,6 @@ export class VoiceService {
     this._activeTtsSourceNodes = [];
     this._activeTtsSources = 0;
     this._unblockAfterTts = false;
-    this._ttsExpectedEndTime = 0;
-    // Reset ordered-scheduling state so the next speaking turn starts fresh.
-    this._ttsChunkSeq = 0;
-    this._ttsScheduledSeq = 0;
-    this._ttsDecodedPending.clear();
     this._stopTtsKaraoke(true);
     this.logger.log('[VoiceService] TTS cancelled – all audio sources stopped');
   }
@@ -574,21 +452,12 @@ export class VoiceService {
       this.logger.log('[VoiceService] TTS unblock: all sources ended, sending playback complete');
     }
     this._stopTtsKaraoke(true);
-    // Signal the proxy that TTS playback is complete. The proxy will transition
-    // to LISTENING and send a 'listening' event back; the mic resumes and the UI
-    // unblocks only then — so the user sees 'listening' exactly when the stream
-    // is open, not before.
-    // Start a fallback timer: if the proxy does not respond with 'listening' within
-    // 3 seconds (network hiccup, server race, etc.) force-unblock so the user is
-    // never left stuck. The timer is cancelled immediately if 'listening' arrives.
+    // Signal the proxy that TTS playback is complete.  The proxy will transition
+    // to LISTENING and send a 'listening' event back; the mic is unmuted there
+    // (not here) so it is live only when the proxy is confirmed ready.
+    // Do NOT call _isAcquisitionBlocked$.next(false) here — 'listening' is the
+    // single source of truth so that UI and mic unblock atomically.
     this.voiceStreaming.sendPlaybackComplete();
-    if (this._listeningFallbackTimer !== null) clearTimeout(this._listeningFallbackTimer);
-    this._listeningFallbackTimer = setTimeout(() => {
-      this._listeningFallbackTimer = null;
-      this.logger.warn('[VoiceService] listening fallback timer fired – proxy did not respond, force-unblocking');
-      this._isAcquisitionBlocked$.next(false);
-      this.voiceStreaming.resumeRecording();
-    }, 3000);
   }
   // ── WSS TTS Karaoke helpers ───────────────────────────────────────────────
@@ -661,39 +530,8 @@ export class VoiceService {
   // ─────────────────────────────────────────────────────────────────────────
-  // ── Keyboard typing-indicator sound helpers ───────────────────────────────
-  /**
-   * Starts the keyboard sound on loop to mask silence while the bot is
-   * generating its response. No-op if already playing.
-   * Only called during WSS voice sessions (voice-proxy mode).
-   */
-  private _startKeyboardSound(): void {
-    if (this._keyboardSoundEl) return; // already playing
-    const file = this.globals.keyboardSoundFile ?? 'keyboard.mp3';
-    const src = /^https?:\/\//i.test(file)
-      ? file
-      : `${this.globals.baseLocation}/assets/sounds/${file}`;
-    const audio = new Audio(src);
-    audio.loop = true;
-    audio.volume = Math.min(1, Math.max(0, this.globals.keyboardSoundVolume));
-    audio.play().catch((e) => this.logger.warn('[VoiceService] keyboard sound play failed', e));
-    this._keyboardSoundEl = audio;
-    this.logger.log('[VoiceService] keyboard sound started', { src, volume: audio.volume });
-  }
-  /** Stops and discards the keyboard typing sound. No-op if not playing. */
-  private _stopKeyboardSound(): void {
-    if (!this._keyboardSoundEl) return;
-    this._keyboardSoundEl.pause();
-    this._keyboardSoundEl.currentTime = 0;
-    this._keyboardSoundEl = null;
-    this.logger.log('[VoiceService] keyboard sound stopped');
-  }
-  // ─────────────────────────────────────────────────────────────────────────
   async stopSession(options?: { discardInProgressSegment?: boolean}): Promise<{ voiceIngressResultUrl: string | null }> {
     const discard = options?.discardInProgressSegment === true;
-    this.logger.info('[VoiceService] stopSession', { discard, isWssVoiceActive: this._isWssVoiceActive$.getValue() });
     this.wsControlSub?.unsubscribe();
     this.wsControlSub = undefined;
@@ -710,7 +548,6 @@ export class VoiceService {
     this._cancelAllTtsAudio();
     this.ttsPlayContext = undefined;
     this.ttsNextPlayTime = 0;
-    this._stopKeyboardSound();
     let voiceIngressResultUrl: string | null = null;
     if (this.voiceIngressConfig) {
@@ -753,10 +590,6 @@ export class VoiceService {
     }
     // 🎧 cleanup audio context
-    if (this.volumeRafId) {
-      cancelAnimationFrame(this.volumeRafId);
-      this.volumeRafId = undefined;
-    }
     this.audioContext?.close();
     this.audioContext = undefined;
     this.analyser = undefined;
@@ -775,10 +608,6 @@ export class VoiceService {
     clearTimeout(this.responseTimeoutId);
     this.responseTimeoutId = undefined;
     this.isWaitingForResponse = false;
-    if (this._listeningFallbackTimer !== null) {
-      clearTimeout(this._listeningFallbackTimer);
-      this._listeningFallbackTimer = null;
-    }
     this._isAcquisitionBlocked$.next(false);
     return { voiceIngressResultUrl };
@@ -859,7 +688,8 @@ export class VoiceService {
   private startVolumeLoop(): void {
     const tick = () => {
       if (!this.analyser || !this.dataArray) {
-        return; // Stop the loop if analyser is cleaned up
+        requestAnimationFrame(tick);
+        return;
       }
       this.analyser.getByteFrequencyData(
@@ -875,10 +705,10 @@ export class VoiceService {
       this.volumeSubject.next(volume);
-      this.volumeRafId = requestAnimationFrame(tick);
+      requestAnimationFrame(tick);
     };
-    this.volumeRafId = requestAnimationFrame(tick);
+    tick();
   }
   /**