@chat21/chat21-web-widget 5.1.33-rc9 → 5.1.34-rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/CHANGELOG.md +14 -0
  2. package/package.json +1 -1
  3. package/src/app/component/conversation-detail/conversation/conversation.component.ts +3 -1
  4. package/src/app/component/conversation-detail/conversation-content/conversation-content.component.spec.ts +0 -7
  5. package/src/app/component/conversation-detail/conversation-content/conversation-content.component.ts +7 -5
  6. package/src/app/component/conversation-detail/conversation-footer/conversation-footer.component.html +4 -3
  7. package/src/app/component/conversation-detail/conversation-footer/conversation-footer.component.scss +18 -18
  8. package/src/app/component/conversation-detail/conversation-footer/conversation-footer.component.ts +6 -0
  9. package/src/app/component/conversation-detail/stream-audio-spectrum/stream-audio-spectrum.component.html +8 -5
  10. package/src/app/component/conversation-detail/stream-audio-spectrum/stream-audio-spectrum.component.scss +5 -1
  11. package/src/app/component/form/inputs/form-text/form-text.component.ts +9 -3
  12. package/src/app/component/message/bubble-message/bubble-message.component.scss +5 -0
  13. package/src/app/component/message/bubble-message/bubble-message.component.ts +14 -0
  14. package/src/app/component/message/json-sources/json-sources.component.scss +12 -8
  15. package/src/app/pipe/marked.pipe.ts +51 -41
  16. package/src/app/providers/global-settings.service.ts +31 -0
  17. package/src/app/providers/json-sources-parser.service.ts +25 -32
  18. package/src/app/providers/voice/voice-streaming.service.ts +11 -19
  19. package/src/app/providers/voice/voice-streaming.types.ts +0 -1
  20. package/src/app/providers/voice/voice.service.spec.ts +12 -45
  21. package/src/app/providers/voice/voice.service.ts +215 -45
  22. package/src/app/utils/globals.ts +10 -0
  23. package/src/assets/i18n/en.json +106 -125
  24. package/src/assets/i18n/es.json +1 -0
  25. package/src/assets/i18n/fr.json +1 -0
  26. package/src/assets/i18n/it.json +1 -0
  27. package/src/assets/sounds/keyboard.mp3 +0 -0
  28. package/src/assets/twp/chatbot-panel.html +3 -1
  29. package/src/chat21-core/utils/utils-message.ts +15 -5
  30. package/src/widget-config-template.json +1 -0
  31. package/src/widget-config.json +30 -28
  32. package/.playwright-mcp/console-2026-05-08T15-31-09-000Z.log +0 -17
  33. package/.playwright-mcp/console-2026-05-08T15-32-19-412Z.log +0 -89
  34. package/.playwright-mcp/console-2026-05-08T16-18-48-424Z.log +0 -133
  35. package/.playwright-mcp/console-2026-05-11T12-54-06-869Z.log +0 -13
  36. package/.playwright-mcp/console-2026-05-11T12-54-56-229Z.log +0 -147
  37. package/.playwright-mcp/console-2026-05-11T12-55-47-174Z.log +0 -183
  38. package/.playwright-mcp/console-2026-05-11T15-34-03-590Z.log +0 -210
  39. package/.playwright-mcp/console-2026-05-12T15-07-31-880Z.log +0 -118
  40. package/.playwright-mcp/page-2026-05-08T15-32-19-900Z.yml +0 -851
  41. package/.playwright-mcp/page-2026-05-08T15-32-47-264Z.yml +0 -857
  42. package/.playwright-mcp/page-2026-05-08T15-33-17-089Z.yml +0 -1110
  43. package/.playwright-mcp/page-2026-05-08T15-33-23-486Z.yml +0 -1069
  44. package/.playwright-mcp/page-2026-05-08T15-33-45-390Z.yml +0 -1076
  45. package/.playwright-mcp/page-2026-05-08T15-33-52-666Z.yml +0 -1072
  46. package/.playwright-mcp/page-2026-05-08T15-34-01-338Z.yml +0 -1085
  47. package/.playwright-mcp/page-2026-05-08T15-34-07-227Z.yml +0 -1072
  48. package/.playwright-mcp/page-2026-05-08T15-34-13-875Z.yml +0 -1072
  49. package/.playwright-mcp/page-2026-05-08T15-34-21-885Z.yml +0 -1109
  50. package/.playwright-mcp/page-2026-05-08T15-34-32-755Z.yml +0 -1109
  51. package/.playwright-mcp/page-2026-05-08T15-35-09-607Z.yml +0 -1119
  52. package/.playwright-mcp/page-2026-05-08T15-35-14-242Z.yml +0 -1109
  53. package/.playwright-mcp/page-2026-05-08T16-18-48-671Z.yml +0 -44
  54. package/.playwright-mcp/page-2026-05-08T16-18-52-753Z.png +0 -0
  55. package/.playwright-mcp/page-2026-05-08T16-19-13-919Z.yml +0 -68
  56. package/.playwright-mcp/page-2026-05-08T16-19-17-977Z.png +0 -0
  57. package/.playwright-mcp/page-2026-05-08T16-19-25-733Z.yml +0 -120
  58. package/.playwright-mcp/page-2026-05-08T16-19-29-252Z.png +0 -0
  59. package/.playwright-mcp/page-2026-05-08T16-19-39-269Z.yml +0 -80
  60. package/.playwright-mcp/page-2026-05-08T16-19-43-915Z.png +0 -0
  61. package/.playwright-mcp/page-2026-05-08T16-20-04-407Z.yml +0 -81
  62. package/.playwright-mcp/page-2026-05-08T16-20-08-984Z.png +0 -0
  63. package/.playwright-mcp/page-2026-05-08T16-20-32-397Z.png +0 -0
  64. package/.playwright-mcp/page-2026-05-08T16-20-58-658Z.png +0 -0
  65. package/.playwright-mcp/page-2026-05-08T16-21-12-320Z.yml +0 -86
  66. package/.playwright-mcp/page-2026-05-08T16-21-39-154Z.yml +0 -91
  67. package/.playwright-mcp/page-2026-05-08T16-21-45-420Z.png +0 -0
  68. package/.playwright-mcp/page-2026-05-08T16-22-21-062Z.yml +0 -0
  69. package/.playwright-mcp/page-2026-05-08T16-22-58-232Z.yml +0 -91
  70. package/.playwright-mcp/page-2026-05-08T16-23-36-520Z.yml +0 -0
  71. package/.playwright-mcp/page-2026-05-08T16-23-46-805Z.yml +0 -100
  72. package/.playwright-mcp/page-2026-05-08T16-23-55-169Z.png +0 -0
  73. package/.playwright-mcp/page-2026-05-08T16-24-26-574Z.yml +0 -91
  74. package/.playwright-mcp/page-2026-05-08T16-25-34-414Z.png +0 -0
  75. package/.playwright-mcp/page-2026-05-08T16-25-59-831Z.png +0 -0
  76. package/.playwright-mcp/page-2026-05-08T16-26-21-809Z.yml +0 -91
  77. package/.playwright-mcp/page-2026-05-08T16-26-47-443Z.yml +0 -105
  78. package/.playwright-mcp/page-2026-05-08T16-26-56-136Z.png +0 -0
  79. package/.playwright-mcp/page-2026-05-08T16-27-59-610Z.yml +0 -48
  80. package/.playwright-mcp/page-2026-05-11T12-54-07-180Z.yml +0 -44
  81. package/.playwright-mcp/page-2026-05-11T12-54-56-946Z.yml +0 -4
  82. package/.playwright-mcp/page-2026-05-11T12-55-47-503Z.yml +0 -24
  83. package/.playwright-mcp/page-2026-05-11T12-56-00-766Z.yml +0 -28
  84. package/.playwright-mcp/page-2026-05-11T12-56-06-438Z.yml +0 -90
  85. package/.playwright-mcp/page-2026-05-11T12-57-56-838Z.yml +0 -106
  86. package/.playwright-mcp/page-2026-05-11T12-58-00-124Z.yml +0 -106
  87. package/.playwright-mcp/page-2026-05-11T12-59-08-836Z.yml +0 -61
  88. package/.playwright-mcp/page-2026-05-11T12-59-12-088Z.yml +0 -61
  89. package/.playwright-mcp/page-2026-05-11T12-59-26-215Z.yml +0 -69
  90. package/.playwright-mcp/page-2026-05-11T12-59-29-519Z.yml +0 -69
  91. package/.playwright-mcp/page-2026-05-11T12-59-37-309Z.yml +0 -0
  92. package/.playwright-mcp/page-2026-05-11T12-59-39-968Z.yml +0 -79
  93. package/.playwright-mcp/page-2026-05-11T12-59-45-983Z.yml +0 -78
  94. package/.playwright-mcp/page-2026-05-11T12-59-49-951Z.yml +0 -78
  95. package/.playwright-mcp/page-2026-05-11T15-34-04-515Z.yml +0 -0
  96. package/.playwright-mcp/page-2026-05-12T15-07-32-171Z.yml +0 -44
  97. package/.playwright-mcp/page-2026-05-12T15-08-09-820Z.yml +0 -119
  98. package/docs/TILEDESK_WIDGET_ACCESSIBILITY_STATEMENT_COMPLETE.md +0 -379
  99. package/playwright-report/index.html +0 -90
  100. package/src/app/component/conversation-detail/conversation-footer/conversation-footer.component copy.html +0 -172
  101. package/test-results/.last-run.json +0 -4
@@ -4,6 +4,7 @@ import { getDefaultRealTimeVADOptions } from '@ricky0123/vad-web';
4
4
  import { BehaviorSubject, Observable, Subject, Subscription } from 'rxjs';
5
5
  import { LoggerInstance } from 'src/chat21-core/providers/logger/loggerInstance';
6
6
  import { LoggerService } from 'src/chat21-core/providers/abstract/logger.service';
7
+ import { Globals } from 'src/app/utils/globals';
7
8
 
8
9
  import {
9
10
  DEFAULT_VOICE_MEDIA_STREAM_CONSTRAINTS,
@@ -109,6 +110,8 @@ export class VoiceService {
109
110
  private analyser?: AnalyserNode;
110
111
  /** Buffer dedicato (`ArrayBuffer`) per compatibilità con `getByteFrequencyData`. */
111
112
  private dataArray?: Uint8Array;
113
+ /** RAF ID for volume loop - used to cancel on cleanup */
114
+ private volumeRafId?: number;
112
115
 
113
116
  /** Riproduzione chunk TTS binari dal proxy (Web Audio). */
114
117
  private ttsPlayContext?: AudioContext;
@@ -124,9 +127,29 @@ export class VoiceService {
124
127
  // (barge_in or a new speaking event). playWsTtsChunk captures this at entry and
125
128
  // checks it after the async decodeAudioData call to discard stale results.
126
129
  private _ttsGeneration = 0;
130
+
131
+ // ── Ordered-scheduling state ──────────────────────────────────────────────────────────────────
132
+ // Chunks arrive over WebSocket and their decodeAudioData calls run concurrently.
133
+ // Because a smaller/later chunk can decode faster than a larger/earlier one, scheduling
134
+ // based solely on decode-completion order causes audio to play out of arrival order
135
+ // (e.g. "manuale" starts before "scrittura" even though it arrived after it).
136
+ // Fix: assign a monotonic sequence number on arrival, decode in parallel, but only
137
+ // schedule a buffer once every preceding buffer has already been scheduled.
138
+ private _ttsChunkSeq = 0; // Incremented on each chunk arrival (arrival order)
139
+ private _ttsScheduledSeq = 0; // Next sequence slot that is allowed to be scheduled
140
+ // Decoded buffers waiting for their turn to be scheduled (keyed by arrival sequence)
141
+ private _ttsDecodedPending = new Map<number, AudioBuffer>();
142
+ // ─────────────────────────────────────────────────────────────────────────────────────────────
127
143
  // Set to true by the 'done' event; triggers acquisition unblock once all sources end.
128
144
  private _unblockAfterTts = false;
129
145
  private _unblockSafetyTimer: ReturnType<typeof setTimeout> | null = null;
146
+ // Fallback timer started after sendPlaybackComplete. If the proxy does not reply
147
+ // with 'listening' within the timeout window, the UI is force-unblocked so the
148
+ // user is not left stuck waiting indefinitely.
149
+ private _listeningFallbackTimer: ReturnType<typeof setTimeout> | null = null;
150
+ // Track when the last TTS chunk is expected to finish playing.
151
+ // Used to calculate a proper safety timer duration for long messages.
152
+ private _ttsExpectedEndTime = 0;
130
153
 
131
154
  // ── WSS TTS Karaoke ──────────────────────────────────────────────────────────────────────────
132
155
  private _kText = '';
@@ -141,13 +164,22 @@ export class VoiceService {
141
164
  readonly voiceTtsKaraoke$: Observable<VoiceTtsKaraokeFrame> = this._voiceTtsKaraokeSubject.asObservable();
142
165
  // ─────────────────────────────────────────────────────────────────────────────────────────────
143
166
 
167
+ // ── Thinking / typing-indicator sound ─────────────────────────────────────────────────────────
168
+ // Played on loop while the bot is thinking or the first TTS chunk hasn't arrived yet.
169
+ // Only active during WSS voice sessions (voice-proxy mode).
170
+ private _keyboardSoundEl: HTMLAudioElement | null = null;
171
+ // ─────────────────────────────────────────────────────────────────────────────────────────────
172
+
144
173
  private readonly logger: LoggerService = LoggerInstance.getInstance();
145
174
 
175
+ private readonly bufferTime = 200000; // used as max safety timer duration for long TTS messages
176
+
146
177
  constructor(
147
178
  private readonly vadService: VadService,
148
179
  private readonly ttsPlayback: TtsAudioPlaybackCoordinator,
149
180
  private readonly voiceStreaming: VoiceStreamingService,
150
181
  @Optional() @Inject(SpeechToTextProvider) private readonly speechToText: SpeechToTextProvider | null,
182
+ private readonly globals: Globals,
151
183
  ) {}
152
184
 
153
185
  get isSessionActive(): boolean {
@@ -172,6 +204,8 @@ export class VoiceService {
172
204
  * Richiede il microfono, avvia VAD in ascolto (inizio/fine parlato) e registra in WebM per segmento.
173
205
  */
174
206
  async startSession(options: VoiceSessionStartOptions = {}): Promise<void> {
207
+ const mode = options.voiceIngressStream ? 'wss-proxy' : 'legacy-vad';
208
+ this.logger.info('[VoiceService] startSession', { mode });
175
209
  await this.stopSession();
176
210
 
177
211
  this.sessionConstraints = options.constraints ?? DEFAULT_VOICE_MEDIA_STREAM_CONSTRAINTS;
@@ -189,7 +223,13 @@ export class VoiceService {
189
223
 
190
224
  /** Sessione guidata dal proxy: solo mic + volume + WSS (mic in upload, eventi + TTS in download). */
191
225
  private async startWssVoiceSession(): Promise<void> {
226
+ this.logger.info('[VoiceService] acquiring microphone for WSS session');
192
227
  this.stream = await navigator.mediaDevices.getUserMedia(this.sessionConstraints);
228
+ const tracks = this.stream.getAudioTracks();
229
+ this.logger.info('[VoiceService] microphone acquired', {
230
+ tracks: tracks.length,
231
+ label: tracks[0]?.label ?? '(unknown)',
232
+ });
193
233
 
194
234
  // 🎧 AUDIO ANALYSER INIT
195
235
  this.initAudioAnalyser(this.stream);
@@ -202,7 +242,7 @@ export class VoiceService {
202
242
  await this.voiceStreaming.start(this.voiceIngressConfig!, { sharedMediaStream: this.stream });
203
243
  // Signal that the voice proxy is now live — suppresses tiledesk-server TTS.
204
244
  this._isWssVoiceActive$.next(true);
205
- this.logger.log('[VoiceService] sessione WSS (nessun VAD locale)');
245
+ this.logger.info('[VoiceService] WSS voice session started (no local VAD)');
206
246
  } catch (e) {
207
247
  this.wsControlSub?.unsubscribe();
208
248
  this.wsControlSub = undefined;
@@ -280,25 +320,45 @@ export class VoiceService {
280
320
  this.logger.log('[VoiceService] session_started', { requestId: msg.requestId ?? '' });
281
321
  break;
282
322
  case 'listening':
283
- // Proxy confirmed it is in LISTENING state — unblock the UI.
284
- // Audio has been flowing continuously (AEC handles echo suppression),
285
- // so there is nothing to unmute here.
323
+ // Proxy confirmed it is in LISTENING state — unblock the UI and resume
324
+ // the MediaRecorder. Recording was paused on 'thinking' and must only
325
+ // restart here, after TTS playback has fully completed and the proxy
326
+ // is confirmed ready to receive audio again.
327
+ if (this._listeningFallbackTimer !== null) {
328
+ clearTimeout(this._listeningFallbackTimer);
329
+ this._listeningFallbackTimer = null;
330
+ }
331
+ // If TTS never arrived (edge case) the keyboard sound would still be looping — stop it.
332
+ this._stopKeyboardSound();
286
333
  this._isAcquisitionBlocked$.next(false);
287
- this.logger.log('[VoiceService] listening – acquisition unblocked');
334
+ this.voiceStreaming.resumeRecording();
335
+ this.logger.log('[VoiceService] listening – acquisition unblocked, recording resumed');
288
336
  break;
289
337
  case 'transcript': {
290
338
  const text = typeof msg.text === 'string' ? msg.text : '';
291
339
  const isFinal = !!msg.isFinal;
340
+ // Guard: if the proxy has already moved to PROCESSING (thinking) or SPEAKING,
341
+ // this transcript is a stale in-flight STT result. Discard it so it cannot
342
+ // override the blocked acquisition state or reach any downstream subscriber.
343
+ // 'thinking' is stronger than 'transcript' — state must not regress.
344
+ if (this._isAcquisitionBlocked$.value) {
345
+ this.logger.warn('[VoiceService] transcript discarded – arrived after thinking/speaking (stale STT result)', { text, isFinal });
346
+ break;
347
+ }
292
348
  this.logger.log('[VoiceService] transcript', { text, isFinal });
293
349
  this.voiceTranscriptSubject.next({ text, isFinal });
294
350
  break;
295
351
  }
296
352
  case 'thinking':
297
353
  // Block acquisition UI while the bot processes the utterance.
298
- // Audio continues flowing to the proxy so the server can detect
299
- // barge-in via Flux STT even during PROCESSING state.
354
+ // Pause the MediaRecorder so no audio chunks are sent to the proxy
355
+ // during PROCESSING state. Recording resumes only after the proxy
356
+ // confirms LISTENING (i.e. after TTS playback has fully finished).
300
357
  this._isAcquisitionBlocked$.next(true);
301
- this.logger.log('[VoiceService] thinking – acquisition blocked', { activeTtsSources: this._activeTtsSources });
358
+ this.voiceStreaming.pauseRecording();
359
+ // Play keyboard typing sound to mask the silence while the bot generates its response.
360
+ this._startKeyboardSound();
361
+ this.logger.log('[VoiceService] thinking – acquisition blocked, recording paused', { activeTtsSources: this._activeTtsSources });
302
362
  break;
303
363
  case 'speaking': {
304
364
  this._isAcquisitionBlocked$.next(true);
@@ -310,8 +370,13 @@ export class VoiceService {
310
370
  this._cancelAllTtsAudio();
311
371
  // Reset TTS scheduling so new chunks play from now, not a stale future time.
312
372
  this.ttsNextPlayTime = this.ttsPlayContext?.currentTime ?? 0;
373
+ // Reset expected end time for new TTS stream
374
+ this._ttsExpectedEndTime = 0;
313
375
  const preview = typeof msg.text === 'string' ? msg.text.slice(0, 80) : '';
314
376
  this.logger.log('[VoiceService] speaking – acquisition blocked, TTS text preview', { preview });
377
+ // Keep keyboard sound going (or start it as a fallback if 'thinking' was missed)
378
+ // until the first TTS audio chunk actually starts playing.
379
+ this._startKeyboardSound();
315
380
  // Emit the text being spoken so UI can display it alongside the audio.
316
381
  if (typeof msg.text === 'string' && msg.text) {
317
382
  this.voiceTtsTextSubject.next(msg.text);
@@ -324,31 +389,31 @@ export class VoiceService {
324
389
  // _activeTtsSources tracks pending sources; when the last one ends, acquisition unblocks.
325
390
  if (this._activeTtsSources > 0) {
326
391
  this._unblockAfterTts = true;
327
- // Safety: force-unblock after 15 s in case onended never fires.
392
+ // Calculate safety timer based on expected audio end time.
393
+ // Add 5 seconds buffer for network/decode latency.
394
+ // Minimum 5 seconds, maximum 300 seconds for very long messages.
395
+ const remainingMs = Math.max(0, this._ttsExpectedEndTime - Date.now());
396
+ const safetyMs = Math.min(this.bufferTime, Math.max(5000, remainingMs + 5000));
328
397
  if (this._unblockSafetyTimer !== null) clearTimeout(this._unblockSafetyTimer);
329
- this._unblockSafetyTimer = setTimeout(() => this._flushTtsUnblock(true), 15000);
330
- this.logger.log('[VoiceService] done – TTS still pending, waiting for all sources to end', { activeTtsSources: this._activeTtsSources });
398
+ this._unblockSafetyTimer = setTimeout(() => this._flushTtsUnblock(true), safetyMs);
399
+ this.logger.log('[VoiceService] done – TTS still pending, waiting for all sources to end', {
400
+ activeTtsSources: this._activeTtsSources,
401
+ expectedEndInMs: remainingMs,
402
+ safetyTimerMs: safetyMs
403
+ });
331
404
  } else {
332
- // No audio sources pending playback was already complete (or audio was empty).
333
- // Signal the proxy synchronously; mic stays muted until the proxy confirms
334
- // LISTENING via the 'listening' event.
335
- this.logger.log('[VoiceService] done no pending TTS, sending playback complete immediately');
336
- this.voiceStreaming.sendPlaybackComplete();
337
- // Do NOT unblock acquisition here — proxy will send 'listening' which is
338
- // the single source of truth for unblocking both UI and mic.
405
+ // No audio sources tracked yet, but binary TTS chunks may still be in-flight
406
+ // (WebSocket binary frames can arrive after the JSON 'done' control message).
407
+ // Set _unblockAfterTts so that _onTtsSourceEnded() triggers _flushTtsUnblock
408
+ // naturally when those chunks finish playing, instead of relying solely on the
409
+ // safety timer (which would delay unblock by 10 s even when audio ends sooner).
410
+ this._unblockAfterTts = true;
411
+ this.logger.log('[VoiceService] done no active sources yet, arming unblock for in-flight chunks');
412
+ // Safety timer as last resort in case no chunks arrive at all.
413
+ if (this._unblockSafetyTimer !== null) clearTimeout(this._unblockSafetyTimer);
414
+ this._unblockSafetyTimer = setTimeout(() => this._flushTtsUnblock(true), 10000);
339
415
  }
340
416
  break;
341
- case 'barge_in':
342
- // Proxy's VAD detected user speech while the bot was talking — stop TTS immediately.
343
- // Do NOT send tts_playback_complete; this is an interruption, not a normal completion.
344
- // The proxy will follow with { event: "listening" } which authoritatively unblocks the UI.
345
- // Audio was never muted, so there is nothing to unmute.
346
- this._cancelAllTtsAudio();
347
- this.ttsNextPlayTime = 0;
348
- this._unblockAfterTts = false;
349
- this._isAcquisitionBlocked$.next(false);
350
- this.logger.log('[VoiceService] barge_in – TTS cancelled, acquisition unblocked');
351
- break;
352
417
  case 'error': {
353
418
  const errorMsg = typeof msg.message === 'string' ? msg.message : 'Voice session error';
354
419
  this.logger.error('[VoiceService] WSS error', errorMsg);
@@ -361,8 +426,19 @@ export class VoiceService {
361
426
  }
362
427
  }
363
428
 
364
- /** Chunk TTS: ogni buffer deve essere decodificabile da `decodeAudioData` (es. segmento WebM/Opus completo). */
429
+ /**
430
+ * Chunk TTS: ogni buffer deve essere decodificabile da `decodeAudioData` (es. segmento WebM/Opus completo).
431
+ *
432
+ * Decode-race fix: multiple chunks decode concurrently; a smaller/later chunk can finish
433
+ * decoding before a larger/earlier one, which would cause the AudioBufferSourceNode to be
434
+ * scheduled out of arrival order (e.g. "manuale" before "scrittura"). To prevent this, each
435
+ * chunk is assigned a monotonic sequence number on arrival and stored in _ttsDecodedPending
436
+ * after decoding. _drainTtsDecodedBuffers() only advances the schedule when the next
437
+ * expected sequence slot is present, guaranteeing arrival-order playback regardless of decode speed.
438
+ */
365
439
  private async playWsTtsChunk(buf: ArrayBuffer): Promise<void> {
440
+ // Assign arrival-order sequence number SYNCHRONOUSLY before any await.
441
+ const seq = this._ttsChunkSeq++;
366
442
  // Capture the current generation BEFORE the synchronous increment so that
367
443
  // if _cancelAllTtsAudio() fires (incrementing _ttsGeneration) while this
368
444
  // decode is in-flight, the mismatch is detected and the stale chunk is discarded.
@@ -370,11 +446,12 @@ export class VoiceService {
370
446
  // Increment SYNCHRONOUSLY before any await so the 'done' event handler (which arrives
371
447
  // on the next WebSocket message — a different event-loop tick) sees a non-zero count.
372
448
  this._activeTtsSources++;
373
- this.logger.log('[VoiceService] TTS chunk received', { bytes: buf.byteLength, activeTtsSources: this._activeTtsSources });
449
+ this.logger.log('[VoiceService] TTS chunk received', { seq, bytes: buf.byteLength, activeTtsSources: this._activeTtsSources });
374
450
  try {
375
451
  if (!this.ttsPlayContext || this.ttsPlayContext.state === 'closed') {
376
452
  this.ttsPlayContext = new AudioContext();
377
453
  this.ttsNextPlayTime = this.ttsPlayContext.currentTime;
454
+ this.logger.info('[VoiceService] TTS AudioContext created');
378
455
  }
379
456
  const ctx = this.ttsPlayContext;
380
457
  const audioBuf = await ctx.decodeAudioData(buf.slice(0));
@@ -383,21 +460,57 @@ export class VoiceService {
383
460
  // for a turn that was already cancelled, and undo the counter increment.
384
461
  if (this._ttsGeneration !== capturedGeneration) {
385
462
  this._activeTtsSources = Math.max(0, this._activeTtsSources - 1);
386
- this.logger.log('[VoiceService] TTS chunk discarded – stale generation', { capturedGeneration, currentGeneration: this._ttsGeneration });
463
+ this.logger.log('[VoiceService] TTS chunk discarded – stale generation', { seq, capturedGeneration, currentGeneration: this._ttsGeneration });
387
464
  return;
388
465
  }
466
+ // Store the decoded buffer under its arrival sequence number and attempt to
467
+ // flush any contiguous run of decoded buffers in order.
468
+ this._ttsDecodedPending.set(seq, audioBuf);
469
+ this._drainTtsDecodedBuffers();
470
+ } catch (e) {
471
+ // Advance the scheduler past this failed slot so subsequent decoded chunks are
472
+ // not blocked waiting for a slot that will never be filled.
473
+ if (seq === this._ttsScheduledSeq) {
474
+ this._ttsScheduledSeq++;
475
+ this._drainTtsDecodedBuffers();
476
+ }
477
+ this._onTtsSourceEnded();
478
+ this.logger.warn('[VoiceService] TTS chunk decode failed', { seq }, e);
479
+ }
480
+ }
481
+
482
+ /**
483
+ * Schedules decoded TTS buffers in strict arrival order.
484
+ * Called after every successful decode. Drains the _ttsDecodedPending map
485
+ * starting at _ttsScheduledSeq, stopping as soon as the next slot is missing
486
+ * (i.e. that chunk is still decoding or failed).
487
+ */
488
+ private _drainTtsDecodedBuffers(): void {
489
+ const ctx = this.ttsPlayContext;
490
+ if (!ctx) return;
491
+ while (this._ttsDecodedPending.has(this._ttsScheduledSeq)) {
492
+ const audioBuf = this._ttsDecodedPending.get(this._ttsScheduledSeq)!;
493
+ this._ttsDecodedPending.delete(this._ttsScheduledSeq);
494
+ this._ttsScheduledSeq++;
495
+
389
496
  const src = ctx.createBufferSource();
390
497
  src.buffer = audioBuf;
391
498
  src.connect(ctx.destination);
392
499
  const t0 = Math.max(ctx.currentTime, this.ttsNextPlayTime);
393
500
  src.start(t0);
394
501
  this.ttsNextPlayTime = t0 + audioBuf.duration;
502
+ // Track the expected end time in wall-clock time (ms) for safety timer calculation.
503
+ const audioEndDelayMs = (this.ttsNextPlayTime - ctx.currentTime) * 1000;
504
+ this._ttsExpectedEndTime = Date.now() + audioEndDelayMs;
505
+ const isFirstChunk = this._activeTtsSourceNodes.length === 0;
395
506
  this._activeTtsSourceNodes.push(src);
396
- this.logger.log('[VoiceService] TTS chunk scheduled', { durationS: audioBuf.duration.toFixed(3), startsAtS: t0.toFixed(3), activeTtsSources: this._activeTtsSources });
507
+ if (isFirstChunk) {
508
+ // First real audio about to play — stop the keyboard typing sound immediately.
509
+ this._stopKeyboardSound();
510
+ this.logger.info('[VoiceService] TTS playback started', { durationS: audioBuf.duration.toFixed(3), startsAtS: t0.toFixed(3) });
511
+ }
512
+ this.logger.log('[VoiceService] TTS chunk scheduled', { seq: this._ttsScheduledSeq - 1, durationS: audioBuf.duration.toFixed(3), startsAtS: t0.toFixed(3), activeTtsSources: this._activeTtsSources, expectedEndInMs: audioEndDelayMs.toFixed(0) });
397
513
  src.onended = () => this._onTtsSourceEnded(src);
398
- } catch (e) {
399
- this._onTtsSourceEnded();
400
- this.logger.warn('[VoiceService] TTS chunk decode failed', e);
401
514
  }
402
515
  }
403
516
 
@@ -408,6 +521,10 @@ export class VoiceService {
408
521
  if (idx !== -1) { this._activeTtsSourceNodes.splice(idx, 1); }
409
522
  }
410
523
  this.logger.log('[VoiceService] TTS source ended', { activeTtsSources: this._activeTtsSources, unblockPending: this._unblockAfterTts });
524
+ if (this._activeTtsSources === 0) {
525
+ this.logger.info('[VoiceService] TTS playback ended – all sources finished');
526
+ console.log('[VoiceService] TTS audio finished playing');
527
+ }
411
528
  if (this._unblockAfterTts && this._activeTtsSources === 0) {
412
529
  this._flushTtsUnblock(false);
413
530
  }
@@ -435,6 +552,11 @@ export class VoiceService {
435
552
  this._activeTtsSourceNodes = [];
436
553
  this._activeTtsSources = 0;
437
554
  this._unblockAfterTts = false;
555
+ this._ttsExpectedEndTime = 0;
556
+ // Reset ordered-scheduling state so the next speaking turn starts fresh.
557
+ this._ttsChunkSeq = 0;
558
+ this._ttsScheduledSeq = 0;
559
+ this._ttsDecodedPending.clear();
438
560
  this._stopTtsKaraoke(true);
439
561
  this.logger.log('[VoiceService] TTS cancelled – all audio sources stopped');
440
562
  }
@@ -452,12 +574,21 @@ export class VoiceService {
452
574
  this.logger.log('[VoiceService] TTS unblock: all sources ended, sending playback complete');
453
575
  }
454
576
  this._stopTtsKaraoke(true);
455
- // Signal the proxy that TTS playback is complete. The proxy will transition
456
- // to LISTENING and send a 'listening' event back; the mic is unmuted there
457
- // (not here) so it is live only when the proxy is confirmed ready.
458
- // Do NOT call _isAcquisitionBlocked$.next(false) here — 'listening' is the
459
- // single source of truth so that UI and mic unblock atomically.
577
+ // Signal the proxy that TTS playback is complete. The proxy will transition
578
+ // to LISTENING and send a 'listening' event back; the mic resumes and the UI
579
+ // unblocks only then — so the user sees 'listening' exactly when the stream
580
+ // is open, not before.
581
+ // Start a fallback timer: if the proxy does not respond with 'listening' within
582
+ // 3 seconds (network hiccup, server race, etc.) force-unblock so the user is
583
+ // never left stuck. The timer is cancelled immediately if 'listening' arrives.
460
584
  this.voiceStreaming.sendPlaybackComplete();
585
+ if (this._listeningFallbackTimer !== null) clearTimeout(this._listeningFallbackTimer);
586
+ this._listeningFallbackTimer = setTimeout(() => {
587
+ this._listeningFallbackTimer = null;
588
+ this.logger.warn('[VoiceService] listening fallback timer fired – proxy did not respond, force-unblocking');
589
+ this._isAcquisitionBlocked$.next(false);
590
+ this.voiceStreaming.resumeRecording();
591
+ }, 3000);
461
592
  }
462
593
 
463
594
  // ── WSS TTS Karaoke helpers ───────────────────────────────────────────────
@@ -530,8 +661,39 @@ export class VoiceService {
530
661
 
531
662
  // ─────────────────────────────────────────────────────────────────────────
532
663
 
664
+ // ── Keyboard typing-indicator sound helpers ───────────────────────────────
665
+ /**
666
+ * Starts the keyboard sound on loop to mask silence while the bot is
667
+ * generating its response. No-op if already playing.
668
+ * Only called during WSS voice sessions (voice-proxy mode).
669
+ */
670
+ private _startKeyboardSound(): void {
671
+ if (this._keyboardSoundEl) return; // already playing
672
+ const file = this.globals.keyboardSoundFile ?? 'keyboard.mp3';
673
+ const src = /^https?:\/\//i.test(file)
674
+ ? file
675
+ : `${this.globals.baseLocation}/assets/sounds/${file}`;
676
+ const audio = new Audio(src);
677
+ audio.loop = true;
678
+ audio.volume = Math.min(1, Math.max(0, this.globals.keyboardSoundVolume));
679
+ audio.play().catch((e) => this.logger.warn('[VoiceService] keyboard sound play failed', e));
680
+ this._keyboardSoundEl = audio;
681
+ this.logger.log('[VoiceService] keyboard sound started', { src, volume: audio.volume });
682
+ }
683
+
684
+ /** Stops and discards the keyboard typing sound. No-op if not playing. */
685
+ private _stopKeyboardSound(): void {
686
+ if (!this._keyboardSoundEl) return;
687
+ this._keyboardSoundEl.pause();
688
+ this._keyboardSoundEl.currentTime = 0;
689
+ this._keyboardSoundEl = null;
690
+ this.logger.log('[VoiceService] keyboard sound stopped');
691
+ }
692
+ // ─────────────────────────────────────────────────────────────────────────
693
+
533
694
  async stopSession(options?: { discardInProgressSegment?: boolean}): Promise<{ voiceIngressResultUrl: string | null }> {
534
695
  const discard = options?.discardInProgressSegment === true;
696
+ this.logger.info('[VoiceService] stopSession', { discard, isWssVoiceActive: this._isWssVoiceActive$.getValue() });
535
697
 
536
698
  this.wsControlSub?.unsubscribe();
537
699
  this.wsControlSub = undefined;
@@ -548,6 +710,7 @@ export class VoiceService {
548
710
  this._cancelAllTtsAudio();
549
711
  this.ttsPlayContext = undefined;
550
712
  this.ttsNextPlayTime = 0;
713
+ this._stopKeyboardSound();
551
714
 
552
715
  let voiceIngressResultUrl: string | null = null;
553
716
  if (this.voiceIngressConfig) {
@@ -590,6 +753,10 @@ export class VoiceService {
590
753
  }
591
754
 
592
755
  // 🎧 cleanup audio context
756
+ if (this.volumeRafId) {
757
+ cancelAnimationFrame(this.volumeRafId);
758
+ this.volumeRafId = undefined;
759
+ }
593
760
  this.audioContext?.close();
594
761
  this.audioContext = undefined;
595
762
  this.analyser = undefined;
@@ -608,6 +775,10 @@ export class VoiceService {
608
775
  clearTimeout(this.responseTimeoutId);
609
776
  this.responseTimeoutId = undefined;
610
777
  this.isWaitingForResponse = false;
778
+ if (this._listeningFallbackTimer !== null) {
779
+ clearTimeout(this._listeningFallbackTimer);
780
+ this._listeningFallbackTimer = null;
781
+ }
611
782
  this._isAcquisitionBlocked$.next(false);
612
783
 
613
784
  return { voiceIngressResultUrl };
@@ -688,8 +859,7 @@ export class VoiceService {
688
859
  private startVolumeLoop(): void {
689
860
  const tick = () => {
690
861
  if (!this.analyser || !this.dataArray) {
691
- requestAnimationFrame(tick);
692
- return;
862
+ return; // Stop the loop if analyser is cleaned up
693
863
  }
694
864
 
695
865
  this.analyser.getByteFrequencyData(
@@ -705,10 +875,10 @@ export class VoiceService {
705
875
 
706
876
  this.volumeSubject.next(volume);
707
877
 
708
- requestAnimationFrame(tick);
878
+ this.volumeRafId = requestAnimationFrame(tick);
709
879
  };
710
880
 
711
- tick();
881
+ this.volumeRafId = requestAnimationFrame(tick);
712
882
  }
713
883
 
714
884
  /**
@@ -36,6 +36,14 @@ export class Globals {
36
36
  isMobile: boolean;
37
37
  isLogged: boolean;
38
38
  soundEnabled: boolean;
39
+ /** Volume for the keyboard typing-indicator sound during voice sessions. Range 0.0–1.0. */
40
+ keyboardSoundVolume: number;
41
+ /**
42
+ * Filename (local) or absolute URL (remote) for the keyboard typing-indicator sound.
43
+ * Local: bare filename resolved against `baseLocation + '/assets/sounds/'` (e.g. `'keyboard.mp3'`).
44
+ * Remote: full URL starting with `http://` or `https://`.
45
+ */
46
+ keyboardSoundFile: string;
39
47
  BUILD_VERSION: String;
40
48
  baseLocation: string;
41
49
  availableAgents: Array<UserAgent> = [];
@@ -472,6 +480,8 @@ export class Globals {
472
480
  this.BUILD_VERSION = 'v.' + environment.version;
473
481
 
474
482
  this.soundEnabled = true;
483
+ this.keyboardSoundVolume = 0.3;
484
+ this.keyboardSoundFile = 'keyboard.mp3';
475
485
 
476
486
  this.conversationsBadge = 0;
477
487