@chat21/chat21-web-widget 5.1.33-rc9 → 5.1.34-rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/package.json +1 -1
- package/src/app/component/conversation-detail/conversation/conversation.component.ts +3 -1
- package/src/app/component/conversation-detail/conversation-content/conversation-content.component.spec.ts +0 -7
- package/src/app/component/conversation-detail/conversation-content/conversation-content.component.ts +7 -5
- package/src/app/component/conversation-detail/conversation-footer/conversation-footer.component.html +4 -3
- package/src/app/component/conversation-detail/conversation-footer/conversation-footer.component.scss +18 -18
- package/src/app/component/conversation-detail/conversation-footer/conversation-footer.component.ts +6 -0
- package/src/app/component/conversation-detail/stream-audio-spectrum/stream-audio-spectrum.component.html +8 -5
- package/src/app/component/conversation-detail/stream-audio-spectrum/stream-audio-spectrum.component.scss +5 -1
- package/src/app/component/form/inputs/form-text/form-text.component.ts +9 -3
- package/src/app/component/message/bubble-message/bubble-message.component.scss +5 -0
- package/src/app/component/message/bubble-message/bubble-message.component.ts +14 -0
- package/src/app/component/message/json-sources/json-sources.component.scss +12 -8
- package/src/app/pipe/marked.pipe.ts +51 -41
- package/src/app/providers/global-settings.service.ts +31 -0
- package/src/app/providers/json-sources-parser.service.ts +25 -32
- package/src/app/providers/voice/voice-streaming.service.ts +11 -19
- package/src/app/providers/voice/voice-streaming.types.ts +0 -1
- package/src/app/providers/voice/voice.service.spec.ts +12 -45
- package/src/app/providers/voice/voice.service.ts +215 -45
- package/src/app/utils/globals.ts +10 -0
- package/src/assets/i18n/en.json +106 -125
- package/src/assets/i18n/es.json +1 -0
- package/src/assets/i18n/fr.json +1 -0
- package/src/assets/i18n/it.json +1 -0
- package/src/assets/sounds/keyboard.mp3 +0 -0
- package/src/assets/twp/chatbot-panel.html +3 -1
- package/src/chat21-core/utils/utils-message.ts +15 -5
- package/src/widget-config-template.json +1 -0
- package/src/widget-config.json +30 -28
- package/.playwright-mcp/console-2026-05-08T15-31-09-000Z.log +0 -17
- package/.playwright-mcp/console-2026-05-08T15-32-19-412Z.log +0 -89
- package/.playwright-mcp/console-2026-05-08T16-18-48-424Z.log +0 -133
- package/.playwright-mcp/console-2026-05-11T12-54-06-869Z.log +0 -13
- package/.playwright-mcp/console-2026-05-11T12-54-56-229Z.log +0 -147
- package/.playwright-mcp/console-2026-05-11T12-55-47-174Z.log +0 -183
- package/.playwright-mcp/console-2026-05-11T15-34-03-590Z.log +0 -210
- package/.playwright-mcp/console-2026-05-12T15-07-31-880Z.log +0 -118
- package/.playwright-mcp/page-2026-05-08T15-32-19-900Z.yml +0 -851
- package/.playwright-mcp/page-2026-05-08T15-32-47-264Z.yml +0 -857
- package/.playwright-mcp/page-2026-05-08T15-33-17-089Z.yml +0 -1110
- package/.playwright-mcp/page-2026-05-08T15-33-23-486Z.yml +0 -1069
- package/.playwright-mcp/page-2026-05-08T15-33-45-390Z.yml +0 -1076
- package/.playwright-mcp/page-2026-05-08T15-33-52-666Z.yml +0 -1072
- package/.playwright-mcp/page-2026-05-08T15-34-01-338Z.yml +0 -1085
- package/.playwright-mcp/page-2026-05-08T15-34-07-227Z.yml +0 -1072
- package/.playwright-mcp/page-2026-05-08T15-34-13-875Z.yml +0 -1072
- package/.playwright-mcp/page-2026-05-08T15-34-21-885Z.yml +0 -1109
- package/.playwright-mcp/page-2026-05-08T15-34-32-755Z.yml +0 -1109
- package/.playwright-mcp/page-2026-05-08T15-35-09-607Z.yml +0 -1119
- package/.playwright-mcp/page-2026-05-08T15-35-14-242Z.yml +0 -1109
- package/.playwright-mcp/page-2026-05-08T16-18-48-671Z.yml +0 -44
- package/.playwright-mcp/page-2026-05-08T16-18-52-753Z.png +0 -0
- package/.playwright-mcp/page-2026-05-08T16-19-13-919Z.yml +0 -68
- package/.playwright-mcp/page-2026-05-08T16-19-17-977Z.png +0 -0
- package/.playwright-mcp/page-2026-05-08T16-19-25-733Z.yml +0 -120
- package/.playwright-mcp/page-2026-05-08T16-19-29-252Z.png +0 -0
- package/.playwright-mcp/page-2026-05-08T16-19-39-269Z.yml +0 -80
- package/.playwright-mcp/page-2026-05-08T16-19-43-915Z.png +0 -0
- package/.playwright-mcp/page-2026-05-08T16-20-04-407Z.yml +0 -81
- package/.playwright-mcp/page-2026-05-08T16-20-08-984Z.png +0 -0
- package/.playwright-mcp/page-2026-05-08T16-20-32-397Z.png +0 -0
- package/.playwright-mcp/page-2026-05-08T16-20-58-658Z.png +0 -0
- package/.playwright-mcp/page-2026-05-08T16-21-12-320Z.yml +0 -86
- package/.playwright-mcp/page-2026-05-08T16-21-39-154Z.yml +0 -91
- package/.playwright-mcp/page-2026-05-08T16-21-45-420Z.png +0 -0
- package/.playwright-mcp/page-2026-05-08T16-22-21-062Z.yml +0 -0
- package/.playwright-mcp/page-2026-05-08T16-22-58-232Z.yml +0 -91
- package/.playwright-mcp/page-2026-05-08T16-23-36-520Z.yml +0 -0
- package/.playwright-mcp/page-2026-05-08T16-23-46-805Z.yml +0 -100
- package/.playwright-mcp/page-2026-05-08T16-23-55-169Z.png +0 -0
- package/.playwright-mcp/page-2026-05-08T16-24-26-574Z.yml +0 -91
- package/.playwright-mcp/page-2026-05-08T16-25-34-414Z.png +0 -0
- package/.playwright-mcp/page-2026-05-08T16-25-59-831Z.png +0 -0
- package/.playwright-mcp/page-2026-05-08T16-26-21-809Z.yml +0 -91
- package/.playwright-mcp/page-2026-05-08T16-26-47-443Z.yml +0 -105
- package/.playwright-mcp/page-2026-05-08T16-26-56-136Z.png +0 -0
- package/.playwright-mcp/page-2026-05-08T16-27-59-610Z.yml +0 -48
- package/.playwright-mcp/page-2026-05-11T12-54-07-180Z.yml +0 -44
- package/.playwright-mcp/page-2026-05-11T12-54-56-946Z.yml +0 -4
- package/.playwright-mcp/page-2026-05-11T12-55-47-503Z.yml +0 -24
- package/.playwright-mcp/page-2026-05-11T12-56-00-766Z.yml +0 -28
- package/.playwright-mcp/page-2026-05-11T12-56-06-438Z.yml +0 -90
- package/.playwright-mcp/page-2026-05-11T12-57-56-838Z.yml +0 -106
- package/.playwright-mcp/page-2026-05-11T12-58-00-124Z.yml +0 -106
- package/.playwright-mcp/page-2026-05-11T12-59-08-836Z.yml +0 -61
- package/.playwright-mcp/page-2026-05-11T12-59-12-088Z.yml +0 -61
- package/.playwright-mcp/page-2026-05-11T12-59-26-215Z.yml +0 -69
- package/.playwright-mcp/page-2026-05-11T12-59-29-519Z.yml +0 -69
- package/.playwright-mcp/page-2026-05-11T12-59-37-309Z.yml +0 -0
- package/.playwright-mcp/page-2026-05-11T12-59-39-968Z.yml +0 -79
- package/.playwright-mcp/page-2026-05-11T12-59-45-983Z.yml +0 -78
- package/.playwright-mcp/page-2026-05-11T12-59-49-951Z.yml +0 -78
- package/.playwright-mcp/page-2026-05-11T15-34-04-515Z.yml +0 -0
- package/.playwright-mcp/page-2026-05-12T15-07-32-171Z.yml +0 -44
- package/.playwright-mcp/page-2026-05-12T15-08-09-820Z.yml +0 -119
- package/docs/TILEDESK_WIDGET_ACCESSIBILITY_STATEMENT_COMPLETE.md +0 -379
- package/playwright-report/index.html +0 -90
- package/src/app/component/conversation-detail/conversation-footer/conversation-footer.component copy.html +0 -172
- package/test-results/.last-run.json +0 -4
|
@@ -4,6 +4,7 @@ import { getDefaultRealTimeVADOptions } from '@ricky0123/vad-web';
|
|
|
4
4
|
import { BehaviorSubject, Observable, Subject, Subscription } from 'rxjs';
|
|
5
5
|
import { LoggerInstance } from 'src/chat21-core/providers/logger/loggerInstance';
|
|
6
6
|
import { LoggerService } from 'src/chat21-core/providers/abstract/logger.service';
|
|
7
|
+
import { Globals } from 'src/app/utils/globals';
|
|
7
8
|
|
|
8
9
|
import {
|
|
9
10
|
DEFAULT_VOICE_MEDIA_STREAM_CONSTRAINTS,
|
|
@@ -109,6 +110,8 @@ export class VoiceService {
|
|
|
109
110
|
private analyser?: AnalyserNode;
|
|
110
111
|
/** Buffer dedicato (`ArrayBuffer`) per compatibilità con `getByteFrequencyData`. */
|
|
111
112
|
private dataArray?: Uint8Array;
|
|
113
|
+
/** RAF ID for volume loop - used to cancel on cleanup */
|
|
114
|
+
private volumeRafId?: number;
|
|
112
115
|
|
|
113
116
|
/** Riproduzione chunk TTS binari dal proxy (Web Audio). */
|
|
114
117
|
private ttsPlayContext?: AudioContext;
|
|
@@ -124,9 +127,29 @@ export class VoiceService {
|
|
|
124
127
|
// (barge_in or a new speaking event). playWsTtsChunk captures this at entry and
|
|
125
128
|
// checks it after the async decodeAudioData call to discard stale results.
|
|
126
129
|
private _ttsGeneration = 0;
|
|
130
|
+
|
|
131
|
+
// ── Ordered-scheduling state ──────────────────────────────────────────────────────────────────
|
|
132
|
+
// Chunks arrive over WebSocket and their decodeAudioData calls run concurrently.
|
|
133
|
+
// Because a smaller/later chunk can decode faster than a larger/earlier one, scheduling
|
|
134
|
+
// based solely on decode-completion order causes audio to play out of arrival order
|
|
135
|
+
// (e.g. "manuale" starts before "scrittura" even though it arrived after it).
|
|
136
|
+
// Fix: assign a monotonic sequence number on arrival, decode in parallel, but only
|
|
137
|
+
// schedule a buffer once every preceding buffer has already been scheduled.
|
|
138
|
+
private _ttsChunkSeq = 0; // Incremented on each chunk arrival (arrival order)
|
|
139
|
+
private _ttsScheduledSeq = 0; // Next sequence slot that is allowed to be scheduled
|
|
140
|
+
// Decoded buffers waiting for their turn to be scheduled (keyed by arrival sequence)
|
|
141
|
+
private _ttsDecodedPending = new Map<number, AudioBuffer>();
|
|
142
|
+
// ─────────────────────────────────────────────────────────────────────────────────────────────
|
|
127
143
|
// Set to true by the 'done' event; triggers acquisition unblock once all sources end.
|
|
128
144
|
private _unblockAfterTts = false;
|
|
129
145
|
private _unblockSafetyTimer: ReturnType<typeof setTimeout> | null = null;
|
|
146
|
+
// Fallback timer started after sendPlaybackComplete. If the proxy does not reply
|
|
147
|
+
// with 'listening' within the timeout window, the UI is force-unblocked so the
|
|
148
|
+
// user is not left stuck waiting indefinitely.
|
|
149
|
+
private _listeningFallbackTimer: ReturnType<typeof setTimeout> | null = null;
|
|
150
|
+
// Track when the last TTS chunk is expected to finish playing.
|
|
151
|
+
// Used to calculate a proper safety timer duration for long messages.
|
|
152
|
+
private _ttsExpectedEndTime = 0;
|
|
130
153
|
|
|
131
154
|
// ── WSS TTS Karaoke ──────────────────────────────────────────────────────────────────────────
|
|
132
155
|
private _kText = '';
|
|
@@ -141,13 +164,22 @@ export class VoiceService {
|
|
|
141
164
|
readonly voiceTtsKaraoke$: Observable<VoiceTtsKaraokeFrame> = this._voiceTtsKaraokeSubject.asObservable();
|
|
142
165
|
// ─────────────────────────────────────────────────────────────────────────────────────────────
|
|
143
166
|
|
|
167
|
+
// ── Thinking / typing-indicator sound ─────────────────────────────────────────────────────────
|
|
168
|
+
// Played on loop while the bot is thinking or the first TTS chunk hasn't arrived yet.
|
|
169
|
+
// Only active during WSS voice sessions (voice-proxy mode).
|
|
170
|
+
private _keyboardSoundEl: HTMLAudioElement | null = null;
|
|
171
|
+
// ─────────────────────────────────────────────────────────────────────────────────────────────
|
|
172
|
+
|
|
144
173
|
private readonly logger: LoggerService = LoggerInstance.getInstance();
|
|
145
174
|
|
|
175
|
+
private readonly bufferTime = 200000; // used as max safety timer duration for long TTS messages
|
|
176
|
+
|
|
146
177
|
constructor(
|
|
147
178
|
private readonly vadService: VadService,
|
|
148
179
|
private readonly ttsPlayback: TtsAudioPlaybackCoordinator,
|
|
149
180
|
private readonly voiceStreaming: VoiceStreamingService,
|
|
150
181
|
@Optional() @Inject(SpeechToTextProvider) private readonly speechToText: SpeechToTextProvider | null,
|
|
182
|
+
private readonly globals: Globals,
|
|
151
183
|
) {}
|
|
152
184
|
|
|
153
185
|
get isSessionActive(): boolean {
|
|
@@ -172,6 +204,8 @@ export class VoiceService {
|
|
|
172
204
|
* Richiede il microfono, avvia VAD in ascolto (inizio/fine parlato) e registra in WebM per segmento.
|
|
173
205
|
*/
|
|
174
206
|
async startSession(options: VoiceSessionStartOptions = {}): Promise<void> {
|
|
207
|
+
const mode = options.voiceIngressStream ? 'wss-proxy' : 'legacy-vad';
|
|
208
|
+
this.logger.info('[VoiceService] startSession', { mode });
|
|
175
209
|
await this.stopSession();
|
|
176
210
|
|
|
177
211
|
this.sessionConstraints = options.constraints ?? DEFAULT_VOICE_MEDIA_STREAM_CONSTRAINTS;
|
|
@@ -189,7 +223,13 @@ export class VoiceService {
|
|
|
189
223
|
|
|
190
224
|
/** Sessione guidata dal proxy: solo mic + volume + WSS (mic in upload, eventi + TTS in download). */
|
|
191
225
|
private async startWssVoiceSession(): Promise<void> {
|
|
226
|
+
this.logger.info('[VoiceService] acquiring microphone for WSS session');
|
|
192
227
|
this.stream = await navigator.mediaDevices.getUserMedia(this.sessionConstraints);
|
|
228
|
+
const tracks = this.stream.getAudioTracks();
|
|
229
|
+
this.logger.info('[VoiceService] microphone acquired', {
|
|
230
|
+
tracks: tracks.length,
|
|
231
|
+
label: tracks[0]?.label ?? '(unknown)',
|
|
232
|
+
});
|
|
193
233
|
|
|
194
234
|
// 🎧 AUDIO ANALYSER INIT
|
|
195
235
|
this.initAudioAnalyser(this.stream);
|
|
@@ -202,7 +242,7 @@ export class VoiceService {
|
|
|
202
242
|
await this.voiceStreaming.start(this.voiceIngressConfig!, { sharedMediaStream: this.stream });
|
|
203
243
|
// Signal that the voice proxy is now live — suppresses tiledesk-server TTS.
|
|
204
244
|
this._isWssVoiceActive$.next(true);
|
|
205
|
-
this.logger.
|
|
245
|
+
this.logger.info('[VoiceService] WSS voice session started (no local VAD)');
|
|
206
246
|
} catch (e) {
|
|
207
247
|
this.wsControlSub?.unsubscribe();
|
|
208
248
|
this.wsControlSub = undefined;
|
|
@@ -280,25 +320,45 @@ export class VoiceService {
|
|
|
280
320
|
this.logger.log('[VoiceService] session_started', { requestId: msg.requestId ?? '' });
|
|
281
321
|
break;
|
|
282
322
|
case 'listening':
|
|
283
|
-
// Proxy confirmed it is in LISTENING state — unblock the UI
|
|
284
|
-
//
|
|
285
|
-
//
|
|
323
|
+
// Proxy confirmed it is in LISTENING state — unblock the UI and resume
|
|
324
|
+
// the MediaRecorder. Recording was paused on 'thinking' and must only
|
|
325
|
+
// restart here, after TTS playback has fully completed and the proxy
|
|
326
|
+
// is confirmed ready to receive audio again.
|
|
327
|
+
if (this._listeningFallbackTimer !== null) {
|
|
328
|
+
clearTimeout(this._listeningFallbackTimer);
|
|
329
|
+
this._listeningFallbackTimer = null;
|
|
330
|
+
}
|
|
331
|
+
// If TTS never arrived (edge case) the keyboard sound would still be looping — stop it.
|
|
332
|
+
this._stopKeyboardSound();
|
|
286
333
|
this._isAcquisitionBlocked$.next(false);
|
|
287
|
-
this.
|
|
334
|
+
this.voiceStreaming.resumeRecording();
|
|
335
|
+
this.logger.log('[VoiceService] listening – acquisition unblocked, recording resumed');
|
|
288
336
|
break;
|
|
289
337
|
case 'transcript': {
|
|
290
338
|
const text = typeof msg.text === 'string' ? msg.text : '';
|
|
291
339
|
const isFinal = !!msg.isFinal;
|
|
340
|
+
// Guard: if the proxy has already moved to PROCESSING (thinking) or SPEAKING,
|
|
341
|
+
// this transcript is a stale in-flight STT result. Discard it so it cannot
|
|
342
|
+
// override the blocked acquisition state or reach any downstream subscriber.
|
|
343
|
+
// 'thinking' is stronger than 'transcript' — state must not regress.
|
|
344
|
+
if (this._isAcquisitionBlocked$.value) {
|
|
345
|
+
this.logger.warn('[VoiceService] transcript discarded – arrived after thinking/speaking (stale STT result)', { text, isFinal });
|
|
346
|
+
break;
|
|
347
|
+
}
|
|
292
348
|
this.logger.log('[VoiceService] transcript', { text, isFinal });
|
|
293
349
|
this.voiceTranscriptSubject.next({ text, isFinal });
|
|
294
350
|
break;
|
|
295
351
|
}
|
|
296
352
|
case 'thinking':
|
|
297
353
|
// Block acquisition UI while the bot processes the utterance.
|
|
298
|
-
//
|
|
299
|
-
//
|
|
354
|
+
// Pause the MediaRecorder so no audio chunks are sent to the proxy
|
|
355
|
+
// during PROCESSING state. Recording resumes only after the proxy
|
|
356
|
+
// confirms LISTENING (i.e. after TTS playback has fully finished).
|
|
300
357
|
this._isAcquisitionBlocked$.next(true);
|
|
301
|
-
this.
|
|
358
|
+
this.voiceStreaming.pauseRecording();
|
|
359
|
+
// Play keyboard typing sound to mask the silence while the bot generates its response.
|
|
360
|
+
this._startKeyboardSound();
|
|
361
|
+
this.logger.log('[VoiceService] thinking – acquisition blocked, recording paused', { activeTtsSources: this._activeTtsSources });
|
|
302
362
|
break;
|
|
303
363
|
case 'speaking': {
|
|
304
364
|
this._isAcquisitionBlocked$.next(true);
|
|
@@ -310,8 +370,13 @@ export class VoiceService {
|
|
|
310
370
|
this._cancelAllTtsAudio();
|
|
311
371
|
// Reset TTS scheduling so new chunks play from now, not a stale future time.
|
|
312
372
|
this.ttsNextPlayTime = this.ttsPlayContext?.currentTime ?? 0;
|
|
373
|
+
// Reset expected end time for new TTS stream
|
|
374
|
+
this._ttsExpectedEndTime = 0;
|
|
313
375
|
const preview = typeof msg.text === 'string' ? msg.text.slice(0, 80) : '';
|
|
314
376
|
this.logger.log('[VoiceService] speaking – acquisition blocked, TTS text preview', { preview });
|
|
377
|
+
// Keep keyboard sound going (or start it as a fallback if 'thinking' was missed)
|
|
378
|
+
// until the first TTS audio chunk actually starts playing.
|
|
379
|
+
this._startKeyboardSound();
|
|
315
380
|
// Emit the text being spoken so UI can display it alongside the audio.
|
|
316
381
|
if (typeof msg.text === 'string' && msg.text) {
|
|
317
382
|
this.voiceTtsTextSubject.next(msg.text);
|
|
@@ -324,31 +389,31 @@ export class VoiceService {
|
|
|
324
389
|
// _activeTtsSources tracks pending sources; when the last one ends, acquisition unblocks.
|
|
325
390
|
if (this._activeTtsSources > 0) {
|
|
326
391
|
this._unblockAfterTts = true;
|
|
327
|
-
//
|
|
392
|
+
// Calculate safety timer based on expected audio end time.
|
|
393
|
+
// Add 5 seconds buffer for network/decode latency.
|
|
394
|
+
// Minimum 5 seconds, maximum 300 seconds for very long messages.
|
|
395
|
+
const remainingMs = Math.max(0, this._ttsExpectedEndTime - Date.now());
|
|
396
|
+
const safetyMs = Math.min(this.bufferTime, Math.max(5000, remainingMs + 5000));
|
|
328
397
|
if (this._unblockSafetyTimer !== null) clearTimeout(this._unblockSafetyTimer);
|
|
329
|
-
this._unblockSafetyTimer = setTimeout(() => this._flushTtsUnblock(true),
|
|
330
|
-
this.logger.log('[VoiceService] done – TTS still pending, waiting for all sources to end', {
|
|
398
|
+
this._unblockSafetyTimer = setTimeout(() => this._flushTtsUnblock(true), safetyMs);
|
|
399
|
+
this.logger.log('[VoiceService] done – TTS still pending, waiting for all sources to end', {
|
|
400
|
+
activeTtsSources: this._activeTtsSources,
|
|
401
|
+
expectedEndInMs: remainingMs,
|
|
402
|
+
safetyTimerMs: safetyMs
|
|
403
|
+
});
|
|
331
404
|
} else {
|
|
332
|
-
// No audio sources
|
|
333
|
-
//
|
|
334
|
-
//
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
405
|
+
// No audio sources tracked yet, but binary TTS chunks may still be in-flight
|
|
406
|
+
// (WebSocket binary frames can arrive after the JSON 'done' control message).
|
|
407
|
+
// Set _unblockAfterTts so that _onTtsSourceEnded() triggers _flushTtsUnblock
|
|
408
|
+
// naturally when those chunks finish playing, instead of relying solely on the
|
|
409
|
+
// safety timer (which would delay unblock by 10 s even when audio ends sooner).
|
|
410
|
+
this._unblockAfterTts = true;
|
|
411
|
+
this.logger.log('[VoiceService] done – no active sources yet, arming unblock for in-flight chunks');
|
|
412
|
+
// Safety timer as last resort in case no chunks arrive at all.
|
|
413
|
+
if (this._unblockSafetyTimer !== null) clearTimeout(this._unblockSafetyTimer);
|
|
414
|
+
this._unblockSafetyTimer = setTimeout(() => this._flushTtsUnblock(true), 10000);
|
|
339
415
|
}
|
|
340
416
|
break;
|
|
341
|
-
case 'barge_in':
|
|
342
|
-
// Proxy's VAD detected user speech while the bot was talking — stop TTS immediately.
|
|
343
|
-
// Do NOT send tts_playback_complete; this is an interruption, not a normal completion.
|
|
344
|
-
// The proxy will follow with { event: "listening" } which authoritatively unblocks the UI.
|
|
345
|
-
// Audio was never muted, so there is nothing to unmute.
|
|
346
|
-
this._cancelAllTtsAudio();
|
|
347
|
-
this.ttsNextPlayTime = 0;
|
|
348
|
-
this._unblockAfterTts = false;
|
|
349
|
-
this._isAcquisitionBlocked$.next(false);
|
|
350
|
-
this.logger.log('[VoiceService] barge_in – TTS cancelled, acquisition unblocked');
|
|
351
|
-
break;
|
|
352
417
|
case 'error': {
|
|
353
418
|
const errorMsg = typeof msg.message === 'string' ? msg.message : 'Voice session error';
|
|
354
419
|
this.logger.error('[VoiceService] WSS error', errorMsg);
|
|
@@ -361,8 +426,19 @@ export class VoiceService {
|
|
|
361
426
|
}
|
|
362
427
|
}
|
|
363
428
|
|
|
364
|
-
/**
|
|
429
|
+
/**
|
|
430
|
+
* Chunk TTS: ogni buffer deve essere decodificabile da `decodeAudioData` (es. segmento WebM/Opus completo).
|
|
431
|
+
*
|
|
432
|
+
* Decode-race fix: multiple chunks decode concurrently; a smaller/later chunk can finish
|
|
433
|
+
* decoding before a larger/earlier one, which would cause the AudioBufferSourceNode to be
|
|
434
|
+
* scheduled out of arrival order (e.g. "manuale" before "scrittura"). To prevent this, each
|
|
435
|
+
* chunk is assigned a monotonic sequence number on arrival and stored in _ttsDecodedPending
|
|
436
|
+
* after decoding. _drainTtsDecodedBuffers() only advances the schedule when the next
|
|
437
|
+
* expected sequence slot is present, guaranteeing arrival-order playback regardless of decode speed.
|
|
438
|
+
*/
|
|
365
439
|
private async playWsTtsChunk(buf: ArrayBuffer): Promise<void> {
|
|
440
|
+
// Assign arrival-order sequence number SYNCHRONOUSLY before any await.
|
|
441
|
+
const seq = this._ttsChunkSeq++;
|
|
366
442
|
// Capture the current generation BEFORE the synchronous increment so that
|
|
367
443
|
// if _cancelAllTtsAudio() fires (incrementing _ttsGeneration) while this
|
|
368
444
|
// decode is in-flight, the mismatch is detected and the stale chunk is discarded.
|
|
@@ -370,11 +446,12 @@ export class VoiceService {
|
|
|
370
446
|
// Increment SYNCHRONOUSLY before any await so the 'done' event handler (which arrives
|
|
371
447
|
// on the next WebSocket message — a different event-loop tick) sees a non-zero count.
|
|
372
448
|
this._activeTtsSources++;
|
|
373
|
-
this.logger.log('[VoiceService] TTS chunk received', { bytes: buf.byteLength, activeTtsSources: this._activeTtsSources });
|
|
449
|
+
this.logger.log('[VoiceService] TTS chunk received', { seq, bytes: buf.byteLength, activeTtsSources: this._activeTtsSources });
|
|
374
450
|
try {
|
|
375
451
|
if (!this.ttsPlayContext || this.ttsPlayContext.state === 'closed') {
|
|
376
452
|
this.ttsPlayContext = new AudioContext();
|
|
377
453
|
this.ttsNextPlayTime = this.ttsPlayContext.currentTime;
|
|
454
|
+
this.logger.info('[VoiceService] TTS AudioContext created');
|
|
378
455
|
}
|
|
379
456
|
const ctx = this.ttsPlayContext;
|
|
380
457
|
const audioBuf = await ctx.decodeAudioData(buf.slice(0));
|
|
@@ -383,21 +460,57 @@ export class VoiceService {
|
|
|
383
460
|
// for a turn that was already cancelled, and undo the counter increment.
|
|
384
461
|
if (this._ttsGeneration !== capturedGeneration) {
|
|
385
462
|
this._activeTtsSources = Math.max(0, this._activeTtsSources - 1);
|
|
386
|
-
this.logger.log('[VoiceService] TTS chunk discarded – stale generation', { capturedGeneration, currentGeneration: this._ttsGeneration });
|
|
463
|
+
this.logger.log('[VoiceService] TTS chunk discarded – stale generation', { seq, capturedGeneration, currentGeneration: this._ttsGeneration });
|
|
387
464
|
return;
|
|
388
465
|
}
|
|
466
|
+
// Store the decoded buffer under its arrival sequence number and attempt to
|
|
467
|
+
// flush any contiguous run of decoded buffers in order.
|
|
468
|
+
this._ttsDecodedPending.set(seq, audioBuf);
|
|
469
|
+
this._drainTtsDecodedBuffers();
|
|
470
|
+
} catch (e) {
|
|
471
|
+
// Advance the scheduler past this failed slot so subsequent decoded chunks are
|
|
472
|
+
// not blocked waiting for a slot that will never be filled.
|
|
473
|
+
if (seq === this._ttsScheduledSeq) {
|
|
474
|
+
this._ttsScheduledSeq++;
|
|
475
|
+
this._drainTtsDecodedBuffers();
|
|
476
|
+
}
|
|
477
|
+
this._onTtsSourceEnded();
|
|
478
|
+
this.logger.warn('[VoiceService] TTS chunk decode failed', { seq }, e);
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
/**
|
|
483
|
+
* Schedules decoded TTS buffers in strict arrival order.
|
|
484
|
+
* Called after every successful decode. Drains the _ttsDecodedPending map
|
|
485
|
+
* starting at _ttsScheduledSeq, stopping as soon as the next slot is missing
|
|
486
|
+
* (i.e. that chunk is still decoding or failed).
|
|
487
|
+
*/
|
|
488
|
+
private _drainTtsDecodedBuffers(): void {
|
|
489
|
+
const ctx = this.ttsPlayContext;
|
|
490
|
+
if (!ctx) return;
|
|
491
|
+
while (this._ttsDecodedPending.has(this._ttsScheduledSeq)) {
|
|
492
|
+
const audioBuf = this._ttsDecodedPending.get(this._ttsScheduledSeq)!;
|
|
493
|
+
this._ttsDecodedPending.delete(this._ttsScheduledSeq);
|
|
494
|
+
this._ttsScheduledSeq++;
|
|
495
|
+
|
|
389
496
|
const src = ctx.createBufferSource();
|
|
390
497
|
src.buffer = audioBuf;
|
|
391
498
|
src.connect(ctx.destination);
|
|
392
499
|
const t0 = Math.max(ctx.currentTime, this.ttsNextPlayTime);
|
|
393
500
|
src.start(t0);
|
|
394
501
|
this.ttsNextPlayTime = t0 + audioBuf.duration;
|
|
502
|
+
// Track the expected end time in wall-clock time (ms) for safety timer calculation.
|
|
503
|
+
const audioEndDelayMs = (this.ttsNextPlayTime - ctx.currentTime) * 1000;
|
|
504
|
+
this._ttsExpectedEndTime = Date.now() + audioEndDelayMs;
|
|
505
|
+
const isFirstChunk = this._activeTtsSourceNodes.length === 0;
|
|
395
506
|
this._activeTtsSourceNodes.push(src);
|
|
396
|
-
|
|
507
|
+
if (isFirstChunk) {
|
|
508
|
+
// First real audio about to play — stop the keyboard typing sound immediately.
|
|
509
|
+
this._stopKeyboardSound();
|
|
510
|
+
this.logger.info('[VoiceService] TTS playback started', { durationS: audioBuf.duration.toFixed(3), startsAtS: t0.toFixed(3) });
|
|
511
|
+
}
|
|
512
|
+
this.logger.log('[VoiceService] TTS chunk scheduled', { seq: this._ttsScheduledSeq - 1, durationS: audioBuf.duration.toFixed(3), startsAtS: t0.toFixed(3), activeTtsSources: this._activeTtsSources, expectedEndInMs: audioEndDelayMs.toFixed(0) });
|
|
397
513
|
src.onended = () => this._onTtsSourceEnded(src);
|
|
398
|
-
} catch (e) {
|
|
399
|
-
this._onTtsSourceEnded();
|
|
400
|
-
this.logger.warn('[VoiceService] TTS chunk decode failed', e);
|
|
401
514
|
}
|
|
402
515
|
}
|
|
403
516
|
|
|
@@ -408,6 +521,10 @@ export class VoiceService {
|
|
|
408
521
|
if (idx !== -1) { this._activeTtsSourceNodes.splice(idx, 1); }
|
|
409
522
|
}
|
|
410
523
|
this.logger.log('[VoiceService] TTS source ended', { activeTtsSources: this._activeTtsSources, unblockPending: this._unblockAfterTts });
|
|
524
|
+
if (this._activeTtsSources === 0) {
|
|
525
|
+
this.logger.info('[VoiceService] TTS playback ended – all sources finished');
|
|
526
|
+
console.log('[VoiceService] TTS audio finished playing');
|
|
527
|
+
}
|
|
411
528
|
if (this._unblockAfterTts && this._activeTtsSources === 0) {
|
|
412
529
|
this._flushTtsUnblock(false);
|
|
413
530
|
}
|
|
@@ -435,6 +552,11 @@ export class VoiceService {
|
|
|
435
552
|
this._activeTtsSourceNodes = [];
|
|
436
553
|
this._activeTtsSources = 0;
|
|
437
554
|
this._unblockAfterTts = false;
|
|
555
|
+
this._ttsExpectedEndTime = 0;
|
|
556
|
+
// Reset ordered-scheduling state so the next speaking turn starts fresh.
|
|
557
|
+
this._ttsChunkSeq = 0;
|
|
558
|
+
this._ttsScheduledSeq = 0;
|
|
559
|
+
this._ttsDecodedPending.clear();
|
|
438
560
|
this._stopTtsKaraoke(true);
|
|
439
561
|
this.logger.log('[VoiceService] TTS cancelled – all audio sources stopped');
|
|
440
562
|
}
|
|
@@ -452,12 +574,21 @@ export class VoiceService {
|
|
|
452
574
|
this.logger.log('[VoiceService] TTS unblock: all sources ended, sending playback complete');
|
|
453
575
|
}
|
|
454
576
|
this._stopTtsKaraoke(true);
|
|
455
|
-
// Signal the proxy that TTS playback is complete.
|
|
456
|
-
// to LISTENING and send a 'listening' event back; the mic
|
|
457
|
-
//
|
|
458
|
-
//
|
|
459
|
-
//
|
|
577
|
+
// Signal the proxy that TTS playback is complete. The proxy will transition
|
|
578
|
+
// to LISTENING and send a 'listening' event back; the mic resumes and the UI
|
|
579
|
+
// unblocks only then — so the user sees 'listening' exactly when the stream
|
|
580
|
+
// is open, not before.
|
|
581
|
+
// Start a fallback timer: if the proxy does not respond with 'listening' within
|
|
582
|
+
// 3 seconds (network hiccup, server race, etc.) force-unblock so the user is
|
|
583
|
+
// never left stuck. The timer is cancelled immediately if 'listening' arrives.
|
|
460
584
|
this.voiceStreaming.sendPlaybackComplete();
|
|
585
|
+
if (this._listeningFallbackTimer !== null) clearTimeout(this._listeningFallbackTimer);
|
|
586
|
+
this._listeningFallbackTimer = setTimeout(() => {
|
|
587
|
+
this._listeningFallbackTimer = null;
|
|
588
|
+
this.logger.warn('[VoiceService] listening fallback timer fired – proxy did not respond, force-unblocking');
|
|
589
|
+
this._isAcquisitionBlocked$.next(false);
|
|
590
|
+
this.voiceStreaming.resumeRecording();
|
|
591
|
+
}, 3000);
|
|
461
592
|
}
|
|
462
593
|
|
|
463
594
|
// ── WSS TTS Karaoke helpers ───────────────────────────────────────────────
|
|
@@ -530,8 +661,39 @@ export class VoiceService {
|
|
|
530
661
|
|
|
531
662
|
// ─────────────────────────────────────────────────────────────────────────
|
|
532
663
|
|
|
664
|
+
// ── Keyboard typing-indicator sound helpers ───────────────────────────────
|
|
665
|
+
/**
|
|
666
|
+
* Starts the keyboard sound on loop to mask silence while the bot is
|
|
667
|
+
* generating its response. No-op if already playing.
|
|
668
|
+
* Only called during WSS voice sessions (voice-proxy mode).
|
|
669
|
+
*/
|
|
670
|
+
private _startKeyboardSound(): void {
|
|
671
|
+
if (this._keyboardSoundEl) return; // already playing
|
|
672
|
+
const file = this.globals.keyboardSoundFile ?? 'keyboard.mp3';
|
|
673
|
+
const src = /^https?:\/\//i.test(file)
|
|
674
|
+
? file
|
|
675
|
+
: `${this.globals.baseLocation}/assets/sounds/${file}`;
|
|
676
|
+
const audio = new Audio(src);
|
|
677
|
+
audio.loop = true;
|
|
678
|
+
audio.volume = Math.min(1, Math.max(0, this.globals.keyboardSoundVolume));
|
|
679
|
+
audio.play().catch((e) => this.logger.warn('[VoiceService] keyboard sound play failed', e));
|
|
680
|
+
this._keyboardSoundEl = audio;
|
|
681
|
+
this.logger.log('[VoiceService] keyboard sound started', { src, volume: audio.volume });
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
/** Stops and discards the keyboard typing sound. No-op if not playing. */
|
|
685
|
+
private _stopKeyboardSound(): void {
|
|
686
|
+
if (!this._keyboardSoundEl) return;
|
|
687
|
+
this._keyboardSoundEl.pause();
|
|
688
|
+
this._keyboardSoundEl.currentTime = 0;
|
|
689
|
+
this._keyboardSoundEl = null;
|
|
690
|
+
this.logger.log('[VoiceService] keyboard sound stopped');
|
|
691
|
+
}
|
|
692
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
693
|
+
|
|
533
694
|
async stopSession(options?: { discardInProgressSegment?: boolean}): Promise<{ voiceIngressResultUrl: string | null }> {
|
|
534
695
|
const discard = options?.discardInProgressSegment === true;
|
|
696
|
+
this.logger.info('[VoiceService] stopSession', { discard, isWssVoiceActive: this._isWssVoiceActive$.getValue() });
|
|
535
697
|
|
|
536
698
|
this.wsControlSub?.unsubscribe();
|
|
537
699
|
this.wsControlSub = undefined;
|
|
@@ -548,6 +710,7 @@ export class VoiceService {
|
|
|
548
710
|
this._cancelAllTtsAudio();
|
|
549
711
|
this.ttsPlayContext = undefined;
|
|
550
712
|
this.ttsNextPlayTime = 0;
|
|
713
|
+
this._stopKeyboardSound();
|
|
551
714
|
|
|
552
715
|
let voiceIngressResultUrl: string | null = null;
|
|
553
716
|
if (this.voiceIngressConfig) {
|
|
@@ -590,6 +753,10 @@ export class VoiceService {
|
|
|
590
753
|
}
|
|
591
754
|
|
|
592
755
|
// 🎧 cleanup audio context
|
|
756
|
+
if (this.volumeRafId) {
|
|
757
|
+
cancelAnimationFrame(this.volumeRafId);
|
|
758
|
+
this.volumeRafId = undefined;
|
|
759
|
+
}
|
|
593
760
|
this.audioContext?.close();
|
|
594
761
|
this.audioContext = undefined;
|
|
595
762
|
this.analyser = undefined;
|
|
@@ -608,6 +775,10 @@ export class VoiceService {
|
|
|
608
775
|
clearTimeout(this.responseTimeoutId);
|
|
609
776
|
this.responseTimeoutId = undefined;
|
|
610
777
|
this.isWaitingForResponse = false;
|
|
778
|
+
if (this._listeningFallbackTimer !== null) {
|
|
779
|
+
clearTimeout(this._listeningFallbackTimer);
|
|
780
|
+
this._listeningFallbackTimer = null;
|
|
781
|
+
}
|
|
611
782
|
this._isAcquisitionBlocked$.next(false);
|
|
612
783
|
|
|
613
784
|
return { voiceIngressResultUrl };
|
|
@@ -688,8 +859,7 @@ export class VoiceService {
|
|
|
688
859
|
private startVolumeLoop(): void {
|
|
689
860
|
const tick = () => {
|
|
690
861
|
if (!this.analyser || !this.dataArray) {
|
|
691
|
-
|
|
692
|
-
return;
|
|
862
|
+
return; // Stop the loop if analyser is cleaned up
|
|
693
863
|
}
|
|
694
864
|
|
|
695
865
|
this.analyser.getByteFrequencyData(
|
|
@@ -705,10 +875,10 @@ export class VoiceService {
|
|
|
705
875
|
|
|
706
876
|
this.volumeSubject.next(volume);
|
|
707
877
|
|
|
708
|
-
requestAnimationFrame(tick);
|
|
878
|
+
this.volumeRafId = requestAnimationFrame(tick);
|
|
709
879
|
};
|
|
710
880
|
|
|
711
|
-
tick
|
|
881
|
+
this.volumeRafId = requestAnimationFrame(tick);
|
|
712
882
|
}
|
|
713
883
|
|
|
714
884
|
/**
|
package/src/app/utils/globals.ts
CHANGED
|
@@ -36,6 +36,14 @@ export class Globals {
|
|
|
36
36
|
isMobile: boolean;
|
|
37
37
|
isLogged: boolean;
|
|
38
38
|
soundEnabled: boolean;
|
|
39
|
+
/** Volume for the keyboard typing-indicator sound during voice sessions. Range 0.0–1.0. */
|
|
40
|
+
keyboardSoundVolume: number;
|
|
41
|
+
/**
|
|
42
|
+
* Filename (local) or absolute URL (remote) for the keyboard typing-indicator sound.
|
|
43
|
+
* Local: bare filename resolved against `baseLocation + '/assets/sounds/'` (e.g. `'keyboard.mp3'`).
|
|
44
|
+
* Remote: full URL starting with `http://` or `https://`.
|
|
45
|
+
*/
|
|
46
|
+
keyboardSoundFile: string;
|
|
39
47
|
BUILD_VERSION: String;
|
|
40
48
|
baseLocation: string;
|
|
41
49
|
availableAgents: Array<UserAgent> = [];
|
|
@@ -472,6 +480,8 @@ export class Globals {
|
|
|
472
480
|
this.BUILD_VERSION = 'v.' + environment.version;
|
|
473
481
|
|
|
474
482
|
this.soundEnabled = true;
|
|
483
|
+
this.keyboardSoundVolume = 0.3;
|
|
484
|
+
this.keyboardSoundFile = 'keyboard.mp3';
|
|
475
485
|
|
|
476
486
|
this.conversationsBadge = 0;
|
|
477
487
|
|