@volley/recognition-client-sdk 0.1.385 → 0.1.418

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,704 @@
1
+ /**
2
+ * Integration tests for SimplifiedVGFRecognitionClient state transitions.
3
+ *
4
+ * These tests verify complete recognition flows involving multiple method calls
5
+ * and state transitions, including:
6
+ * - Normal recognition flow (sendAudio → transcript → stopRecording → final)
7
+ * - Early termination with synthetic finalization
8
+ * - Abnormal stop (cancel/abandon)
9
+ * - Terminal status protection (duplicate suppression)
10
+ */
11
+
12
+ import {
13
+ SimplifiedVGFRecognitionClient,
14
+ SimplifiedVGFClientConfig
15
+ } from './simplified-vgf-recognition-client.js';
16
+ import { RealTimeTwoWayWebSocketRecognitionClient } from './recognition-client.js';
17
+ import {
18
+ RecognitionState,
19
+ TranscriptionStatus,
20
+ RecordingStatus
21
+ } from './vgf-recognition-state.js';
22
+ import { AudioEncoding } from '@recog/shared-types';
23
+
24
+ // Mock the underlying client
25
+ jest.mock('./recognition-client.js', () => {
26
+ const mockClient = {
27
+ connect: jest.fn().mockResolvedValue(undefined),
28
+ sendAudio: jest.fn(),
29
+ stopRecording: jest.fn().mockResolvedValue(undefined),
30
+ stopAbnormally: jest.fn(),
31
+ getAudioUtteranceId: jest.fn().mockReturnValue('mock-uuid'),
32
+ getUrl: jest.fn().mockReturnValue('wss://mock-url'),
33
+ getState: jest.fn().mockReturnValue('IDLE'),
34
+ isConnected: jest.fn().mockReturnValue(true),
35
+ isConnecting: jest.fn().mockReturnValue(false),
36
+ isStopping: jest.fn().mockReturnValue(false),
37
+ isTranscriptionFinished: jest.fn().mockReturnValue(false),
38
+ isBufferOverflowing: jest.fn().mockReturnValue(false)
39
+ };
40
+
41
+ return {
42
+ RealTimeTwoWayWebSocketRecognitionClient: jest.fn().mockImplementation(() => mockClient)
43
+ };
44
+ });
45
+
46
+ describe('SimplifiedVGFRecognitionClient Integration - State Transitions', () => {
47
+ beforeEach(() => {
48
+ jest.clearAllMocks();
49
+ });
50
+
51
+ describe('Normal Recognition Flow', () => {
52
+ it('should follow correct state transitions for complete normal flow', async () => {
53
+ let stateChangeCounter = 0;
54
+ const stateHistory: RecognitionState[] = [];
55
+
56
+ const trackingCallback = jest.fn((state: RecognitionState) => {
57
+ stateChangeCounter++;
58
+ stateHistory.push({ ...state });
59
+ });
60
+
61
+ const client = new SimplifiedVGFRecognitionClient({
62
+ asrRequestConfig: {
63
+ provider: 'deepgram',
64
+ language: 'en',
65
+ sampleRate: 16000,
66
+ encoding: AudioEncoding.LINEAR16
67
+ },
68
+ onStateChange: trackingCallback
69
+ });
70
+
71
+ // Capture the onTranscript callback
72
+ const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
73
+ const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
74
+ const onTranscriptCallback = latestConfig?.onTranscript;
75
+ if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
76
+
77
+ const clientUuid = client.getVGFState().audioUtteranceId;
78
+
79
+ // Step 1: Initial state - verify starting point
80
+ expect(stateChangeCounter).toBe(0);
81
+ let currentState = client.getVGFState();
82
+ expect(currentState.audioUtteranceId).toBe(clientUuid);
83
+ expect(currentState.startRecordingStatus).toBe(RecordingStatus.READY);
84
+ expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED);
85
+ expect(currentState.pendingTranscript).toBe('');
86
+ expect(currentState.finalTranscript).toBeUndefined();
87
+
88
+ // Step 2: Send first audio chunk - triggers RECORDING state
89
+ client.sendAudio(Buffer.from([1, 2, 3, 4]));
90
+
91
+ expect(stateChangeCounter).toBe(1);
92
+ currentState = client.getVGFState();
93
+ expect(currentState.audioUtteranceId).toBe(clientUuid);
94
+ expect(currentState.startRecordingStatus).toBe(RecordingStatus.RECORDING);
95
+ expect(currentState.startRecordingTimestamp).toBeDefined();
96
+ expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED);
97
+
98
+ // Step 3: Server sends pending transcript - triggers IN_PROGRESS
99
+ onTranscriptCallback({
100
+ type: 'Transcription',
101
+ audioUtteranceId: clientUuid,
102
+ pendingTranscript: 'hello',
103
+ pendingTranscriptConfidence: 0.7,
104
+ is_finished: false
105
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
106
+ } as any);
107
+
108
+ expect(stateChangeCounter).toBe(2);
109
+ currentState = client.getVGFState();
110
+ expect(currentState.audioUtteranceId).toBe(clientUuid);
111
+ expect(currentState.startRecordingStatus).toBe(RecordingStatus.RECORDING);
112
+ expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS);
113
+ expect(currentState.pendingTranscript).toBe('hello');
114
+ expect(currentState.pendingConfidence).toBe(0.7);
115
+ expect(currentState.finalTranscript).toBeUndefined();
116
+
117
+ // Step 4: Server sends more pending transcript
118
+ onTranscriptCallback({
119
+ type: 'Transcription',
120
+ audioUtteranceId: clientUuid,
121
+ pendingTranscript: 'hello world',
122
+ pendingTranscriptConfidence: 0.8,
123
+ is_finished: false
124
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
125
+ } as any);
126
+
127
+ expect(stateChangeCounter).toBe(3);
128
+ currentState = client.getVGFState();
129
+ expect(currentState.audioUtteranceId).toBe(clientUuid);
130
+ expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS);
131
+ expect(currentState.pendingTranscript).toBe('hello world');
132
+ expect(currentState.pendingConfidence).toBe(0.8);
133
+
134
+ // Step 5: User calls stopRecording - triggers FINISHED recording status
135
+ await client.stopRecording();
136
+
137
+ expect(stateChangeCounter).toBe(4);
138
+ currentState = client.getVGFState();
139
+ expect(currentState.audioUtteranceId).toBe(clientUuid);
140
+ expect(currentState.startRecordingStatus).toBe(RecordingStatus.FINISHED);
141
+ expect(currentState.finalRecordingTimestamp).toBeDefined();
142
+ // Transcription should still be IN_PROGRESS (waiting for server final)
143
+ expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS);
144
+
145
+ // Step 6: Server sends final transcript - triggers FINALIZED
146
+ onTranscriptCallback({
147
+ type: 'Transcription',
148
+ audioUtteranceId: clientUuid,
149
+ finalTranscript: 'hello world',
150
+ finalTranscriptConfidence: 0.95,
151
+ is_finished: true
152
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
153
+ } as any);
154
+
155
+ expect(stateChangeCounter).toBe(5);
156
+ currentState = client.getVGFState();
157
+ expect(currentState.audioUtteranceId).toBe(clientUuid);
158
+ expect(currentState.startRecordingStatus).toBe(RecordingStatus.FINISHED);
159
+ expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
160
+ expect(currentState.finalTranscript).toBe('hello world');
161
+ expect(currentState.finalConfidence).toBe(0.95);
162
+ expect(currentState.pendingTranscript).toBe('');
163
+ expect(currentState.pendingConfidence).toBeUndefined();
164
+ expect(currentState.finalTranscriptionTimestamp).toBeDefined();
165
+
166
+ // Verify total callback count
167
+ expect(trackingCallback).toHaveBeenCalledTimes(5);
168
+ });
169
+
170
+ it('should not emit synthetic finalization if transcript was received before stopRecording', async () => {
171
+ let stateChangeCounter = 0;
172
+
173
+ const trackingCallback = jest.fn((state: RecognitionState) => {
174
+ stateChangeCounter++;
175
+ });
176
+
177
+ const client = new SimplifiedVGFRecognitionClient({
178
+ asrRequestConfig: {
179
+ provider: 'deepgram',
180
+ language: 'en',
181
+ sampleRate: 16000,
182
+ encoding: AudioEncoding.LINEAR16
183
+ },
184
+ onStateChange: trackingCallback
185
+ });
186
+
187
+ // Capture the onTranscript callback
188
+ const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
189
+ const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
190
+ const onTranscriptCallback = latestConfig?.onTranscript;
191
+ if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
192
+
193
+ const clientUuid = client.getVGFState().audioUtteranceId;
194
+
195
+ // Step 1: Send audio
196
+ client.sendAudio(Buffer.from([1, 2, 3, 4]));
197
+ expect(stateChangeCounter).toBe(1);
198
+
199
+ // Step 2: Server sends a pending transcript (transcriptionStatus -> IN_PROGRESS)
200
+ onTranscriptCallback({
201
+ type: 'Transcription',
202
+ audioUtteranceId: clientUuid,
203
+ pendingTranscript: 'hello',
204
+ pendingTranscriptConfidence: 0.7,
205
+ is_finished: false
206
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
207
+ } as any);
208
+ expect(stateChangeCounter).toBe(2);
209
+
210
+ // Step 3: User calls stopRecording - should NOT emit synthetic finalization
211
+ // because transcriptionStatus is IN_PROGRESS (not NOT_STARTED)
212
+ await client.stopRecording();
213
+
214
+ expect(stateChangeCounter).toBe(3); // Only stopRecording state change, no synthetic finalization
215
+ let currentState = client.getVGFState();
216
+ expect(currentState.startRecordingStatus).toBe(RecordingStatus.FINISHED);
217
+ expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS); // Still waiting for final
218
+
219
+ // Step 4: Server sends final transcript after stop
220
+ onTranscriptCallback({
221
+ type: 'Transcription',
222
+ audioUtteranceId: clientUuid,
223
+ finalTranscript: 'hello world',
224
+ finalTranscriptConfidence: 0.95,
225
+ is_finished: true
226
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
227
+ } as any);
228
+
229
+ expect(stateChangeCounter).toBe(4);
230
+ currentState = client.getVGFState();
231
+ expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
232
+ expect(currentState.finalTranscript).toBe('hello world');
233
+ expect(currentState.finalConfidence).toBe(0.95);
234
+
235
+ expect(trackingCallback).toHaveBeenCalledTimes(4);
236
+ });
237
+ });
238
+
239
+ describe('Early Termination with Synthetic Finalization', () => {
240
+ it('should handle stopRecording without ever sending audio', async () => {
241
+ let stateChangeCounter = 0;
242
+
243
+ const trackingCallback = jest.fn((state: RecognitionState) => {
244
+ stateChangeCounter++;
245
+ });
246
+
247
+ const client = new SimplifiedVGFRecognitionClient({
248
+ asrRequestConfig: {
249
+ provider: 'deepgram',
250
+ language: 'en',
251
+ sampleRate: 16000,
252
+ encoding: AudioEncoding.LINEAR16
253
+ },
254
+ onStateChange: trackingCallback
255
+ });
256
+
257
+ const clientUuid = client.getVGFState().audioUtteranceId;
258
+
259
+ // Step 1: Initial state
260
+ expect(stateChangeCounter).toBe(0);
261
+ let currentState = client.getVGFState();
262
+ expect(currentState.audioUtteranceId).toBe(clientUuid);
263
+ expect(currentState.startRecordingStatus).toBe(RecordingStatus.READY);
264
+ expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED);
265
+
266
+ // Step 2: User calls stopRecording WITHOUT ever sending audio
267
+ // SDK should emit synthetic finalization immediately
268
+ await client.stopRecording();
269
+
270
+ // Should have 2 callbacks: stopRecording (FINISHED) + synthetic finalization (FINALIZED)
271
+ expect(stateChangeCounter).toBe(2);
272
+ currentState = client.getVGFState();
273
+ expect(currentState.audioUtteranceId).toBe(clientUuid);
274
+ expect(currentState.startRecordingStatus).toBe(RecordingStatus.FINISHED);
275
+ expect(currentState.finalRecordingTimestamp).toBeDefined();
276
+ expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
277
+ expect(currentState.finalTranscript).toBe('');
278
+ expect(currentState.finalTranscriptionTimestamp).toBeDefined();
279
+
280
+ // Verify total callback count
281
+ expect(trackingCallback).toHaveBeenCalledTimes(2);
282
+ });
283
+
284
+ it('should emit synthetic finalization and suppress late server transcripts', async () => {
285
+ let stateChangeCounter = 0;
286
+ const stateHistory: RecognitionState[] = [];
287
+ const loggerCalls: Array<{ level: string; message: string }> = [];
288
+
289
+ const trackingCallback = jest.fn((state: RecognitionState) => {
290
+ stateChangeCounter++;
291
+ stateHistory.push({ ...state });
292
+ });
293
+
294
+ const logger = jest.fn((level: string, message: string) => {
295
+ loggerCalls.push({ level, message });
296
+ });
297
+
298
+ const client = new SimplifiedVGFRecognitionClient({
299
+ asrRequestConfig: {
300
+ provider: 'deepgram',
301
+ language: 'en',
302
+ sampleRate: 16000,
303
+ encoding: AudioEncoding.LINEAR16
304
+ },
305
+ onStateChange: trackingCallback,
306
+ logger
307
+ });
308
+
309
+ // Capture the onTranscript callback
310
+ const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
311
+ const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
312
+ const onTranscriptCallback = latestConfig?.onTranscript;
313
+ if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
314
+
315
+ const clientUuid = client.getVGFState().audioUtteranceId;
316
+
317
+ // Step 1: Send audio
318
+ client.sendAudio(Buffer.from([1, 2, 3, 4]));
319
+ expect(stateChangeCounter).toBe(1);
320
+
321
+ // Step 2: User calls stopRecording BEFORE any transcript received
322
+ // SDK should emit synthetic finalization immediately (no waiting for server)
323
+ await client.stopRecording();
324
+
325
+ // Should have 3 callbacks: sendAudio, stopRecording (FINISHED), synthetic finalization (FINALIZED)
326
+ expect(stateChangeCounter).toBe(3);
327
+ const currentState = client.getVGFState();
328
+ expect(currentState.audioUtteranceId).toBe(clientUuid);
329
+ expect(currentState.startRecordingStatus).toBe(RecordingStatus.FINISHED);
330
+ expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
331
+ expect(currentState.finalTranscript).toBe('');
332
+ expect(currentState.finalTranscriptionTimestamp).toBeDefined();
333
+
334
+ // Verify state history: RECORDING -> FINISHED -> FINALIZED
335
+ expect(stateHistory[0]!.startRecordingStatus).toBe(RecordingStatus.RECORDING);
336
+ expect(stateHistory[1]!.startRecordingStatus).toBe(RecordingStatus.FINISHED);
337
+ expect(stateHistory[1]!.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED);
338
+ expect(stateHistory[2]!.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
339
+
340
+ // Game can now proceed - state is terminal (FINALIZED)
341
+ expect(trackingCallback).toHaveBeenCalledTimes(3);
342
+
343
+ // Step 3: Late server transcript arrives - should be SUPPRESSED
344
+ onTranscriptCallback({
345
+ type: 'Transcription',
346
+ audioUtteranceId: clientUuid,
347
+ finalTranscript: 'late server response',
348
+ finalTranscriptConfidence: 0.99,
349
+ is_finished: true
350
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
351
+ } as any);
352
+
353
+ // Callback should NOT be called again
354
+ expect(stateChangeCounter).toBe(3);
355
+ expect(trackingCallback).toHaveBeenCalledTimes(3);
356
+
357
+ // State should remain unchanged (synthetic empty transcript)
358
+ const finalState = client.getVGFState();
359
+ expect(finalState.finalTranscript).toBe('');
360
+
361
+ // Log should indicate suppression
362
+ const suppressionLog = loggerCalls.find(log =>
363
+ log.level === 'info' && log.message.includes('Duplicate terminal status suppressed')
364
+ );
365
+ expect(suppressionLog).toBeDefined();
366
+ });
367
+ });
368
+
369
+ describe('Abnormal Stop (Cancel/Abandon)', () => {
370
+ it('should emit only ONE ABORTED status and suppress late transcripts', () => {
371
+ let stateChangeCounter = 0;
372
+ const stateHistory: RecognitionState[] = [];
373
+ const loggerCalls: Array<{ level: string; message: string }> = [];
374
+
375
+ const trackingCallback = jest.fn((state: RecognitionState) => {
376
+ stateChangeCounter++;
377
+ stateHistory.push({ ...state });
378
+ });
379
+
380
+ const logger = jest.fn((level: string, message: string) => {
381
+ loggerCalls.push({ level, message });
382
+ });
383
+
384
+ const client = new SimplifiedVGFRecognitionClient({
385
+ asrRequestConfig: {
386
+ provider: 'deepgram',
387
+ language: 'en',
388
+ sampleRate: 16000,
389
+ encoding: AudioEncoding.LINEAR16
390
+ },
391
+ onStateChange: trackingCallback,
392
+ logger
393
+ });
394
+
395
+ // Capture the onTranscript callback
396
+ const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
397
+ const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
398
+ const onTranscriptCallback = latestConfig?.onTranscript;
399
+ if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
400
+
401
+ const clientUuid = client.getVGFState().audioUtteranceId;
402
+
403
+ // Step 1: Send audio
404
+ client.sendAudio(Buffer.from([1, 2, 3, 4]));
405
+ expect(stateChangeCounter).toBe(1);
406
+ expect(stateHistory[0]!.startRecordingStatus).toBe(RecordingStatus.RECORDING);
407
+
408
+ // Step 2: Receive a pending transcript
409
+ onTranscriptCallback({
410
+ type: 'Transcription',
411
+ audioUtteranceId: clientUuid,
412
+ pendingTranscript: 'partial text',
413
+ pendingTranscriptConfidence: 0.7,
414
+ is_finished: false
415
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
416
+ } as any);
417
+
418
+ expect(stateChangeCounter).toBe(2);
419
+ expect(stateHistory[1]!.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS);
420
+ expect(stateHistory[1]!.pendingTranscript).toBe('partial text');
421
+
422
+ // Step 3: User calls stopAbnormally (cancel/abandon session)
423
+ client.stopAbnormally();
424
+
425
+ // Should have 3 callbacks: sendAudio, pending transcript, stopAbnormally (ABORTED)
426
+ expect(stateChangeCounter).toBe(3);
427
+ expect(stateHistory[2]!.transcriptionStatus).toBe(TranscriptionStatus.ABORTED);
428
+ expect(stateHistory[2]!.startRecordingStatus).toBe(RecordingStatus.FINISHED);
429
+
430
+ // Step 4: Late server transcript arrives - should be SUPPRESSED
431
+ onTranscriptCallback({
432
+ type: 'Transcription',
433
+ audioUtteranceId: clientUuid,
434
+ finalTranscript: 'late server response',
435
+ finalTranscriptConfidence: 0.99,
436
+ is_finished: true
437
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
438
+ } as any);
439
+
440
+ // Callback should NOT be called again
441
+ expect(stateChangeCounter).toBe(3);
442
+ expect(trackingCallback).toHaveBeenCalledTimes(3);
443
+
444
+ // State should remain ABORTED (not changed to FINALIZED)
445
+ const finalState = client.getVGFState();
446
+ expect(finalState.transcriptionStatus).toBe(TranscriptionStatus.ABORTED);
447
+
448
+ // Log should indicate suppression
449
+ const suppressionLog = loggerCalls.find(log =>
450
+ log.level === 'info' && log.message.includes('Duplicate terminal status suppressed')
451
+ );
452
+ expect(suppressionLog).toBeDefined();
453
+
454
+ // Step 5: Calling stopAbnormally again should not emit another callback
455
+ client.stopAbnormally();
456
+ expect(stateChangeCounter).toBe(3);
457
+ expect(trackingCallback).toHaveBeenCalledTimes(3);
458
+ });
459
+ });
460
+
461
+ describe('Error Status Protection', () => {
462
+ it('should emit only ONE ERROR status and suppress late errors and transcripts', () => {
463
+ let stateChangeCounter = 0;
464
+ const stateHistory: RecognitionState[] = [];
465
+ const loggerCalls: Array<{ level: string; message: string }> = [];
466
+
467
+ const trackingCallback = jest.fn((state: RecognitionState) => {
468
+ stateChangeCounter++;
469
+ stateHistory.push({ ...state });
470
+ });
471
+
472
+ const logger = jest.fn((level: string, message: string) => {
473
+ loggerCalls.push({ level, message });
474
+ });
475
+
476
+ const client = new SimplifiedVGFRecognitionClient({
477
+ asrRequestConfig: {
478
+ provider: 'deepgram',
479
+ language: 'en',
480
+ sampleRate: 16000,
481
+ encoding: AudioEncoding.LINEAR16
482
+ },
483
+ onStateChange: trackingCallback,
484
+ logger
485
+ });
486
+
487
+ // Capture the callbacks
488
+ const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
489
+ const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
490
+ const onTranscriptCallback = latestConfig?.onTranscript;
491
+ const onErrorCallback = latestConfig?.onError;
492
+ if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
493
+ if (!onErrorCallback) throw new Error('onError callback not found');
494
+
495
+ const clientUuid = client.getVGFState().audioUtteranceId;
496
+
497
+ // Step 1: Send audio
498
+ client.sendAudio(Buffer.from([1, 2, 3, 4]));
499
+ expect(stateChangeCounter).toBe(1);
500
+ expect(stateHistory[0]!.startRecordingStatus).toBe(RecordingStatus.RECORDING);
501
+
502
+ // Step 2: Receive a pending transcript
503
+ onTranscriptCallback({
504
+ type: 'Transcription',
505
+ audioUtteranceId: clientUuid,
506
+ pendingTranscript: 'partial text',
507
+ pendingTranscriptConfidence: 0.7,
508
+ is_finished: false
509
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
510
+ } as any);
511
+
512
+ expect(stateChangeCounter).toBe(2);
513
+ expect(stateHistory[1]!.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS);
514
+
515
+ // Step 3: Error occurs (e.g., provider error, timeout)
516
+ onErrorCallback({
517
+ audioUtteranceId: clientUuid,
518
+ message: 'Provider connection failed'
519
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
520
+ } as any);
521
+
522
+ // Should have 3 callbacks: sendAudio, pending transcript, error (ERROR)
523
+ expect(stateChangeCounter).toBe(3);
524
+ expect(stateHistory[2]!.transcriptionStatus).toBe(TranscriptionStatus.ERROR);
525
+ expect(stateHistory[2]!.startRecordingStatus).toBe(RecordingStatus.FINISHED);
526
+
527
+ // Step 4: Another error arrives - should be SUPPRESSED
528
+ onErrorCallback({
529
+ audioUtteranceId: clientUuid,
530
+ message: 'Second error message'
531
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
532
+ } as any);
533
+
534
+ // Callback should NOT be called again
535
+ expect(stateChangeCounter).toBe(3);
536
+ expect(trackingCallback).toHaveBeenCalledTimes(3);
537
+
538
+ // Step 5: Late transcript arrives - should also be SUPPRESSED
539
+ onTranscriptCallback({
540
+ type: 'Transcription',
541
+ audioUtteranceId: clientUuid,
542
+ finalTranscript: 'late server response',
543
+ finalTranscriptConfidence: 0.99,
544
+ is_finished: true
545
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
546
+ } as any);
547
+
548
+ // Callback should still NOT be called again
549
+ expect(stateChangeCounter).toBe(3);
550
+ expect(trackingCallback).toHaveBeenCalledTimes(3);
551
+
552
+ // State should remain ERROR
553
+ const finalState = client.getVGFState();
554
+ expect(finalState.transcriptionStatus).toBe(TranscriptionStatus.ERROR);
555
+
556
+ // Log should indicate suppression for the late transcript
557
+ const suppressionLog = loggerCalls.find(log =>
558
+ log.level === 'info' && log.message.includes('Duplicate terminal status suppressed')
559
+ );
560
+ expect(suppressionLog).toBeDefined();
561
+ });
562
+ });
563
+
564
+ describe('Terminal Status Protection (Duplicate Suppression)', () => {
565
+ it('should emit only ONE terminal status per session', async () => {
566
+ let stateChangeCounter = 0;
567
+ const stateHistory: RecognitionState[] = [];
568
+ const loggerCalls: Array<{ level: string; message: string }> = [];
569
+
570
+ const trackingCallback = jest.fn((state: RecognitionState) => {
571
+ stateChangeCounter++;
572
+ stateHistory.push({ ...state });
573
+ });
574
+
575
+ const logger = jest.fn((level: string, message: string) => {
576
+ loggerCalls.push({ level, message });
577
+ });
578
+
579
+ const client = new SimplifiedVGFRecognitionClient({
580
+ asrRequestConfig: {
581
+ provider: 'deepgram',
582
+ language: 'en',
583
+ sampleRate: 16000,
584
+ encoding: AudioEncoding.LINEAR16
585
+ },
586
+ onStateChange: trackingCallback,
587
+ logger
588
+ });
589
+
590
+ // Capture the onTranscript callback
591
+ const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
592
+ const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
593
+ const onTranscriptCallback = latestConfig?.onTranscript;
594
+ if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
595
+
596
+ const clientUuid = client.getVGFState().audioUtteranceId;
597
+
598
+ // Step 1: Send audio
599
+ client.sendAudio(Buffer.from([1, 2, 3, 4]));
600
+ expect(stateChangeCounter).toBe(1);
601
+
602
+ // Step 2: Receive a pending transcript (transcriptionStatus -> IN_PROGRESS, prevents synthetic finalization)
603
+ onTranscriptCallback({
604
+ type: 'Transcription',
605
+ audioUtteranceId: clientUuid,
606
+ pendingTranscript: 'hello',
607
+ pendingTranscriptConfidence: 0.7,
608
+ is_finished: false
609
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
610
+ } as any);
611
+ expect(stateChangeCounter).toBe(2);
612
+
613
+ // Step 3: Receive first final transcript - should emit
614
+ onTranscriptCallback({
615
+ type: 'Transcription',
616
+ audioUtteranceId: clientUuid,
617
+ finalTranscript: 'hello world',
618
+ finalTranscriptConfidence: 0.95,
619
+ is_finished: true
620
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
621
+ } as any);
622
+
623
+ expect(stateChangeCounter).toBe(3); // First terminal emitted
624
+ expect(stateHistory[2]!.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
625
+
626
+ // Step 4: Duplicate final transcript - should be suppressed
627
+ onTranscriptCallback({
628
+ type: 'Transcription',
629
+ audioUtteranceId: clientUuid,
630
+ finalTranscript: 'different transcript',
631
+ finalTranscriptConfidence: 0.99,
632
+ is_finished: true
633
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
634
+ } as any);
635
+
636
+ // Callback should NOT be called again
637
+ expect(stateChangeCounter).toBe(3);
638
+ expect(trackingCallback).toHaveBeenCalledTimes(3);
639
+
640
+ // Log should indicate suppression
641
+ const suppressionLog = loggerCalls.find(log =>
642
+ log.level === 'info' && log.message.includes('Duplicate terminal status suppressed')
643
+ );
644
+ expect(suppressionLog).toBeDefined();
645
+
646
+ // Final state should still have original transcript
647
+ const finalState = client.getVGFState();
648
+ expect(finalState.finalTranscript).toBe('hello world');
649
+ });
650
+
651
+ it('should reset terminal status flag when new UUID is generated', () => {
652
+ const stateChangeCallback1 = jest.fn();
653
+
654
+ // Create client with terminal state - forces new UUID generation
655
+ const terminalState: RecognitionState = {
656
+ audioUtteranceId: 'old-session-uuid',
657
+ transcriptionStatus: TranscriptionStatus.FINALIZED,
658
+ startRecordingStatus: RecordingStatus.FINISHED,
659
+ pendingTranscript: '',
660
+ finalTranscript: 'previous transcript'
661
+ };
662
+
663
+ const client = new SimplifiedVGFRecognitionClient({
664
+ initialState: terminalState,
665
+ asrRequestConfig: {
666
+ provider: 'deepgram',
667
+ language: 'en',
668
+ sampleRate: 16000,
669
+ encoding: AudioEncoding.LINEAR16
670
+ },
671
+ onStateChange: stateChangeCallback1
672
+ });
673
+
674
+ // UUID was regenerated
675
+ const newUuid = client.getVGFState().audioUtteranceId;
676
+ expect(newUuid).not.toBe('old-session-uuid');
677
+
678
+ // Initial state change callback was called (for UUID regeneration)
679
+ expect(stateChangeCallback1).toHaveBeenCalledTimes(1);
680
+
681
+ // Get the transcript callback
682
+ const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
683
+ const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
684
+ const onTranscriptCallback = latestConfig?.onTranscript;
685
+ if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
686
+
687
+ // Simulate new session transcript with new UUID
688
+ onTranscriptCallback({
689
+ type: 'Transcription',
690
+ audioUtteranceId: newUuid,
691
+ finalTranscript: 'new transcript',
692
+ finalTranscriptConfidence: 0.9,
693
+ is_finished: true
694
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
695
+ } as any);
696
+
697
+ // Should emit terminal status for new session (counter was reset)
698
+ expect(stateChangeCallback1).toHaveBeenCalledTimes(2);
699
+ const finalState = stateChangeCallback1.mock.calls[1][0];
700
+ expect(finalState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
701
+ expect(finalState.finalTranscript).toBe('new transcript');
702
+ });
703
+ });
704
+ });