@volley/recognition-client-sdk 0.1.385 → 0.1.418
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.bundled.d.ts +23 -1
- package/dist/config-builder.d.ts +5 -0
- package/dist/config-builder.d.ts.map +1 -1
- package/dist/index.bundled.d.ts +134 -80
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +136 -40
- package/dist/index.js.map +4 -4
- package/dist/recog-client-sdk.browser.js +62 -23
- package/dist/recog-client-sdk.browser.js.map +4 -4
- package/dist/recognition-client.d.ts.map +1 -1
- package/dist/recognition-client.types.d.ts +6 -0
- package/dist/recognition-client.types.d.ts.map +1 -1
- package/dist/simplified-vgf-recognition-client.d.ts +2 -0
- package/dist/simplified-vgf-recognition-client.d.ts.map +1 -1
- package/dist/utils/url-builder.d.ts +2 -0
- package/dist/utils/url-builder.d.ts.map +1 -1
- package/dist/vgf-recognition-mapper.d.ts +17 -0
- package/dist/vgf-recognition-mapper.d.ts.map +1 -1
- package/package.json +3 -3
- package/src/config-builder.ts +9 -0
- package/src/index.ts +2 -0
- package/src/recognition-client.ts +3 -2
- package/src/recognition-client.types.ts +7 -0
- package/src/simplified-vgf-recognition-client.integration.spec.ts +704 -0
- package/src/simplified-vgf-recognition-client.spec.ts +199 -13
- package/src/simplified-vgf-recognition-client.ts +75 -24
- package/src/utils/audio-ring-buffer.ts +2 -2
- package/src/utils/message-handler.ts +4 -4
- package/src/utils/url-builder.ts +10 -3
- package/src/vgf-recognition-mapper.spec.ts +78 -0
- package/src/vgf-recognition-mapper.ts +29 -0
|
@@ -0,0 +1,704 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration tests for SimplifiedVGFRecognitionClient state transitions.
|
|
3
|
+
*
|
|
4
|
+
* These tests verify complete recognition flows involving multiple method calls
|
|
5
|
+
* and state transitions, including:
|
|
6
|
+
* - Normal recognition flow (sendAudio → transcript → stopRecording → final)
|
|
7
|
+
* - Early termination with synthetic finalization
|
|
8
|
+
* - Abnormal stop (cancel/abandon)
|
|
9
|
+
* - Terminal status protection (duplicate suppression)
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import {
|
|
13
|
+
SimplifiedVGFRecognitionClient,
|
|
14
|
+
SimplifiedVGFClientConfig
|
|
15
|
+
} from './simplified-vgf-recognition-client.js';
|
|
16
|
+
import { RealTimeTwoWayWebSocketRecognitionClient } from './recognition-client.js';
|
|
17
|
+
import {
|
|
18
|
+
RecognitionState,
|
|
19
|
+
TranscriptionStatus,
|
|
20
|
+
RecordingStatus
|
|
21
|
+
} from './vgf-recognition-state.js';
|
|
22
|
+
import { AudioEncoding } from '@recog/shared-types';
|
|
23
|
+
|
|
24
|
+
// Mock the underlying client
|
|
25
|
+
jest.mock('./recognition-client.js', () => {
|
|
26
|
+
const mockClient = {
|
|
27
|
+
connect: jest.fn().mockResolvedValue(undefined),
|
|
28
|
+
sendAudio: jest.fn(),
|
|
29
|
+
stopRecording: jest.fn().mockResolvedValue(undefined),
|
|
30
|
+
stopAbnormally: jest.fn(),
|
|
31
|
+
getAudioUtteranceId: jest.fn().mockReturnValue('mock-uuid'),
|
|
32
|
+
getUrl: jest.fn().mockReturnValue('wss://mock-url'),
|
|
33
|
+
getState: jest.fn().mockReturnValue('IDLE'),
|
|
34
|
+
isConnected: jest.fn().mockReturnValue(true),
|
|
35
|
+
isConnecting: jest.fn().mockReturnValue(false),
|
|
36
|
+
isStopping: jest.fn().mockReturnValue(false),
|
|
37
|
+
isTranscriptionFinished: jest.fn().mockReturnValue(false),
|
|
38
|
+
isBufferOverflowing: jest.fn().mockReturnValue(false)
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
return {
|
|
42
|
+
RealTimeTwoWayWebSocketRecognitionClient: jest.fn().mockImplementation(() => mockClient)
|
|
43
|
+
};
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
describe('SimplifiedVGFRecognitionClient Integration - State Transitions', () => {
|
|
47
|
+
beforeEach(() => {
|
|
48
|
+
jest.clearAllMocks();
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
describe('Normal Recognition Flow', () => {
|
|
52
|
+
it('should follow correct state transitions for complete normal flow', async () => {
|
|
53
|
+
let stateChangeCounter = 0;
|
|
54
|
+
const stateHistory: RecognitionState[] = [];
|
|
55
|
+
|
|
56
|
+
const trackingCallback = jest.fn((state: RecognitionState) => {
|
|
57
|
+
stateChangeCounter++;
|
|
58
|
+
stateHistory.push({ ...state });
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
const client = new SimplifiedVGFRecognitionClient({
|
|
62
|
+
asrRequestConfig: {
|
|
63
|
+
provider: 'deepgram',
|
|
64
|
+
language: 'en',
|
|
65
|
+
sampleRate: 16000,
|
|
66
|
+
encoding: AudioEncoding.LINEAR16
|
|
67
|
+
},
|
|
68
|
+
onStateChange: trackingCallback
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
// Capture the onTranscript callback
|
|
72
|
+
const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
|
|
73
|
+
const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
|
|
74
|
+
const onTranscriptCallback = latestConfig?.onTranscript;
|
|
75
|
+
if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
|
|
76
|
+
|
|
77
|
+
const clientUuid = client.getVGFState().audioUtteranceId;
|
|
78
|
+
|
|
79
|
+
// Step 1: Initial state - verify starting point
|
|
80
|
+
expect(stateChangeCounter).toBe(0);
|
|
81
|
+
let currentState = client.getVGFState();
|
|
82
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
83
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.READY);
|
|
84
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED);
|
|
85
|
+
expect(currentState.pendingTranscript).toBe('');
|
|
86
|
+
expect(currentState.finalTranscript).toBeUndefined();
|
|
87
|
+
|
|
88
|
+
// Step 2: Send first audio chunk - triggers RECORDING state
|
|
89
|
+
client.sendAudio(Buffer.from([1, 2, 3, 4]));
|
|
90
|
+
|
|
91
|
+
expect(stateChangeCounter).toBe(1);
|
|
92
|
+
currentState = client.getVGFState();
|
|
93
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
94
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.RECORDING);
|
|
95
|
+
expect(currentState.startRecordingTimestamp).toBeDefined();
|
|
96
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED);
|
|
97
|
+
|
|
98
|
+
// Step 3: Server sends pending transcript - triggers IN_PROGRESS
|
|
99
|
+
onTranscriptCallback({
|
|
100
|
+
type: 'Transcription',
|
|
101
|
+
audioUtteranceId: clientUuid,
|
|
102
|
+
pendingTranscript: 'hello',
|
|
103
|
+
pendingTranscriptConfidence: 0.7,
|
|
104
|
+
is_finished: false
|
|
105
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
106
|
+
} as any);
|
|
107
|
+
|
|
108
|
+
expect(stateChangeCounter).toBe(2);
|
|
109
|
+
currentState = client.getVGFState();
|
|
110
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
111
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.RECORDING);
|
|
112
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS);
|
|
113
|
+
expect(currentState.pendingTranscript).toBe('hello');
|
|
114
|
+
expect(currentState.pendingConfidence).toBe(0.7);
|
|
115
|
+
expect(currentState.finalTranscript).toBeUndefined();
|
|
116
|
+
|
|
117
|
+
// Step 4: Server sends more pending transcript
|
|
118
|
+
onTranscriptCallback({
|
|
119
|
+
type: 'Transcription',
|
|
120
|
+
audioUtteranceId: clientUuid,
|
|
121
|
+
pendingTranscript: 'hello world',
|
|
122
|
+
pendingTranscriptConfidence: 0.8,
|
|
123
|
+
is_finished: false
|
|
124
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
125
|
+
} as any);
|
|
126
|
+
|
|
127
|
+
expect(stateChangeCounter).toBe(3);
|
|
128
|
+
currentState = client.getVGFState();
|
|
129
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
130
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS);
|
|
131
|
+
expect(currentState.pendingTranscript).toBe('hello world');
|
|
132
|
+
expect(currentState.pendingConfidence).toBe(0.8);
|
|
133
|
+
|
|
134
|
+
// Step 5: User calls stopRecording - triggers FINISHED recording status
|
|
135
|
+
await client.stopRecording();
|
|
136
|
+
|
|
137
|
+
expect(stateChangeCounter).toBe(4);
|
|
138
|
+
currentState = client.getVGFState();
|
|
139
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
140
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.FINISHED);
|
|
141
|
+
expect(currentState.finalRecordingTimestamp).toBeDefined();
|
|
142
|
+
// Transcription should still be IN_PROGRESS (waiting for server final)
|
|
143
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS);
|
|
144
|
+
|
|
145
|
+
// Step 6: Server sends final transcript - triggers FINALIZED
|
|
146
|
+
onTranscriptCallback({
|
|
147
|
+
type: 'Transcription',
|
|
148
|
+
audioUtteranceId: clientUuid,
|
|
149
|
+
finalTranscript: 'hello world',
|
|
150
|
+
finalTranscriptConfidence: 0.95,
|
|
151
|
+
is_finished: true
|
|
152
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
153
|
+
} as any);
|
|
154
|
+
|
|
155
|
+
expect(stateChangeCounter).toBe(5);
|
|
156
|
+
currentState = client.getVGFState();
|
|
157
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
158
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.FINISHED);
|
|
159
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
|
|
160
|
+
expect(currentState.finalTranscript).toBe('hello world');
|
|
161
|
+
expect(currentState.finalConfidence).toBe(0.95);
|
|
162
|
+
expect(currentState.pendingTranscript).toBe('');
|
|
163
|
+
expect(currentState.pendingConfidence).toBeUndefined();
|
|
164
|
+
expect(currentState.finalTranscriptionTimestamp).toBeDefined();
|
|
165
|
+
|
|
166
|
+
// Verify total callback count
|
|
167
|
+
expect(trackingCallback).toHaveBeenCalledTimes(5);
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
it('should not emit synthetic finalization if transcript was received before stopRecording', async () => {
|
|
171
|
+
let stateChangeCounter = 0;
|
|
172
|
+
|
|
173
|
+
const trackingCallback = jest.fn((state: RecognitionState) => {
|
|
174
|
+
stateChangeCounter++;
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
const client = new SimplifiedVGFRecognitionClient({
|
|
178
|
+
asrRequestConfig: {
|
|
179
|
+
provider: 'deepgram',
|
|
180
|
+
language: 'en',
|
|
181
|
+
sampleRate: 16000,
|
|
182
|
+
encoding: AudioEncoding.LINEAR16
|
|
183
|
+
},
|
|
184
|
+
onStateChange: trackingCallback
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
// Capture the onTranscript callback
|
|
188
|
+
const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
|
|
189
|
+
const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
|
|
190
|
+
const onTranscriptCallback = latestConfig?.onTranscript;
|
|
191
|
+
if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
|
|
192
|
+
|
|
193
|
+
const clientUuid = client.getVGFState().audioUtteranceId;
|
|
194
|
+
|
|
195
|
+
// Step 1: Send audio
|
|
196
|
+
client.sendAudio(Buffer.from([1, 2, 3, 4]));
|
|
197
|
+
expect(stateChangeCounter).toBe(1);
|
|
198
|
+
|
|
199
|
+
// Step 2: Server sends a pending transcript (transcriptionStatus -> IN_PROGRESS)
|
|
200
|
+
onTranscriptCallback({
|
|
201
|
+
type: 'Transcription',
|
|
202
|
+
audioUtteranceId: clientUuid,
|
|
203
|
+
pendingTranscript: 'hello',
|
|
204
|
+
pendingTranscriptConfidence: 0.7,
|
|
205
|
+
is_finished: false
|
|
206
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
207
|
+
} as any);
|
|
208
|
+
expect(stateChangeCounter).toBe(2);
|
|
209
|
+
|
|
210
|
+
// Step 3: User calls stopRecording - should NOT emit synthetic finalization
|
|
211
|
+
// because transcriptionStatus is IN_PROGRESS (not NOT_STARTED)
|
|
212
|
+
await client.stopRecording();
|
|
213
|
+
|
|
214
|
+
expect(stateChangeCounter).toBe(3); // Only stopRecording state change, no synthetic finalization
|
|
215
|
+
let currentState = client.getVGFState();
|
|
216
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.FINISHED);
|
|
217
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS); // Still waiting for final
|
|
218
|
+
|
|
219
|
+
// Step 4: Server sends final transcript after stop
|
|
220
|
+
onTranscriptCallback({
|
|
221
|
+
type: 'Transcription',
|
|
222
|
+
audioUtteranceId: clientUuid,
|
|
223
|
+
finalTranscript: 'hello world',
|
|
224
|
+
finalTranscriptConfidence: 0.95,
|
|
225
|
+
is_finished: true
|
|
226
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
227
|
+
} as any);
|
|
228
|
+
|
|
229
|
+
expect(stateChangeCounter).toBe(4);
|
|
230
|
+
currentState = client.getVGFState();
|
|
231
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
|
|
232
|
+
expect(currentState.finalTranscript).toBe('hello world');
|
|
233
|
+
expect(currentState.finalConfidence).toBe(0.95);
|
|
234
|
+
|
|
235
|
+
expect(trackingCallback).toHaveBeenCalledTimes(4);
|
|
236
|
+
});
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
describe('Early Termination with Synthetic Finalization', () => {
|
|
240
|
+
it('should handle stopRecording without ever sending audio', async () => {
|
|
241
|
+
let stateChangeCounter = 0;
|
|
242
|
+
|
|
243
|
+
const trackingCallback = jest.fn((state: RecognitionState) => {
|
|
244
|
+
stateChangeCounter++;
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
const client = new SimplifiedVGFRecognitionClient({
|
|
248
|
+
asrRequestConfig: {
|
|
249
|
+
provider: 'deepgram',
|
|
250
|
+
language: 'en',
|
|
251
|
+
sampleRate: 16000,
|
|
252
|
+
encoding: AudioEncoding.LINEAR16
|
|
253
|
+
},
|
|
254
|
+
onStateChange: trackingCallback
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
const clientUuid = client.getVGFState().audioUtteranceId;
|
|
258
|
+
|
|
259
|
+
// Step 1: Initial state
|
|
260
|
+
expect(stateChangeCounter).toBe(0);
|
|
261
|
+
let currentState = client.getVGFState();
|
|
262
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
263
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.READY);
|
|
264
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED);
|
|
265
|
+
|
|
266
|
+
// Step 2: User calls stopRecording WITHOUT ever sending audio
|
|
267
|
+
// SDK should emit synthetic finalization immediately
|
|
268
|
+
await client.stopRecording();
|
|
269
|
+
|
|
270
|
+
// Should have 2 callbacks: stopRecording (FINISHED) + synthetic finalization (FINALIZED)
|
|
271
|
+
expect(stateChangeCounter).toBe(2);
|
|
272
|
+
currentState = client.getVGFState();
|
|
273
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
274
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.FINISHED);
|
|
275
|
+
expect(currentState.finalRecordingTimestamp).toBeDefined();
|
|
276
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
|
|
277
|
+
expect(currentState.finalTranscript).toBe('');
|
|
278
|
+
expect(currentState.finalTranscriptionTimestamp).toBeDefined();
|
|
279
|
+
|
|
280
|
+
// Verify total callback count
|
|
281
|
+
expect(trackingCallback).toHaveBeenCalledTimes(2);
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
it('should emit synthetic finalization and suppress late server transcripts', async () => {
|
|
285
|
+
let stateChangeCounter = 0;
|
|
286
|
+
const stateHistory: RecognitionState[] = [];
|
|
287
|
+
const loggerCalls: Array<{ level: string; message: string }> = [];
|
|
288
|
+
|
|
289
|
+
const trackingCallback = jest.fn((state: RecognitionState) => {
|
|
290
|
+
stateChangeCounter++;
|
|
291
|
+
stateHistory.push({ ...state });
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
const logger = jest.fn((level: string, message: string) => {
|
|
295
|
+
loggerCalls.push({ level, message });
|
|
296
|
+
});
|
|
297
|
+
|
|
298
|
+
const client = new SimplifiedVGFRecognitionClient({
|
|
299
|
+
asrRequestConfig: {
|
|
300
|
+
provider: 'deepgram',
|
|
301
|
+
language: 'en',
|
|
302
|
+
sampleRate: 16000,
|
|
303
|
+
encoding: AudioEncoding.LINEAR16
|
|
304
|
+
},
|
|
305
|
+
onStateChange: trackingCallback,
|
|
306
|
+
logger
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
// Capture the onTranscript callback
|
|
310
|
+
const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
|
|
311
|
+
const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
|
|
312
|
+
const onTranscriptCallback = latestConfig?.onTranscript;
|
|
313
|
+
if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
|
|
314
|
+
|
|
315
|
+
const clientUuid = client.getVGFState().audioUtteranceId;
|
|
316
|
+
|
|
317
|
+
// Step 1: Send audio
|
|
318
|
+
client.sendAudio(Buffer.from([1, 2, 3, 4]));
|
|
319
|
+
expect(stateChangeCounter).toBe(1);
|
|
320
|
+
|
|
321
|
+
// Step 2: User calls stopRecording BEFORE any transcript received
|
|
322
|
+
// SDK should emit synthetic finalization immediately (no waiting for server)
|
|
323
|
+
await client.stopRecording();
|
|
324
|
+
|
|
325
|
+
// Should have 3 callbacks: sendAudio, stopRecording (FINISHED), synthetic finalization (FINALIZED)
|
|
326
|
+
expect(stateChangeCounter).toBe(3);
|
|
327
|
+
const currentState = client.getVGFState();
|
|
328
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
329
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.FINISHED);
|
|
330
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
|
|
331
|
+
expect(currentState.finalTranscript).toBe('');
|
|
332
|
+
expect(currentState.finalTranscriptionTimestamp).toBeDefined();
|
|
333
|
+
|
|
334
|
+
// Verify state history: RECORDING -> FINISHED -> FINALIZED
|
|
335
|
+
expect(stateHistory[0]!.startRecordingStatus).toBe(RecordingStatus.RECORDING);
|
|
336
|
+
expect(stateHistory[1]!.startRecordingStatus).toBe(RecordingStatus.FINISHED);
|
|
337
|
+
expect(stateHistory[1]!.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED);
|
|
338
|
+
expect(stateHistory[2]!.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
|
|
339
|
+
|
|
340
|
+
// Game can now proceed - state is terminal (FINALIZED)
|
|
341
|
+
expect(trackingCallback).toHaveBeenCalledTimes(3);
|
|
342
|
+
|
|
343
|
+
// Step 3: Late server transcript arrives - should be SUPPRESSED
|
|
344
|
+
onTranscriptCallback({
|
|
345
|
+
type: 'Transcription',
|
|
346
|
+
audioUtteranceId: clientUuid,
|
|
347
|
+
finalTranscript: 'late server response',
|
|
348
|
+
finalTranscriptConfidence: 0.99,
|
|
349
|
+
is_finished: true
|
|
350
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
351
|
+
} as any);
|
|
352
|
+
|
|
353
|
+
// Callback should NOT be called again
|
|
354
|
+
expect(stateChangeCounter).toBe(3);
|
|
355
|
+
expect(trackingCallback).toHaveBeenCalledTimes(3);
|
|
356
|
+
|
|
357
|
+
// State should remain unchanged (synthetic empty transcript)
|
|
358
|
+
const finalState = client.getVGFState();
|
|
359
|
+
expect(finalState.finalTranscript).toBe('');
|
|
360
|
+
|
|
361
|
+
// Log should indicate suppression
|
|
362
|
+
const suppressionLog = loggerCalls.find(log =>
|
|
363
|
+
log.level === 'info' && log.message.includes('Duplicate terminal status suppressed')
|
|
364
|
+
);
|
|
365
|
+
expect(suppressionLog).toBeDefined();
|
|
366
|
+
});
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
describe('Abnormal Stop (Cancel/Abandon)', () => {
|
|
370
|
+
it('should emit only ONE ABORTED status and suppress late transcripts', () => {
|
|
371
|
+
let stateChangeCounter = 0;
|
|
372
|
+
const stateHistory: RecognitionState[] = [];
|
|
373
|
+
const loggerCalls: Array<{ level: string; message: string }> = [];
|
|
374
|
+
|
|
375
|
+
const trackingCallback = jest.fn((state: RecognitionState) => {
|
|
376
|
+
stateChangeCounter++;
|
|
377
|
+
stateHistory.push({ ...state });
|
|
378
|
+
});
|
|
379
|
+
|
|
380
|
+
const logger = jest.fn((level: string, message: string) => {
|
|
381
|
+
loggerCalls.push({ level, message });
|
|
382
|
+
});
|
|
383
|
+
|
|
384
|
+
const client = new SimplifiedVGFRecognitionClient({
|
|
385
|
+
asrRequestConfig: {
|
|
386
|
+
provider: 'deepgram',
|
|
387
|
+
language: 'en',
|
|
388
|
+
sampleRate: 16000,
|
|
389
|
+
encoding: AudioEncoding.LINEAR16
|
|
390
|
+
},
|
|
391
|
+
onStateChange: trackingCallback,
|
|
392
|
+
logger
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
// Capture the onTranscript callback
|
|
396
|
+
const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
|
|
397
|
+
const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
|
|
398
|
+
const onTranscriptCallback = latestConfig?.onTranscript;
|
|
399
|
+
if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
|
|
400
|
+
|
|
401
|
+
const clientUuid = client.getVGFState().audioUtteranceId;
|
|
402
|
+
|
|
403
|
+
// Step 1: Send audio
|
|
404
|
+
client.sendAudio(Buffer.from([1, 2, 3, 4]));
|
|
405
|
+
expect(stateChangeCounter).toBe(1);
|
|
406
|
+
expect(stateHistory[0]!.startRecordingStatus).toBe(RecordingStatus.RECORDING);
|
|
407
|
+
|
|
408
|
+
// Step 2: Receive a pending transcript
|
|
409
|
+
onTranscriptCallback({
|
|
410
|
+
type: 'Transcription',
|
|
411
|
+
audioUtteranceId: clientUuid,
|
|
412
|
+
pendingTranscript: 'partial text',
|
|
413
|
+
pendingTranscriptConfidence: 0.7,
|
|
414
|
+
is_finished: false
|
|
415
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
416
|
+
} as any);
|
|
417
|
+
|
|
418
|
+
expect(stateChangeCounter).toBe(2);
|
|
419
|
+
expect(stateHistory[1]!.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS);
|
|
420
|
+
expect(stateHistory[1]!.pendingTranscript).toBe('partial text');
|
|
421
|
+
|
|
422
|
+
// Step 3: User calls stopAbnormally (cancel/abandon session)
|
|
423
|
+
client.stopAbnormally();
|
|
424
|
+
|
|
425
|
+
// Should have 3 callbacks: sendAudio, pending transcript, stopAbnormally (ABORTED)
|
|
426
|
+
expect(stateChangeCounter).toBe(3);
|
|
427
|
+
expect(stateHistory[2]!.transcriptionStatus).toBe(TranscriptionStatus.ABORTED);
|
|
428
|
+
expect(stateHistory[2]!.startRecordingStatus).toBe(RecordingStatus.FINISHED);
|
|
429
|
+
|
|
430
|
+
// Step 4: Late server transcript arrives - should be SUPPRESSED
|
|
431
|
+
onTranscriptCallback({
|
|
432
|
+
type: 'Transcription',
|
|
433
|
+
audioUtteranceId: clientUuid,
|
|
434
|
+
finalTranscript: 'late server response',
|
|
435
|
+
finalTranscriptConfidence: 0.99,
|
|
436
|
+
is_finished: true
|
|
437
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
438
|
+
} as any);
|
|
439
|
+
|
|
440
|
+
// Callback should NOT be called again
|
|
441
|
+
expect(stateChangeCounter).toBe(3);
|
|
442
|
+
expect(trackingCallback).toHaveBeenCalledTimes(3);
|
|
443
|
+
|
|
444
|
+
// State should remain ABORTED (not changed to FINALIZED)
|
|
445
|
+
const finalState = client.getVGFState();
|
|
446
|
+
expect(finalState.transcriptionStatus).toBe(TranscriptionStatus.ABORTED);
|
|
447
|
+
|
|
448
|
+
// Log should indicate suppression
|
|
449
|
+
const suppressionLog = loggerCalls.find(log =>
|
|
450
|
+
log.level === 'info' && log.message.includes('Duplicate terminal status suppressed')
|
|
451
|
+
);
|
|
452
|
+
expect(suppressionLog).toBeDefined();
|
|
453
|
+
|
|
454
|
+
// Step 5: Calling stopAbnormally again should not emit another callback
|
|
455
|
+
client.stopAbnormally();
|
|
456
|
+
expect(stateChangeCounter).toBe(3);
|
|
457
|
+
expect(trackingCallback).toHaveBeenCalledTimes(3);
|
|
458
|
+
});
|
|
459
|
+
});
|
|
460
|
+
|
|
461
|
+
describe('Error Status Protection', () => {
|
|
462
|
+
it('should emit only ONE ERROR status and suppress late errors and transcripts', () => {
|
|
463
|
+
let stateChangeCounter = 0;
|
|
464
|
+
const stateHistory: RecognitionState[] = [];
|
|
465
|
+
const loggerCalls: Array<{ level: string; message: string }> = [];
|
|
466
|
+
|
|
467
|
+
const trackingCallback = jest.fn((state: RecognitionState) => {
|
|
468
|
+
stateChangeCounter++;
|
|
469
|
+
stateHistory.push({ ...state });
|
|
470
|
+
});
|
|
471
|
+
|
|
472
|
+
const logger = jest.fn((level: string, message: string) => {
|
|
473
|
+
loggerCalls.push({ level, message });
|
|
474
|
+
});
|
|
475
|
+
|
|
476
|
+
const client = new SimplifiedVGFRecognitionClient({
|
|
477
|
+
asrRequestConfig: {
|
|
478
|
+
provider: 'deepgram',
|
|
479
|
+
language: 'en',
|
|
480
|
+
sampleRate: 16000,
|
|
481
|
+
encoding: AudioEncoding.LINEAR16
|
|
482
|
+
},
|
|
483
|
+
onStateChange: trackingCallback,
|
|
484
|
+
logger
|
|
485
|
+
});
|
|
486
|
+
|
|
487
|
+
// Capture the callbacks
|
|
488
|
+
const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
|
|
489
|
+
const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
|
|
490
|
+
const onTranscriptCallback = latestConfig?.onTranscript;
|
|
491
|
+
const onErrorCallback = latestConfig?.onError;
|
|
492
|
+
if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
|
|
493
|
+
if (!onErrorCallback) throw new Error('onError callback not found');
|
|
494
|
+
|
|
495
|
+
const clientUuid = client.getVGFState().audioUtteranceId;
|
|
496
|
+
|
|
497
|
+
// Step 1: Send audio
|
|
498
|
+
client.sendAudio(Buffer.from([1, 2, 3, 4]));
|
|
499
|
+
expect(stateChangeCounter).toBe(1);
|
|
500
|
+
expect(stateHistory[0]!.startRecordingStatus).toBe(RecordingStatus.RECORDING);
|
|
501
|
+
|
|
502
|
+
// Step 2: Receive a pending transcript
|
|
503
|
+
onTranscriptCallback({
|
|
504
|
+
type: 'Transcription',
|
|
505
|
+
audioUtteranceId: clientUuid,
|
|
506
|
+
pendingTranscript: 'partial text',
|
|
507
|
+
pendingTranscriptConfidence: 0.7,
|
|
508
|
+
is_finished: false
|
|
509
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
510
|
+
} as any);
|
|
511
|
+
|
|
512
|
+
expect(stateChangeCounter).toBe(2);
|
|
513
|
+
expect(stateHistory[1]!.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS);
|
|
514
|
+
|
|
515
|
+
// Step 3: Error occurs (e.g., provider error, timeout)
|
|
516
|
+
onErrorCallback({
|
|
517
|
+
audioUtteranceId: clientUuid,
|
|
518
|
+
message: 'Provider connection failed'
|
|
519
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
520
|
+
} as any);
|
|
521
|
+
|
|
522
|
+
// Should have 3 callbacks: sendAudio, pending transcript, error (ERROR)
|
|
523
|
+
expect(stateChangeCounter).toBe(3);
|
|
524
|
+
expect(stateHistory[2]!.transcriptionStatus).toBe(TranscriptionStatus.ERROR);
|
|
525
|
+
expect(stateHistory[2]!.startRecordingStatus).toBe(RecordingStatus.FINISHED);
|
|
526
|
+
|
|
527
|
+
// Step 4: Another error arrives - should be SUPPRESSED
|
|
528
|
+
onErrorCallback({
|
|
529
|
+
audioUtteranceId: clientUuid,
|
|
530
|
+
message: 'Second error message'
|
|
531
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
532
|
+
} as any);
|
|
533
|
+
|
|
534
|
+
// Callback should NOT be called again
|
|
535
|
+
expect(stateChangeCounter).toBe(3);
|
|
536
|
+
expect(trackingCallback).toHaveBeenCalledTimes(3);
|
|
537
|
+
|
|
538
|
+
// Step 5: Late transcript arrives - should also be SUPPRESSED
|
|
539
|
+
onTranscriptCallback({
|
|
540
|
+
type: 'Transcription',
|
|
541
|
+
audioUtteranceId: clientUuid,
|
|
542
|
+
finalTranscript: 'late server response',
|
|
543
|
+
finalTranscriptConfidence: 0.99,
|
|
544
|
+
is_finished: true
|
|
545
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
546
|
+
} as any);
|
|
547
|
+
|
|
548
|
+
// Callback should still NOT be called again
|
|
549
|
+
expect(stateChangeCounter).toBe(3);
|
|
550
|
+
expect(trackingCallback).toHaveBeenCalledTimes(3);
|
|
551
|
+
|
|
552
|
+
// State should remain ERROR
|
|
553
|
+
const finalState = client.getVGFState();
|
|
554
|
+
expect(finalState.transcriptionStatus).toBe(TranscriptionStatus.ERROR);
|
|
555
|
+
|
|
556
|
+
// Log should indicate suppression for the late transcript
|
|
557
|
+
const suppressionLog = loggerCalls.find(log =>
|
|
558
|
+
log.level === 'info' && log.message.includes('Duplicate terminal status suppressed')
|
|
559
|
+
);
|
|
560
|
+
expect(suppressionLog).toBeDefined();
|
|
561
|
+
});
|
|
562
|
+
});
|
|
563
|
+
|
|
564
|
+
describe('Terminal Status Protection (Duplicate Suppression)', () => {
|
|
565
|
+
it('should emit only ONE terminal status per session', async () => {
|
|
566
|
+
let stateChangeCounter = 0;
|
|
567
|
+
const stateHistory: RecognitionState[] = [];
|
|
568
|
+
const loggerCalls: Array<{ level: string; message: string }> = [];
|
|
569
|
+
|
|
570
|
+
const trackingCallback = jest.fn((state: RecognitionState) => {
|
|
571
|
+
stateChangeCounter++;
|
|
572
|
+
stateHistory.push({ ...state });
|
|
573
|
+
});
|
|
574
|
+
|
|
575
|
+
const logger = jest.fn((level: string, message: string) => {
|
|
576
|
+
loggerCalls.push({ level, message });
|
|
577
|
+
});
|
|
578
|
+
|
|
579
|
+
const client = new SimplifiedVGFRecognitionClient({
|
|
580
|
+
asrRequestConfig: {
|
|
581
|
+
provider: 'deepgram',
|
|
582
|
+
language: 'en',
|
|
583
|
+
sampleRate: 16000,
|
|
584
|
+
encoding: AudioEncoding.LINEAR16
|
|
585
|
+
},
|
|
586
|
+
onStateChange: trackingCallback,
|
|
587
|
+
logger
|
|
588
|
+
});
|
|
589
|
+
|
|
590
|
+
// Capture the onTranscript callback
|
|
591
|
+
const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
|
|
592
|
+
const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
|
|
593
|
+
const onTranscriptCallback = latestConfig?.onTranscript;
|
|
594
|
+
if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
|
|
595
|
+
|
|
596
|
+
const clientUuid = client.getVGFState().audioUtteranceId;
|
|
597
|
+
|
|
598
|
+
// Step 1: Send audio
|
|
599
|
+
client.sendAudio(Buffer.from([1, 2, 3, 4]));
|
|
600
|
+
expect(stateChangeCounter).toBe(1);
|
|
601
|
+
|
|
602
|
+
// Step 2: Receive a pending transcript (transcriptionStatus -> IN_PROGRESS, prevents synthetic finalization)
|
|
603
|
+
onTranscriptCallback({
|
|
604
|
+
type: 'Transcription',
|
|
605
|
+
audioUtteranceId: clientUuid,
|
|
606
|
+
pendingTranscript: 'hello',
|
|
607
|
+
pendingTranscriptConfidence: 0.7,
|
|
608
|
+
is_finished: false
|
|
609
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
610
|
+
} as any);
|
|
611
|
+
expect(stateChangeCounter).toBe(2);
|
|
612
|
+
|
|
613
|
+
// Step 3: Receive first final transcript - should emit
|
|
614
|
+
onTranscriptCallback({
|
|
615
|
+
type: 'Transcription',
|
|
616
|
+
audioUtteranceId: clientUuid,
|
|
617
|
+
finalTranscript: 'hello world',
|
|
618
|
+
finalTranscriptConfidence: 0.95,
|
|
619
|
+
is_finished: true
|
|
620
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
621
|
+
} as any);
|
|
622
|
+
|
|
623
|
+
expect(stateChangeCounter).toBe(3); // First terminal emitted
|
|
624
|
+
expect(stateHistory[2]!.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
|
|
625
|
+
|
|
626
|
+
// Step 4: Duplicate final transcript - should be suppressed
|
|
627
|
+
onTranscriptCallback({
|
|
628
|
+
type: 'Transcription',
|
|
629
|
+
audioUtteranceId: clientUuid,
|
|
630
|
+
finalTranscript: 'different transcript',
|
|
631
|
+
finalTranscriptConfidence: 0.99,
|
|
632
|
+
is_finished: true
|
|
633
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
634
|
+
} as any);
|
|
635
|
+
|
|
636
|
+
// Callback should NOT be called again
|
|
637
|
+
expect(stateChangeCounter).toBe(3);
|
|
638
|
+
expect(trackingCallback).toHaveBeenCalledTimes(3);
|
|
639
|
+
|
|
640
|
+
// Log should indicate suppression
|
|
641
|
+
const suppressionLog = loggerCalls.find(log =>
|
|
642
|
+
log.level === 'info' && log.message.includes('Duplicate terminal status suppressed')
|
|
643
|
+
);
|
|
644
|
+
expect(suppressionLog).toBeDefined();
|
|
645
|
+
|
|
646
|
+
// Final state should still have original transcript
|
|
647
|
+
const finalState = client.getVGFState();
|
|
648
|
+
expect(finalState.finalTranscript).toBe('hello world');
|
|
649
|
+
});
|
|
650
|
+
|
|
651
|
+
it('should reset terminal status flag when new UUID is generated', () => {
|
|
652
|
+
const stateChangeCallback1 = jest.fn();
|
|
653
|
+
|
|
654
|
+
// Create client with terminal state - forces new UUID generation
|
|
655
|
+
const terminalState: RecognitionState = {
|
|
656
|
+
audioUtteranceId: 'old-session-uuid',
|
|
657
|
+
transcriptionStatus: TranscriptionStatus.FINALIZED,
|
|
658
|
+
startRecordingStatus: RecordingStatus.FINISHED,
|
|
659
|
+
pendingTranscript: '',
|
|
660
|
+
finalTranscript: 'previous transcript'
|
|
661
|
+
};
|
|
662
|
+
|
|
663
|
+
const client = new SimplifiedVGFRecognitionClient({
|
|
664
|
+
initialState: terminalState,
|
|
665
|
+
asrRequestConfig: {
|
|
666
|
+
provider: 'deepgram',
|
|
667
|
+
language: 'en',
|
|
668
|
+
sampleRate: 16000,
|
|
669
|
+
encoding: AudioEncoding.LINEAR16
|
|
670
|
+
},
|
|
671
|
+
onStateChange: stateChangeCallback1
|
|
672
|
+
});
|
|
673
|
+
|
|
674
|
+
// UUID was regenerated
|
|
675
|
+
const newUuid = client.getVGFState().audioUtteranceId;
|
|
676
|
+
expect(newUuid).not.toBe('old-session-uuid');
|
|
677
|
+
|
|
678
|
+
// Initial state change callback was called (for UUID regeneration)
|
|
679
|
+
expect(stateChangeCallback1).toHaveBeenCalledTimes(1);
|
|
680
|
+
|
|
681
|
+
// Get the transcript callback
|
|
682
|
+
const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
|
|
683
|
+
const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
|
|
684
|
+
const onTranscriptCallback = latestConfig?.onTranscript;
|
|
685
|
+
if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
|
|
686
|
+
|
|
687
|
+
// Simulate new session transcript with new UUID
|
|
688
|
+
onTranscriptCallback({
|
|
689
|
+
type: 'Transcription',
|
|
690
|
+
audioUtteranceId: newUuid,
|
|
691
|
+
finalTranscript: 'new transcript',
|
|
692
|
+
finalTranscriptConfidence: 0.9,
|
|
693
|
+
is_finished: true
|
|
694
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
695
|
+
} as any);
|
|
696
|
+
|
|
697
|
+
// Should emit terminal status for new session (counter was reset)
|
|
698
|
+
expect(stateChangeCallback1).toHaveBeenCalledTimes(2);
|
|
699
|
+
const finalState = stateChangeCallback1.mock.calls[1][0];
|
|
700
|
+
expect(finalState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
|
|
701
|
+
expect(finalState.finalTranscript).toBe('new transcript');
|
|
702
|
+
});
|
|
703
|
+
});
|
|
704
|
+
});
|