@volley/recognition-client-sdk-node22 0.1.424
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +344 -0
- package/dist/browser.bundled.d.ts +1280 -0
- package/dist/browser.d.ts +10 -0
- package/dist/browser.d.ts.map +1 -0
- package/dist/config-builder.d.ts +134 -0
- package/dist/config-builder.d.ts.map +1 -0
- package/dist/errors.d.ts +41 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/factory.d.ts +36 -0
- package/dist/factory.d.ts.map +1 -0
- package/dist/index.bundled.d.ts +2572 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +10199 -0
- package/dist/index.js.map +7 -0
- package/dist/recog-client-sdk.browser.d.ts +10 -0
- package/dist/recog-client-sdk.browser.d.ts.map +1 -0
- package/dist/recog-client-sdk.browser.js +5746 -0
- package/dist/recog-client-sdk.browser.js.map +7 -0
- package/dist/recognition-client.d.ts +128 -0
- package/dist/recognition-client.d.ts.map +1 -0
- package/dist/recognition-client.types.d.ts +271 -0
- package/dist/recognition-client.types.d.ts.map +1 -0
- package/dist/simplified-vgf-recognition-client.d.ts +178 -0
- package/dist/simplified-vgf-recognition-client.d.ts.map +1 -0
- package/dist/utils/audio-ring-buffer.d.ts +69 -0
- package/dist/utils/audio-ring-buffer.d.ts.map +1 -0
- package/dist/utils/message-handler.d.ts +45 -0
- package/dist/utils/message-handler.d.ts.map +1 -0
- package/dist/utils/url-builder.d.ts +28 -0
- package/dist/utils/url-builder.d.ts.map +1 -0
- package/dist/vgf-recognition-mapper.d.ts +66 -0
- package/dist/vgf-recognition-mapper.d.ts.map +1 -0
- package/dist/vgf-recognition-state.d.ts +91 -0
- package/dist/vgf-recognition-state.d.ts.map +1 -0
- package/package.json +74 -0
- package/src/browser.ts +24 -0
- package/src/config-builder.spec.ts +265 -0
- package/src/config-builder.ts +240 -0
- package/src/errors.ts +84 -0
- package/src/factory.spec.ts +215 -0
- package/src/factory.ts +47 -0
- package/src/index.ts +127 -0
- package/src/recognition-client.spec.ts +889 -0
- package/src/recognition-client.ts +844 -0
- package/src/recognition-client.types.ts +338 -0
- package/src/simplified-vgf-recognition-client.integration.spec.ts +718 -0
- package/src/simplified-vgf-recognition-client.spec.ts +1525 -0
- package/src/simplified-vgf-recognition-client.ts +524 -0
- package/src/utils/audio-ring-buffer.spec.ts +335 -0
- package/src/utils/audio-ring-buffer.ts +170 -0
- package/src/utils/message-handler.spec.ts +311 -0
- package/src/utils/message-handler.ts +131 -0
- package/src/utils/url-builder.spec.ts +252 -0
- package/src/utils/url-builder.ts +92 -0
- package/src/vgf-recognition-mapper.spec.ts +78 -0
- package/src/vgf-recognition-mapper.ts +232 -0
- package/src/vgf-recognition-state.ts +102 -0
|
@@ -0,0 +1,718 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration tests for SimplifiedVGFRecognitionClient state transitions.
|
|
3
|
+
*
|
|
4
|
+
* These tests verify complete recognition flows involving multiple method calls
|
|
5
|
+
* and state transitions, including:
|
|
6
|
+
* - Normal recognition flow (sendAudio → transcript → stopRecording → final)
|
|
7
|
+
* - Early termination with synthetic finalization
|
|
8
|
+
* - Abnormal stop (cancel/abandon)
|
|
9
|
+
* - Terminal status protection (duplicate suppression)
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import {
|
|
13
|
+
SimplifiedVGFRecognitionClient,
|
|
14
|
+
SimplifiedVGFClientConfig
|
|
15
|
+
} from './simplified-vgf-recognition-client.js';
|
|
16
|
+
import { RealTimeTwoWayWebSocketRecognitionClient } from './recognition-client.js';
|
|
17
|
+
import {
|
|
18
|
+
RecognitionState,
|
|
19
|
+
TranscriptionStatus,
|
|
20
|
+
RecordingStatus
|
|
21
|
+
} from './vgf-recognition-state.js';
|
|
22
|
+
import { ClientState } from './recognition-client.types.js';
|
|
23
|
+
import { AudioEncoding } from '@recog/shared-types';
|
|
24
|
+
|
|
25
|
+
// Track the current mock state - can be changed per test
|
|
26
|
+
let mockClientState: ClientState = ClientState.READY;
|
|
27
|
+
|
|
28
|
+
// Mock the underlying client
|
|
29
|
+
jest.mock('./recognition-client.js', () => {
|
|
30
|
+
const mockClient = {
|
|
31
|
+
connect: jest.fn().mockResolvedValue(undefined),
|
|
32
|
+
sendAudio: jest.fn(),
|
|
33
|
+
stopRecording: jest.fn().mockResolvedValue(undefined),
|
|
34
|
+
stopAbnormally: jest.fn(),
|
|
35
|
+
getAudioUtteranceId: jest.fn().mockReturnValue('mock-uuid'),
|
|
36
|
+
getUrl: jest.fn().mockReturnValue('wss://mock-url'),
|
|
37
|
+
getState: jest.fn().mockImplementation(() => mockClientState),
|
|
38
|
+
isConnected: jest.fn().mockReturnValue(true),
|
|
39
|
+
isConnecting: jest.fn().mockReturnValue(false),
|
|
40
|
+
isStopping: jest.fn().mockReturnValue(false),
|
|
41
|
+
isTranscriptionFinished: jest.fn().mockReturnValue(false),
|
|
42
|
+
isBufferOverflowing: jest.fn().mockReturnValue(false)
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
return {
|
|
46
|
+
RealTimeTwoWayWebSocketRecognitionClient: jest.fn().mockImplementation(() => mockClient)
|
|
47
|
+
};
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
describe('SimplifiedVGFRecognitionClient Integration - State Transitions', () => {
|
|
51
|
+
beforeEach(() => {
|
|
52
|
+
jest.clearAllMocks();
|
|
53
|
+
// Reset to default READY state
|
|
54
|
+
mockClientState = ClientState.READY;
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
describe('Normal Recognition Flow', () => {
|
|
58
|
+
it('should follow correct state transitions for complete normal flow', async () => {
|
|
59
|
+
let stateChangeCounter = 0;
|
|
60
|
+
const stateHistory: RecognitionState[] = [];
|
|
61
|
+
|
|
62
|
+
const trackingCallback = jest.fn((state: RecognitionState) => {
|
|
63
|
+
stateChangeCounter++;
|
|
64
|
+
stateHistory.push({ ...state });
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
const client = new SimplifiedVGFRecognitionClient({
|
|
68
|
+
asrRequestConfig: {
|
|
69
|
+
provider: 'deepgram',
|
|
70
|
+
language: 'en',
|
|
71
|
+
sampleRate: 16000,
|
|
72
|
+
encoding: AudioEncoding.LINEAR16
|
|
73
|
+
},
|
|
74
|
+
onStateChange: trackingCallback
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
// Capture the onTranscript callback
|
|
78
|
+
const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
|
|
79
|
+
const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
|
|
80
|
+
const onTranscriptCallback = latestConfig?.onTranscript;
|
|
81
|
+
if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
|
|
82
|
+
|
|
83
|
+
const clientUuid = client.getVGFState().audioUtteranceId;
|
|
84
|
+
|
|
85
|
+
// Step 1: Initial state - verify starting point
|
|
86
|
+
expect(stateChangeCounter).toBe(0);
|
|
87
|
+
let currentState = client.getVGFState();
|
|
88
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
89
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.READY);
|
|
90
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED);
|
|
91
|
+
expect(currentState.pendingTranscript).toBe('');
|
|
92
|
+
expect(currentState.finalTranscript).toBeUndefined();
|
|
93
|
+
|
|
94
|
+
// Step 2: Send first audio chunk - triggers RECORDING state
|
|
95
|
+
client.sendAudio(Buffer.from([1, 2, 3, 4]));
|
|
96
|
+
|
|
97
|
+
expect(stateChangeCounter).toBe(1);
|
|
98
|
+
currentState = client.getVGFState();
|
|
99
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
100
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.RECORDING);
|
|
101
|
+
expect(currentState.startRecordingTimestamp).toBeDefined();
|
|
102
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED);
|
|
103
|
+
|
|
104
|
+
// Step 3: Server sends pending transcript - triggers IN_PROGRESS
|
|
105
|
+
onTranscriptCallback({
|
|
106
|
+
type: 'Transcription',
|
|
107
|
+
audioUtteranceId: clientUuid,
|
|
108
|
+
pendingTranscript: 'hello',
|
|
109
|
+
pendingTranscriptConfidence: 0.7,
|
|
110
|
+
is_finished: false
|
|
111
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
112
|
+
} as any);
|
|
113
|
+
|
|
114
|
+
expect(stateChangeCounter).toBe(2);
|
|
115
|
+
currentState = client.getVGFState();
|
|
116
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
117
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.RECORDING);
|
|
118
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS);
|
|
119
|
+
expect(currentState.pendingTranscript).toBe('hello');
|
|
120
|
+
expect(currentState.pendingConfidence).toBe(0.7);
|
|
121
|
+
expect(currentState.finalTranscript).toBeUndefined();
|
|
122
|
+
|
|
123
|
+
// Step 4: Server sends more pending transcript
|
|
124
|
+
onTranscriptCallback({
|
|
125
|
+
type: 'Transcription',
|
|
126
|
+
audioUtteranceId: clientUuid,
|
|
127
|
+
pendingTranscript: 'hello world',
|
|
128
|
+
pendingTranscriptConfidence: 0.8,
|
|
129
|
+
is_finished: false
|
|
130
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
131
|
+
} as any);
|
|
132
|
+
|
|
133
|
+
expect(stateChangeCounter).toBe(3);
|
|
134
|
+
currentState = client.getVGFState();
|
|
135
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
136
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS);
|
|
137
|
+
expect(currentState.pendingTranscript).toBe('hello world');
|
|
138
|
+
expect(currentState.pendingConfidence).toBe(0.8);
|
|
139
|
+
|
|
140
|
+
// Step 5: User calls stopRecording - triggers FINISHED recording status
|
|
141
|
+
await client.stopRecording();
|
|
142
|
+
|
|
143
|
+
expect(stateChangeCounter).toBe(4);
|
|
144
|
+
currentState = client.getVGFState();
|
|
145
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
146
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.FINISHED);
|
|
147
|
+
expect(currentState.finalRecordingTimestamp).toBeDefined();
|
|
148
|
+
// Transcription should still be IN_PROGRESS (waiting for server final)
|
|
149
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS);
|
|
150
|
+
|
|
151
|
+
// Step 6: Server sends final transcript - triggers FINALIZED
|
|
152
|
+
onTranscriptCallback({
|
|
153
|
+
type: 'Transcription',
|
|
154
|
+
audioUtteranceId: clientUuid,
|
|
155
|
+
finalTranscript: 'hello world',
|
|
156
|
+
finalTranscriptConfidence: 0.95,
|
|
157
|
+
is_finished: true
|
|
158
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
159
|
+
} as any);
|
|
160
|
+
|
|
161
|
+
expect(stateChangeCounter).toBe(5);
|
|
162
|
+
currentState = client.getVGFState();
|
|
163
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
164
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.FINISHED);
|
|
165
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
|
|
166
|
+
expect(currentState.finalTranscript).toBe('hello world');
|
|
167
|
+
expect(currentState.finalConfidence).toBe(0.95);
|
|
168
|
+
expect(currentState.pendingTranscript).toBe('');
|
|
169
|
+
expect(currentState.pendingConfidence).toBeUndefined();
|
|
170
|
+
expect(currentState.finalTranscriptionTimestamp).toBeDefined();
|
|
171
|
+
|
|
172
|
+
// Verify total callback count
|
|
173
|
+
expect(trackingCallback).toHaveBeenCalledTimes(5);
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it('should not emit synthetic finalization if transcript was received before stopRecording', async () => {
|
|
177
|
+
let stateChangeCounter = 0;
|
|
178
|
+
|
|
179
|
+
const trackingCallback = jest.fn((state: RecognitionState) => {
|
|
180
|
+
stateChangeCounter++;
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
const client = new SimplifiedVGFRecognitionClient({
|
|
184
|
+
asrRequestConfig: {
|
|
185
|
+
provider: 'deepgram',
|
|
186
|
+
language: 'en',
|
|
187
|
+
sampleRate: 16000,
|
|
188
|
+
encoding: AudioEncoding.LINEAR16
|
|
189
|
+
},
|
|
190
|
+
onStateChange: trackingCallback
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
// Capture the onTranscript callback
|
|
194
|
+
const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
|
|
195
|
+
const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
|
|
196
|
+
const onTranscriptCallback = latestConfig?.onTranscript;
|
|
197
|
+
if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
|
|
198
|
+
|
|
199
|
+
const clientUuid = client.getVGFState().audioUtteranceId;
|
|
200
|
+
|
|
201
|
+
// Step 1: Send audio
|
|
202
|
+
client.sendAudio(Buffer.from([1, 2, 3, 4]));
|
|
203
|
+
expect(stateChangeCounter).toBe(1);
|
|
204
|
+
|
|
205
|
+
// Step 2: Server sends a pending transcript (transcriptionStatus -> IN_PROGRESS)
|
|
206
|
+
onTranscriptCallback({
|
|
207
|
+
type: 'Transcription',
|
|
208
|
+
audioUtteranceId: clientUuid,
|
|
209
|
+
pendingTranscript: 'hello',
|
|
210
|
+
pendingTranscriptConfidence: 0.7,
|
|
211
|
+
is_finished: false
|
|
212
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
213
|
+
} as any);
|
|
214
|
+
expect(stateChangeCounter).toBe(2);
|
|
215
|
+
|
|
216
|
+
// Step 3: User calls stopRecording - should NOT emit synthetic finalization
|
|
217
|
+
// because transcriptionStatus is IN_PROGRESS (not NOT_STARTED)
|
|
218
|
+
await client.stopRecording();
|
|
219
|
+
|
|
220
|
+
expect(stateChangeCounter).toBe(3); // Only stopRecording state change, no synthetic finalization
|
|
221
|
+
let currentState = client.getVGFState();
|
|
222
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.FINISHED);
|
|
223
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS); // Still waiting for final
|
|
224
|
+
|
|
225
|
+
// Step 4: Server sends final transcript after stop
|
|
226
|
+
onTranscriptCallback({
|
|
227
|
+
type: 'Transcription',
|
|
228
|
+
audioUtteranceId: clientUuid,
|
|
229
|
+
finalTranscript: 'hello world',
|
|
230
|
+
finalTranscriptConfidence: 0.95,
|
|
231
|
+
is_finished: true
|
|
232
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
233
|
+
} as any);
|
|
234
|
+
|
|
235
|
+
expect(stateChangeCounter).toBe(4);
|
|
236
|
+
currentState = client.getVGFState();
|
|
237
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
|
|
238
|
+
expect(currentState.finalTranscript).toBe('hello world');
|
|
239
|
+
expect(currentState.finalConfidence).toBe(0.95);
|
|
240
|
+
|
|
241
|
+
expect(trackingCallback).toHaveBeenCalledTimes(4);
|
|
242
|
+
});
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
describe('Early Termination with Synthetic Finalization', () => {
|
|
246
|
+
it('should handle stopRecording without ever sending audio', async () => {
|
|
247
|
+
// Mock client in CONNECTED state (not yet READY) - synthetic finalization should trigger
|
|
248
|
+
mockClientState = ClientState.CONNECTED;
|
|
249
|
+
|
|
250
|
+
let stateChangeCounter = 0;
|
|
251
|
+
|
|
252
|
+
const trackingCallback = jest.fn((state: RecognitionState) => {
|
|
253
|
+
stateChangeCounter++;
|
|
254
|
+
});
|
|
255
|
+
|
|
256
|
+
const client = new SimplifiedVGFRecognitionClient({
|
|
257
|
+
asrRequestConfig: {
|
|
258
|
+
provider: 'deepgram',
|
|
259
|
+
language: 'en',
|
|
260
|
+
sampleRate: 16000,
|
|
261
|
+
encoding: AudioEncoding.LINEAR16
|
|
262
|
+
},
|
|
263
|
+
onStateChange: trackingCallback
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
const clientUuid = client.getVGFState().audioUtteranceId;
|
|
267
|
+
|
|
268
|
+
// Step 1: Initial state
|
|
269
|
+
expect(stateChangeCounter).toBe(0);
|
|
270
|
+
let currentState = client.getVGFState();
|
|
271
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
272
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.READY);
|
|
273
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED);
|
|
274
|
+
|
|
275
|
+
// Step 2: User calls stopRecording WITHOUT ever sending audio
|
|
276
|
+
// Since client is in CONNECTED state (not READY), SDK should emit synthetic finalization
|
|
277
|
+
await client.stopRecording();
|
|
278
|
+
|
|
279
|
+
// Should have 2 callbacks: stopRecording (FINISHED) + synthetic finalization (FINALIZED)
|
|
280
|
+
expect(stateChangeCounter).toBe(2);
|
|
281
|
+
currentState = client.getVGFState();
|
|
282
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
283
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.FINISHED);
|
|
284
|
+
expect(currentState.finalRecordingTimestamp).toBeDefined();
|
|
285
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
|
|
286
|
+
expect(currentState.finalTranscript).toBe('');
|
|
287
|
+
expect(currentState.finalTranscriptionTimestamp).toBeDefined();
|
|
288
|
+
|
|
289
|
+
// Verify total callback count
|
|
290
|
+
expect(trackingCallback).toHaveBeenCalledTimes(2);
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
it('should emit synthetic finalization and suppress late server transcripts', async () => {
|
|
294
|
+
// Mock client in CONNECTED state (not yet READY) - synthetic finalization should trigger
|
|
295
|
+
mockClientState = ClientState.CONNECTED;
|
|
296
|
+
|
|
297
|
+
let stateChangeCounter = 0;
|
|
298
|
+
const stateHistory: RecognitionState[] = [];
|
|
299
|
+
const loggerCalls: Array<{ level: string; message: string }> = [];
|
|
300
|
+
|
|
301
|
+
const trackingCallback = jest.fn((state: RecognitionState) => {
|
|
302
|
+
stateChangeCounter++;
|
|
303
|
+
stateHistory.push({ ...state });
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
const logger = jest.fn((level: string, message: string) => {
|
|
307
|
+
loggerCalls.push({ level, message });
|
|
308
|
+
});
|
|
309
|
+
|
|
310
|
+
const client = new SimplifiedVGFRecognitionClient({
|
|
311
|
+
asrRequestConfig: {
|
|
312
|
+
provider: 'deepgram',
|
|
313
|
+
language: 'en',
|
|
314
|
+
sampleRate: 16000,
|
|
315
|
+
encoding: AudioEncoding.LINEAR16
|
|
316
|
+
},
|
|
317
|
+
onStateChange: trackingCallback,
|
|
318
|
+
logger
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
// Capture the onTranscript callback
|
|
322
|
+
const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
|
|
323
|
+
const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
|
|
324
|
+
const onTranscriptCallback = latestConfig?.onTranscript;
|
|
325
|
+
if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
|
|
326
|
+
|
|
327
|
+
const clientUuid = client.getVGFState().audioUtteranceId;
|
|
328
|
+
|
|
329
|
+
// Step 1: Send audio
|
|
330
|
+
client.sendAudio(Buffer.from([1, 2, 3, 4]));
|
|
331
|
+
expect(stateChangeCounter).toBe(1);
|
|
332
|
+
|
|
333
|
+
// Step 2: User calls stopRecording BEFORE any transcript received
|
|
334
|
+
// Since client is in CONNECTED state (not READY), SDK should emit synthetic finalization
|
|
335
|
+
await client.stopRecording();
|
|
336
|
+
|
|
337
|
+
// Should have 3 callbacks: sendAudio, stopRecording (FINISHED), synthetic finalization (FINALIZED)
|
|
338
|
+
expect(stateChangeCounter).toBe(3);
|
|
339
|
+
const currentState = client.getVGFState();
|
|
340
|
+
expect(currentState.audioUtteranceId).toBe(clientUuid);
|
|
341
|
+
expect(currentState.startRecordingStatus).toBe(RecordingStatus.FINISHED);
|
|
342
|
+
expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
|
|
343
|
+
expect(currentState.finalTranscript).toBe('');
|
|
344
|
+
expect(currentState.finalTranscriptionTimestamp).toBeDefined();
|
|
345
|
+
|
|
346
|
+
// Verify state history: RECORDING -> FINISHED -> FINALIZED
|
|
347
|
+
expect(stateHistory[0]!.startRecordingStatus).toBe(RecordingStatus.RECORDING);
|
|
348
|
+
expect(stateHistory[1]!.startRecordingStatus).toBe(RecordingStatus.FINISHED);
|
|
349
|
+
expect(stateHistory[1]!.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED);
|
|
350
|
+
expect(stateHistory[2]!.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
|
|
351
|
+
|
|
352
|
+
// Game can now proceed - state is terminal (FINALIZED)
|
|
353
|
+
expect(trackingCallback).toHaveBeenCalledTimes(3);
|
|
354
|
+
|
|
355
|
+
// Step 3: Late server transcript arrives - should be SUPPRESSED
|
|
356
|
+
onTranscriptCallback({
|
|
357
|
+
type: 'Transcription',
|
|
358
|
+
audioUtteranceId: clientUuid,
|
|
359
|
+
finalTranscript: 'late server response',
|
|
360
|
+
finalTranscriptConfidence: 0.99,
|
|
361
|
+
is_finished: true
|
|
362
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
363
|
+
} as any);
|
|
364
|
+
|
|
365
|
+
// Callback should NOT be called again
|
|
366
|
+
expect(stateChangeCounter).toBe(3);
|
|
367
|
+
expect(trackingCallback).toHaveBeenCalledTimes(3);
|
|
368
|
+
|
|
369
|
+
// Local state IS updated with late transcript, but callback was not called
|
|
370
|
+
const finalState = client.getVGFState();
|
|
371
|
+
expect(finalState.finalTranscript).toBe('late server response');
|
|
372
|
+
|
|
373
|
+
// Log should indicate suppression
|
|
374
|
+
const suppressionLog = loggerCalls.find(log =>
|
|
375
|
+
log.level === 'info' && log.message.includes('Duplicate terminal status suppressed')
|
|
376
|
+
);
|
|
377
|
+
expect(suppressionLog).toBeDefined();
|
|
378
|
+
});
|
|
379
|
+
});
|
|
380
|
+
|
|
381
|
+
describe('Abnormal Stop (Cancel/Abandon)', () => {
|
|
382
|
+
it('should emit only ONE ABORTED status and suppress late transcripts', () => {
|
|
383
|
+
let stateChangeCounter = 0;
|
|
384
|
+
const stateHistory: RecognitionState[] = [];
|
|
385
|
+
const loggerCalls: Array<{ level: string; message: string }> = [];
|
|
386
|
+
|
|
387
|
+
const trackingCallback = jest.fn((state: RecognitionState) => {
|
|
388
|
+
stateChangeCounter++;
|
|
389
|
+
stateHistory.push({ ...state });
|
|
390
|
+
});
|
|
391
|
+
|
|
392
|
+
const logger = jest.fn((level: string, message: string) => {
|
|
393
|
+
loggerCalls.push({ level, message });
|
|
394
|
+
});
|
|
395
|
+
|
|
396
|
+
const client = new SimplifiedVGFRecognitionClient({
|
|
397
|
+
asrRequestConfig: {
|
|
398
|
+
provider: 'deepgram',
|
|
399
|
+
language: 'en',
|
|
400
|
+
sampleRate: 16000,
|
|
401
|
+
encoding: AudioEncoding.LINEAR16
|
|
402
|
+
},
|
|
403
|
+
onStateChange: trackingCallback,
|
|
404
|
+
logger
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
// Capture the onTranscript callback
|
|
408
|
+
const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
|
|
409
|
+
const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
|
|
410
|
+
const onTranscriptCallback = latestConfig?.onTranscript;
|
|
411
|
+
if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
|
|
412
|
+
|
|
413
|
+
const clientUuid = client.getVGFState().audioUtteranceId;
|
|
414
|
+
|
|
415
|
+
// Step 1: Send audio
|
|
416
|
+
client.sendAudio(Buffer.from([1, 2, 3, 4]));
|
|
417
|
+
expect(stateChangeCounter).toBe(1);
|
|
418
|
+
expect(stateHistory[0]!.startRecordingStatus).toBe(RecordingStatus.RECORDING);
|
|
419
|
+
|
|
420
|
+
// Step 2: Receive a pending transcript
|
|
421
|
+
onTranscriptCallback({
|
|
422
|
+
type: 'Transcription',
|
|
423
|
+
audioUtteranceId: clientUuid,
|
|
424
|
+
pendingTranscript: 'partial text',
|
|
425
|
+
pendingTranscriptConfidence: 0.7,
|
|
426
|
+
is_finished: false
|
|
427
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
428
|
+
} as any);
|
|
429
|
+
|
|
430
|
+
expect(stateChangeCounter).toBe(2);
|
|
431
|
+
expect(stateHistory[1]!.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS);
|
|
432
|
+
expect(stateHistory[1]!.pendingTranscript).toBe('partial text');
|
|
433
|
+
|
|
434
|
+
// Step 3: User calls stopAbnormally (cancel/abandon session)
|
|
435
|
+
client.stopAbnormally();
|
|
436
|
+
|
|
437
|
+
// Should have 3 callbacks: sendAudio, pending transcript, stopAbnormally (ABORTED)
|
|
438
|
+
expect(stateChangeCounter).toBe(3);
|
|
439
|
+
expect(stateHistory[2]!.transcriptionStatus).toBe(TranscriptionStatus.ABORTED);
|
|
440
|
+
expect(stateHistory[2]!.startRecordingStatus).toBe(RecordingStatus.FINISHED);
|
|
441
|
+
|
|
442
|
+
// Step 4: Late server transcript arrives - should be SUPPRESSED
|
|
443
|
+
onTranscriptCallback({
|
|
444
|
+
type: 'Transcription',
|
|
445
|
+
audioUtteranceId: clientUuid,
|
|
446
|
+
finalTranscript: 'late server response',
|
|
447
|
+
finalTranscriptConfidence: 0.99,
|
|
448
|
+
is_finished: true
|
|
449
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
450
|
+
} as any);
|
|
451
|
+
|
|
452
|
+
// Callback should NOT be called again
|
|
453
|
+
expect(stateChangeCounter).toBe(3);
|
|
454
|
+
expect(trackingCallback).toHaveBeenCalledTimes(3);
|
|
455
|
+
|
|
456
|
+
// Local state IS updated (late transcript overwrites ABORTED), but callback was not called
|
|
457
|
+
const finalState = client.getVGFState();
|
|
458
|
+
expect(finalState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
|
|
459
|
+
expect(finalState.finalTranscript).toBe('late server response');
|
|
460
|
+
|
|
461
|
+
// Log should indicate suppression
|
|
462
|
+
const suppressionLog = loggerCalls.find(log =>
|
|
463
|
+
log.level === 'info' && log.message.includes('Duplicate terminal status suppressed')
|
|
464
|
+
);
|
|
465
|
+
expect(suppressionLog).toBeDefined();
|
|
466
|
+
|
|
467
|
+
// Step 5: Calling stopAbnormally again should not emit another callback
|
|
468
|
+
client.stopAbnormally();
|
|
469
|
+
expect(stateChangeCounter).toBe(3);
|
|
470
|
+
expect(trackingCallback).toHaveBeenCalledTimes(3);
|
|
471
|
+
});
|
|
472
|
+
});
|
|
473
|
+
|
|
474
|
+
describe('Error Status Protection', () => {
|
|
475
|
+
it('should emit only ONE ERROR status and suppress late errors and transcripts', () => {
|
|
476
|
+
let stateChangeCounter = 0;
|
|
477
|
+
const stateHistory: RecognitionState[] = [];
|
|
478
|
+
const loggerCalls: Array<{ level: string; message: string }> = [];
|
|
479
|
+
|
|
480
|
+
const trackingCallback = jest.fn((state: RecognitionState) => {
|
|
481
|
+
stateChangeCounter++;
|
|
482
|
+
stateHistory.push({ ...state });
|
|
483
|
+
});
|
|
484
|
+
|
|
485
|
+
const logger = jest.fn((level: string, message: string) => {
|
|
486
|
+
loggerCalls.push({ level, message });
|
|
487
|
+
});
|
|
488
|
+
|
|
489
|
+
const client = new SimplifiedVGFRecognitionClient({
|
|
490
|
+
asrRequestConfig: {
|
|
491
|
+
provider: 'deepgram',
|
|
492
|
+
language: 'en',
|
|
493
|
+
sampleRate: 16000,
|
|
494
|
+
encoding: AudioEncoding.LINEAR16
|
|
495
|
+
},
|
|
496
|
+
onStateChange: trackingCallback,
|
|
497
|
+
logger
|
|
498
|
+
});
|
|
499
|
+
|
|
500
|
+
// Capture the callbacks
|
|
501
|
+
const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
|
|
502
|
+
const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
|
|
503
|
+
const onTranscriptCallback = latestConfig?.onTranscript;
|
|
504
|
+
const onErrorCallback = latestConfig?.onError;
|
|
505
|
+
if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
|
|
506
|
+
if (!onErrorCallback) throw new Error('onError callback not found');
|
|
507
|
+
|
|
508
|
+
const clientUuid = client.getVGFState().audioUtteranceId;
|
|
509
|
+
|
|
510
|
+
// Step 1: Send audio
|
|
511
|
+
client.sendAudio(Buffer.from([1, 2, 3, 4]));
|
|
512
|
+
expect(stateChangeCounter).toBe(1);
|
|
513
|
+
expect(stateHistory[0]!.startRecordingStatus).toBe(RecordingStatus.RECORDING);
|
|
514
|
+
|
|
515
|
+
// Step 2: Receive a pending transcript
|
|
516
|
+
onTranscriptCallback({
|
|
517
|
+
type: 'Transcription',
|
|
518
|
+
audioUtteranceId: clientUuid,
|
|
519
|
+
pendingTranscript: 'partial text',
|
|
520
|
+
pendingTranscriptConfidence: 0.7,
|
|
521
|
+
is_finished: false
|
|
522
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
523
|
+
} as any);
|
|
524
|
+
|
|
525
|
+
expect(stateChangeCounter).toBe(2);
|
|
526
|
+
expect(stateHistory[1]!.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS);
|
|
527
|
+
|
|
528
|
+
// Step 3: Error occurs (e.g., provider error, timeout)
|
|
529
|
+
onErrorCallback({
|
|
530
|
+
audioUtteranceId: clientUuid,
|
|
531
|
+
message: 'Provider connection failed'
|
|
532
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
533
|
+
} as any);
|
|
534
|
+
|
|
535
|
+
// Should have 3 callbacks: sendAudio, pending transcript, error (ERROR)
|
|
536
|
+
expect(stateChangeCounter).toBe(3);
|
|
537
|
+
expect(stateHistory[2]!.transcriptionStatus).toBe(TranscriptionStatus.ERROR);
|
|
538
|
+
expect(stateHistory[2]!.startRecordingStatus).toBe(RecordingStatus.FINISHED);
|
|
539
|
+
|
|
540
|
+
// Step 4: Another error arrives - should be SUPPRESSED
|
|
541
|
+
onErrorCallback({
|
|
542
|
+
audioUtteranceId: clientUuid,
|
|
543
|
+
message: 'Second error message'
|
|
544
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
545
|
+
} as any);
|
|
546
|
+
|
|
547
|
+
// Callback should NOT be called again
|
|
548
|
+
expect(stateChangeCounter).toBe(3);
|
|
549
|
+
expect(trackingCallback).toHaveBeenCalledTimes(3);
|
|
550
|
+
|
|
551
|
+
// Step 5: Late transcript arrives - should also be SUPPRESSED
|
|
552
|
+
onTranscriptCallback({
|
|
553
|
+
type: 'Transcription',
|
|
554
|
+
audioUtteranceId: clientUuid,
|
|
555
|
+
finalTranscript: 'late server response',
|
|
556
|
+
finalTranscriptConfidence: 0.99,
|
|
557
|
+
is_finished: true
|
|
558
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
559
|
+
} as any);
|
|
560
|
+
|
|
561
|
+
// Callback should still NOT be called again
|
|
562
|
+
expect(stateChangeCounter).toBe(3);
|
|
563
|
+
expect(trackingCallback).toHaveBeenCalledTimes(3);
|
|
564
|
+
|
|
565
|
+
// Local state IS updated (late transcript overwrites), but callback was not called
|
|
566
|
+
const finalState = client.getVGFState();
|
|
567
|
+
expect(finalState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
|
|
568
|
+
expect(finalState.finalTranscript).toBe('late server response');
|
|
569
|
+
|
|
570
|
+
// Log should indicate suppression for the late transcript
|
|
571
|
+
const suppressionLog = loggerCalls.find(log =>
|
|
572
|
+
log.level === 'info' && log.message.includes('Duplicate terminal status suppressed')
|
|
573
|
+
);
|
|
574
|
+
expect(suppressionLog).toBeDefined();
|
|
575
|
+
});
|
|
576
|
+
});
|
|
577
|
+
|
|
578
|
+
describe('Terminal Status Protection (Duplicate Suppression)', () => {
|
|
579
|
+
it('should emit only ONE terminal status per session', async () => {
|
|
580
|
+
let stateChangeCounter = 0;
|
|
581
|
+
const stateHistory: RecognitionState[] = [];
|
|
582
|
+
const loggerCalls: Array<{ level: string; message: string }> = [];
|
|
583
|
+
|
|
584
|
+
const trackingCallback = jest.fn((state: RecognitionState) => {
|
|
585
|
+
stateChangeCounter++;
|
|
586
|
+
stateHistory.push({ ...state });
|
|
587
|
+
});
|
|
588
|
+
|
|
589
|
+
const logger = jest.fn((level: string, message: string) => {
|
|
590
|
+
loggerCalls.push({ level, message });
|
|
591
|
+
});
|
|
592
|
+
|
|
593
|
+
const client = new SimplifiedVGFRecognitionClient({
|
|
594
|
+
asrRequestConfig: {
|
|
595
|
+
provider: 'deepgram',
|
|
596
|
+
language: 'en',
|
|
597
|
+
sampleRate: 16000,
|
|
598
|
+
encoding: AudioEncoding.LINEAR16
|
|
599
|
+
},
|
|
600
|
+
onStateChange: trackingCallback,
|
|
601
|
+
logger
|
|
602
|
+
});
|
|
603
|
+
|
|
604
|
+
// Capture the onTranscript callback
|
|
605
|
+
const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
|
|
606
|
+
const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
|
|
607
|
+
const onTranscriptCallback = latestConfig?.onTranscript;
|
|
608
|
+
if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
|
|
609
|
+
|
|
610
|
+
const clientUuid = client.getVGFState().audioUtteranceId;
|
|
611
|
+
|
|
612
|
+
// Step 1: Send audio
|
|
613
|
+
client.sendAudio(Buffer.from([1, 2, 3, 4]));
|
|
614
|
+
expect(stateChangeCounter).toBe(1);
|
|
615
|
+
|
|
616
|
+
// Step 2: Receive a pending transcript (transcriptionStatus -> IN_PROGRESS, prevents synthetic finalization)
|
|
617
|
+
onTranscriptCallback({
|
|
618
|
+
type: 'Transcription',
|
|
619
|
+
audioUtteranceId: clientUuid,
|
|
620
|
+
pendingTranscript: 'hello',
|
|
621
|
+
pendingTranscriptConfidence: 0.7,
|
|
622
|
+
is_finished: false
|
|
623
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
624
|
+
} as any);
|
|
625
|
+
expect(stateChangeCounter).toBe(2);
|
|
626
|
+
|
|
627
|
+
// Step 3: Receive first final transcript - should emit
|
|
628
|
+
onTranscriptCallback({
|
|
629
|
+
type: 'Transcription',
|
|
630
|
+
audioUtteranceId: clientUuid,
|
|
631
|
+
finalTranscript: 'hello world',
|
|
632
|
+
finalTranscriptConfidence: 0.95,
|
|
633
|
+
is_finished: true
|
|
634
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
635
|
+
} as any);
|
|
636
|
+
|
|
637
|
+
expect(stateChangeCounter).toBe(3); // First terminal emitted
|
|
638
|
+
expect(stateHistory[2]!.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
|
|
639
|
+
|
|
640
|
+
// Step 4: Duplicate final transcript - should be suppressed
|
|
641
|
+
onTranscriptCallback({
|
|
642
|
+
type: 'Transcription',
|
|
643
|
+
audioUtteranceId: clientUuid,
|
|
644
|
+
finalTranscript: 'different transcript',
|
|
645
|
+
finalTranscriptConfidence: 0.99,
|
|
646
|
+
is_finished: true
|
|
647
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
648
|
+
} as any);
|
|
649
|
+
|
|
650
|
+
// Callback should NOT be called again
|
|
651
|
+
expect(stateChangeCounter).toBe(3);
|
|
652
|
+
expect(trackingCallback).toHaveBeenCalledTimes(3);
|
|
653
|
+
|
|
654
|
+
// Log should indicate suppression
|
|
655
|
+
const suppressionLog = loggerCalls.find(log =>
|
|
656
|
+
log.level === 'info' && log.message.includes('Duplicate terminal status suppressed')
|
|
657
|
+
);
|
|
658
|
+
expect(suppressionLog).toBeDefined();
|
|
659
|
+
|
|
660
|
+
// Local state IS updated with new transcript, but callback was not called
|
|
661
|
+
const finalState = client.getVGFState();
|
|
662
|
+
expect(finalState.finalTranscript).toBe('different transcript');
|
|
663
|
+
});
|
|
664
|
+
|
|
665
|
+
it('should reset terminal status flag when new UUID is generated', () => {
|
|
666
|
+
const stateChangeCallback1 = jest.fn();
|
|
667
|
+
|
|
668
|
+
// Create client with terminal state - forces new UUID generation
|
|
669
|
+
const terminalState: RecognitionState = {
|
|
670
|
+
audioUtteranceId: 'old-session-uuid',
|
|
671
|
+
transcriptionStatus: TranscriptionStatus.FINALIZED,
|
|
672
|
+
startRecordingStatus: RecordingStatus.FINISHED,
|
|
673
|
+
pendingTranscript: '',
|
|
674
|
+
finalTranscript: 'previous transcript'
|
|
675
|
+
};
|
|
676
|
+
|
|
677
|
+
const client = new SimplifiedVGFRecognitionClient({
|
|
678
|
+
initialState: terminalState,
|
|
679
|
+
asrRequestConfig: {
|
|
680
|
+
provider: 'deepgram',
|
|
681
|
+
language: 'en',
|
|
682
|
+
sampleRate: 16000,
|
|
683
|
+
encoding: AudioEncoding.LINEAR16
|
|
684
|
+
},
|
|
685
|
+
onStateChange: stateChangeCallback1
|
|
686
|
+
});
|
|
687
|
+
|
|
688
|
+
// UUID was regenerated
|
|
689
|
+
const newUuid = client.getVGFState().audioUtteranceId;
|
|
690
|
+
expect(newUuid).not.toBe('old-session-uuid');
|
|
691
|
+
|
|
692
|
+
// Initial state change callback was called (for UUID regeneration)
|
|
693
|
+
expect(stateChangeCallback1).toHaveBeenCalledTimes(1);
|
|
694
|
+
|
|
695
|
+
// Get the transcript callback
|
|
696
|
+
const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls;
|
|
697
|
+
const latestConfig = constructorCalls[constructorCalls.length - 1]?.[0];
|
|
698
|
+
const onTranscriptCallback = latestConfig?.onTranscript;
|
|
699
|
+
if (!onTranscriptCallback) throw new Error('onTranscript callback not found');
|
|
700
|
+
|
|
701
|
+
// Simulate new session transcript with new UUID
|
|
702
|
+
onTranscriptCallback({
|
|
703
|
+
type: 'Transcription',
|
|
704
|
+
audioUtteranceId: newUuid,
|
|
705
|
+
finalTranscript: 'new transcript',
|
|
706
|
+
finalTranscriptConfidence: 0.9,
|
|
707
|
+
is_finished: true
|
|
708
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
709
|
+
} as any);
|
|
710
|
+
|
|
711
|
+
// Should emit terminal status for new session (counter was reset)
|
|
712
|
+
expect(stateChangeCallback1).toHaveBeenCalledTimes(2);
|
|
713
|
+
const finalState = stateChangeCallback1.mock.calls[1][0];
|
|
714
|
+
expect(finalState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED);
|
|
715
|
+
expect(finalState.finalTranscript).toBe('new transcript');
|
|
716
|
+
});
|
|
717
|
+
});
|
|
718
|
+
});
|