@volley/recognition-client-sdk 0.1.200
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +168 -0
- package/dist/browser-CDQ_TzeH.d.ts +1039 -0
- package/dist/index.d.ts +461 -0
- package/dist/index.js +2332 -0
- package/dist/index.js.map +1 -0
- package/dist/recog-client-sdk.browser.d.ts +2 -0
- package/dist/recog-client-sdk.browser.js +1843 -0
- package/dist/recog-client-sdk.browser.js.map +1 -0
- package/package.json +73 -0
- package/src/browser.ts +24 -0
- package/src/config-builder.ts +213 -0
- package/src/factory.ts +43 -0
- package/src/index.ts +86 -0
- package/src/recognition-client.spec.ts +551 -0
- package/src/recognition-client.ts +595 -0
- package/src/recognition-client.types.ts +260 -0
- package/src/simplified-vgf-recognition-client.spec.ts +671 -0
- package/src/simplified-vgf-recognition-client.ts +339 -0
- package/src/utils/audio-ring-buffer.ts +170 -0
- package/src/utils/message-handler.ts +131 -0
- package/src/utils/url-builder.ts +70 -0
- package/src/vgf-recognition-mapper.ts +225 -0
- package/src/vgf-recognition-state.ts +89 -0
|
@@ -0,0 +1,595 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RealTimeTwoWayWebSocketRecognitionClient - Clean, compact SDK for real-time speech recognition
|
|
3
|
+
*
|
|
4
|
+
* Features:
|
|
5
|
+
* - Ring buffer-based audio storage with fixed memory footprint
|
|
6
|
+
* - Automatic buffering when disconnected, immediate send when connected
|
|
7
|
+
* - Buffer persists after flush (for future retry/reconnection scenarios)
|
|
8
|
+
* - Built on WebSocketAudioClient for robust protocol handling
|
|
9
|
+
* - Simple API: connect() → sendAudio() → stopRecording()
|
|
10
|
+
* - Type-safe message handling with callbacks
|
|
11
|
+
* - Automatic backpressure management
|
|
12
|
+
* - Overflow detection with buffer state tracking
|
|
13
|
+
*
|
|
14
|
+
* Example:
|
|
15
|
+
* ```typescript
|
|
16
|
+
* const client = new RealTimeTwoWayWebSocketRecognitionClient({
|
|
17
|
+
* url: 'ws://localhost:3101/ws/v1/recognize',
|
|
18
|
+
* onTranscript: (result) => console.log(result.finalTranscript),
|
|
19
|
+
* onError: (error) => console.error(error),
|
|
20
|
+
* maxBufferDurationSec: 60 // Ring buffer for 60 seconds
|
|
21
|
+
* });
|
|
22
|
+
*
|
|
23
|
+
* await client.connect();
|
|
24
|
+
*
|
|
25
|
+
* // Send audio chunks - always stored in ring buffer, sent if connected
|
|
26
|
+
* micStream.on('data', (chunk) => client.sendAudio(chunk));
|
|
27
|
+
*
|
|
28
|
+
* // Signal end of audio and wait for final results
|
|
29
|
+
* await client.stopRecording();
|
|
30
|
+
*
|
|
31
|
+
* // Server will close connection after sending finals
|
|
32
|
+
* // No manual cleanup needed - browser handles it
|
|
33
|
+
* ```
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
import { WebSocketAudioClient } from '@recog/websocket';
|
|
37
|
+
import {
|
|
38
|
+
AudioEncoding,
|
|
39
|
+
RecognitionResultTypeV1,
|
|
40
|
+
ClientControlActionV1,
|
|
41
|
+
RecognitionContextTypeV1,
|
|
42
|
+
ControlSignalTypeV1,
|
|
43
|
+
type TranscriptionResultV1,
|
|
44
|
+
type FunctionCallResultV1,
|
|
45
|
+
type MetadataResultV1,
|
|
46
|
+
type ErrorResultV1,
|
|
47
|
+
type ClientControlMessageV1,
|
|
48
|
+
type ASRRequestConfig,
|
|
49
|
+
type ASRRequestV1,
|
|
50
|
+
type GameContextV1,
|
|
51
|
+
SampleRate
|
|
52
|
+
} from '@recog/shared-types';
|
|
53
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
54
|
+
import { ClientState } from './recognition-client.types.js';
|
|
55
|
+
import type {
|
|
56
|
+
IRecognitionClient,
|
|
57
|
+
IRecognitionClientStats,
|
|
58
|
+
RealTimeTwoWayWebSocketRecognitionClientConfig,
|
|
59
|
+
RecognitionCallbackUrl
|
|
60
|
+
} from './recognition-client.types.js';
|
|
61
|
+
import { buildWebSocketUrl } from './utils/url-builder.js';
|
|
62
|
+
import { AudioRingBuffer } from './utils/audio-ring-buffer.js';
|
|
63
|
+
import { MessageHandler } from './utils/message-handler.js';
|
|
64
|
+
|
|
65
|
+
// ============================================================================
|
|
66
|
+
// UTILITIES
|
|
67
|
+
// ============================================================================
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Check if a WebSocket close code indicates normal closure
|
|
71
|
+
* @param code - WebSocket close code
|
|
72
|
+
* @returns true if the disconnection was normal/expected, false if it was an error
|
|
73
|
+
*/
|
|
74
|
+
export function isNormalDisconnection(code: number): boolean {
|
|
75
|
+
return code === 1000; // 1000 is the only "normal" close code
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// ============================================================================
|
|
79
|
+
// TYPE DEFINITIONS
|
|
80
|
+
// ============================================================================
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Re-export TranscriptionResultV1 as TranscriptionResult for backward compatibility
|
|
84
|
+
*/
|
|
85
|
+
export type TranscriptionResult = TranscriptionResultV1;
|
|
86
|
+
|
|
87
|
+
// Re-export config interface from types file for backward compatibility
|
|
88
|
+
export type { RealTimeTwoWayWebSocketRecognitionClientConfig } from './recognition-client.types.js';
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Internal config with processed values and defaults
|
|
92
|
+
*/
|
|
93
|
+
interface InternalConfig {
|
|
94
|
+
url: string;
|
|
95
|
+
readonly audioUtteranceId: string; // Immutable - ensures one audio session per client instance
|
|
96
|
+
asrRequestConfig?: ASRRequestConfig;
|
|
97
|
+
gameContext?: GameContextV1;
|
|
98
|
+
callbackUrls?: RecognitionCallbackUrl[];
|
|
99
|
+
onTranscript: (result: TranscriptionResultV1) => void;
|
|
100
|
+
onFunctionCall: (result: FunctionCallResultV1) => void;
|
|
101
|
+
onMetadata: (metadata: MetadataResultV1) => void;
|
|
102
|
+
onError: (error: ErrorResultV1) => void;
|
|
103
|
+
onConnected: () => void;
|
|
104
|
+
onDisconnected: (code: number, reason: string) => void;
|
|
105
|
+
highWaterMark: number;
|
|
106
|
+
lowWaterMark: number;
|
|
107
|
+
maxBufferDurationSec: number;
|
|
108
|
+
chunksPerSecond: number;
|
|
109
|
+
logger?: (level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any) => void;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// ============================================================================
|
|
113
|
+
// RECOGNITION CLIENT
|
|
114
|
+
// ============================================================================
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* RealTimeTwoWayWebSocketRecognitionClient - SDK-level client for real-time speech recognition
|
|
118
|
+
*
|
|
119
|
+
* Implements IRecognitionClient interface for dependency injection and testing.
|
|
120
|
+
* Extends WebSocketAudioClient with local audio buffering and simple callback-based API.
|
|
121
|
+
*/
|
|
122
|
+
export class RealTimeTwoWayWebSocketRecognitionClient
|
|
123
|
+
extends WebSocketAudioClient<number, any, any>
|
|
124
|
+
implements IRecognitionClient
|
|
125
|
+
{
|
|
126
|
+
private static readonly PROTOCOL_VERSION = 1;
|
|
127
|
+
|
|
128
|
+
private config: InternalConfig;
|
|
129
|
+
private audioBuffer: AudioRingBuffer;
|
|
130
|
+
private messageHandler: MessageHandler;
|
|
131
|
+
private state: ClientState = ClientState.INITIAL;
|
|
132
|
+
private connectionPromise: Promise<void> | undefined;
|
|
133
|
+
|
|
134
|
+
// Debug control (internal state, controlled by debugCommand in ASRRequest)
|
|
135
|
+
private isDebugLogEnabled = false;
|
|
136
|
+
|
|
137
|
+
// Stats
|
|
138
|
+
private audioBytesSent = 0;
|
|
139
|
+
private audioChunksSent = 0;
|
|
140
|
+
private audioStatsLogInterval = 100;
|
|
141
|
+
private lastAudioStatsLog = 0;
|
|
142
|
+
|
|
143
|
+
constructor(config: RealTimeTwoWayWebSocketRecognitionClientConfig) {
|
|
144
|
+
// Generate UUID v4 for audioUtteranceId if not provided
|
|
145
|
+
const audioUtteranceId = config.audioUtteranceId || uuidv4();
|
|
146
|
+
|
|
147
|
+
// Build WebSocket URL with query parameters
|
|
148
|
+
const url = buildWebSocketUrl({
|
|
149
|
+
audioUtteranceId,
|
|
150
|
+
...(config.url && { url: config.url }),
|
|
151
|
+
...(config.callbackUrls && { callbackUrls: config.callbackUrls }),
|
|
152
|
+
...(config.userId && { userId: config.userId }),
|
|
153
|
+
...(config.gameSessionId && { gameSessionId: config.gameSessionId }),
|
|
154
|
+
...(config.deviceId && { deviceId: config.deviceId }),
|
|
155
|
+
...(config.accountId && { accountId: config.accountId }),
|
|
156
|
+
...(config.questionAnswerId && { questionAnswerId: config.questionAnswerId }),
|
|
157
|
+
...(config.platform && { platform: config.platform }),
|
|
158
|
+
...(config.gameContext && { gameContext: config.gameContext })
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
// Initialize base WebSocketAudioClient
|
|
162
|
+
super({
|
|
163
|
+
url: url,
|
|
164
|
+
highWM: config.highWaterMark ?? 512_000,
|
|
165
|
+
lowWM: config.lowWaterMark ?? 128_000
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
// Process config with defaults
|
|
169
|
+
this.config = {
|
|
170
|
+
url,
|
|
171
|
+
audioUtteranceId,
|
|
172
|
+
...(config.asrRequestConfig && { asrRequestConfig: config.asrRequestConfig }),
|
|
173
|
+
...(config.gameContext && { gameContext: config.gameContext }),
|
|
174
|
+
...(config.callbackUrls && { callbackUrls: config.callbackUrls }),
|
|
175
|
+
onTranscript: config.onTranscript || (() => {}),
|
|
176
|
+
onFunctionCall: config.onFunctionCall || (() => {}),
|
|
177
|
+
onMetadata: config.onMetadata || (() => {}),
|
|
178
|
+
onError: config.onError || (() => {}),
|
|
179
|
+
onConnected: config.onConnected || (() => {}),
|
|
180
|
+
onDisconnected: config.onDisconnected || (() => {}),
|
|
181
|
+
highWaterMark: config.highWaterMark ?? 512_000,
|
|
182
|
+
lowWaterMark: config.lowWaterMark ?? 128_000,
|
|
183
|
+
maxBufferDurationSec: config.maxBufferDurationSec ?? 60,
|
|
184
|
+
chunksPerSecond: config.chunksPerSecond ?? 100,
|
|
185
|
+
...(config.logger && { logger: config.logger })
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
// Initialize audio buffer
|
|
189
|
+
this.audioBuffer = new AudioRingBuffer({
|
|
190
|
+
maxBufferDurationSec: this.config.maxBufferDurationSec,
|
|
191
|
+
chunksPerSecond: this.config.chunksPerSecond,
|
|
192
|
+
...(this.config.logger && { logger: this.config.logger })
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
// Initialize message handler
|
|
196
|
+
this.messageHandler = new MessageHandler({
|
|
197
|
+
onTranscript: this.config.onTranscript,
|
|
198
|
+
onFunctionCall: this.config.onFunctionCall,
|
|
199
|
+
onMetadata: this.config.onMetadata,
|
|
200
|
+
onError: this.config.onError,
|
|
201
|
+
onControlMessage: this.handleControlMessage.bind(this),
|
|
202
|
+
...(this.config.logger && { logger: this.config.logger })
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// ==========================================================================
|
|
207
|
+
// PRIVATE HELPERS
|
|
208
|
+
// ==========================================================================
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Internal logging helper - only logs if a logger was provided in config
|
|
212
|
+
* Debug logs are additionally gated by isDebugLogEnabled flag
|
|
213
|
+
* @param level - Log level: debug, info, warn, or error
|
|
214
|
+
* @param message - Message to log
|
|
215
|
+
* @param data - Optional additional data to log
|
|
216
|
+
*/
|
|
217
|
+
private log(level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any): void {
|
|
218
|
+
// Skip debug logs if debug logging is not enabled
|
|
219
|
+
if (level === 'debug' && !this.isDebugLogEnabled) {
|
|
220
|
+
return;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
if (this.config.logger) {
|
|
224
|
+
this.config.logger(level, `[SDK] ${message}`, data);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Clean up internal resources to free memory
|
|
230
|
+
* Called when connection closes (normally or abnormally)
|
|
231
|
+
*/
|
|
232
|
+
private cleanup(): void {
|
|
233
|
+
this.log('debug', 'Cleaning up resources');
|
|
234
|
+
|
|
235
|
+
// Clear audio buffer to free memory
|
|
236
|
+
this.audioBuffer.clear();
|
|
237
|
+
|
|
238
|
+
// Reset stats
|
|
239
|
+
this.audioBytesSent = 0;
|
|
240
|
+
this.audioChunksSent = 0;
|
|
241
|
+
this.lastAudioStatsLog = 0;
|
|
242
|
+
|
|
243
|
+
// Clear connection promise so new connections can be made
|
|
244
|
+
this.connectionPromise = undefined;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// ==========================================================================
|
|
248
|
+
// PUBLIC API
|
|
249
|
+
// ==========================================================================
|
|
250
|
+
|
|
251
|
+
override async connect(): Promise<void> {
|
|
252
|
+
// FIRST: Check if we already have a connection promise (handles simultaneous calls)
|
|
253
|
+
if (this.connectionPromise) {
|
|
254
|
+
this.log('debug', 'Returning existing connection promise', {
|
|
255
|
+
state: this.state,
|
|
256
|
+
hasPromise: true
|
|
257
|
+
});
|
|
258
|
+
return this.connectionPromise;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// SECOND: Check state machine - prevent connections in wrong states
|
|
262
|
+
if (
|
|
263
|
+
this.state !== ClientState.INITIAL &&
|
|
264
|
+
this.state !== ClientState.FAILED &&
|
|
265
|
+
this.state !== ClientState.STOPPED
|
|
266
|
+
) {
|
|
267
|
+
this.log('debug', 'Already connected or in wrong state', {
|
|
268
|
+
state: this.state
|
|
269
|
+
});
|
|
270
|
+
// If we're already connected/ready, return resolved promise
|
|
271
|
+
return Promise.resolve();
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// RETRY HINT: Wrap this method with exponential backoff (e.g., 1s, 2s, 4s) on FAILED state
|
|
275
|
+
// Ensure audioBuffer persists between retries - same audioUtteranceId = same audio session
|
|
276
|
+
|
|
277
|
+
this.log('debug', 'Creating new connection to WebSocket', { url: this.config.url });
|
|
278
|
+
this.state = ClientState.CONNECTING;
|
|
279
|
+
|
|
280
|
+
const connectionStartTime = Date.now();
|
|
281
|
+
|
|
282
|
+
// Store the promise IMMEDIATELY so simultaneous calls will get the same promise
|
|
283
|
+
this.connectionPromise = new Promise((resolve, reject) => {
|
|
284
|
+
const timeout = setTimeout(() => {
|
|
285
|
+
this.log('warn', 'Connection timeout', { timeout: 10000 });
|
|
286
|
+
this.state = ClientState.FAILED;
|
|
287
|
+
reject(new Error('Timeout'));
|
|
288
|
+
}, 10000);
|
|
289
|
+
|
|
290
|
+
const originalOnConnected = this.onConnected.bind(this);
|
|
291
|
+
this.onConnected = (): void => {
|
|
292
|
+
clearTimeout(timeout);
|
|
293
|
+
const connectionTime = Date.now() - connectionStartTime;
|
|
294
|
+
this.log('debug', 'Connection established successfully', {
|
|
295
|
+
connectionTimeMs: connectionTime,
|
|
296
|
+
url: this.config.url
|
|
297
|
+
});
|
|
298
|
+
this.state = ClientState.CONNECTED;
|
|
299
|
+
originalOnConnected();
|
|
300
|
+
resolve();
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
const originalOnError = this.onError.bind(this);
|
|
304
|
+
this.onError = (error): void => {
|
|
305
|
+
clearTimeout(timeout);
|
|
306
|
+
this.log('warn', 'Connection error', error);
|
|
307
|
+
this.state = ClientState.FAILED;
|
|
308
|
+
originalOnError(error);
|
|
309
|
+
reject(error);
|
|
310
|
+
};
|
|
311
|
+
|
|
312
|
+
super.connect();
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
return this.connectionPromise;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
override sendAudio(audioData: ArrayBuffer | ArrayBufferView): void {
|
|
319
|
+
const bytes = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
|
|
320
|
+
if (bytes === 0) return;
|
|
321
|
+
|
|
322
|
+
// BACKPRESSURE HINT: Return false or throw if audioBuffer.write() returns false (overflow)
|
|
323
|
+
// Caller should pause audio capture until buffer has space (check isBufferOverflowing())
|
|
324
|
+
|
|
325
|
+
// Always write to ring buffer
|
|
326
|
+
this.audioBuffer.write(audioData);
|
|
327
|
+
|
|
328
|
+
// Send immediately if ready and not backpressured
|
|
329
|
+
if (this.state === ClientState.READY && !super.isLocalBackpressured()) {
|
|
330
|
+
this.log('debug', 'Sending audio immediately', { bytes });
|
|
331
|
+
this.sendAudioNow(audioData);
|
|
332
|
+
this.audioBuffer.read(); // Remove from buffer since we sent it
|
|
333
|
+
} else {
|
|
334
|
+
this.log('debug', 'Buffering audio', {
|
|
335
|
+
bytes,
|
|
336
|
+
state: this.state,
|
|
337
|
+
backpressured: super.isLocalBackpressured()
|
|
338
|
+
});
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
// Log audio stats periodically (only if debug logging is enabled)
|
|
342
|
+
if (this.isDebugLogEnabled) {
|
|
343
|
+
const totalChunks = this.audioChunksSent + this.audioBuffer.getStats().chunksBuffered;
|
|
344
|
+
if (totalChunks - this.lastAudioStatsLog >= this.audioStatsLogInterval) {
|
|
345
|
+
const stats = this.audioBuffer.getStats();
|
|
346
|
+
this.log('debug', 'Audio statistics', {
|
|
347
|
+
totalBytesSent: this.audioBytesSent,
|
|
348
|
+
totalChunksSent: this.audioChunksSent,
|
|
349
|
+
...stats
|
|
350
|
+
});
|
|
351
|
+
this.lastAudioStatsLog = totalChunks;
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
async stopRecording(): Promise<void> {
|
|
357
|
+
if (this.state !== ClientState.READY) {
|
|
358
|
+
this.log('warn', 'Cannot stop recording - not in READY state', { state: this.state });
|
|
359
|
+
return;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
this.log('debug', 'Stopping recording');
|
|
363
|
+
this.state = ClientState.STOPPING;
|
|
364
|
+
|
|
365
|
+
super.sendMessage(RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION, 'message', {
|
|
366
|
+
type: RecognitionContextTypeV1.CONTROL_SIGNAL,
|
|
367
|
+
signal: ControlSignalTypeV1.STOP_RECORDING
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
return new Promise((resolve) => {
|
|
371
|
+
const timeout = setTimeout(() => {
|
|
372
|
+
this.state = ClientState.STOPPED;
|
|
373
|
+
resolve();
|
|
374
|
+
}, 5000);
|
|
375
|
+
|
|
376
|
+
const original = this.config.onTranscript;
|
|
377
|
+
this.config.onTranscript = (result): void => {
|
|
378
|
+
original(result);
|
|
379
|
+
if (result.is_finished) {
|
|
380
|
+
clearTimeout(timeout);
|
|
381
|
+
this.state = ClientState.STOPPED;
|
|
382
|
+
resolve();
|
|
383
|
+
}
|
|
384
|
+
};
|
|
385
|
+
|
|
386
|
+
// CRITICAL: Update MessageHandler's callback to use the wrapped version
|
|
387
|
+
// Otherwise it will keep calling the original and never detect is_finished
|
|
388
|
+
(this.messageHandler as any).callbacks.onTranscript = this.config.onTranscript;
|
|
389
|
+
});
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
getAudioUtteranceId(): string {
|
|
394
|
+
return this.config.audioUtteranceId;
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
getState(): ClientState {
|
|
398
|
+
return this.state;
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
isConnected(): boolean {
|
|
402
|
+
return this.state === ClientState.READY;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
isConnecting(): boolean {
|
|
406
|
+
return this.state === ClientState.CONNECTING;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
isStopping(): boolean {
|
|
410
|
+
return this.state === ClientState.STOPPING;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
isTranscriptionFinished(): boolean {
|
|
414
|
+
return this.state === ClientState.STOPPED;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
isBufferOverflowing(): boolean {
|
|
418
|
+
return this.audioBuffer.isOverflowing();
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
getStats(): IRecognitionClientStats {
|
|
422
|
+
const bufferStats = this.audioBuffer.getStats();
|
|
423
|
+
return {
|
|
424
|
+
audioBytesSent: this.audioBytesSent,
|
|
425
|
+
audioChunksSent: this.audioChunksSent,
|
|
426
|
+
audioChunksBuffered: bufferStats.chunksBuffered,
|
|
427
|
+
bufferOverflowCount: bufferStats.overflowCount,
|
|
428
|
+
currentBufferedChunks: bufferStats.currentBufferedChunks,
|
|
429
|
+
hasWrapped: bufferStats.hasWrapped
|
|
430
|
+
};
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// ==========================================================================
|
|
434
|
+
// WEBSOCKET HOOKS (from WebSocketAudioClient)
|
|
435
|
+
// ==========================================================================
|
|
436
|
+
|
|
437
|
+
protected onConnected(): void {
|
|
438
|
+
this.log('debug', 'WebSocket onConnected callback');
|
|
439
|
+
|
|
440
|
+
// Send ASRRequest with configuration (if provided)
|
|
441
|
+
if (this.config.asrRequestConfig) {
|
|
442
|
+
// Extract debugCommand if present (with type safety for new field)
|
|
443
|
+
const debugCommand = (this.config.asrRequestConfig as any).debugCommand;
|
|
444
|
+
if (debugCommand?.enableDebugLog) {
|
|
445
|
+
this.isDebugLogEnabled = true;
|
|
446
|
+
this.log('debug', 'Debug logging enabled via debugCommand');
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
// Only generate debug log data if debug logging is enabled
|
|
450
|
+
if (this.isDebugLogEnabled) {
|
|
451
|
+
this.log('debug', 'Sending ASR request', this.config.asrRequestConfig);
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
const asrRequest: ASRRequestV1 = {
|
|
455
|
+
type: RecognitionContextTypeV1.ASR_REQUEST,
|
|
456
|
+
audioUtteranceId: this.config.audioUtteranceId,
|
|
457
|
+
provider: this.config.asrRequestConfig.provider.toString(),
|
|
458
|
+
model: this.config.asrRequestConfig.model,
|
|
459
|
+
language: this.config.asrRequestConfig.language?.toString() || 'en',
|
|
460
|
+
sampleRate:
|
|
461
|
+
typeof this.config.asrRequestConfig.sampleRate === 'number'
|
|
462
|
+
? this.config.asrRequestConfig.sampleRate
|
|
463
|
+
: SampleRate.RATE_16000,
|
|
464
|
+
encoding:
|
|
465
|
+
typeof this.config.asrRequestConfig.encoding === 'number'
|
|
466
|
+
? this.config.asrRequestConfig.encoding
|
|
467
|
+
: AudioEncoding.LINEAR16,
|
|
468
|
+
interimResults: this.config.asrRequestConfig.interimResults ?? false,
|
|
469
|
+
// Auto-enable useContext if gameContext is provided, or use explicit value if set
|
|
470
|
+
useContext: this.config.asrRequestConfig.useContext ?? !!this.config.gameContext,
|
|
471
|
+
...(debugCommand && { debugCommand })
|
|
472
|
+
};
|
|
473
|
+
|
|
474
|
+
super.sendMessage(
|
|
475
|
+
RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
|
|
476
|
+
'message',
|
|
477
|
+
asrRequest
|
|
478
|
+
);
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
// Send GameContext if provided
|
|
482
|
+
if (this.config.gameContext) {
|
|
483
|
+
// Only pass gameContext object to log if debug logging is enabled
|
|
484
|
+
if (this.isDebugLogEnabled) {
|
|
485
|
+
this.log('debug', 'Sending game context', this.config.gameContext);
|
|
486
|
+
}
|
|
487
|
+
super.sendMessage(
|
|
488
|
+
RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
|
|
489
|
+
'message',
|
|
490
|
+
this.config.gameContext
|
|
491
|
+
);
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
this.log('debug', 'Waiting for server ready signal');
|
|
495
|
+
this.config.onConnected();
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
protected onDisconnected(code: number, reason: string): void {
|
|
499
|
+
this.log('debug', 'WebSocket disconnected', { code, reason, previousState: this.state });
|
|
500
|
+
|
|
501
|
+
// Update state based on disconnection type
|
|
502
|
+
if (this.state === ClientState.STOPPING) {
|
|
503
|
+
this.state = ClientState.STOPPED;
|
|
504
|
+
} else if (
|
|
505
|
+
this.state === ClientState.CONNECTED ||
|
|
506
|
+
this.state === ClientState.READY ||
|
|
507
|
+
this.state === ClientState.CONNECTING
|
|
508
|
+
) {
|
|
509
|
+
this.log('error', 'Unexpected disconnection', { code, reason });
|
|
510
|
+
this.state = ClientState.FAILED;
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
// Clean up memory proactively when connection closes
|
|
514
|
+
this.cleanup();
|
|
515
|
+
|
|
516
|
+
this.config.onDisconnected(code, reason);
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
protected onError(error: Event): void {
|
|
520
|
+
this.state = ClientState.FAILED;
|
|
521
|
+
|
|
522
|
+
const errorResult: ErrorResultV1 = {
|
|
523
|
+
type: RecognitionResultTypeV1.ERROR,
|
|
524
|
+
audioUtteranceId: '',
|
|
525
|
+
message: 'WebSocket error',
|
|
526
|
+
description: error.type || 'Connection error'
|
|
527
|
+
};
|
|
528
|
+
this.config.onError(errorResult);
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
protected override onMessage(msg: { v: number; type: string; data: any }): void {
|
|
532
|
+
this.messageHandler.handleMessage(msg);
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
// ==========================================================================
|
|
536
|
+
// INTERNAL HELPERS
|
|
537
|
+
// ==========================================================================
|
|
538
|
+
|
|
539
|
+
/**
|
|
540
|
+
* Handle control messages from server
|
|
541
|
+
* @param msg - Control message containing server actions
|
|
542
|
+
*/
|
|
543
|
+
private handleControlMessage(msg: ClientControlMessageV1): void {
|
|
544
|
+
switch (msg.action) {
|
|
545
|
+
case ClientControlActionV1.READY_FOR_UPLOADING_RECORDING: {
|
|
546
|
+
this.log('debug', 'Server ready for audio upload');
|
|
547
|
+
this.state = ClientState.READY;
|
|
548
|
+
this.messageHandler.setSessionStartTime(Date.now());
|
|
549
|
+
|
|
550
|
+
// Flush buffered audio now that server is ready
|
|
551
|
+
const bufferedChunks = this.audioBuffer.flush();
|
|
552
|
+
if (bufferedChunks.length > 0) {
|
|
553
|
+
this.log('debug', 'Flushing buffered audio', { chunks: bufferedChunks.length });
|
|
554
|
+
bufferedChunks.forEach((chunk) => this.sendAudioNow(chunk.data));
|
|
555
|
+
}
|
|
556
|
+
break;
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
case ClientControlActionV1.STOP_RECORDING:
|
|
560
|
+
this.log('debug', 'Received stop recording signal from server');
|
|
561
|
+
break;
|
|
562
|
+
|
|
563
|
+
default:
|
|
564
|
+
this.log('warn', 'Unknown control action', { action: msg.action });
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
/**
|
|
569
|
+
* Send audio immediately to the server (without buffering)
|
|
570
|
+
* @param audioData - Audio data to send
|
|
571
|
+
*/
|
|
572
|
+
private sendAudioNow(audioData: ArrayBuffer | ArrayBufferView): void {
|
|
573
|
+
const byteLength = ArrayBuffer.isView(audioData)
|
|
574
|
+
? audioData.byteLength
|
|
575
|
+
: audioData.byteLength;
|
|
576
|
+
|
|
577
|
+
const encodingId = (this.config.asrRequestConfig?.encoding ||
|
|
578
|
+
AudioEncoding.LINEAR16) as AudioEncoding;
|
|
579
|
+
|
|
580
|
+
const sampleRate =
|
|
581
|
+
typeof this.config.asrRequestConfig?.sampleRate === 'number'
|
|
582
|
+
? this.config.asrRequestConfig.sampleRate
|
|
583
|
+
: SampleRate.RATE_16000;
|
|
584
|
+
|
|
585
|
+
super.sendAudio(
|
|
586
|
+
audioData,
|
|
587
|
+
RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
|
|
588
|
+
encodingId,
|
|
589
|
+
sampleRate
|
|
590
|
+
);
|
|
591
|
+
|
|
592
|
+
this.audioBytesSent += byteLength;
|
|
593
|
+
this.audioChunksSent++;
|
|
594
|
+
}
|
|
595
|
+
}
|