@volley/recognition-client-sdk-node22 0.1.424

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +344 -0
  2. package/dist/browser.bundled.d.ts +1280 -0
  3. package/dist/browser.d.ts +10 -0
  4. package/dist/browser.d.ts.map +1 -0
  5. package/dist/config-builder.d.ts +134 -0
  6. package/dist/config-builder.d.ts.map +1 -0
  7. package/dist/errors.d.ts +41 -0
  8. package/dist/errors.d.ts.map +1 -0
  9. package/dist/factory.d.ts +36 -0
  10. package/dist/factory.d.ts.map +1 -0
  11. package/dist/index.bundled.d.ts +2572 -0
  12. package/dist/index.d.ts +16 -0
  13. package/dist/index.d.ts.map +1 -0
  14. package/dist/index.js +10199 -0
  15. package/dist/index.js.map +7 -0
  16. package/dist/recog-client-sdk.browser.d.ts +10 -0
  17. package/dist/recog-client-sdk.browser.d.ts.map +1 -0
  18. package/dist/recog-client-sdk.browser.js +5746 -0
  19. package/dist/recog-client-sdk.browser.js.map +7 -0
  20. package/dist/recognition-client.d.ts +128 -0
  21. package/dist/recognition-client.d.ts.map +1 -0
  22. package/dist/recognition-client.types.d.ts +271 -0
  23. package/dist/recognition-client.types.d.ts.map +1 -0
  24. package/dist/simplified-vgf-recognition-client.d.ts +178 -0
  25. package/dist/simplified-vgf-recognition-client.d.ts.map +1 -0
  26. package/dist/utils/audio-ring-buffer.d.ts +69 -0
  27. package/dist/utils/audio-ring-buffer.d.ts.map +1 -0
  28. package/dist/utils/message-handler.d.ts +45 -0
  29. package/dist/utils/message-handler.d.ts.map +1 -0
  30. package/dist/utils/url-builder.d.ts +28 -0
  31. package/dist/utils/url-builder.d.ts.map +1 -0
  32. package/dist/vgf-recognition-mapper.d.ts +66 -0
  33. package/dist/vgf-recognition-mapper.d.ts.map +1 -0
  34. package/dist/vgf-recognition-state.d.ts +91 -0
  35. package/dist/vgf-recognition-state.d.ts.map +1 -0
  36. package/package.json +74 -0
  37. package/src/browser.ts +24 -0
  38. package/src/config-builder.spec.ts +265 -0
  39. package/src/config-builder.ts +240 -0
  40. package/src/errors.ts +84 -0
  41. package/src/factory.spec.ts +215 -0
  42. package/src/factory.ts +47 -0
  43. package/src/index.ts +127 -0
  44. package/src/recognition-client.spec.ts +889 -0
  45. package/src/recognition-client.ts +844 -0
  46. package/src/recognition-client.types.ts +338 -0
  47. package/src/simplified-vgf-recognition-client.integration.spec.ts +718 -0
  48. package/src/simplified-vgf-recognition-client.spec.ts +1525 -0
  49. package/src/simplified-vgf-recognition-client.ts +524 -0
  50. package/src/utils/audio-ring-buffer.spec.ts +335 -0
  51. package/src/utils/audio-ring-buffer.ts +170 -0
  52. package/src/utils/message-handler.spec.ts +311 -0
  53. package/src/utils/message-handler.ts +131 -0
  54. package/src/utils/url-builder.spec.ts +252 -0
  55. package/src/utils/url-builder.ts +92 -0
  56. package/src/vgf-recognition-mapper.spec.ts +78 -0
  57. package/src/vgf-recognition-mapper.ts +232 -0
  58. package/src/vgf-recognition-state.ts +102 -0
@@ -0,0 +1,844 @@
1
+ /**
2
+ * RealTimeTwoWayWebSocketRecognitionClient - Clean, compact SDK for real-time speech recognition
3
+ *
4
+ * Features:
5
+ * - Ring buffer-based audio storage with fixed memory footprint
6
+ * - Automatic buffering when disconnected, immediate send when connected
7
+ * - Buffer persists after flush (for future retry/reconnection scenarios)
8
+ * - Built on WebSocketAudioClient for robust protocol handling
9
+ * - Simple API: connect() → sendAudio() → stopRecording()
10
+ * - Type-safe message handling with callbacks
11
+ * - Automatic backpressure management
12
+ * - Overflow detection with buffer state tracking
13
+ *
14
+ * Example:
15
+ * ```typescript
16
+ * const client = new RealTimeTwoWayWebSocketRecognitionClient({
17
+ * url: 'ws://localhost:3101/ws/v1/recognize',
18
+ * onTranscript: (result) => console.log(result.finalTranscript),
19
+ * onError: (error) => console.error(error),
20
+ * maxBufferDurationSec: 60 // Ring buffer for 60 seconds
21
+ * });
22
+ *
23
+ * await client.connect();
24
+ *
25
+ * // Send audio chunks - always stored in ring buffer, sent if connected
26
+ * micStream.on('data', (chunk) => client.sendAudio(chunk));
27
+ *
28
+ * // Signal end of audio and wait for final results
29
+ * await client.stopRecording();
30
+ *
31
+ * // Server will close connection after sending finals
32
+ * // No manual cleanup needed - browser handles it
33
+ * ```
34
+ */
35
+
36
+ import { WebSocketAudioClient } from '@recog/websocket';
37
+ import {
38
+ AudioEncoding,
39
+ RecognitionResultTypeV1,
40
+ ClientControlActionV1,
41
+ RecognitionContextTypeV1,
42
+ ControlSignalTypeV1,
43
+ type TranscriptionResultV1,
44
+ type FunctionCallResultV1,
45
+ type MetadataResultV1,
46
+ type ErrorResultV1,
47
+ type ClientControlMessageV1,
48
+ type ASRRequestConfig,
49
+ type ASRRequestV1,
50
+ type GameContextV1,
51
+ SampleRate
52
+ } from '@recog/shared-types';
53
+ import { v4 as uuidv4 } from 'uuid';
54
+ import { ClientState } from './recognition-client.types.js';
55
+ import type {
56
+ IRecognitionClient,
57
+ IRecognitionClientStats,
58
+ RealTimeTwoWayWebSocketRecognitionClientConfig,
59
+ RecognitionCallbackUrl
60
+ } from './recognition-client.types.js';
61
+ import { buildWebSocketUrl } from './utils/url-builder.js';
62
+ import { AudioRingBuffer } from './utils/audio-ring-buffer.js';
63
+ import { MessageHandler } from './utils/message-handler.js';
64
+ import { ConnectionError } from './errors.js';
65
+
66
+ // ============================================================================
67
+ // UTILITIES
68
+ // ============================================================================
69
+
70
+ /**
71
+ * Check if a WebSocket close code indicates normal closure
72
+ * @param code - WebSocket close code
73
+ * @returns true if the disconnection was normal/expected, false if it was an error
74
+ */
75
+ export function isNormalDisconnection(code: number): boolean {
76
+ return code === 1000; // 1000 is the only "normal" close code
77
+ }
78
+
79
+ /**
80
+ * Convert Blob to ArrayBuffer with Smart TV compatibility
81
+ *
82
+ * Browser Compatibility:
83
+ * - blob.arrayBuffer(): Newer TV
84
+ * - FileReader: All browsers, including older Smart TVs
85
+ *
86
+ * @see https://developer.samsung.com/smarttv/develop/specifications/web-engine-specifications.html
87
+ * @param blob - Blob to convert
88
+ * @returns Promise resolving to ArrayBuffer
89
+ */
90
+ async function blobToArrayBuffer(blob: Blob): Promise<ArrayBuffer> {
91
+ // Modern approach (Chrome 76+, Safari 14+, Tizen 2020+, webOS 5.0+)
92
+ if (typeof blob.arrayBuffer === 'function') {
93
+ return await blob.arrayBuffer();
94
+ }
95
+
96
+ // Fallback for older Smart TVs (Tizen 2018-2019, webOS 3.0-4.x)
97
+ return new Promise((resolve, reject) => {
98
+ const reader = new FileReader();
99
+ reader.onload = (): void => resolve(reader.result as ArrayBuffer);
100
+ reader.onerror = (): void => reject(reader.error);
101
+ reader.readAsArrayBuffer(blob);
102
+ });
103
+ }
104
+
105
+ // ============================================================================
106
+ // TYPE DEFINITIONS
107
+ // ============================================================================
108
+
109
+ /**
110
+ * Re-export TranscriptionResultV1 as TranscriptionResult for backward compatibility
111
+ */
112
+ export type TranscriptionResult = TranscriptionResultV1;
113
+
114
+ // Re-export config interface from types file for backward compatibility
115
+ export type { RealTimeTwoWayWebSocketRecognitionClientConfig } from './recognition-client.types.js';
116
+
117
+ /**
118
+ * Internal config with processed values and defaults
119
+ */
120
+ interface InternalConfig {
121
+ url: string;
122
+ readonly audioUtteranceId: string; // Immutable - ensures one audio session per client instance
123
+ asrRequestConfig?: ASRRequestConfig;
124
+ gameContext?: GameContextV1;
125
+ callbackUrls?: RecognitionCallbackUrl[];
126
+ onTranscript: (result: TranscriptionResultV1) => void;
127
+ onFunctionCall: (result: FunctionCallResultV1) => void;
128
+ onMetadata: (metadata: MetadataResultV1) => void;
129
+ onError: (error: ErrorResultV1) => void;
130
+ onConnected: () => void;
131
+ onDisconnected: (code: number, reason: string) => void;
132
+ highWaterMark: number;
133
+ lowWaterMark: number;
134
+ maxBufferDurationSec: number;
135
+ chunksPerSecond: number;
136
+ connectionRetry: {
137
+ maxAttempts: number;
138
+ delayMs: number;
139
+ };
140
+ logger?: (level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any) => void;
141
+ }
142
+
143
+ // ============================================================================
144
+ // RECOGNITION CLIENT
145
+ // ============================================================================
146
+
147
+ /**
148
+ * RealTimeTwoWayWebSocketRecognitionClient - SDK-level client for real-time speech recognition
149
+ *
150
+ * Implements IRecognitionClient interface for dependency injection and testing.
151
+ * Extends WebSocketAudioClient with local audio buffering and simple callback-based API.
152
+ */
153
+ export class RealTimeTwoWayWebSocketRecognitionClient
154
+ extends WebSocketAudioClient<number, any, any>
155
+ implements IRecognitionClient
156
+ {
157
+ private static readonly PROTOCOL_VERSION = 1;
158
+
159
+ private config: InternalConfig;
160
+ private audioBuffer: AudioRingBuffer;
161
+ private messageHandler: MessageHandler;
162
+ private state: ClientState = ClientState.INITIAL;
163
+ private connectionPromise: Promise<void> | undefined;
164
+
165
+ // Debug control (internal state, controlled by debugCommand in ASRRequest)
166
+ private isDebugLogEnabled = false;
167
+
168
+ // Stats
169
+ private audioBytesSent = 0;
170
+ private audioChunksSent = 0;
171
+ private audioStatsLogInterval = 100;
172
+ private lastAudioStatsLog = 0;
173
+
174
+ constructor(config: RealTimeTwoWayWebSocketRecognitionClientConfig) {
175
+ // Generate UUID v4 for audioUtteranceId if not provided
176
+ const audioUtteranceId = config.audioUtteranceId || uuidv4();
177
+
178
+ // Build WebSocket URL with query parameters
179
+ // Precedence: url > stage > default production
180
+ const url = buildWebSocketUrl({
181
+ audioUtteranceId,
182
+ ...(config.url && { url: config.url }),
183
+ ...(config.stage && { stage: config.stage }),
184
+ ...(config.callbackUrls && { callbackUrls: config.callbackUrls }),
185
+ ...(config.userId && { userId: config.userId }),
186
+ ...(config.gameSessionId && { gameSessionId: config.gameSessionId }),
187
+ ...(config.deviceId && { deviceId: config.deviceId }),
188
+ ...(config.accountId && { accountId: config.accountId }),
189
+ ...(config.questionAnswerId && { questionAnswerId: config.questionAnswerId }),
190
+ ...(config.platform && { platform: config.platform }),
191
+ ...(config.gameContext && { gameContext: config.gameContext }),
192
+ ...(config.gameId && { gameId: config.gameId })
193
+ });
194
+
195
+ // Initialize base WebSocketAudioClient
196
+ super({
197
+ url: url,
198
+ highWM: config.highWaterMark ?? 512_000,
199
+ lowWM: config.lowWaterMark ?? 128_000
200
+ });
201
+
202
+ // Process retry config with defaults and validation
203
+ const retryConfig = config.connectionRetry || {};
204
+ const maxAttempts = Math.max(1, Math.min(5, retryConfig.maxAttempts ?? 4)); // Default: 4 attempts (3 retries), clamp 1-5
205
+ const delayMs = retryConfig.delayMs ?? 200; // Fast retry for short audio sessions
206
+
207
+ // Process config with defaults
208
+ this.config = {
209
+ url,
210
+ audioUtteranceId,
211
+ ...(config.asrRequestConfig && { asrRequestConfig: config.asrRequestConfig }),
212
+ ...(config.gameContext && { gameContext: config.gameContext }),
213
+ ...(config.callbackUrls && { callbackUrls: config.callbackUrls }),
214
+ onTranscript: config.onTranscript || (() => {}),
215
+ onFunctionCall: config.onFunctionCall || (() => {}),
216
+ onMetadata: config.onMetadata || (() => {}),
217
+ onError: config.onError || (() => {}),
218
+ onConnected: config.onConnected || (() => {}),
219
+ onDisconnected: config.onDisconnected || (() => {}),
220
+ highWaterMark: config.highWaterMark ?? 512_000,
221
+ lowWaterMark: config.lowWaterMark ?? 128_000,
222
+ maxBufferDurationSec: config.maxBufferDurationSec ?? 60,
223
+ chunksPerSecond: config.chunksPerSecond ?? 100,
224
+ connectionRetry: {
225
+ maxAttempts,
226
+ delayMs
227
+ },
228
+ ...(config.logger && { logger: config.logger })
229
+ };
230
+
231
+ // Initialize audio buffer
232
+ this.audioBuffer = new AudioRingBuffer({
233
+ maxBufferDurationSec: this.config.maxBufferDurationSec,
234
+ chunksPerSecond: this.config.chunksPerSecond,
235
+ ...(this.config.logger && { logger: this.config.logger })
236
+ });
237
+
238
+ // Initialize message handler
239
+ this.messageHandler = new MessageHandler({
240
+ onTranscript: this.config.onTranscript,
241
+ onFunctionCall: this.config.onFunctionCall,
242
+ onMetadata: this.config.onMetadata,
243
+ onError: this.config.onError,
244
+ onControlMessage: this.handleControlMessage.bind(this),
245
+ ...(this.config.logger && { logger: this.config.logger })
246
+ });
247
+ }
248
+
249
+ // ==========================================================================
250
+ // PRIVATE HELPERS
251
+ // ==========================================================================
252
+
253
+ /**
254
+ * Internal logging helper - only logs if a logger was provided in config
255
+ * Debug logs are additionally gated by isDebugLogEnabled flag
256
+ * @param level - Log level: debug, info, warn, or error
257
+ * @param message - Message to log
258
+ * @param data - Optional additional data to log
259
+ */
260
+ private log(level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any): void {
261
+ // Skip debug logs if debug logging is not enabled
262
+ if (level === 'debug' && !this.isDebugLogEnabled) {
263
+ return;
264
+ }
265
+
266
+ if (this.config.logger) {
267
+ this.config.logger(level, `[RecogSDK] ${message}`, data);
268
+ }
269
+ }
270
+
271
+ /**
272
+ * Clean up internal resources to free memory
273
+ * Called when connection closes (normally or abnormally)
274
+ */
275
+ private cleanup(): void {
276
+ this.log('debug', 'Cleaning up resources');
277
+
278
+ // Clear audio buffer to free memory
279
+ this.audioBuffer.clear();
280
+
281
+ // Reset stats
282
+ this.audioBytesSent = 0;
283
+ this.audioChunksSent = 0;
284
+ this.lastAudioStatsLog = 0;
285
+
286
+ // Clear connection promise so new connections can be made
287
+ this.connectionPromise = undefined;
288
+ }
289
+
290
+ // ==========================================================================
291
+ // PUBLIC API
292
+ // ==========================================================================
293
+
294
+ override async connect(): Promise<void> {
295
+ // FIRST: Prevent concurrent connection attempts - return existing promise if connecting
296
+ if (this.connectionPromise) {
297
+ this.log('debug', 'Returning existing connection promise (already connecting)', {
298
+ state: this.state
299
+ });
300
+ return this.connectionPromise;
301
+ }
302
+
303
+ // SECOND: Check state machine - prevent connections in wrong states
304
+ if (
305
+ this.state !== ClientState.INITIAL &&
306
+ this.state !== ClientState.FAILED &&
307
+ this.state !== ClientState.STOPPED
308
+ ) {
309
+ this.log('debug', 'Already connected or in wrong state', {
310
+ state: this.state
311
+ });
312
+ // If we're already connected/ready, return resolved promise
313
+ return Promise.resolve();
314
+ }
315
+
316
+ // THIRD: Create connection promise with retry logic
317
+ // Store the promise IMMEDIATELY to prevent concurrent attempts
318
+ this.connectionPromise = this.connectWithRetry();
319
+
320
+ return this.connectionPromise;
321
+ }
322
+
323
+ /**
324
+ * Attempt to connect with retry logic
325
+ * Only retries on initial connection establishment, not mid-stream interruptions
326
+ */
327
+ private async connectWithRetry(): Promise<void> {
328
+ const { maxAttempts, delayMs } = this.config.connectionRetry;
329
+ const connectionTimeout = 10000; // 10 second timeout per attempt
330
+
331
+ // TODO: Consider implementing error-code-based retry strategy
332
+ // - Retry on 503 (Service Unavailable) with longer delays
333
+ // - Don't retry on 401 (Unauthorized) or 400 (Bad Request)
334
+ // - Requires extracting HTTP status from WebSocket connection error
335
+ // For now: Simple retry for all connection failures
336
+
337
+ let lastError: Error | undefined;
338
+
339
+ // Store original handlers once (not per-attempt to avoid nested wrappers)
340
+ // IMPORTANT: Save the bound protected methods (they contain the real logic)!
341
+ // The protected onConnected() method sends ASRRequest and then calls config callback
342
+ // The protected onError() method converts Event to ErrorResultV1 and calls config callback
343
+ const originalOnConnected = this.onConnected.bind(this);
344
+ const originalOnError = this.onError.bind(this);
345
+
346
+ try {
347
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
348
+ // Use debug for first attempt (usually succeeds), info for retries
349
+ const attemptLogLevel = attempt === 1 ? 'debug' : 'info';
350
+ this.log(attemptLogLevel, `Connection attempt ${attempt}/${maxAttempts}`, {
351
+ url: this.config.url,
352
+ delayMs: attempt > 1 ? delayMs : 0
353
+ });
354
+
355
+ this.state = ClientState.CONNECTING;
356
+ const connectionStartTime = Date.now();
357
+
358
+ try {
359
+ // Create promise for this single attempt with timeout
360
+ await new Promise<void>((resolve, reject) => {
361
+ let settled = false; // Guard against late callbacks for this attempt
362
+
363
+ const timeout = setTimeout(() => {
364
+ if (settled) return;
365
+ settled = true;
366
+ this.log('warn', 'Connection timeout', { timeout: connectionTimeout, attempt });
367
+ this.state = ClientState.FAILED;
368
+ reject(new Error(`Connection timeout after ${connectionTimeout}ms`));
369
+ }, connectionTimeout);
370
+
371
+ // One-shot handlers for this attempt
372
+ this.onConnected = (): void => {
373
+ if (settled) return; // Ignore late callback
374
+ settled = true;
375
+ clearTimeout(timeout);
376
+
377
+ const connectionTime = Date.now() - connectionStartTime;
378
+ this.log('debug', 'Connection established successfully', {
379
+ connectionTimeMs: connectionTime,
380
+ url: this.config.url,
381
+ attempt
382
+ });
383
+ this.state = ClientState.CONNECTED;
384
+
385
+ // Call original handler
386
+ originalOnConnected();
387
+ resolve();
388
+ };
389
+
390
+ this.onError = (error): void => {
391
+ if (settled) return; // Ignore late callback
392
+ settled = true;
393
+ clearTimeout(timeout);
394
+
395
+ this.log('warn', 'Connection error', { error, attempt });
396
+ this.state = ClientState.FAILED;
397
+
398
+ // Don't call originalOnError - it expects ErrorResultV1, not WebSocket Event
399
+ // Connection errors are handled by throwing ConnectionError after retry exhaustion
400
+ reject(error);
401
+ };
402
+
403
+ // Start the connection attempt
404
+ super.connect();
405
+ });
406
+
407
+ // Success! Connection established
408
+ const successLogLevel = attempt === 1 ? 'debug' : 'info';
409
+ this.log(successLogLevel, `Connection successful on attempt ${attempt}`, {
410
+ totalAttempts: attempt
411
+ });
412
+ return; // Success - exit retry loop
413
+
414
+ } catch (error) {
415
+ lastError = error as Error;
416
+
417
+ if (attempt < maxAttempts) {
418
+ // Not the last attempt - wait before retry
419
+ // Use info for first 2 retries (attempts 2-3), warn for 3rd retry (attempt 4)
420
+ const logLevel = attempt < 3 ? 'info' : 'warn';
421
+ this.log(logLevel, `Connection attempt ${attempt} failed, retrying after ${delayMs}ms`, {
422
+ error: lastError.message,
423
+ nextAttempt: attempt + 1
424
+ });
425
+
426
+ // Reset state to allow retry (but DON'T clear connectionPromise - maintains concurrency guard)
427
+ this.state = ClientState.INITIAL;
428
+
429
+ // Wait before next attempt
430
+ await new Promise(resolve => setTimeout(resolve, delayMs));
431
+ } else {
432
+ // Last attempt failed - all retries exhausted
433
+ this.log('warn', `All ${maxAttempts} connection attempts failed`, {
434
+ error: lastError.message
435
+ });
436
+ }
437
+ }
438
+ }
439
+
440
+ // All retries exhausted - throw typed ConnectionError
441
+ throw new ConnectionError(
442
+ `Failed to establish connection after ${maxAttempts} attempts`,
443
+ maxAttempts,
444
+ this.config.url,
445
+ lastError
446
+ );
447
+ } finally {
448
+ // Restore original protected method handlers
449
+ this.onConnected = originalOnConnected;
450
+ this.onError = originalOnError;
451
+
452
+ // Clear connectionPromise only after entire retry sequence completes (success or failure)
453
+ this.connectionPromise = undefined;
454
+ }
455
+ }
456
+
457
+ override sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void {
458
+ // Handle Blob by converting to ArrayBuffer asynchronously
459
+ if (audioData instanceof Blob) {
460
+ blobToArrayBuffer(audioData)
461
+ .then((arrayBuffer) => {
462
+ this.sendAudioInternal(arrayBuffer);
463
+ })
464
+ .catch((error) => {
465
+ this.log('error', 'Failed to convert Blob to ArrayBuffer', error);
466
+ });
467
+ return;
468
+ }
469
+
470
+ // Handle ArrayBuffer and ArrayBufferView synchronously
471
+ this.sendAudioInternal(audioData);
472
+ }
473
+
474
+ private sendAudioInternal(audioData: ArrayBuffer | ArrayBufferView): void {
475
+ const bytes = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
476
+ if (bytes === 0) return;
477
+
478
+ // BACKPRESSURE HINT: Return false or throw if audioBuffer.write() returns false (overflow)
479
+ // Caller should pause audio capture until buffer has space (check isBufferOverflowing())
480
+
481
+ // Always write to ring buffer
482
+ this.audioBuffer.write(audioData);
483
+
484
+ // Send immediately if ready and not backpressured
485
+ if (this.state === ClientState.READY && !super.isLocalBackpressured()) {
486
+ this.log('debug', 'Sending audio immediately', { bytes });
487
+ this.sendAudioNow(audioData);
488
+ this.audioBuffer.read(); // Remove from buffer since we sent it
489
+ } else {
490
+ this.log('debug', 'Buffering audio', {
491
+ bytes,
492
+ state: this.state,
493
+ backpressured: super.isLocalBackpressured()
494
+ });
495
+ }
496
+
497
+ // Log audio stats periodically (only if debug logging is enabled)
498
+ if (this.isDebugLogEnabled) {
499
+ const totalChunks = this.audioChunksSent + this.audioBuffer.getStats().chunksBuffered;
500
+ if (totalChunks - this.lastAudioStatsLog >= this.audioStatsLogInterval) {
501
+ const stats = this.audioBuffer.getStats();
502
+ this.log('debug', 'Audio statistics', {
503
+ totalBytesSent: this.audioBytesSent,
504
+ totalChunksSent: this.audioChunksSent,
505
+ ...stats
506
+ });
507
+ this.lastAudioStatsLog = totalChunks;
508
+ }
509
+ }
510
+ }
511
+
512
+ /**
513
+ * Only active ehwne client is in READY state. otherwise it will return immediately.
514
+ * @returns Promise that resolves when the recording is stopped
515
+ */
516
+
517
+ async stopRecording(): Promise<void> {
518
+ if (this.state !== ClientState.READY) {
519
+ this.log('warn', 'stopRecording called but not in READY state', { state: this.state });
520
+ return;
521
+ }
522
+
523
+ this.log('debug', 'Stopping recording');
524
+ this.state = ClientState.STOPPING;
525
+
526
+ super.sendMessage(RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION, 'message', {
527
+ type: RecognitionContextTypeV1.CONTROL_SIGNAL,
528
+ signal: ControlSignalTypeV1.STOP_RECORDING
529
+ });
530
+
531
+ return new Promise((resolve) => {
532
+ const timeout = setTimeout(() => {
533
+ this.state = ClientState.STOPPED;
534
+ resolve();
535
+ }, 5000);
536
+
537
+ const original = this.config.onTranscript;
538
+ this.config.onTranscript = (result): void => {
539
+ original(result);
540
+ if (result.is_finished) {
541
+ clearTimeout(timeout);
542
+ this.state = ClientState.STOPPED;
543
+ resolve();
544
+ }
545
+ };
546
+
547
+ // CRITICAL: Update MessageHandler's callback to use the wrapped version
548
+ // Otherwise it will keep calling the original and never detect is_finished
549
+ (this.messageHandler as any).callbacks.onTranscript = this.config.onTranscript;
550
+ });
551
+ }
552
+
553
+ stopAbnormally(): void {
554
+ // Guard: If already in terminal state, do nothing
555
+ if (this.state === ClientState.STOPPED || this.state === ClientState.FAILED) {
556
+ this.log('debug', 'stopAbnormally called but already in terminal state', { state: this.state });
557
+ return;
558
+ }
559
+
560
+ this.log('warn', 'Abnormal stop requested - closing connection immediately', { state: this.state });
561
+
562
+ // Update state to STOPPED (skip STOPPING)
563
+ this.state = ClientState.STOPPED;
564
+
565
+ // Clean up resources
566
+ this.cleanup();
567
+
568
+ // Close WebSocket connection immediately
569
+ // Code 1000 = Normal Closure (even though abnormal for us, it's normal for WebSocket spec)
570
+ // Type assertion needed because closeConnection is a newly added protected method
571
+ (this as any).closeConnection(1000, 'Client abnormal stop');
572
+
573
+ // Note: onDisconnected will be called by WebSocket close event
574
+ // which will call cleanup again (idempotent) and trigger onDisconnected callback
575
+ }
576
+
577
+ getAudioUtteranceId(): string {
578
+ return this.config.audioUtteranceId;
579
+ }
580
+
581
+ getUrl(): string {
582
+ return this.config.url;
583
+ }
584
+
585
+ getState(): ClientState {
586
+ return this.state;
587
+ }
588
+
589
+ isConnected(): boolean {
590
+ return this.state === ClientState.READY;
591
+ }
592
+
593
+ isConnecting(): boolean {
594
+ return this.state === ClientState.CONNECTING;
595
+ }
596
+
597
+ isStopping(): boolean {
598
+ return this.state === ClientState.STOPPING;
599
+ }
600
+
601
+ isTranscriptionFinished(): boolean {
602
+ return this.state === ClientState.STOPPED;
603
+ }
604
+
605
+ isBufferOverflowing(): boolean {
606
+ return this.audioBuffer.isOverflowing();
607
+ }
608
+
609
+ getStats(): IRecognitionClientStats {
610
+ const bufferStats = this.audioBuffer.getStats();
611
+ return {
612
+ audioBytesSent: this.audioBytesSent,
613
+ audioChunksSent: this.audioChunksSent,
614
+ audioChunksBuffered: bufferStats.chunksBuffered,
615
+ bufferOverflowCount: bufferStats.overflowCount,
616
+ currentBufferedChunks: bufferStats.currentBufferedChunks,
617
+ hasWrapped: bufferStats.hasWrapped
618
+ };
619
+ }
620
+
621
+ // ==========================================================================
622
+ // WEBSOCKET HOOKS (from WebSocketAudioClient)
623
+ // ==========================================================================
624
+
625
+ protected onConnected(): void {
626
+ this.log('debug', 'WebSocket onConnected callback');
627
+
628
+ // Send ASRRequest with configuration (if provided)
629
+ if (this.config.asrRequestConfig) {
630
+ // Extract debugCommand if present (with type safety for new field)
631
+ const debugCommand = (this.config.asrRequestConfig as any).debugCommand;
632
+ if (debugCommand?.enableDebugLog) {
633
+ this.isDebugLogEnabled = true;
634
+ this.log('debug', 'Debug logging enabled via debugCommand');
635
+ }
636
+
637
+ // Only generate debug log data if debug logging is enabled
638
+ if (this.isDebugLogEnabled) {
639
+ this.log('debug', 'Sending ASR request', this.config.asrRequestConfig);
640
+ }
641
+
642
+ // Extract fallbackModels if present
643
+ const fallbackModels = (this.config.asrRequestConfig as any).fallbackModels;
644
+
645
+ const asrRequest: ASRRequestV1 = {
646
+ type: RecognitionContextTypeV1.ASR_REQUEST,
647
+ audioUtteranceId: this.config.audioUtteranceId,
648
+ provider: this.config.asrRequestConfig.provider.toString(),
649
+ model: this.config.asrRequestConfig.model,
650
+ language: this.config.asrRequestConfig.language?.toString() || 'en',
651
+ sampleRate:
652
+ typeof this.config.asrRequestConfig.sampleRate === 'number'
653
+ ? this.config.asrRequestConfig.sampleRate
654
+ : SampleRate.RATE_16000,
655
+ encoding:
656
+ typeof this.config.asrRequestConfig.encoding === 'number'
657
+ ? this.config.asrRequestConfig.encoding
658
+ : AudioEncoding.LINEAR16,
659
+ interimResults: this.config.asrRequestConfig.interimResults ?? false,
660
+ // Auto-enable useContext if gameContext is provided, or use explicit value if set
661
+ useContext: this.config.asrRequestConfig.useContext ?? !!this.config.gameContext,
662
+ // Include finalTranscriptStability if provided (it's already a string enum)
663
+ ...(this.config.asrRequestConfig.finalTranscriptStability && {
664
+ finalTranscriptStability: this.config.asrRequestConfig.finalTranscriptStability
665
+ }),
666
+ // Include fallbackModels if provided (for circuit breaker fallback)
667
+ ...(fallbackModels && { fallbackModels }),
668
+ ...(debugCommand && { debugCommand })
669
+ };
670
+
671
+ super.sendMessage(
672
+ RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
673
+ 'message',
674
+ asrRequest
675
+ );
676
+ }
677
+
678
+ // Send GameContext if provided
679
+ if (this.config.gameContext) {
680
+ // Only pass gameContext object to log if debug logging is enabled
681
+ if (this.isDebugLogEnabled) {
682
+ this.log('debug', 'Sending game context', this.config.gameContext);
683
+ }
684
+ super.sendMessage(
685
+ RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
686
+ 'message',
687
+ this.config.gameContext
688
+ );
689
+ }
690
+
691
+ this.log('debug', 'Waiting for server ready signal');
692
+ this.config.onConnected();
693
+ }
694
+
695
+ protected onDisconnected(code: number, reason: string): void {
696
+ // DIAGNOSTIC: Enhanced logging for disconnections
697
+ const closeCodeDescription = this.getCloseCodeDescription(code);
698
+ const is1006 = code === 1006;
699
+
700
+ this.log('debug', '[DIAGNOSTIC] WebSocket disconnected', {
701
+ code,
702
+ codeDescription: closeCodeDescription,
703
+ reason: reason || '(empty)',
704
+ previousState: this.state,
705
+ is1006Abnormal: is1006,
706
+ audioChunksSent: this.audioChunksSent,
707
+ audioBytesSent: this.audioBytesSent,
708
+ bufferStats: this.audioBuffer.getStats()
709
+ });
710
+
711
+ // Update state based on disconnection type
712
+ if (this.state === ClientState.STOPPING) {
713
+ this.state = ClientState.STOPPED;
714
+ } else if (
715
+ this.state === ClientState.CONNECTED ||
716
+ this.state === ClientState.READY ||
717
+ this.state === ClientState.CONNECTING
718
+ ) {
719
+ this.log('error', '[DIAGNOSTIC] Unexpected disconnection', {
720
+ code,
721
+ codeDescription: closeCodeDescription,
722
+ reason: reason || '(empty)',
723
+ is1006: is1006,
724
+ possibleCauses: is1006 ? [
725
+ 'Network connection lost',
726
+ 'Server process crashed',
727
+ 'Provider (Deepgram/AssemblyAI) WebSocket closed abnormally',
728
+ 'Firewall/proxy terminated connection',
729
+ 'Browser/tab suspended (mobile)'
730
+ ] : []
731
+ });
732
+ this.state = ClientState.FAILED;
733
+ }
734
+
735
+ // Clean up memory proactively when connection closes
736
+ this.cleanup();
737
+
738
+ this.config.onDisconnected(code, reason);
739
+ }
740
+
741
+ /**
742
+ * Get human-readable description for WebSocket close code
743
+ */
744
+ private getCloseCodeDescription(code: number): string {
745
+ const descriptions: Record<number, string> = {
746
+ 1000: 'Normal Closure',
747
+ 1001: 'Going Away',
748
+ 1002: 'Protocol Error',
749
+ 1003: 'Unsupported Data',
750
+ 1005: 'No Status Received',
751
+ 1006: 'Abnormal Closure (no close frame received)',
752
+ 1007: 'Invalid Frame Payload',
753
+ 1008: 'Policy Violation',
754
+ 1009: 'Message Too Big',
755
+ 1010: 'Mandatory Extension',
756
+ 1011: 'Internal Server Error',
757
+ 1012: 'Service Restart',
758
+ 1013: 'Try Again Later',
759
+ 4000: 'Auth Required',
760
+ 4001: 'Auth Failed',
761
+ 4002: 'Rate Limit Exceeded',
762
+ 4003: 'Invalid Session',
763
+ 4004: 'Session Expired'
764
+ };
765
+ return descriptions[code] || `Unknown (${code})`;
766
+ }
767
+
768
+ protected onError(error: Event): void {
769
+ this.state = ClientState.FAILED;
770
+
771
+ const errorResult: ErrorResultV1 = {
772
+ type: RecognitionResultTypeV1.ERROR,
773
+ audioUtteranceId: '',
774
+ message: 'WebSocket error',
775
+ description: error.type || 'Connection error'
776
+ };
777
+ this.config.onError(errorResult);
778
+ }
779
+
780
+ protected override onMessage(msg: { v: number; type: string; data: any }): void {
781
+ this.messageHandler.handleMessage(msg);
782
+ }
783
+
784
+ // ==========================================================================
785
+ // INTERNAL HELPERS
786
+ // ==========================================================================
787
+
788
+ /**
789
+ * Handle control messages from server
790
+ * @param msg - Control message containing server actions
791
+ */
792
+ private handleControlMessage(msg: ClientControlMessageV1): void {
793
+ switch (msg.action) {
794
+ case ClientControlActionV1.READY_FOR_UPLOADING_RECORDING: {
795
+ this.log('debug', 'Server ready for audio upload');
796
+ this.state = ClientState.READY;
797
+ this.messageHandler.setSessionStartTime(Date.now());
798
+
799
+ // Flush buffered audio now that server is ready
800
+ const bufferedChunks = this.audioBuffer.flush();
801
+ if (bufferedChunks.length > 0) {
802
+ this.log('debug', 'Flushing buffered audio', { chunks: bufferedChunks.length });
803
+ bufferedChunks.forEach((chunk) => this.sendAudioNow(chunk.data));
804
+ }
805
+ break;
806
+ }
807
+
808
+ case ClientControlActionV1.STOP_RECORDING:
809
+ this.log('debug', 'Received stop recording signal from server');
810
+ break;
811
+
812
+ default:
813
+ this.log('warn', 'Unknown control action', { action: msg.action });
814
+ }
815
+ }
816
+
817
+ /**
818
+ * Send audio immediately to the server (without buffering)
819
+ * @param audioData - Audio data to send
820
+ */
821
+ private sendAudioNow(audioData: ArrayBuffer | ArrayBufferView): void {
822
+ const byteLength = ArrayBuffer.isView(audioData)
823
+ ? audioData.byteLength
824
+ : audioData.byteLength;
825
+
826
+ const encodingId = (this.config.asrRequestConfig?.encoding ||
827
+ AudioEncoding.LINEAR16) as AudioEncoding;
828
+
829
+ const sampleRate =
830
+ typeof this.config.asrRequestConfig?.sampleRate === 'number'
831
+ ? this.config.asrRequestConfig.sampleRate
832
+ : SampleRate.RATE_16000;
833
+
834
+ super.sendAudio(
835
+ audioData,
836
+ RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
837
+ encodingId,
838
+ sampleRate
839
+ );
840
+
841
+ this.audioBytesSent += byteLength;
842
+ this.audioChunksSent++;
843
+ }
844
+ }