@volley/recognition-client-sdk-node22 0.1.424

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +344 -0
  2. package/dist/browser.bundled.d.ts +1280 -0
  3. package/dist/browser.d.ts +10 -0
  4. package/dist/browser.d.ts.map +1 -0
  5. package/dist/config-builder.d.ts +134 -0
  6. package/dist/config-builder.d.ts.map +1 -0
  7. package/dist/errors.d.ts +41 -0
  8. package/dist/errors.d.ts.map +1 -0
  9. package/dist/factory.d.ts +36 -0
  10. package/dist/factory.d.ts.map +1 -0
  11. package/dist/index.bundled.d.ts +2572 -0
  12. package/dist/index.d.ts +16 -0
  13. package/dist/index.d.ts.map +1 -0
  14. package/dist/index.js +10199 -0
  15. package/dist/index.js.map +7 -0
  16. package/dist/recog-client-sdk.browser.d.ts +10 -0
  17. package/dist/recog-client-sdk.browser.d.ts.map +1 -0
  18. package/dist/recog-client-sdk.browser.js +5746 -0
  19. package/dist/recog-client-sdk.browser.js.map +7 -0
  20. package/dist/recognition-client.d.ts +128 -0
  21. package/dist/recognition-client.d.ts.map +1 -0
  22. package/dist/recognition-client.types.d.ts +271 -0
  23. package/dist/recognition-client.types.d.ts.map +1 -0
  24. package/dist/simplified-vgf-recognition-client.d.ts +178 -0
  25. package/dist/simplified-vgf-recognition-client.d.ts.map +1 -0
  26. package/dist/utils/audio-ring-buffer.d.ts +69 -0
  27. package/dist/utils/audio-ring-buffer.d.ts.map +1 -0
  28. package/dist/utils/message-handler.d.ts +45 -0
  29. package/dist/utils/message-handler.d.ts.map +1 -0
  30. package/dist/utils/url-builder.d.ts +28 -0
  31. package/dist/utils/url-builder.d.ts.map +1 -0
  32. package/dist/vgf-recognition-mapper.d.ts +66 -0
  33. package/dist/vgf-recognition-mapper.d.ts.map +1 -0
  34. package/dist/vgf-recognition-state.d.ts +91 -0
  35. package/dist/vgf-recognition-state.d.ts.map +1 -0
  36. package/package.json +74 -0
  37. package/src/browser.ts +24 -0
  38. package/src/config-builder.spec.ts +265 -0
  39. package/src/config-builder.ts +240 -0
  40. package/src/errors.ts +84 -0
  41. package/src/factory.spec.ts +215 -0
  42. package/src/factory.ts +47 -0
  43. package/src/index.ts +127 -0
  44. package/src/recognition-client.spec.ts +889 -0
  45. package/src/recognition-client.ts +844 -0
  46. package/src/recognition-client.types.ts +338 -0
  47. package/src/simplified-vgf-recognition-client.integration.spec.ts +718 -0
  48. package/src/simplified-vgf-recognition-client.spec.ts +1525 -0
  49. package/src/simplified-vgf-recognition-client.ts +524 -0
  50. package/src/utils/audio-ring-buffer.spec.ts +335 -0
  51. package/src/utils/audio-ring-buffer.ts +170 -0
  52. package/src/utils/message-handler.spec.ts +311 -0
  53. package/src/utils/message-handler.ts +131 -0
  54. package/src/utils/url-builder.spec.ts +252 -0
  55. package/src/utils/url-builder.ts +92 -0
  56. package/src/vgf-recognition-mapper.spec.ts +78 -0
  57. package/src/vgf-recognition-mapper.ts +232 -0
  58. package/src/vgf-recognition-state.ts +102 -0
@@ -0,0 +1,2572 @@
1
+ import { z } from 'zod';
2
+
3
+ /**
4
+ * Provider types and enums for recognition services
5
+ * NOTE_TO_AI: DO NOT CHANGE THIS UNLESS EXPLICITLY ASKED. Always ask before making any changes.
6
+ */
7
+ /**
8
+ * Supported speech recognition providers
9
+ */
10
+ declare enum RecognitionProvider {
11
+ ASSEMBLYAI = "assemblyai",
12
+ DEEPGRAM = "deepgram",
13
+ ELEVENLABS = "elevenlabs",
14
+ FIREWORKS = "fireworks",
15
+ GOOGLE = "google",
16
+ GEMINI_BATCH = "gemini-batch",
17
+ OPENAI_BATCH = "openai-batch",
18
+ OPENAI_REALTIME = "openai-realtime",
19
+ TEST_ASR_PROVIDER_QUOTA = "test-asr-provider-quota",
20
+ TEST_ASR_STREAMING = "test-asr-streaming"
21
+ }
22
+ /**
23
+ * ASR API type - distinguishes between streaming and file-based transcription APIs
24
+ * - STREAMING: Real-time streaming APIs (Deepgram, AssemblyAI, Google)
25
+ * - FILE_BASED: File upload/batch APIs (OpenAI Batch, Gemini Batch)
26
+ */
27
+ declare enum ASRApiType {
28
+ STREAMING = "streaming",
29
+ FILE_BASED = "file-based"
30
+ }
31
+ /**
32
+ * Deepgram model names
33
+ */
34
+ declare enum DeepgramModel {
35
+ NOVA_2 = "nova-2",
36
+ NOVA_3 = "nova-3",
37
+ FLUX_GENERAL_EN = "flux-general-en"
38
+ }
39
+ /**
40
+ * Google Cloud Speech models
41
+ * @see https://cloud.google.com/speech-to-text/docs/transcription-model
42
+ * @see https://cloud.google.com/speech-to-text/v2/docs/chirp_3-model
43
+ */
44
+ declare enum GoogleModel {
45
+ CHIRP_3 = "chirp_3",
46
+ CHIRP_2 = "chirp_2",
47
+ CHIRP = "chirp",
48
+ LATEST_LONG = "latest_long",
49
+ LATEST_SHORT = "latest_short",
50
+ TELEPHONY = "telephony",
51
+ TELEPHONY_SHORT = "telephony_short",
52
+ DEFAULT = "default",
53
+ COMMAND_AND_SEARCH = "command_and_search",
54
+ PHONE_CALL = "phone_call",
55
+ VIDEO = "video"
56
+ }
57
+ /**
58
+ * Fireworks AI models for ASR
59
+ * @see https://docs.fireworks.ai/guides/querying-asr-models
60
+ * @see https://fireworks.ai/models/fireworks/fireworks-asr-large
61
+ */
62
+ declare enum FireworksModel {
63
+ ASR_V1 = "fireworks-asr-large",
64
+ ASR_V2 = "fireworks-asr-v2",
65
+ WHISPER_V3 = "whisper-v3",
66
+ WHISPER_V3_TURBO = "whisper-v3-turbo"
67
+ }
68
+ /**
69
+ * ElevenLabs Scribe models for speech-to-text
70
+ * @see https://elevenlabs.io/blog/introducing-scribe-v2-realtime
71
+ * @see https://elevenlabs.io/docs/cookbooks/speech-to-text/streaming
72
+ * @see https://elevenlabs.io/docs/api-reference/speech-to-text/convert
73
+ */
74
+ declare enum ElevenLabsModel {
75
+ SCRIBE_V2_REALTIME = "scribe_v2_realtime",
76
+ SCRIBE_V1 = "scribe_v1"
77
+ }
78
+ /**
79
+ * OpenAI Realtime API transcription models
80
+ * These are the verified `input_audio_transcription.model` values.
81
+ * @see https://platform.openai.com/docs/guides/realtime
82
+ */
83
+ declare enum OpenAIRealtimeModel {
84
+ GPT_4O_MINI_TRANSCRIBE = "gpt-4o-mini-transcribe"
85
+ }
86
+ /**
87
+ * Type alias for any model from any provider
88
+ */
89
+ type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | ElevenLabsModel | OpenAIRealtimeModel | string;
90
+
91
+ /**
92
+ * Audio encoding types
93
+ */
94
+ declare enum AudioEncoding {
95
+ ENCODING_UNSPECIFIED = 0,
96
+ LINEAR16 = 1,
97
+ OGG_OPUS = 2,
98
+ FLAC = 3,
99
+ MULAW = 4,
100
+ ALAW = 5
101
+ }
102
+ declare namespace AudioEncoding {
103
+ /**
104
+ * Convert numeric ID to AudioEncoding enum
105
+ * @param id - Numeric encoding identifier (0-5)
106
+ * @returns AudioEncoding enum value or undefined if invalid
107
+ */
108
+ function fromId(id: number): AudioEncoding | undefined;
109
+ /**
110
+ * Convert string name to AudioEncoding enum
111
+ * @param nameStr - String name like "linear16", "LINEAR16", "ogg_opus", "OGG_OPUS", etc. (case insensitive)
112
+ * @returns AudioEncoding enum value or undefined if invalid
113
+ */
114
+ function fromName(nameStr: string): AudioEncoding | undefined;
115
+ /**
116
+ * Convert AudioEncoding enum to numeric ID
117
+ * @param encoding - AudioEncoding enum value
118
+ * @returns Numeric ID (0-5)
119
+ */
120
+ function toId(encoding: AudioEncoding): number;
121
+ /**
122
+ * Convert AudioEncoding enum to string name
123
+ * @param encoding - AudioEncoding enum value
124
+ * @returns String name like "LINEAR16", "MULAW", etc.
125
+ */
126
+ function toName(encoding: AudioEncoding): string;
127
+ /**
128
+ * Check if a numeric ID is a valid encoding
129
+ * @param id - Numeric identifier to validate
130
+ * @returns true if valid encoding ID
131
+ */
132
+ function isIdValid(id: number): boolean;
133
+ /**
134
+ * Check if a string name is a valid encoding
135
+ * @param nameStr - String name to validate
136
+ * @returns true if valid encoding name
137
+ */
138
+ function isNameValid(nameStr: string): boolean;
139
+ }
140
+ /**
141
+ * Common sample rates (in Hz)
142
+ */
143
+ declare enum SampleRate {
144
+ RATE_8000 = 8000,
145
+ RATE_16000 = 16000,
146
+ RATE_22050 = 22050,
147
+ RATE_24000 = 24000,
148
+ RATE_32000 = 32000,
149
+ RATE_44100 = 44100,
150
+ RATE_48000 = 48000
151
+ }
152
+ declare namespace SampleRate {
153
+ /**
154
+ * Convert Hz value to SampleRate enum
155
+ * @param hz - Sample rate in Hz (8000, 16000, etc.)
156
+ * @returns SampleRate enum value or undefined if invalid
157
+ */
158
+ function fromHz(hz: number): SampleRate | undefined;
159
+ /**
160
+ * Convert string name to SampleRate enum
161
+ * @param nameStr - String name like "rate_8000", "RATE_16000", etc. (case insensitive)
162
+ * @returns SampleRate enum value or undefined if invalid
163
+ */
164
+ function fromName(nameStr: string): SampleRate | undefined;
165
+ /**
166
+ * Convert SampleRate enum to Hz value
167
+ * @param rate - SampleRate enum value
168
+ * @returns Hz value (8000, 16000, etc.)
169
+ */
170
+ function toHz(rate: SampleRate): number;
171
+ /**
172
+ * Convert SampleRate enum to string name
173
+ * @param rate - SampleRate enum value
174
+ * @returns String name like "RATE_8000", "RATE_16000", etc.
175
+ */
176
+ function toName(rate: SampleRate): string;
177
+ /**
178
+ * Check if a numeric Hz value is a valid sample rate
179
+ * @param hz - Hz value to validate
180
+ * @returns true if valid sample rate
181
+ */
182
+ function isHzValid(hz: number): boolean;
183
+ /**
184
+ * Check if a string name is a valid sample rate
185
+ * @param nameStr - String name to validate
186
+ * @returns true if valid sample rate name
187
+ */
188
+ function isNameValid(nameStr: string): boolean;
189
+ }
190
+ /**
191
+ * Supported languages for recognition
192
+ * Using BCP-47 language tags
193
+ */
194
+ declare enum Language {
195
+ ENGLISH_US = "en-US",
196
+ ENGLISH_GB = "en-GB",
197
+ SPANISH_ES = "es-ES",
198
+ SPANISH_MX = "es-MX",
199
+ FRENCH_FR = "fr-FR",
200
+ GERMAN_DE = "de-DE",
201
+ ITALIAN_IT = "it-IT",
202
+ PORTUGUESE_BR = "pt-BR",
203
+ JAPANESE_JP = "ja-JP",
204
+ KOREAN_KR = "ko-KR",
205
+ CHINESE_CN = "zh-CN",
206
+ CHINESE_TW = "zh-TW"
207
+ }
208
+
209
+ /**
210
+ * Recognition Result Types V1
211
+ * NOTE_TO_AI: DO NOT CHANGE THIS UNLESS EXPLICITLY ASKED. Always ask before making any changes.
212
+ * Types and schemas for recognition results sent to SDK clients
213
+ */
214
+
215
+ /**
216
+ * Message type discriminator for recognition results V1
217
+ */
218
+ declare enum RecognitionResultTypeV1 {
219
+ TRANSCRIPTION = "Transcription",
220
+ FUNCTION_CALL = "FunctionCall",
221
+ METADATA = "Metadata",
222
+ ERROR = "Error",
223
+ CLIENT_CONTROL_MESSAGE = "ClientControlMessage",
224
+ AUDIO_METRICS = "AudioMetrics"
225
+ }
226
+ /**
227
+ * Transcription result V1 - contains transcript message
228
+ * In the long run game side should not need to know it. In the short run it is send back to client.
229
+ * NOTE_TO_AI: DO NOT CHANGE THIS UNLESS EXPLICITLY ASKED. Always ask before making any changes.
230
+ */
231
+ declare const TranscriptionResultSchemaV1: z.ZodObject<{
232
+ type: z.ZodLiteral<RecognitionResultTypeV1.TRANSCRIPTION>;
233
+ audioUtteranceId: z.ZodString;
234
+ finalTranscript: z.ZodString;
235
+ finalTranscriptConfidence: z.ZodOptional<z.ZodNumber>;
236
+ pendingTranscript: z.ZodOptional<z.ZodString>;
237
+ pendingTranscriptConfidence: z.ZodOptional<z.ZodNumber>;
238
+ is_finished: z.ZodBoolean;
239
+ voiceStart: z.ZodOptional<z.ZodNumber>;
240
+ voiceDuration: z.ZodOptional<z.ZodNumber>;
241
+ voiceEnd: z.ZodOptional<z.ZodNumber>;
242
+ startTimestamp: z.ZodOptional<z.ZodNumber>;
243
+ endTimestamp: z.ZodOptional<z.ZodNumber>;
244
+ receivedAtMs: z.ZodOptional<z.ZodNumber>;
245
+ accumulatedAudioTimeMs: z.ZodOptional<z.ZodNumber>;
246
+ }, "strip", z.ZodTypeAny, {
247
+ type: RecognitionResultTypeV1.TRANSCRIPTION;
248
+ audioUtteranceId: string;
249
+ finalTranscript: string;
250
+ is_finished: boolean;
251
+ finalTranscriptConfidence?: number | undefined;
252
+ pendingTranscript?: string | undefined;
253
+ pendingTranscriptConfidence?: number | undefined;
254
+ voiceStart?: number | undefined;
255
+ voiceDuration?: number | undefined;
256
+ voiceEnd?: number | undefined;
257
+ startTimestamp?: number | undefined;
258
+ endTimestamp?: number | undefined;
259
+ receivedAtMs?: number | undefined;
260
+ accumulatedAudioTimeMs?: number | undefined;
261
+ }, {
262
+ type: RecognitionResultTypeV1.TRANSCRIPTION;
263
+ audioUtteranceId: string;
264
+ finalTranscript: string;
265
+ is_finished: boolean;
266
+ finalTranscriptConfidence?: number | undefined;
267
+ pendingTranscript?: string | undefined;
268
+ pendingTranscriptConfidence?: number | undefined;
269
+ voiceStart?: number | undefined;
270
+ voiceDuration?: number | undefined;
271
+ voiceEnd?: number | undefined;
272
+ startTimestamp?: number | undefined;
273
+ endTimestamp?: number | undefined;
274
+ receivedAtMs?: number | undefined;
275
+ accumulatedAudioTimeMs?: number | undefined;
276
+ }>;
277
+ type TranscriptionResultV1 = z.infer<typeof TranscriptionResultSchemaV1>;
278
+ /**
279
+ * Function call result V1 - similar to LLM function call
280
+ * In the long run game server should know it, rather than TV or client.
281
+ */
282
+ declare const FunctionCallResultSchemaV1: z.ZodObject<{
283
+ type: z.ZodLiteral<RecognitionResultTypeV1.FUNCTION_CALL>;
284
+ audioUtteranceId: z.ZodString;
285
+ functionName: z.ZodString;
286
+ functionArgJson: z.ZodString;
287
+ }, "strip", z.ZodTypeAny, {
288
+ type: RecognitionResultTypeV1.FUNCTION_CALL;
289
+ audioUtteranceId: string;
290
+ functionName: string;
291
+ functionArgJson: string;
292
+ }, {
293
+ type: RecognitionResultTypeV1.FUNCTION_CALL;
294
+ audioUtteranceId: string;
295
+ functionName: string;
296
+ functionArgJson: string;
297
+ }>;
298
+ type FunctionCallResultV1 = z.infer<typeof FunctionCallResultSchemaV1>;
299
+ /**
300
+ * Transcript outcome type - categorizes final transcript state
301
+ * Used in Metadata schema. Maps 1:1 with Datadog metrics:
302
+ * - WITH_CONTENT → recog.client.websocket.transcript.final_with_content
303
+ * - EMPTY → recog.client.websocket.transcript.final_empty
304
+ * - NEVER_SENT → derived from sessions.streamed - final_with_content - final_empty
305
+ * - ERROR_* → 1:1 mapping to ErrorTypeV1 for error-caused outcomes
306
+ */
307
+ declare enum TranscriptOutcomeType {
308
+ WITH_CONTENT = "with_content",
309
+ EMPTY = "empty",
310
+ NEVER_SENT = "never_sent",
311
+ ERROR_AUTHENTICATION = "error_authentication",
312
+ ERROR_VALIDATION = "error_validation",
313
+ ERROR_PROVIDER = "error_provider",
314
+ ERROR_TIMEOUT = "error_timeout",
315
+ ERROR_QUOTA = "error_quota",
316
+ ERROR_INTERNAL_QUOTA = "error_internal_quota",
317
+ ERROR_CONNECTION = "error_connection",
318
+ ERROR_NO_AUDIO = "error_no_audio",
319
+ ERROR_CIRCUIT_BREAKER = "error_circuit_breaker",
320
+ ERROR_UNKNOWN = "error_unknown"
321
+ }
322
+ /**
323
+ * Metadata result V1 - contains metadata, timing information, and ASR config
324
+ * Sent when the provider connection closes to provide final timing metrics and config
325
+ * In the long run game server should know it, rather than TV or client.
326
+ */
327
+ declare const MetadataResultSchemaV1: z.ZodObject<{
328
+ type: z.ZodLiteral<RecognitionResultTypeV1.METADATA>;
329
+ audioUtteranceId: z.ZodString;
330
+ recordingStartMs: z.ZodOptional<z.ZodNumber>;
331
+ recordingEndMs: z.ZodOptional<z.ZodNumber>;
332
+ transcriptEndMs: z.ZodOptional<z.ZodNumber>;
333
+ socketCloseAtMs: z.ZodOptional<z.ZodNumber>;
334
+ duration: z.ZodOptional<z.ZodNumber>;
335
+ volume: z.ZodOptional<z.ZodNumber>;
336
+ accumulatedAudioTimeMs: z.ZodOptional<z.ZodNumber>;
337
+ costInUSD: z.ZodOptional<z.ZodDefault<z.ZodNumber>>;
338
+ apiType: z.ZodOptional<z.ZodNativeEnum<typeof ASRApiType>>;
339
+ asrConfig: z.ZodOptional<z.ZodString>;
340
+ rawAsrMetadata: z.ZodOptional<z.ZodString>;
341
+ transcriptOutcome: z.ZodOptional<z.ZodNativeEnum<typeof TranscriptOutcomeType>>;
342
+ audioMetrics: z.ZodOptional<z.ZodObject<{
343
+ valid: z.ZodBoolean;
344
+ audioBeginMs: z.ZodNumber;
345
+ audioEndMs: z.ZodNumber;
346
+ maxVolume: z.ZodNumber;
347
+ minVolume: z.ZodNumber;
348
+ avgVolume: z.ZodNumber;
349
+ silenceRatio: z.ZodNumber;
350
+ clippingRatio: z.ZodNumber;
351
+ snrEstimate: z.ZodNullable<z.ZodNumber>;
352
+ lastNonSilenceMs: z.ZodNumber;
353
+ timestamp: z.ZodString;
354
+ }, "strip", z.ZodTypeAny, {
355
+ valid: boolean;
356
+ audioBeginMs: number;
357
+ audioEndMs: number;
358
+ maxVolume: number;
359
+ minVolume: number;
360
+ avgVolume: number;
361
+ silenceRatio: number;
362
+ clippingRatio: number;
363
+ snrEstimate: number | null;
364
+ lastNonSilenceMs: number;
365
+ timestamp: string;
366
+ }, {
367
+ valid: boolean;
368
+ audioBeginMs: number;
369
+ audioEndMs: number;
370
+ maxVolume: number;
371
+ minVolume: number;
372
+ avgVolume: number;
373
+ silenceRatio: number;
374
+ clippingRatio: number;
375
+ snrEstimate: number | null;
376
+ lastNonSilenceMs: number;
377
+ timestamp: string;
378
+ }>>;
379
+ }, "strip", z.ZodTypeAny, {
380
+ type: RecognitionResultTypeV1.METADATA;
381
+ audioUtteranceId: string;
382
+ recordingStartMs?: number | undefined;
383
+ recordingEndMs?: number | undefined;
384
+ transcriptEndMs?: number | undefined;
385
+ socketCloseAtMs?: number | undefined;
386
+ duration?: number | undefined;
387
+ volume?: number | undefined;
388
+ accumulatedAudioTimeMs?: number | undefined;
389
+ costInUSD?: number | undefined;
390
+ apiType?: ASRApiType | undefined;
391
+ asrConfig?: string | undefined;
392
+ rawAsrMetadata?: string | undefined;
393
+ transcriptOutcome?: TranscriptOutcomeType | undefined;
394
+ audioMetrics?: {
395
+ valid: boolean;
396
+ audioBeginMs: number;
397
+ audioEndMs: number;
398
+ maxVolume: number;
399
+ minVolume: number;
400
+ avgVolume: number;
401
+ silenceRatio: number;
402
+ clippingRatio: number;
403
+ snrEstimate: number | null;
404
+ lastNonSilenceMs: number;
405
+ timestamp: string;
406
+ } | undefined;
407
+ }, {
408
+ type: RecognitionResultTypeV1.METADATA;
409
+ audioUtteranceId: string;
410
+ recordingStartMs?: number | undefined;
411
+ recordingEndMs?: number | undefined;
412
+ transcriptEndMs?: number | undefined;
413
+ socketCloseAtMs?: number | undefined;
414
+ duration?: number | undefined;
415
+ volume?: number | undefined;
416
+ accumulatedAudioTimeMs?: number | undefined;
417
+ costInUSD?: number | undefined;
418
+ apiType?: ASRApiType | undefined;
419
+ asrConfig?: string | undefined;
420
+ rawAsrMetadata?: string | undefined;
421
+ transcriptOutcome?: TranscriptOutcomeType | undefined;
422
+ audioMetrics?: {
423
+ valid: boolean;
424
+ audioBeginMs: number;
425
+ audioEndMs: number;
426
+ maxVolume: number;
427
+ minVolume: number;
428
+ avgVolume: number;
429
+ silenceRatio: number;
430
+ clippingRatio: number;
431
+ snrEstimate: number | null;
432
+ lastNonSilenceMs: number;
433
+ timestamp: string;
434
+ } | undefined;
435
+ }>;
436
+ type MetadataResultV1 = z.infer<typeof MetadataResultSchemaV1>;
437
+ /**
438
+ * Error type enum V1 - categorizes different types of errors
439
+ */
440
+ declare enum ErrorTypeV1 {
441
+ AUTHENTICATION_ERROR = "authentication_error",
442
+ VALIDATION_ERROR = "validation_error",
443
+ PROVIDER_ERROR = "provider_error",
444
+ TIMEOUT_ERROR = "timeout_error",
445
+ QUOTA_EXCEEDED = "quota_exceeded",
446
+ INTERNAL_QUOTA_EXHAUSTED = "internal_quota_exhausted",
447
+ CONNECTION_ERROR = "connection_error",
448
+ NO_AUDIO_ERROR = "no_audio_error",
449
+ CIRCUIT_BREAKER_OPEN = "circuit_breaker_open",
450
+ UNKNOWN_ERROR = "unknown_error"
451
+ }
452
+ /**
453
+ * Error result V1 - contains error message
454
+ * In the long run game server should know it, rather than TV or client.
455
+ */
456
+ declare const ErrorResultSchemaV1: z.ZodObject<{
457
+ type: z.ZodLiteral<RecognitionResultTypeV1.ERROR>;
458
+ audioUtteranceId: z.ZodString;
459
+ errorType: z.ZodOptional<z.ZodNativeEnum<typeof ErrorTypeV1>>;
460
+ message: z.ZodOptional<z.ZodString>;
461
+ code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
462
+ description: z.ZodOptional<z.ZodString>;
463
+ }, "strip", z.ZodTypeAny, {
464
+ type: RecognitionResultTypeV1.ERROR;
465
+ audioUtteranceId: string;
466
+ errorType?: ErrorTypeV1 | undefined;
467
+ message?: string | undefined;
468
+ code?: string | number | undefined;
469
+ description?: string | undefined;
470
+ }, {
471
+ type: RecognitionResultTypeV1.ERROR;
472
+ audioUtteranceId: string;
473
+ errorType?: ErrorTypeV1 | undefined;
474
+ message?: string | undefined;
475
+ code?: string | number | undefined;
476
+ description?: string | undefined;
477
+ }>;
478
+ type ErrorResultV1 = z.infer<typeof ErrorResultSchemaV1>;
479
+ /**
480
+ * Client control actions enum V1
481
+ * Actions that can be sent from server to client to control the recognition stream
482
+ * In the long run audio client(mic) should know it, rather than servers.
483
+ */
484
+ declare enum ClientControlActionV1 {
485
+ READY_FOR_UPLOADING_RECORDING = "ready_for_uploading_recording",
486
+ STOP_RECORDING = "stop_recording"
487
+ }
488
+
489
+ /**
490
+ * Error Exception Types
491
+ *
492
+ * Defines structured exception types for each ErrorTypeV1 category.
493
+ * Each exception type has metadata about whether it's immediately available
494
+ * (can be shown to user right away vs needs investigation/retry).
495
+ */
496
+
497
+ /**
498
+ * Authentication/Authorization Error
499
+ * isImmediatelyAvailable: false
500
+ * These are system configuration issues, not user-facing
501
+ */
502
+ declare const AuthenticationExceptionSchema: z.ZodObject<{
503
+ provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
504
+ code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
505
+ message: z.ZodString;
506
+ audioUtteranceId: z.ZodOptional<z.ZodString>;
507
+ timestamp: z.ZodOptional<z.ZodNumber>;
508
+ description: z.ZodOptional<z.ZodString>;
509
+ errorType: z.ZodLiteral<ErrorTypeV1.AUTHENTICATION_ERROR>;
510
+ isImmediatelyAvailable: z.ZodLiteral<false>;
511
+ service: z.ZodOptional<z.ZodString>;
512
+ authMethod: z.ZodOptional<z.ZodString>;
513
+ }, "strip", z.ZodTypeAny, {
514
+ message: string;
515
+ errorType: ErrorTypeV1.AUTHENTICATION_ERROR;
516
+ isImmediatelyAvailable: false;
517
+ provider?: RecognitionProvider | undefined;
518
+ code?: string | number | undefined;
519
+ audioUtteranceId?: string | undefined;
520
+ timestamp?: number | undefined;
521
+ description?: string | undefined;
522
+ service?: string | undefined;
523
+ authMethod?: string | undefined;
524
+ }, {
525
+ message: string;
526
+ errorType: ErrorTypeV1.AUTHENTICATION_ERROR;
527
+ isImmediatelyAvailable: false;
528
+ provider?: RecognitionProvider | undefined;
529
+ code?: string | number | undefined;
530
+ audioUtteranceId?: string | undefined;
531
+ timestamp?: number | undefined;
532
+ description?: string | undefined;
533
+ service?: string | undefined;
534
+ authMethod?: string | undefined;
535
+ }>;
536
+ type AuthenticationException = z.infer<typeof AuthenticationExceptionSchema>;
537
+ /**
538
+ * Validation Error
539
+ * isImmediatelyAvailable: true
540
+ * User provided invalid input - can show them what's wrong
541
+ */
542
+ declare const ValidationExceptionSchema: z.ZodObject<{
543
+ provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
544
+ code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
545
+ message: z.ZodString;
546
+ audioUtteranceId: z.ZodOptional<z.ZodString>;
547
+ timestamp: z.ZodOptional<z.ZodNumber>;
548
+ description: z.ZodOptional<z.ZodString>;
549
+ errorType: z.ZodLiteral<ErrorTypeV1.VALIDATION_ERROR>;
550
+ isImmediatelyAvailable: z.ZodLiteral<true>;
551
+ field: z.ZodOptional<z.ZodString>;
552
+ expected: z.ZodOptional<z.ZodString>;
553
+ received: z.ZodOptional<z.ZodString>;
554
+ }, "strip", z.ZodTypeAny, {
555
+ message: string;
556
+ errorType: ErrorTypeV1.VALIDATION_ERROR;
557
+ isImmediatelyAvailable: true;
558
+ provider?: RecognitionProvider | undefined;
559
+ code?: string | number | undefined;
560
+ audioUtteranceId?: string | undefined;
561
+ timestamp?: number | undefined;
562
+ description?: string | undefined;
563
+ field?: string | undefined;
564
+ expected?: string | undefined;
565
+ received?: string | undefined;
566
+ }, {
567
+ message: string;
568
+ errorType: ErrorTypeV1.VALIDATION_ERROR;
569
+ isImmediatelyAvailable: true;
570
+ provider?: RecognitionProvider | undefined;
571
+ code?: string | number | undefined;
572
+ audioUtteranceId?: string | undefined;
573
+ timestamp?: number | undefined;
574
+ description?: string | undefined;
575
+ field?: string | undefined;
576
+ expected?: string | undefined;
577
+ received?: string | undefined;
578
+ }>;
579
+ type ValidationException = z.infer<typeof ValidationExceptionSchema>;
580
+ /**
581
+ * Provider Error
582
+ * isImmediatelyAvailable: false
583
+ * Error from ASR provider - usually transient or needs investigation
584
+ */
585
+ declare const ProviderExceptionSchema: z.ZodObject<{
586
+ code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
587
+ message: z.ZodString;
588
+ audioUtteranceId: z.ZodOptional<z.ZodString>;
589
+ timestamp: z.ZodOptional<z.ZodNumber>;
590
+ description: z.ZodOptional<z.ZodString>;
591
+ errorType: z.ZodLiteral<ErrorTypeV1.PROVIDER_ERROR>;
592
+ isImmediatelyAvailable: z.ZodLiteral<false>;
593
+ provider: z.ZodOptional<z.ZodString>;
594
+ providerErrorCode: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
595
+ isTransient: z.ZodOptional<z.ZodBoolean>;
596
+ }, "strip", z.ZodTypeAny, {
597
+ message: string;
598
+ errorType: ErrorTypeV1.PROVIDER_ERROR;
599
+ isImmediatelyAvailable: false;
600
+ code?: string | number | undefined;
601
+ audioUtteranceId?: string | undefined;
602
+ timestamp?: number | undefined;
603
+ description?: string | undefined;
604
+ provider?: string | undefined;
605
+ providerErrorCode?: string | number | undefined;
606
+ isTransient?: boolean | undefined;
607
+ }, {
608
+ message: string;
609
+ errorType: ErrorTypeV1.PROVIDER_ERROR;
610
+ isImmediatelyAvailable: false;
611
+ code?: string | number | undefined;
612
+ audioUtteranceId?: string | undefined;
613
+ timestamp?: number | undefined;
614
+ description?: string | undefined;
615
+ provider?: string | undefined;
616
+ providerErrorCode?: string | number | undefined;
617
+ isTransient?: boolean | undefined;
618
+ }>;
619
+ type ProviderException = z.infer<typeof ProviderExceptionSchema>;
620
+ /**
621
+ * Timeout Error
622
+ * isImmediatelyAvailable: true
623
+ * Request took too long - user should try again
624
+ */
625
+ declare const TimeoutExceptionSchema: z.ZodObject<{
626
+ provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
627
+ code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
628
+ message: z.ZodString;
629
+ audioUtteranceId: z.ZodOptional<z.ZodString>;
630
+ timestamp: z.ZodOptional<z.ZodNumber>;
631
+ description: z.ZodOptional<z.ZodString>;
632
+ errorType: z.ZodLiteral<ErrorTypeV1.TIMEOUT_ERROR>;
633
+ isImmediatelyAvailable: z.ZodLiteral<true>;
634
+ timeoutMs: z.ZodOptional<z.ZodNumber>;
635
+ operation: z.ZodOptional<z.ZodString>;
636
+ }, "strip", z.ZodTypeAny, {
637
+ message: string;
638
+ errorType: ErrorTypeV1.TIMEOUT_ERROR;
639
+ isImmediatelyAvailable: true;
640
+ provider?: RecognitionProvider | undefined;
641
+ code?: string | number | undefined;
642
+ audioUtteranceId?: string | undefined;
643
+ timestamp?: number | undefined;
644
+ description?: string | undefined;
645
+ timeoutMs?: number | undefined;
646
+ operation?: string | undefined;
647
+ }, {
648
+ message: string;
649
+ errorType: ErrorTypeV1.TIMEOUT_ERROR;
650
+ isImmediatelyAvailable: true;
651
+ provider?: RecognitionProvider | undefined;
652
+ code?: string | number | undefined;
653
+ audioUtteranceId?: string | undefined;
654
+ timestamp?: number | undefined;
655
+ description?: string | undefined;
656
+ timeoutMs?: number | undefined;
657
+ operation?: string | undefined;
658
+ }>;
659
+ type TimeoutException = z.infer<typeof TimeoutExceptionSchema>;
660
+ /**
661
+ * Quota Exceeded Error
662
+ * isImmediatelyAvailable: true
663
+ * Rate limit or quota exceeded - user should wait
664
+ */
665
+ declare const QuotaExceededExceptionSchema: z.ZodObject<{
666
+ provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
667
+ code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
668
+ message: z.ZodString;
669
+ audioUtteranceId: z.ZodOptional<z.ZodString>;
670
+ timestamp: z.ZodOptional<z.ZodNumber>;
671
+ description: z.ZodOptional<z.ZodString>;
672
+ errorType: z.ZodLiteral<ErrorTypeV1.QUOTA_EXCEEDED>;
673
+ isImmediatelyAvailable: z.ZodLiteral<true>;
674
+ quotaType: z.ZodOptional<z.ZodString>;
675
+ resetAt: z.ZodOptional<z.ZodNumber>;
676
+ retryAfterSeconds: z.ZodOptional<z.ZodNumber>;
677
+ }, "strip", z.ZodTypeAny, {
678
+ message: string;
679
+ errorType: ErrorTypeV1.QUOTA_EXCEEDED;
680
+ isImmediatelyAvailable: true;
681
+ provider?: RecognitionProvider | undefined;
682
+ code?: string | number | undefined;
683
+ audioUtteranceId?: string | undefined;
684
+ timestamp?: number | undefined;
685
+ description?: string | undefined;
686
+ quotaType?: string | undefined;
687
+ resetAt?: number | undefined;
688
+ retryAfterSeconds?: number | undefined;
689
+ }, {
690
+ message: string;
691
+ errorType: ErrorTypeV1.QUOTA_EXCEEDED;
692
+ isImmediatelyAvailable: true;
693
+ provider?: RecognitionProvider | undefined;
694
+ code?: string | number | undefined;
695
+ audioUtteranceId?: string | undefined;
696
+ timestamp?: number | undefined;
697
+ description?: string | undefined;
698
+ quotaType?: string | undefined;
699
+ resetAt?: number | undefined;
700
+ retryAfterSeconds?: number | undefined;
701
+ }>;
702
+ type QuotaExceededException = z.infer<typeof QuotaExceededExceptionSchema>;
703
+ /**
704
+ * Connection Error
705
+ * isImmediatelyAvailable: true
706
+ * Connection establishment or network failure - user should check network or retry
707
+ */
708
+ declare const ConnectionExceptionSchema: z.ZodObject<{
709
+ provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
710
+ code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
711
+ message: z.ZodString;
712
+ audioUtteranceId: z.ZodOptional<z.ZodString>;
713
+ timestamp: z.ZodOptional<z.ZodNumber>;
714
+ description: z.ZodOptional<z.ZodString>;
715
+ errorType: z.ZodLiteral<ErrorTypeV1.CONNECTION_ERROR>;
716
+ isImmediatelyAvailable: z.ZodLiteral<true>;
717
+ attempts: z.ZodOptional<z.ZodNumber>;
718
+ url: z.ZodOptional<z.ZodString>;
719
+ underlyingError: z.ZodOptional<z.ZodString>;
720
+ }, "strip", z.ZodTypeAny, {
721
+ message: string;
722
+ errorType: ErrorTypeV1.CONNECTION_ERROR;
723
+ isImmediatelyAvailable: true;
724
+ provider?: RecognitionProvider | undefined;
725
+ code?: string | number | undefined;
726
+ audioUtteranceId?: string | undefined;
727
+ timestamp?: number | undefined;
728
+ description?: string | undefined;
729
+ attempts?: number | undefined;
730
+ url?: string | undefined;
731
+ underlyingError?: string | undefined;
732
+ }, {
733
+ message: string;
734
+ errorType: ErrorTypeV1.CONNECTION_ERROR;
735
+ isImmediatelyAvailable: true;
736
+ provider?: RecognitionProvider | undefined;
737
+ code?: string | number | undefined;
738
+ audioUtteranceId?: string | undefined;
739
+ timestamp?: number | undefined;
740
+ description?: string | undefined;
741
+ attempts?: number | undefined;
742
+ url?: string | undefined;
743
+ underlyingError?: string | undefined;
744
+ }>;
745
+ type ConnectionException = z.infer<typeof ConnectionExceptionSchema>;
746
+ /**
747
+ * Unknown Error
748
+ * isImmediatelyAvailable: false
749
+ * Unexpected error - needs investigation
750
+ */
751
+ declare const UnknownExceptionSchema: z.ZodObject<{
752
+ provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
753
+ code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
754
+ message: z.ZodString;
755
+ audioUtteranceId: z.ZodOptional<z.ZodString>;
756
+ timestamp: z.ZodOptional<z.ZodNumber>;
757
+ description: z.ZodOptional<z.ZodString>;
758
+ errorType: z.ZodLiteral<ErrorTypeV1.UNKNOWN_ERROR>;
759
+ isImmediatelyAvailable: z.ZodLiteral<false>;
760
+ stack: z.ZodOptional<z.ZodString>;
761
+ context: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
762
+ }, "strip", z.ZodTypeAny, {
763
+ message: string;
764
+ errorType: ErrorTypeV1.UNKNOWN_ERROR;
765
+ isImmediatelyAvailable: false;
766
+ provider?: RecognitionProvider | undefined;
767
+ code?: string | number | undefined;
768
+ audioUtteranceId?: string | undefined;
769
+ timestamp?: number | undefined;
770
+ description?: string | undefined;
771
+ stack?: string | undefined;
772
+ context?: Record<string, unknown> | undefined;
773
+ }, {
774
+ message: string;
775
+ errorType: ErrorTypeV1.UNKNOWN_ERROR;
776
+ isImmediatelyAvailable: false;
777
+ provider?: RecognitionProvider | undefined;
778
+ code?: string | number | undefined;
779
+ audioUtteranceId?: string | undefined;
780
+ timestamp?: number | undefined;
781
+ description?: string | undefined;
782
+ stack?: string | undefined;
783
+ context?: Record<string, unknown> | undefined;
784
+ }>;
785
+ type UnknownException = z.infer<typeof UnknownExceptionSchema>;
786
+ /**
787
+ * Discriminated union of all exception types
788
+ * Use this for type-safe error handling
789
+ */
790
+ declare const RecognitionExceptionSchema: z.ZodDiscriminatedUnion<"errorType", [z.ZodObject<{
791
+ provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
792
+ code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
793
+ message: z.ZodString;
794
+ audioUtteranceId: z.ZodOptional<z.ZodString>;
795
+ timestamp: z.ZodOptional<z.ZodNumber>;
796
+ description: z.ZodOptional<z.ZodString>;
797
+ errorType: z.ZodLiteral<ErrorTypeV1.AUTHENTICATION_ERROR>;
798
+ isImmediatelyAvailable: z.ZodLiteral<false>;
799
+ service: z.ZodOptional<z.ZodString>;
800
+ authMethod: z.ZodOptional<z.ZodString>;
801
+ }, "strip", z.ZodTypeAny, {
802
+ message: string;
803
+ errorType: ErrorTypeV1.AUTHENTICATION_ERROR;
804
+ isImmediatelyAvailable: false;
805
+ provider?: RecognitionProvider | undefined;
806
+ code?: string | number | undefined;
807
+ audioUtteranceId?: string | undefined;
808
+ timestamp?: number | undefined;
809
+ description?: string | undefined;
810
+ service?: string | undefined;
811
+ authMethod?: string | undefined;
812
+ }, {
813
+ message: string;
814
+ errorType: ErrorTypeV1.AUTHENTICATION_ERROR;
815
+ isImmediatelyAvailable: false;
816
+ provider?: RecognitionProvider | undefined;
817
+ code?: string | number | undefined;
818
+ audioUtteranceId?: string | undefined;
819
+ timestamp?: number | undefined;
820
+ description?: string | undefined;
821
+ service?: string | undefined;
822
+ authMethod?: string | undefined;
823
+ }>, z.ZodObject<{
824
+ provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
825
+ code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
826
+ message: z.ZodString;
827
+ audioUtteranceId: z.ZodOptional<z.ZodString>;
828
+ timestamp: z.ZodOptional<z.ZodNumber>;
829
+ description: z.ZodOptional<z.ZodString>;
830
+ errorType: z.ZodLiteral<ErrorTypeV1.VALIDATION_ERROR>;
831
+ isImmediatelyAvailable: z.ZodLiteral<true>;
832
+ field: z.ZodOptional<z.ZodString>;
833
+ expected: z.ZodOptional<z.ZodString>;
834
+ received: z.ZodOptional<z.ZodString>;
835
+ }, "strip", z.ZodTypeAny, {
836
+ message: string;
837
+ errorType: ErrorTypeV1.VALIDATION_ERROR;
838
+ isImmediatelyAvailable: true;
839
+ provider?: RecognitionProvider | undefined;
840
+ code?: string | number | undefined;
841
+ audioUtteranceId?: string | undefined;
842
+ timestamp?: number | undefined;
843
+ description?: string | undefined;
844
+ field?: string | undefined;
845
+ expected?: string | undefined;
846
+ received?: string | undefined;
847
+ }, {
848
+ message: string;
849
+ errorType: ErrorTypeV1.VALIDATION_ERROR;
850
+ isImmediatelyAvailable: true;
851
+ provider?: RecognitionProvider | undefined;
852
+ code?: string | number | undefined;
853
+ audioUtteranceId?: string | undefined;
854
+ timestamp?: number | undefined;
855
+ description?: string | undefined;
856
+ field?: string | undefined;
857
+ expected?: string | undefined;
858
+ received?: string | undefined;
859
+ }>, z.ZodObject<{
860
+ code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
861
+ message: z.ZodString;
862
+ audioUtteranceId: z.ZodOptional<z.ZodString>;
863
+ timestamp: z.ZodOptional<z.ZodNumber>;
864
+ description: z.ZodOptional<z.ZodString>;
865
+ errorType: z.ZodLiteral<ErrorTypeV1.PROVIDER_ERROR>;
866
+ isImmediatelyAvailable: z.ZodLiteral<false>;
867
+ provider: z.ZodOptional<z.ZodString>;
868
+ providerErrorCode: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
869
+ isTransient: z.ZodOptional<z.ZodBoolean>;
870
+ }, "strip", z.ZodTypeAny, {
871
+ message: string;
872
+ errorType: ErrorTypeV1.PROVIDER_ERROR;
873
+ isImmediatelyAvailable: false;
874
+ code?: string | number | undefined;
875
+ audioUtteranceId?: string | undefined;
876
+ timestamp?: number | undefined;
877
+ description?: string | undefined;
878
+ provider?: string | undefined;
879
+ providerErrorCode?: string | number | undefined;
880
+ isTransient?: boolean | undefined;
881
+ }, {
882
+ message: string;
883
+ errorType: ErrorTypeV1.PROVIDER_ERROR;
884
+ isImmediatelyAvailable: false;
885
+ code?: string | number | undefined;
886
+ audioUtteranceId?: string | undefined;
887
+ timestamp?: number | undefined;
888
+ description?: string | undefined;
889
+ provider?: string | undefined;
890
+ providerErrorCode?: string | number | undefined;
891
+ isTransient?: boolean | undefined;
892
+ }>, z.ZodObject<{
893
+ provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
894
+ code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
895
+ message: z.ZodString;
896
+ audioUtteranceId: z.ZodOptional<z.ZodString>;
897
+ timestamp: z.ZodOptional<z.ZodNumber>;
898
+ description: z.ZodOptional<z.ZodString>;
899
+ errorType: z.ZodLiteral<ErrorTypeV1.TIMEOUT_ERROR>;
900
+ isImmediatelyAvailable: z.ZodLiteral<true>;
901
+ timeoutMs: z.ZodOptional<z.ZodNumber>;
902
+ operation: z.ZodOptional<z.ZodString>;
903
+ }, "strip", z.ZodTypeAny, {
904
+ message: string;
905
+ errorType: ErrorTypeV1.TIMEOUT_ERROR;
906
+ isImmediatelyAvailable: true;
907
+ provider?: RecognitionProvider | undefined;
908
+ code?: string | number | undefined;
909
+ audioUtteranceId?: string | undefined;
910
+ timestamp?: number | undefined;
911
+ description?: string | undefined;
912
+ timeoutMs?: number | undefined;
913
+ operation?: string | undefined;
914
+ }, {
915
+ message: string;
916
+ errorType: ErrorTypeV1.TIMEOUT_ERROR;
917
+ isImmediatelyAvailable: true;
918
+ provider?: RecognitionProvider | undefined;
919
+ code?: string | number | undefined;
920
+ audioUtteranceId?: string | undefined;
921
+ timestamp?: number | undefined;
922
+ description?: string | undefined;
923
+ timeoutMs?: number | undefined;
924
+ operation?: string | undefined;
925
+ }>, z.ZodObject<{
926
+ provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
927
+ code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
928
+ message: z.ZodString;
929
+ audioUtteranceId: z.ZodOptional<z.ZodString>;
930
+ timestamp: z.ZodOptional<z.ZodNumber>;
931
+ description: z.ZodOptional<z.ZodString>;
932
+ errorType: z.ZodLiteral<ErrorTypeV1.QUOTA_EXCEEDED>;
933
+ isImmediatelyAvailable: z.ZodLiteral<true>;
934
+ quotaType: z.ZodOptional<z.ZodString>;
935
+ resetAt: z.ZodOptional<z.ZodNumber>;
936
+ retryAfterSeconds: z.ZodOptional<z.ZodNumber>;
937
+ }, "strip", z.ZodTypeAny, {
938
+ message: string;
939
+ errorType: ErrorTypeV1.QUOTA_EXCEEDED;
940
+ isImmediatelyAvailable: true;
941
+ provider?: RecognitionProvider | undefined;
942
+ code?: string | number | undefined;
943
+ audioUtteranceId?: string | undefined;
944
+ timestamp?: number | undefined;
945
+ description?: string | undefined;
946
+ quotaType?: string | undefined;
947
+ resetAt?: number | undefined;
948
+ retryAfterSeconds?: number | undefined;
949
+ }, {
950
+ message: string;
951
+ errorType: ErrorTypeV1.QUOTA_EXCEEDED;
952
+ isImmediatelyAvailable: true;
953
+ provider?: RecognitionProvider | undefined;
954
+ code?: string | number | undefined;
955
+ audioUtteranceId?: string | undefined;
956
+ timestamp?: number | undefined;
957
+ description?: string | undefined;
958
+ quotaType?: string | undefined;
959
+ resetAt?: number | undefined;
960
+ retryAfterSeconds?: number | undefined;
961
+ }>, z.ZodObject<{
962
+ provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
963
+ code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
964
+ message: z.ZodString;
965
+ audioUtteranceId: z.ZodOptional<z.ZodString>;
966
+ timestamp: z.ZodOptional<z.ZodNumber>;
967
+ description: z.ZodOptional<z.ZodString>;
968
+ errorType: z.ZodLiteral<ErrorTypeV1.CONNECTION_ERROR>;
969
+ isImmediatelyAvailable: z.ZodLiteral<true>;
970
+ attempts: z.ZodOptional<z.ZodNumber>;
971
+ url: z.ZodOptional<z.ZodString>;
972
+ underlyingError: z.ZodOptional<z.ZodString>;
973
+ }, "strip", z.ZodTypeAny, {
974
+ message: string;
975
+ errorType: ErrorTypeV1.CONNECTION_ERROR;
976
+ isImmediatelyAvailable: true;
977
+ provider?: RecognitionProvider | undefined;
978
+ code?: string | number | undefined;
979
+ audioUtteranceId?: string | undefined;
980
+ timestamp?: number | undefined;
981
+ description?: string | undefined;
982
+ attempts?: number | undefined;
983
+ url?: string | undefined;
984
+ underlyingError?: string | undefined;
985
+ }, {
986
+ message: string;
987
+ errorType: ErrorTypeV1.CONNECTION_ERROR;
988
+ isImmediatelyAvailable: true;
989
+ provider?: RecognitionProvider | undefined;
990
+ code?: string | number | undefined;
991
+ audioUtteranceId?: string | undefined;
992
+ timestamp?: number | undefined;
993
+ description?: string | undefined;
994
+ attempts?: number | undefined;
995
+ url?: string | undefined;
996
+ underlyingError?: string | undefined;
997
+ }>, z.ZodObject<{
998
+ code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
999
+ message: z.ZodString;
1000
+ audioUtteranceId: z.ZodOptional<z.ZodString>;
1001
+ timestamp: z.ZodOptional<z.ZodNumber>;
1002
+ description: z.ZodOptional<z.ZodString>;
1003
+ errorType: z.ZodLiteral<ErrorTypeV1.CIRCUIT_BREAKER_OPEN>;
1004
+ isImmediatelyAvailable: z.ZodLiteral<true>;
1005
+ provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
1006
+ model: z.ZodOptional<z.ZodString>;
1007
+ }, "strip", z.ZodTypeAny, {
1008
+ message: string;
1009
+ errorType: ErrorTypeV1.CIRCUIT_BREAKER_OPEN;
1010
+ isImmediatelyAvailable: true;
1011
+ code?: string | number | undefined;
1012
+ audioUtteranceId?: string | undefined;
1013
+ timestamp?: number | undefined;
1014
+ description?: string | undefined;
1015
+ provider?: RecognitionProvider | undefined;
1016
+ model?: string | undefined;
1017
+ }, {
1018
+ message: string;
1019
+ errorType: ErrorTypeV1.CIRCUIT_BREAKER_OPEN;
1020
+ isImmediatelyAvailable: true;
1021
+ code?: string | number | undefined;
1022
+ audioUtteranceId?: string | undefined;
1023
+ timestamp?: number | undefined;
1024
+ description?: string | undefined;
1025
+ provider?: RecognitionProvider | undefined;
1026
+ model?: string | undefined;
1027
+ }>, z.ZodObject<{
1028
+ provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
1029
+ code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
1030
+ message: z.ZodString;
1031
+ audioUtteranceId: z.ZodOptional<z.ZodString>;
1032
+ timestamp: z.ZodOptional<z.ZodNumber>;
1033
+ description: z.ZodOptional<z.ZodString>;
1034
+ errorType: z.ZodLiteral<ErrorTypeV1.UNKNOWN_ERROR>;
1035
+ isImmediatelyAvailable: z.ZodLiteral<false>;
1036
+ stack: z.ZodOptional<z.ZodString>;
1037
+ context: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
1038
+ }, "strip", z.ZodTypeAny, {
1039
+ message: string;
1040
+ errorType: ErrorTypeV1.UNKNOWN_ERROR;
1041
+ isImmediatelyAvailable: false;
1042
+ provider?: RecognitionProvider | undefined;
1043
+ code?: string | number | undefined;
1044
+ audioUtteranceId?: string | undefined;
1045
+ timestamp?: number | undefined;
1046
+ description?: string | undefined;
1047
+ stack?: string | undefined;
1048
+ context?: Record<string, unknown> | undefined;
1049
+ }, {
1050
+ message: string;
1051
+ errorType: ErrorTypeV1.UNKNOWN_ERROR;
1052
+ isImmediatelyAvailable: false;
1053
+ provider?: RecognitionProvider | undefined;
1054
+ code?: string | number | undefined;
1055
+ audioUtteranceId?: string | undefined;
1056
+ timestamp?: number | undefined;
1057
+ description?: string | undefined;
1058
+ stack?: string | undefined;
1059
+ context?: Record<string, unknown> | undefined;
1060
+ }>]>;
1061
+ type RecognitionException = z.infer<typeof RecognitionExceptionSchema>;
1062
+ /**
1063
+ * Check if an exception should be shown to the user immediately
1064
+ */
1065
+ declare function isExceptionImmediatelyAvailable(exception: RecognitionException): boolean;
1066
+ /**
1067
+ * Get user-friendly error message for exceptions
1068
+ */
1069
+ declare function getUserFriendlyMessage(exception: RecognitionException): string;
1070
+
1071
+ /**
1072
+ * Recognition Context Types V1
1073
+ * NOTE_TO_AI: DO NOT CHANGE THIS UNLESS EXPLICITLY ASKED. Always ask before making any changes.
1074
+ * Types and schemas for recognition context data
1075
+ */
1076
+
1077
+ /**
1078
+ * Message type discriminator for recognition context V1
1079
+ */
1080
+ declare enum RecognitionContextTypeV1 {
1081
+ GAME_CONTEXT = "GameContext",
1082
+ CONTROL_SIGNAL = "ControlSignal",
1083
+ ASR_REQUEST = "ASRRequest"
1084
+ }
1085
+ /**
1086
+ * Control signal types for recognition V1
1087
+ */
1088
+ declare enum ControlSignalTypeV1 {
1089
+ START_RECORDING = "start_recording",
1090
+ STOP_RECORDING = "stop_recording"
1091
+ }
1092
+ /**
1093
+ * SlotMap - A strongly typed map from slot names to lists of values
1094
+ * Used for entity extraction and slot filling in voice interactions
1095
+ */
1096
+ declare const SlotMapSchema: z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString, "many">>;
1097
+ type SlotMap = z.infer<typeof SlotMapSchema>;
1098
+ /**
1099
+ * Game context V1 - contains game state information
1100
+ */
1101
+ declare const GameContextSchemaV1: z.ZodObject<{
1102
+ type: z.ZodLiteral<RecognitionContextTypeV1.GAME_CONTEXT>;
1103
+ gameId: z.ZodString;
1104
+ gamePhase: z.ZodString;
1105
+ promptSTT: z.ZodOptional<z.ZodString>;
1106
+ promptSTF: z.ZodOptional<z.ZodString>;
1107
+ promptTTF: z.ZodOptional<z.ZodString>;
1108
+ slotMap: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString, "many">>>;
1109
+ }, "strip", z.ZodTypeAny, {
1110
+ type: RecognitionContextTypeV1.GAME_CONTEXT;
1111
+ gameId: string;
1112
+ gamePhase: string;
1113
+ promptSTT?: string | undefined;
1114
+ promptSTF?: string | undefined;
1115
+ promptTTF?: string | undefined;
1116
+ slotMap?: Record<string, string[]> | undefined;
1117
+ }, {
1118
+ type: RecognitionContextTypeV1.GAME_CONTEXT;
1119
+ gameId: string;
1120
+ gamePhase: string;
1121
+ promptSTT?: string | undefined;
1122
+ promptSTF?: string | undefined;
1123
+ promptTTF?: string | undefined;
1124
+ slotMap?: Record<string, string[]> | undefined;
1125
+ }>;
1126
+ type GameContextV1 = z.infer<typeof GameContextSchemaV1>;
1127
+ /**
1128
+ * ASR Request V1 - contains complete ASR setup information
1129
+ * Sent once at connection start to configure the session
1130
+ */
1131
+ declare const ASRRequestSchemaV1: z.ZodObject<{
1132
+ type: z.ZodLiteral<RecognitionContextTypeV1.ASR_REQUEST>;
1133
+ audioUtteranceId: z.ZodOptional<z.ZodString>;
1134
+ provider: z.ZodString;
1135
+ model: z.ZodOptional<z.ZodString>;
1136
+ language: z.ZodString;
1137
+ sampleRate: z.ZodNumber;
1138
+ encoding: z.ZodNumber;
1139
+ interimResults: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
1140
+ useContext: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
1141
+ finalTranscriptStability: z.ZodOptional<z.ZodString>;
1142
+ priority: z.ZodDefault<z.ZodOptional<z.ZodEnum<["low", "high"]>>>;
1143
+ fallbackModels: z.ZodOptional<z.ZodArray<z.ZodObject<{
1144
+ provider: z.ZodString;
1145
+ model: z.ZodOptional<z.ZodString>;
1146
+ language: z.ZodOptional<z.ZodString>;
1147
+ sampleRate: z.ZodOptional<z.ZodNumber>;
1148
+ encoding: z.ZodOptional<z.ZodNumber>;
1149
+ interimResults: z.ZodOptional<z.ZodBoolean>;
1150
+ useContext: z.ZodOptional<z.ZodBoolean>;
1151
+ finalTranscriptStability: z.ZodOptional<z.ZodString>;
1152
+ }, "strip", z.ZodTypeAny, {
1153
+ provider: string;
1154
+ model?: string | undefined;
1155
+ language?: string | undefined;
1156
+ sampleRate?: number | undefined;
1157
+ encoding?: number | undefined;
1158
+ interimResults?: boolean | undefined;
1159
+ useContext?: boolean | undefined;
1160
+ finalTranscriptStability?: string | undefined;
1161
+ }, {
1162
+ provider: string;
1163
+ model?: string | undefined;
1164
+ language?: string | undefined;
1165
+ sampleRate?: number | undefined;
1166
+ encoding?: number | undefined;
1167
+ interimResults?: boolean | undefined;
1168
+ useContext?: boolean | undefined;
1169
+ finalTranscriptStability?: string | undefined;
1170
+ }>, "many">>;
1171
+ debugCommand: z.ZodOptional<z.ZodObject<{
1172
+ enableDebugLog: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
1173
+ enableAudioStorage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
1174
+ enableSongQuizSessionIdCheck: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
1175
+ enablePilotModels: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
1176
+ }, "strip", z.ZodTypeAny, {
1177
+ enableDebugLog: boolean;
1178
+ enableAudioStorage: boolean;
1179
+ enableSongQuizSessionIdCheck: boolean;
1180
+ enablePilotModels: boolean;
1181
+ }, {
1182
+ enableDebugLog?: boolean | undefined;
1183
+ enableAudioStorage?: boolean | undefined;
1184
+ enableSongQuizSessionIdCheck?: boolean | undefined;
1185
+ enablePilotModels?: boolean | undefined;
1186
+ }>>;
1187
+ }, "strip", z.ZodTypeAny, {
1188
+ provider: string;
1189
+ language: string;
1190
+ sampleRate: number;
1191
+ encoding: number;
1192
+ interimResults: boolean;
1193
+ useContext: boolean;
1194
+ priority: "low" | "high";
1195
+ type: RecognitionContextTypeV1.ASR_REQUEST;
1196
+ audioUtteranceId?: string | undefined;
1197
+ model?: string | undefined;
1198
+ finalTranscriptStability?: string | undefined;
1199
+ fallbackModels?: {
1200
+ provider: string;
1201
+ model?: string | undefined;
1202
+ language?: string | undefined;
1203
+ sampleRate?: number | undefined;
1204
+ encoding?: number | undefined;
1205
+ interimResults?: boolean | undefined;
1206
+ useContext?: boolean | undefined;
1207
+ finalTranscriptStability?: string | undefined;
1208
+ }[] | undefined;
1209
+ debugCommand?: {
1210
+ enableDebugLog: boolean;
1211
+ enableAudioStorage: boolean;
1212
+ enableSongQuizSessionIdCheck: boolean;
1213
+ enablePilotModels: boolean;
1214
+ } | undefined;
1215
+ }, {
1216
+ provider: string;
1217
+ language: string;
1218
+ sampleRate: number;
1219
+ encoding: number;
1220
+ type: RecognitionContextTypeV1.ASR_REQUEST;
1221
+ audioUtteranceId?: string | undefined;
1222
+ model?: string | undefined;
1223
+ interimResults?: boolean | undefined;
1224
+ useContext?: boolean | undefined;
1225
+ finalTranscriptStability?: string | undefined;
1226
+ priority?: "low" | "high" | undefined;
1227
+ fallbackModels?: {
1228
+ provider: string;
1229
+ model?: string | undefined;
1230
+ language?: string | undefined;
1231
+ sampleRate?: number | undefined;
1232
+ encoding?: number | undefined;
1233
+ interimResults?: boolean | undefined;
1234
+ useContext?: boolean | undefined;
1235
+ finalTranscriptStability?: string | undefined;
1236
+ }[] | undefined;
1237
+ debugCommand?: {
1238
+ enableDebugLog?: boolean | undefined;
1239
+ enableAudioStorage?: boolean | undefined;
1240
+ enableSongQuizSessionIdCheck?: boolean | undefined;
1241
+ enablePilotModels?: boolean | undefined;
1242
+ } | undefined;
1243
+ }>;
1244
+ type ASRRequestV1 = z.infer<typeof ASRRequestSchemaV1>;
1245
+
1246
+ /**
1247
+ * Unified ASR Request Configuration
1248
+ *
1249
+ * Provider-agnostic configuration for ASR (Automatic Speech Recognition) requests.
1250
+ * This interface provides a consistent API for clients regardless of the underlying provider.
1251
+ *
1252
+ * All fields use library-defined enums for type safety and consistency.
1253
+ * Provider-specific mappers will convert these to provider-native formats.
1254
+ */
1255
+
1256
+ /**
1257
+ * Final transcript stability modes
1258
+ *
1259
+ * Controls timeout duration for fallback final transcript after stopRecording().
1260
+ * Similar to AssemblyAI's turn detection confidence modes but applied to our
1261
+ * internal timeout mechanism when vendors don't respond with is_final=true.
1262
+ *
1263
+ * @see https://www.assemblyai.com/docs/speech-to-text/universal-streaming/turn-detection
1264
+ */
1265
+ declare enum FinalTranscriptStability {
1266
+ /**
1267
+ * Aggressive mode: 100ms timeout
1268
+ * Fast response, optimized for short utterances and quick back-and-forth
1269
+ * Use cases: IVR, quick commands, retail confirmations
1270
+ */
1271
+ AGGRESSIVE = "aggressive",
1272
+ /**
1273
+ * Balanced mode: 200ms timeout (default)
1274
+ * Natural middle ground for most conversational scenarios
1275
+ * Use cases: General customer support, tech support, typical voice interactions
1276
+ */
1277
+ BALANCED = "balanced",
1278
+ /**
1279
+ * Conservative mode: 400ms timeout
1280
+ * Wait longer for providers, optimized for complex/reflective speech
1281
+ * Use cases: Healthcare, complex queries, careful thought processes
1282
+ */
1283
+ CONSERVATIVE = "conservative",
1284
+ /**
1285
+ * Experimental mode: 10000ms (10 seconds) timeout
1286
+ * Very long wait for batch/async providers that need significant processing time
1287
+ * Use cases: Batch processing (Gemini, OpenAI Whisper), complex audio analysis
1288
+ * Note: Should be cancelled immediately when transcript is received
1289
+ */
1290
+ EXPERIMENTAL = "experimental"
1291
+ }
1292
+ /**
1293
+ * Unified ASR request configuration
1294
+ *
1295
+ * This configuration is used by:
1296
+ * - Client SDKs to specify recognition parameters
1297
+ * - Demo applications for user input
1298
+ * - Service layer to configure provider sessions
1299
+ *
1300
+ * Core fields only - all provider-specific options go in providerOptions
1301
+ *
1302
+ * @example
1303
+ * ```typescript
1304
+ * const config: ASRRequestConfig = {
1305
+ * provider: RecognitionProvider.GOOGLE,
1306
+ * model: GoogleModel.LATEST_LONG,
1307
+ * language: Language.ENGLISH_US,
1308
+ * sampleRate: SampleRate.RATE_16000, // or just 16000
1309
+ * encoding: AudioEncoding.LINEAR16,
1310
+ * providerOptions: {
1311
+ * google: {
1312
+ * enableAutomaticPunctuation: true,
1313
+ * interimResults: true,
1314
+ * singleUtterance: false
1315
+ * }
1316
+ * }
1317
+ * };
1318
+ * ```
1319
+ */
1320
+ interface ASRRequestConfig {
1321
+ /**
1322
+ * The ASR provider to use
1323
+ * Must be one of the supported providers in RecognitionProvider enum
1324
+ */
1325
+ provider: RecognitionProvider | string;
1326
+ /**
1327
+ * Optional model specification for the provider
1328
+ * Can be provider-specific model enum or string
1329
+ * If not specified, provider's default model will be used
1330
+ */
1331
+ model?: RecognitionModel;
1332
+ /**
1333
+ * Language/locale for recognition
1334
+ * Use Language enum for common languages
1335
+ * Can also accept BCP-47 language tags as strings
1336
+ */
1337
+ language: Language | string;
1338
+ /**
1339
+ * Audio sample rate in Hz
1340
+ * Prefer using SampleRate enum values for standard rates
1341
+ * Can also accept numeric Hz values (e.g., 16000)
1342
+ */
1343
+ sampleRate: SampleRate | number;
1344
+ /**
1345
+ * Audio encoding format
1346
+ * Must match the actual audio data being sent
1347
+ * Use AudioEncoding enum for standard formats
1348
+ */
1349
+ encoding: AudioEncoding | string;
1350
+ /**
1351
+ * Enable interim (partial) results during recognition
1352
+ * When true, receive real-time updates before finalization
1353
+ * When false, only receive final results
1354
+ * Default: false
1355
+ */
1356
+ interimResults?: boolean;
1357
+ /**
1358
+ * Require GameContext before starting recognition such as song titles
1359
+ * When true, server waits for GameContext message before processing audio
1360
+ * When false, recognition starts immediately
1361
+ * Default: false
1362
+ */
1363
+ useContext?: boolean;
1364
+ /**
1365
+ * Final transcript stability mode
1366
+ *
1367
+ * Controls timeout duration for fallback final transcript when provider
1368
+ * doesn't respond with is_final=true after stopRecording().
1369
+ *
1370
+ * - aggressive: 100ms - fast response, may cut off slow providers
1371
+ * - balanced: 200ms - current default, good for most cases
1372
+ * - conservative: 400ms - wait longer for complex utterances
1373
+ *
1374
+ * @default 'balanced'
1375
+ * @see FinalTranscriptStability enum for detailed descriptions
1376
+ */
1377
+ finalTranscriptStability?: FinalTranscriptStability | string;
1378
+ /**
1379
+ * Traffic control priority for quota slot allocation
1380
+ *
1381
+ * Controls which quota slots this request can use when traffic control is enabled.
1382
+ * The quota system reserves a portion of slots for high-priority requests.
1383
+ *
1384
+ * - 'high': Can use all quota slots (reserved for critical games like song-quiz)
1385
+ * - 'low': Limited to non-reserved slots (default for most requests)
1386
+ *
1387
+ * @default 'low'
1388
+ */
1389
+ priority?: 'low' | 'high';
1390
+ /**
1391
+ * Additional provider-specific options
1392
+ *
1393
+ * Common options per provider:
1394
+ * - Deepgram: punctuate, smart_format, diarize, utterances
1395
+ * - Google: enableAutomaticPunctuation, singleUtterance, enableWordTimeOffsets
1396
+ * - AssemblyAI: formatTurns, filter_profanity, word_boost
1397
+ *
1398
+ * Note: interimResults is now a top-level field, but can still be overridden per provider
1399
+ *
1400
+ * @example
1401
+ * ```typescript
1402
+ * providerOptions: {
1403
+ * google: {
1404
+ * enableAutomaticPunctuation: true,
1405
+ * singleUtterance: false,
1406
+ * enableWordTimeOffsets: false
1407
+ * }
1408
+ * }
1409
+ * ```
1410
+ */
1411
+ providerOptions?: Record<string, any>;
1412
+ /**
1413
+ * Optional fallback ASR configurations
1414
+ *
1415
+ * List of alternative ASR configurations to use if the primary fails.
1416
+ * Each fallback config is a complete ASRRequestConfig that will be tried
1417
+ * in order until one succeeds.
1418
+ *
1419
+ * @example
1420
+ * ```typescript
1421
+ * fallbackModels: [
1422
+ * {
1423
+ * provider: RecognitionProvider.DEEPGRAM,
1424
+ * model: DeepgramModel.NOVA_2,
1425
+ * language: Language.ENGLISH_US,
1426
+ * sampleRate: 16000,
1427
+ * encoding: AudioEncoding.LINEAR16
1428
+ * },
1429
+ * {
1430
+ * provider: RecognitionProvider.GOOGLE,
1431
+ * model: GoogleModel.LATEST_SHORT,
1432
+ * language: Language.ENGLISH_US,
1433
+ * sampleRate: 16000,
1434
+ * encoding: AudioEncoding.LINEAR16
1435
+ * }
1436
+ * ]
1437
+ * ```
1438
+ */
1439
+ fallbackModels?: ASRRequestConfig[];
1440
+ }
1441
+ /**
1442
+ * Partial ASR config for updates
1443
+ * All fields are optional for partial updates
1444
+ */
1445
+ type PartialASRRequestConfig = Partial<ASRRequestConfig>;
1446
+ /**
1447
+ * Helper function to create a default ASR config
1448
+ */
1449
+ declare function createDefaultASRConfig(overrides?: PartialASRRequestConfig): ASRRequestConfig;
1450
+
1451
+ /**
1452
+ * Gemini Model Types
1453
+ * Based on available models as of January 2025
1454
+ *
1455
+ * API Version Notes:
1456
+ * - Gemini 2.5+ models: Use v1beta API (early access features)
1457
+ * - Gemini 2.0 models: Use v1beta API (early access features)
1458
+ * - Gemini 1.5 models: Use v1 API (stable, production-ready)
1459
+ *
1460
+ * @see https://ai.google.dev/gemini-api/docs/models
1461
+ * @see https://ai.google.dev/gemini-api/docs/api-versions
1462
+ */
1463
+ declare enum GeminiModel {
1464
+ GEMINI_2_5_PRO = "gemini-2.5-pro",
1465
+ GEMINI_2_5_FLASH = "gemini-2.5-flash",
1466
+ GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite",
1467
+ GEMINI_2_0_FLASH_LATEST = "gemini-2.0-flash-latest",
1468
+ GEMINI_2_0_FLASH_EXP = "gemini-2.0-flash-exp"
1469
+ }
1470
+
1471
+ /**
1472
+ * OpenAI Model Types
1473
+ */
1474
+ declare enum OpenAIModel {
1475
+ WHISPER_1 = "whisper-1"
1476
+ }
1477
+
1478
+ /**
1479
+ * Standard stage/environment constants used across all services
1480
+ */
1481
+ declare const STAGES: {
1482
+ readonly LOCAL: "local";
1483
+ readonly DEV: "dev";
1484
+ readonly STAGING: "staging";
1485
+ readonly PRODUCTION: "production";
1486
+ };
1487
+ type Stage = typeof STAGES[keyof typeof STAGES];
1488
+
1489
+ /**
1490
+ * Generic WebSocket protocol types and utilities
1491
+ * Supports flexible versioning and message types
1492
+ * Used by both client and server implementations
1493
+ */
1494
+
1495
+ /**
1496
+ * Base message structure - completely flexible
1497
+ * @template V - Version type (number, string, etc.)
1498
+ */
1499
+ interface Message<V = number> {
1500
+ v: V;
1501
+ type: string;
1502
+ data?: unknown;
1503
+ }
1504
+ /**
1505
+ * Version serializer interface
1506
+ * Converts between version type V and byte representation
1507
+ */
1508
+ interface VersionSerializer<V> {
1509
+ serialize: (v: V) => number;
1510
+ deserialize: (byte: number) => V;
1511
+ }
1512
+
1513
+ /**
1514
+ * WebSocketAudioClient - Abstract base class for WebSocket clients
1515
+ * Sends audio and control messages, receives responses from server
1516
+ *
1517
+ * Features:
1518
+ * - Generic version type support (number, string, etc.)
1519
+ * - Type-safe upward/downward message data
1520
+ * - Client-side backpressure monitoring
1521
+ * - Abstract hooks for application-specific logic
1522
+ * - Format-agnostic audio protocol (supports any encoding)
1523
+ */
1524
+
1525
+ type ClientConfig = {
1526
+ url: string;
1527
+ highWM?: number;
1528
+ lowWM?: number;
1529
+ };
1530
+ /**
1531
+ * WebSocketAudioClient - Abstract base class for WebSocket clients
1532
+ * that send audio frames and JSON messages
1533
+ *
1534
+ * @template V - Version type (number, string, object, etc.)
1535
+ * @template TUpward - Type of upward message data (Client -> Server)
1536
+ * @template TDownward - Type of downward message data (Server -> Client)
1537
+ *
1538
+ * @example
1539
+ * ```typescript
1540
+ * class MyClient extends WebSocketAudioClient<number, MyUpMsg, MyDownMsg> {
1541
+ * protected onConnected() {
1542
+ * console.log('Connected!');
1543
+ * }
1544
+ *
1545
+ * protected onMessage(msg) {
1546
+ * console.log('Received:', msg.type, msg.data);
1547
+ * }
1548
+ *
1549
+ * protected onDisconnected(code, reason) {
1550
+ * console.log('Disconnected:', code, reason);
1551
+ * }
1552
+ *
1553
+ * protected onError(error) {
1554
+ * console.error('Error:', error);
1555
+ * }
1556
+ * }
1557
+ *
1558
+ * const client = new MyClient({ url: 'ws://localhost:8080' });
1559
+ * client.connect();
1560
+ * client.sendMessage(1, 'configure', { language: 'en' });
1561
+ * client.sendAudio(audioData);
1562
+ * ```
1563
+ */
1564
+ declare abstract class WebSocketAudioClient<V = number, // Version type (default: number)
1565
+ TUpward = unknown, // Upward message data type
1566
+ TDownward = unknown> {
1567
+ private cfg;
1568
+ protected versionSerializer: VersionSerializer<V>;
1569
+ private ws;
1570
+ private seq;
1571
+ private HWM;
1572
+ private LWM;
1573
+ constructor(cfg: ClientConfig, versionSerializer?: VersionSerializer<V>);
1574
+ /**
1575
+ * Hook: Called when WebSocket connection is established
1576
+ */
1577
+ protected abstract onConnected(): void;
1578
+ /**
1579
+ * Hook: Called when WebSocket connection closes
1580
+ * @param code - Close code (see WebSocketCloseCode enum)
1581
+ * @param reason - Human-readable close reason
1582
+ */
1583
+ protected abstract onDisconnected(code: number, reason: string): void;
1584
+ /**
1585
+ * Hook: Called when WebSocket error occurs
1586
+ */
1587
+ protected abstract onError(error: Event): void;
1588
+ /**
1589
+ * Hook: Called when downward message arrives from server
1590
+ * Override this to handle messages (optional - default does nothing)
1591
+ */
1592
+ protected onMessage(_msg: Message<V> & {
1593
+ data: TDownward;
1594
+ }): void;
1595
+ connect(): void;
1596
+ /**
1597
+ * Send JSON message to server
1598
+ * @param version - Message version
1599
+ * @param type - Message type (developer defined)
1600
+ * @param data - Message payload (typed)
1601
+ */
1602
+ sendMessage(version: V, type: string, data: TUpward): void;
1603
+ /**
1604
+ * Send audio frame with specified encoding and sample rate
1605
+ * @param audioData - Audio data (any format: Int16Array, Uint8Array, ArrayBuffer, etc.)
1606
+ * @param version - Audio frame version
1607
+ * @param encodingId - Audio encoding ID (0-5, e.g., AudioEncoding.LINEAR16)
1608
+ * @param sampleRate - Sample rate in Hz (e.g., 16000)
1609
+ */
1610
+ sendAudio(audioData: ArrayBuffer | ArrayBufferView, version: V, encodingId: number, sampleRate: number): void;
1611
+ /**
1612
+ * Get current WebSocket buffer size
1613
+ */
1614
+ getBufferedAmount(): number;
1615
+ /**
1616
+ * Check if local buffer is backpressured
1617
+ */
1618
+ isLocalBackpressured(): boolean;
1619
+ /**
1620
+ * Check if ready to send audio
1621
+ * Verifies: connection open, no local buffer pressure
1622
+ */
1623
+ canSend(): boolean;
1624
+ /**
1625
+ * Check if connection is open
1626
+ */
1627
+ isOpen(): boolean;
1628
+ /**
1629
+ * Get current connection state
1630
+ */
1631
+ getReadyState(): number;
1632
+ /**
1633
+ * Close the WebSocket connection
1634
+ * Protected method for subclasses to implement disconnect logic
1635
+ * @param code - WebSocket close code (default: 1000 = normal closure)
1636
+ * @param reason - Human-readable close reason
1637
+ */
1638
+ protected closeConnection(code?: number, reason?: string): void;
1639
+ }
1640
+
1641
+ /**
1642
+ * Recognition Client Types
1643
+ *
1644
+ * Type definitions and interfaces for the recognition client SDK.
1645
+ * These interfaces enable dependency injection, testing, and alternative implementations.
1646
+ */
1647
+
1648
+ /**
1649
+ * Client connection state enum
1650
+ * Represents the various states a recognition client can be in during its lifecycle
1651
+ */
1652
+ declare enum ClientState {
1653
+ /** Initial state, no connection established */
1654
+ INITIAL = "initial",
1655
+ /** Actively establishing WebSocket connection */
1656
+ CONNECTING = "connecting",
1657
+ /** WebSocket connected but waiting for server ready signal */
1658
+ CONNECTED = "connected",
1659
+ /** Server ready, can send audio */
1660
+ READY = "ready",
1661
+ /** Sent stop signal, waiting for final transcript */
1662
+ STOPPING = "stopping",
1663
+ /** Connection closed normally after stop */
1664
+ STOPPED = "stopped",
1665
+ /** Connection failed or lost unexpectedly */
1666
+ FAILED = "failed"
1667
+ }
1668
+ /**
1669
+ * Callback URL configuration with message type filtering
1670
+ */
1671
+ interface RecognitionCallbackUrl {
1672
+ /** The callback URL endpoint */
1673
+ url: string;
1674
+ /** Array of message types to send to this URL. If empty/undefined, all types are sent */
1675
+ messageTypes?: Array<string | number>;
1676
+ }
1677
+ interface IRecognitionClientConfig {
1678
+ /**
1679
+ * WebSocket endpoint URL (optional)
1680
+ * Either `url` or `stage` must be provided.
1681
+ * If both are provided, `url` takes precedence.
1682
+ *
1683
+ * Example with explicit URL:
1684
+ * ```typescript
1685
+ * { url: 'wss://custom-endpoint.example.com/ws/v1/recognize' }
1686
+ * ```
1687
+ */
1688
+ url?: string;
1689
+ /**
1690
+ * Stage for recognition service (recommended)
1691
+ * Either `url` or `stage` must be provided.
1692
+ * If both are provided, `url` takes precedence.
1693
+ * Defaults to production if neither is provided.
1694
+ *
1695
+ * Example with STAGES enum (recommended):
1696
+ * ```typescript
1697
+ * import { STAGES } from '@recog/shared-types';
1698
+ * { stage: STAGES.STAGING }
1699
+ * ```
1700
+ *
1701
+ * String values also accepted:
1702
+ * ```typescript
1703
+ * { stage: 'staging' } // STAGES.LOCAL | STAGES.DEV | STAGES.STAGING | STAGES.PRODUCTION
1704
+ * ```
1705
+ */
1706
+ stage?: Stage | string;
1707
+ /** ASR configuration (provider, model, language, etc.) - optional */
1708
+ asrRequestConfig?: ASRRequestConfig;
1709
+ /** Game context for improved recognition accuracy */
1710
+ gameContext?: GameContextV1;
1711
+ /**
1712
+ * Game ID for tracking and routing purposes (optional)
1713
+ * If provided, this is added to the WebSocket URL as a query parameter.
1714
+ * If gameContext is also provided, this takes precedence over gameContext.gameId.
1715
+ */
1716
+ gameId?: string;
1717
+ /** Audio utterance ID (optional) - if not provided, a UUID v4 will be generated */
1718
+ audioUtteranceId?: string;
1719
+ /** Callback URLs for server-side notifications with optional message type filtering (optional)
1720
+ * Game side only need to use it if another service need to be notified about the transcription results.
1721
+ */
1722
+ callbackUrls?: RecognitionCallbackUrl[];
1723
+ /** User identification (optional) */
1724
+ userId?: string;
1725
+ /** Game session identification (optional). called 'sessionId' in Platform and most games. */
1726
+ gameSessionId?: string;
1727
+ /** Device identification (optional) */
1728
+ deviceId?: string;
1729
+ /** Account identification (optional) */
1730
+ accountId?: string;
1731
+ /** Question answer identifier for tracking Q&A sessions (optional and tracking purpose only) */
1732
+ questionAnswerId?: string;
1733
+ /** Platform for audio recording device (optional, e.g., 'ios', 'android', 'web', 'unity') */
1734
+ platform?: string;
1735
+ /** Callback when transcript is received */
1736
+ onTranscript?: (result: TranscriptionResultV1) => void;
1737
+ /**
1738
+ * Callback when function call is received
1739
+ * Note: Not supported in 2025. P2 feature for future speech-to-function-call capability.
1740
+ */
1741
+ onFunctionCall?: (result: FunctionCallResultV1) => void;
1742
+ /** Callback when metadata is received. Only once after transcription is complete.*/
1743
+ onMetadata?: (metadata: MetadataResultV1) => void;
1744
+ /** Callback when error occurs */
1745
+ onError?: (error: ErrorResultV1) => void;
1746
+ /** Callback when connected to WebSocket */
1747
+ onConnected?: () => void;
1748
+ /**
1749
+ * Callback when WebSocket disconnects
1750
+ * @param code - WebSocket close code (1000 = normal, 1006 = abnormal, etc.)
1751
+ * @param reason - Close reason string
1752
+ */
1753
+ onDisconnected?: (code: number, reason: string) => void;
1754
+ /** High water mark for backpressure control (bytes) */
1755
+ highWaterMark?: number;
1756
+ /** Low water mark for backpressure control (bytes) */
1757
+ lowWaterMark?: number;
1758
+ /** Maximum buffer duration in seconds (default: 60s) */
1759
+ maxBufferDurationSec?: number;
1760
+ /** Expected chunks per second for ring buffer sizing (default: 100) */
1761
+ chunksPerSecond?: number;
1762
+ /**
1763
+ * Connection retry configuration (optional)
1764
+ * Only applies to initial connection establishment, not mid-stream interruptions.
1765
+ *
1766
+ * Default: { maxAttempts: 4, delayMs: 200 } (try once, retry 3 times = 4 total attempts)
1767
+ *
1768
+ * Timing: Attempt 1 → FAIL → wait 200ms → Attempt 2 → FAIL → wait 200ms → Attempt 3 → FAIL → wait 200ms → Attempt 4
1769
+ *
1770
+ * Example:
1771
+ * ```typescript
1772
+ * {
1773
+ * connectionRetry: {
1774
+ * maxAttempts: 2, // Try connecting up to 2 times (1 retry)
1775
+ * delayMs: 500 // Wait 500ms between attempts
1776
+ * }
1777
+ * }
1778
+ * ```
1779
+ */
1780
+ connectionRetry?: {
1781
+ /** Maximum number of connection attempts (default: 4, min: 1, max: 5) */
1782
+ maxAttempts?: number;
1783
+ /** Delay in milliseconds between retry attempts (default: 200ms) */
1784
+ delayMs?: number;
1785
+ };
1786
+ /**
1787
+ * Optional logger function for debugging
1788
+ * If not provided, no logging will occur
1789
+ * @param level - Log level: 'debug', 'info', 'warn', 'error'
1790
+ * @param message - Log message
1791
+ * @param data - Optional additional data
1792
+ */
1793
+ logger?: (level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any) => void;
1794
+ }
1795
+ /**
1796
+ * Recognition Client Interface
1797
+ *
1798
+ * Main interface for real-time speech recognition clients.
1799
+ * Provides methods for connection management, audio streaming, and session control.
1800
+ */
1801
+ interface IRecognitionClient {
1802
+ /**
1803
+ * Connect to the WebSocket endpoint
1804
+ * @returns Promise that resolves when connected
1805
+ * @throws Error if connection fails or times out
1806
+ */
1807
+ connect(): Promise<void>;
1808
+ /**
1809
+ * Send audio data to the recognition service
1810
+ * Audio is buffered locally and sent when connection is ready.
1811
+ * @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
1812
+ */
1813
+ sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
1814
+ /**
1815
+ * Stop recording and wait for final transcript
1816
+ * The server will close the connection after sending the final transcript.
1817
+ * @returns Promise that resolves when final transcript is received
1818
+ */
1819
+ stopRecording(): Promise<void>;
1820
+ /**
1821
+ * Force stop and immediately close connection without waiting for server
1822
+ *
1823
+ * WARNING: This is an abnormal shutdown that bypasses the graceful stop flow:
1824
+ * - Does NOT wait for server to process remaining audio
1825
+ * - Does NOT receive final transcript from server
1826
+ * - Immediately closes WebSocket connection
1827
+ * - Cleans up resources (buffers, listeners)
1828
+ *
1829
+ * Use Cases:
1830
+ * - User explicitly cancels/abandons session
1831
+ * - Timeout scenarios where waiting is not acceptable
1832
+ * - Need immediate cleanup and can't wait for server
1833
+ *
1834
+ * RECOMMENDED: Use stopRecording() for normal shutdown.
1835
+ * Only use this when immediate disconnection is required.
1836
+ */
1837
+ stopAbnormally(): void;
1838
+ /**
1839
+ * Get the audio utterance ID for this session
1840
+ * Available immediately after client construction.
1841
+ * @returns UUID v4 string identifying this recognition session
1842
+ */
1843
+ getAudioUtteranceId(): string;
1844
+ /**
1845
+ * Get the current state of the client
1846
+ * @returns Current ClientState value
1847
+ */
1848
+ getState(): ClientState;
1849
+ /**
1850
+ * Check if WebSocket connection is open
1851
+ * @returns true if connected and ready to communicate
1852
+ */
1853
+ isConnected(): boolean;
1854
+ /**
1855
+ * Check if client is currently connecting
1856
+ * @returns true if connection is in progress
1857
+ */
1858
+ isConnecting(): boolean;
1859
+ /**
1860
+ * Check if client is currently stopping
1861
+ * @returns true if stopRecording() is in progress
1862
+ */
1863
+ isStopping(): boolean;
1864
+ /**
1865
+ * Check if transcription has finished
1866
+ * @returns true if the transcription is complete
1867
+ */
1868
+ isTranscriptionFinished(): boolean;
1869
+ /**
1870
+ * Check if the audio buffer has overflowed
1871
+ * @returns true if the ring buffer has wrapped around
1872
+ */
1873
+ isBufferOverflowing(): boolean;
1874
+ /**
1875
+ * Get client statistics
1876
+ * @returns Statistics about audio transmission and buffering
1877
+ */
1878
+ getStats(): IRecognitionClientStats;
1879
+ /**
1880
+ * Get the WebSocket URL being used by this client
1881
+ * Available immediately after client construction.
1882
+ * @returns WebSocket URL string
1883
+ */
1884
+ getUrl(): string;
1885
+ }
1886
+ /**
1887
+ * Client statistics interface
1888
+ */
1889
+ interface IRecognitionClientStats {
1890
+ /** Total audio bytes sent to server */
1891
+ audioBytesSent: number;
1892
+ /** Total number of audio chunks sent */
1893
+ audioChunksSent: number;
1894
+ /** Total number of audio chunks buffered */
1895
+ audioChunksBuffered: number;
1896
+ /** Number of times the ring buffer overflowed */
1897
+ bufferOverflowCount: number;
1898
+ /** Current number of chunks in buffer */
1899
+ currentBufferedChunks: number;
1900
+ /** Whether the ring buffer has wrapped (overwritten old data) */
1901
+ hasWrapped: boolean;
1902
+ }
1903
+ /**
1904
+ * Configuration for RealTimeTwoWayWebSocketRecognitionClient
1905
+ * This extends IRecognitionClientConfig and is the main configuration interface
1906
+ * for creating a new RealTimeTwoWayWebSocketRecognitionClient instance.
1907
+ */
1908
+ interface RealTimeTwoWayWebSocketRecognitionClientConfig extends IRecognitionClientConfig {
1909
+ }
1910
+
1911
+ /**
1912
+ * RealTimeTwoWayWebSocketRecognitionClient - Clean, compact SDK for real-time speech recognition
1913
+ *
1914
+ * Features:
1915
+ * - Ring buffer-based audio storage with fixed memory footprint
1916
+ * - Automatic buffering when disconnected, immediate send when connected
1917
+ * - Buffer persists after flush (for future retry/reconnection scenarios)
1918
+ * - Built on WebSocketAudioClient for robust protocol handling
1919
+ * - Simple API: connect() → sendAudio() → stopRecording()
1920
+ * - Type-safe message handling with callbacks
1921
+ * - Automatic backpressure management
1922
+ * - Overflow detection with buffer state tracking
1923
+ *
1924
+ * Example:
1925
+ * ```typescript
1926
+ * const client = new RealTimeTwoWayWebSocketRecognitionClient({
1927
+ * url: 'ws://localhost:3101/ws/v1/recognize',
1928
+ * onTranscript: (result) => console.log(result.finalTranscript),
1929
+ * onError: (error) => console.error(error),
1930
+ * maxBufferDurationSec: 60 // Ring buffer for 60 seconds
1931
+ * });
1932
+ *
1933
+ * await client.connect();
1934
+ *
1935
+ * // Send audio chunks - always stored in ring buffer, sent if connected
1936
+ * micStream.on('data', (chunk) => client.sendAudio(chunk));
1937
+ *
1938
+ * // Signal end of audio and wait for final results
1939
+ * await client.stopRecording();
1940
+ *
1941
+ * // Server will close connection after sending finals
1942
+ * // No manual cleanup needed - browser handles it
1943
+ * ```
1944
+ */
1945
+
1946
+ /**
1947
+ * Check if a WebSocket close code indicates normal closure
1948
+ * @param code - WebSocket close code
1949
+ * @returns true if the disconnection was normal/expected, false if it was an error
1950
+ */
1951
+ declare function isNormalDisconnection(code: number): boolean;
1952
+ /**
1953
+ * Re-export TranscriptionResultV1 as TranscriptionResult for backward compatibility
1954
+ */
1955
+ type TranscriptionResult = TranscriptionResultV1;
1956
+
1957
+ /**
1958
+ * RealTimeTwoWayWebSocketRecognitionClient - SDK-level client for real-time speech recognition
1959
+ *
1960
+ * Implements IRecognitionClient interface for dependency injection and testing.
1961
+ * Extends WebSocketAudioClient with local audio buffering and simple callback-based API.
1962
+ */
1963
+ declare class RealTimeTwoWayWebSocketRecognitionClient extends WebSocketAudioClient<number, any, any> implements IRecognitionClient {
1964
+ private static readonly PROTOCOL_VERSION;
1965
+ private config;
1966
+ private audioBuffer;
1967
+ private messageHandler;
1968
+ private state;
1969
+ private connectionPromise;
1970
+ private isDebugLogEnabled;
1971
+ private audioBytesSent;
1972
+ private audioChunksSent;
1973
+ private audioStatsLogInterval;
1974
+ private lastAudioStatsLog;
1975
+ constructor(config: RealTimeTwoWayWebSocketRecognitionClientConfig);
1976
+ /**
1977
+ * Internal logging helper - only logs if a logger was provided in config
1978
+ * Debug logs are additionally gated by isDebugLogEnabled flag
1979
+ * @param level - Log level: debug, info, warn, or error
1980
+ * @param message - Message to log
1981
+ * @param data - Optional additional data to log
1982
+ */
1983
+ private log;
1984
+ /**
1985
+ * Clean up internal resources to free memory
1986
+ * Called when connection closes (normally or abnormally)
1987
+ */
1988
+ private cleanup;
1989
+ connect(): Promise<void>;
1990
+ /**
1991
+ * Attempt to connect with retry logic
1992
+ * Only retries on initial connection establishment, not mid-stream interruptions
1993
+ */
1994
+ private connectWithRetry;
1995
+ sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
1996
+ private sendAudioInternal;
1997
+ /**
1998
+ * Only active ehwne client is in READY state. otherwise it will return immediately.
1999
+ * @returns Promise that resolves when the recording is stopped
2000
+ */
2001
+ stopRecording(): Promise<void>;
2002
+ stopAbnormally(): void;
2003
+ getAudioUtteranceId(): string;
2004
+ getUrl(): string;
2005
+ getState(): ClientState;
2006
+ isConnected(): boolean;
2007
+ isConnecting(): boolean;
2008
+ isStopping(): boolean;
2009
+ isTranscriptionFinished(): boolean;
2010
+ isBufferOverflowing(): boolean;
2011
+ getStats(): IRecognitionClientStats;
2012
+ protected onConnected(): void;
2013
+ protected onDisconnected(code: number, reason: string): void;
2014
+ /**
2015
+ * Get human-readable description for WebSocket close code
2016
+ */
2017
+ private getCloseCodeDescription;
2018
+ protected onError(error: Event): void;
2019
+ protected onMessage(msg: {
2020
+ v: number;
2021
+ type: string;
2022
+ data: any;
2023
+ }): void;
2024
+ /**
2025
+ * Handle control messages from server
2026
+ * @param msg - Control message containing server actions
2027
+ */
2028
+ private handleControlMessage;
2029
+ /**
2030
+ * Send audio immediately to the server (without buffering)
2031
+ * @param audioData - Audio data to send
2032
+ */
2033
+ private sendAudioNow;
2034
+ }
2035
+
2036
+ /**
2037
+ * Configuration Builder for Recognition Client
2038
+ *
2039
+ * Simple builder pattern for RealTimeTwoWayWebSocketRecognitionClientConfig
2040
+ */
2041
+
2042
+ /**
2043
+ * Builder for RealTimeTwoWayWebSocketRecognitionClientConfig
2044
+ *
2045
+ * Provides a fluent API for building client configurations.
2046
+ *
2047
+ * Example:
2048
+ * ```typescript
2049
+ * import { STAGES } from '@recog/shared-types';
2050
+ *
2051
+ * const config = new ConfigBuilder()
2052
+ * .stage(STAGES.STAGING) // Recommended: automatic environment selection
2053
+ * .asrRequestConfig({
2054
+ * provider: RecognitionProvider.DEEPGRAM,
2055
+ * model: 'nova-2-general'
2056
+ * })
2057
+ * .onTranscript((result) => console.log(result))
2058
+ * .build();
2059
+ * ```
2060
+ */
2061
+ declare class ConfigBuilder {
2062
+ private config;
2063
+ /**
2064
+ * Set the WebSocket URL (advanced usage)
2065
+ * For standard environments, use stage() instead
2066
+ */
2067
+ url(url: string): this;
2068
+ /**
2069
+ * Set the stage for automatic environment selection (recommended)
2070
+ * @param stage - STAGES.LOCAL | STAGES.DEV | STAGES.STAGING | STAGES.PRODUCTION
2071
+ * @example
2072
+ * ```typescript
2073
+ * import { STAGES } from '@recog/shared-types';
2074
+ * builder.stage(STAGES.STAGING)
2075
+ * ```
2076
+ */
2077
+ stage(stage: Stage | string): this;
2078
+ /**
2079
+ * Set ASR request configuration
2080
+ */
2081
+ asrRequestConfig(config: ASRRequestConfig): this;
2082
+ /**
2083
+ * Set game context
2084
+ */
2085
+ gameContext(context: GameContextV1): this;
2086
+ /**
2087
+ * Set game ID directly (takes precedence over gameContext.gameId)
2088
+ * Use this when you only need to identify the game without full context.
2089
+ */
2090
+ gameId(id: string): this;
2091
+ /**
2092
+ * Set audio utterance ID
2093
+ */
2094
+ audioUtteranceId(id: string): this;
2095
+ /**
2096
+ * Set callback URLs
2097
+ */
2098
+ callbackUrls(urls: RecognitionCallbackUrl[]): this;
2099
+ /**
2100
+ * Set user ID
2101
+ */
2102
+ userId(id: string): this;
2103
+ /**
2104
+ * Set game session ID
2105
+ */
2106
+ gameSessionId(id: string): this;
2107
+ /**
2108
+ * Set device ID
2109
+ */
2110
+ deviceId(id: string): this;
2111
+ /**
2112
+ * Set account ID
2113
+ */
2114
+ accountId(id: string): this;
2115
+ /**
2116
+ * Set question answer ID
2117
+ */
2118
+ questionAnswerId(id: string): this;
2119
+ /**
2120
+ * Set platform
2121
+ */
2122
+ platform(platform: string): this;
2123
+ /**
2124
+ * Set transcript callback
2125
+ */
2126
+ onTranscript(callback: (result: TranscriptionResultV1) => void): this;
2127
+ /**
2128
+ * Set metadata callback
2129
+ */
2130
+ onMetadata(callback: (metadata: MetadataResultV1) => void): this;
2131
+ /**
2132
+ * Set error callback
2133
+ */
2134
+ onError(callback: (error: ErrorResultV1) => void): this;
2135
+ /**
2136
+ * Set connected callback
2137
+ */
2138
+ onConnected(callback: () => void): this;
2139
+ /**
2140
+ * Set disconnected callback
2141
+ */
2142
+ onDisconnected(callback: (code: number, reason: string) => void): this;
2143
+ /**
2144
+ * Set high water mark
2145
+ */
2146
+ highWaterMark(bytes: number): this;
2147
+ /**
2148
+ * Set low water mark
2149
+ */
2150
+ lowWaterMark(bytes: number): this;
2151
+ /**
2152
+ * Set max buffer duration in seconds
2153
+ */
2154
+ maxBufferDurationSec(seconds: number): this;
2155
+ /**
2156
+ * Set chunks per second
2157
+ */
2158
+ chunksPerSecond(chunks: number): this;
2159
+ /**
2160
+ * Set logger function
2161
+ */
2162
+ logger(logger: (level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any) => void): this;
2163
+ /**
2164
+ * Build the configuration
2165
+ */
2166
+ build(): RealTimeTwoWayWebSocketRecognitionClientConfig;
2167
+ }
2168
+
2169
+ /**
2170
+ * Factory function for creating Recognition Client instances
2171
+ */
2172
+
2173
+ /**
2174
+ * Create a recognition client from a configuration object
2175
+ *
2176
+ * Example:
2177
+ * ```typescript
2178
+ * const client = createClient({
2179
+ * url: 'ws://localhost:3101/ws/v1/recognize',
2180
+ * audioUtteranceId: 'unique-id',
2181
+ * onTranscript: (result) => console.log(result)
2182
+ * });
2183
+ * ```
2184
+ *
2185
+ * @param config - Client configuration
2186
+ * @returns Configured recognition client instance
2187
+ */
2188
+ declare function createClient(config: RealTimeTwoWayWebSocketRecognitionClientConfig): IRecognitionClient;
2189
+ /**
2190
+ * Create a recognition client using the builder pattern
2191
+ *
2192
+ * Example:
2193
+ * ```typescript
2194
+ * const client = createClientWithBuilder((builder) =>
2195
+ * builder
2196
+ * .url('ws://localhost:3101/ws/v1/recognize')
2197
+ * .onTranscript((result) => console.log(result))
2198
+ * .onError((error) => console.error(error))
2199
+ * );
2200
+ * ```
2201
+ */
2202
+ declare function createClientWithBuilder(configure: (builder: ConfigBuilder) => ConfigBuilder): IRecognitionClient;
2203
+
2204
+ /**
2205
+ * SDK Error Classes
2206
+ *
2207
+ * Typed error classes that extend native Error with recognition-specific metadata
2208
+ */
2209
+
2210
+ /**
2211
+ * Base class for all recognition SDK errors
2212
+ */
2213
+ declare class RecognitionError extends Error {
2214
+ readonly errorType: ErrorTypeV1;
2215
+ readonly timestamp: number;
2216
+ constructor(errorType: ErrorTypeV1, message: string);
2217
+ }
2218
+ /**
2219
+ * Connection error - thrown when WebSocket connection fails after all retry attempts
2220
+ */
2221
+ declare class ConnectionError extends RecognitionError {
2222
+ readonly attempts: number;
2223
+ readonly url: string;
2224
+ readonly underlyingError?: Error;
2225
+ constructor(message: string, attempts: number, url: string, underlyingError?: Error);
2226
+ }
2227
+ /**
2228
+ * Timeout error - thrown when operations exceed timeout limits
2229
+ */
2230
+ declare class TimeoutError extends RecognitionError {
2231
+ readonly timeoutMs: number;
2232
+ readonly operation: string;
2233
+ constructor(message: string, timeoutMs: number, operation: string);
2234
+ }
2235
+ /**
2236
+ * Validation error - thrown when invalid configuration or input is provided
2237
+ */
2238
+ declare class ValidationError extends RecognitionError {
2239
+ readonly field?: string;
2240
+ readonly expected?: string;
2241
+ readonly received?: string;
2242
+ constructor(message: string, field?: string, expected?: string, received?: string);
2243
+ }
2244
+
2245
+ /**
2246
+ * VGF-style state schema for game-side recognition state/results management.
2247
+ *
2248
+ * This schema provides a standardized way for game developers to manage
2249
+ * voice recognition state and results in their applications. It supports:
2250
+ *
2251
+ * STEP 1: Basic transcription flow
2252
+ * STEP 2: Mic auto-stop upon correct answer (using partial transcripts)
2253
+ * STEP 3: Semantic/function-call outcomes for game actions
2254
+ *
2255
+ * Ideally this should be part of a more centralized shared type library to free
2256
+ * game developers and provide helper functions (VGF? Platform SDK?).
2257
+ */
2258
+ declare const RecognitionVGFStateSchema: z.ZodObject<{
2259
+ audioUtteranceId: z.ZodString;
2260
+ startRecordingStatus: z.ZodOptional<z.ZodString>;
2261
+ transcriptionStatus: z.ZodOptional<z.ZodString>;
2262
+ finalTranscript: z.ZodOptional<z.ZodString>;
2263
+ finalConfidence: z.ZodOptional<z.ZodNumber>;
2264
+ asrConfig: z.ZodOptional<z.ZodString>;
2265
+ startRecordingTimestamp: z.ZodOptional<z.ZodString>;
2266
+ finalRecordingTimestamp: z.ZodOptional<z.ZodString>;
2267
+ finalTranscriptionTimestamp: z.ZodOptional<z.ZodString>;
2268
+ pendingTranscript: z.ZodDefault<z.ZodOptional<z.ZodString>>;
2269
+ pendingConfidence: z.ZodOptional<z.ZodNumber>;
2270
+ functionCallMetadata: z.ZodOptional<z.ZodString>;
2271
+ functionCallConfidence: z.ZodOptional<z.ZodNumber>;
2272
+ finalFunctionCallTimestamp: z.ZodOptional<z.ZodString>;
2273
+ promptSlotMap: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString, "many">>>;
2274
+ recognitionActionProcessingState: z.ZodOptional<z.ZodString>;
2275
+ }, "strip", z.ZodTypeAny, {
2276
+ audioUtteranceId: string;
2277
+ pendingTranscript: string;
2278
+ startRecordingStatus?: string | undefined;
2279
+ transcriptionStatus?: string | undefined;
2280
+ finalTranscript?: string | undefined;
2281
+ finalConfidence?: number | undefined;
2282
+ asrConfig?: string | undefined;
2283
+ startRecordingTimestamp?: string | undefined;
2284
+ finalRecordingTimestamp?: string | undefined;
2285
+ finalTranscriptionTimestamp?: string | undefined;
2286
+ pendingConfidence?: number | undefined;
2287
+ functionCallMetadata?: string | undefined;
2288
+ functionCallConfidence?: number | undefined;
2289
+ finalFunctionCallTimestamp?: string | undefined;
2290
+ promptSlotMap?: Record<string, string[]> | undefined;
2291
+ recognitionActionProcessingState?: string | undefined;
2292
+ }, {
2293
+ audioUtteranceId: string;
2294
+ startRecordingStatus?: string | undefined;
2295
+ transcriptionStatus?: string | undefined;
2296
+ finalTranscript?: string | undefined;
2297
+ finalConfidence?: number | undefined;
2298
+ asrConfig?: string | undefined;
2299
+ startRecordingTimestamp?: string | undefined;
2300
+ finalRecordingTimestamp?: string | undefined;
2301
+ finalTranscriptionTimestamp?: string | undefined;
2302
+ pendingTranscript?: string | undefined;
2303
+ pendingConfidence?: number | undefined;
2304
+ functionCallMetadata?: string | undefined;
2305
+ functionCallConfidence?: number | undefined;
2306
+ finalFunctionCallTimestamp?: string | undefined;
2307
+ promptSlotMap?: Record<string, string[]> | undefined;
2308
+ recognitionActionProcessingState?: string | undefined;
2309
+ }>;
2310
+ type RecognitionState = z.infer<typeof RecognitionVGFStateSchema>;
2311
+ declare const RecordingStatus: {
2312
+ readonly NOT_READY: "NOT_READY";
2313
+ readonly READY: "READY";
2314
+ readonly RECORDING: "RECORDING";
2315
+ readonly FINISHED: "FINISHED";
2316
+ };
2317
+ type RecordingStatusType = typeof RecordingStatus[keyof typeof RecordingStatus];
2318
+ declare const TranscriptionStatus: {
2319
+ readonly NOT_STARTED: "NOT_STARTED";
2320
+ readonly IN_PROGRESS: "IN_PROGRESS";
2321
+ readonly FINALIZED: "FINALIZED";
2322
+ readonly ABORTED: "ABORTED";
2323
+ readonly ERROR: "ERROR";
2324
+ };
2325
+ type TranscriptionStatusType = typeof TranscriptionStatus[keyof typeof TranscriptionStatus];
2326
+ declare function createInitialRecognitionState(audioUtteranceId: string): RecognitionState;
2327
+ declare function isValidRecordingStatusTransition(from: string | undefined, to: string): boolean;
2328
+
2329
+ /**
2330
+ * Simplified VGF Recognition Client
2331
+ *
2332
+ * A thin wrapper around RealTimeTwoWayWebSocketRecognitionClient that maintains
2333
+ * a VGF RecognitionState as a pure sink/output of recognition events.
2334
+ *
2335
+ * The VGF state is updated based on events but never influences client behavior.
2336
+ * All functionality is delegated to the underlying client.
2337
+ */
2338
+
2339
+ /**
2340
+ * Configuration for SimplifiedVGFRecognitionClient
2341
+ */
2342
+ interface SimplifiedVGFClientConfig extends IRecognitionClientConfig {
2343
+ /**
2344
+ * Callback invoked whenever the VGF state changes
2345
+ * Use this to update your UI or React state
2346
+ */
2347
+ onStateChange?: (state: RecognitionState) => void;
2348
+ /**
2349
+ * Optional initial state to restore from a previous session
2350
+ * If provided, audioUtteranceId will be extracted and used
2351
+ */
2352
+ initialState?: RecognitionState;
2353
+ }
2354
+ /**
2355
+ * Interface for SimplifiedVGFRecognitionClient
2356
+ *
2357
+ * A simplified client that maintains VGF state for game developers.
2358
+ * All methods from the underlying client are available, plus VGF state management.
2359
+ */
2360
+ interface ISimplifiedVGFRecognitionClient {
2361
+ /**
2362
+ * Connect to the recognition service WebSocket
2363
+ * @returns Promise that resolves when connected and ready
2364
+ */
2365
+ connect(): Promise<void>;
2366
+ /**
2367
+ * Send audio data for transcription
2368
+ * @param audioData - PCM audio data as ArrayBuffer, typed array, or Blob
2369
+ */
2370
+ sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2371
+ /**
2372
+ * Stop recording and wait for final transcription
2373
+ * @returns Promise that resolves when transcription is complete
2374
+ */
2375
+ stopRecording(): Promise<void>;
2376
+ /**
2377
+ * Force stop and immediately close connection without waiting for server
2378
+ *
2379
+ * WARNING: This is an abnormal shutdown that bypasses the graceful stop flow:
2380
+ * - Does NOT wait for server to process remaining audio
2381
+ * - Does NOT receive final transcript from server (VGF state set to empty)
2382
+ * - Immediately closes WebSocket connection
2383
+ * - Cleans up resources (buffers, listeners)
2384
+ *
2385
+ * Use Cases:
2386
+ * - User explicitly cancels/abandons the session
2387
+ * - Timeout scenarios where waiting is not acceptable
2388
+ * - Need immediate cleanup and can't wait for server
2389
+ *
2390
+ * RECOMMENDED: Use stopRecording() for normal shutdown.
2391
+ * Only use this when immediate disconnection is required.
2392
+ */
2393
+ stopAbnormally(): void;
2394
+ /**
2395
+ * Get the current VGF recognition state
2396
+ * @returns Current RecognitionState with all transcription data
2397
+ */
2398
+ getVGFState(): RecognitionState;
2399
+ /**
2400
+ * Check if connected to the WebSocket
2401
+ */
2402
+ isConnected(): boolean;
2403
+ /**
2404
+ * Check if currently connecting
2405
+ */
2406
+ isConnecting(): boolean;
2407
+ /**
2408
+ * Check if currently stopping
2409
+ */
2410
+ isStopping(): boolean;
2411
+ /**
2412
+ * Check if transcription has finished
2413
+ */
2414
+ isTranscriptionFinished(): boolean;
2415
+ /**
2416
+ * Check if the audio buffer has overflowed
2417
+ */
2418
+ isBufferOverflowing(): boolean;
2419
+ /**
2420
+ * Get the audio utterance ID for this session
2421
+ */
2422
+ getAudioUtteranceId(): string;
2423
+ /**
2424
+ * Get the WebSocket URL being used
2425
+ */
2426
+ getUrl(): string;
2427
+ /**
2428
+ * Get the underlying client state (for advanced usage)
2429
+ */
2430
+ getState(): ClientState;
2431
+ }
2432
+ /**
2433
+ * This wrapper ONLY maintains VGF state as a sink.
2434
+ * All actual functionality is delegated to the underlying client.
2435
+ */
2436
+ declare class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognitionClient {
2437
+ private client;
2438
+ private state;
2439
+ private isRecordingAudio;
2440
+ private stateChangeCallback;
2441
+ private expectedUuid;
2442
+ private logger;
2443
+ private lastSentTerminalUuid;
2444
+ constructor(config: SimplifiedVGFClientConfig);
2445
+ connect(): Promise<void>;
2446
+ sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2447
+ stopRecording(): Promise<void>;
2448
+ stopAbnormally(): void;
2449
+ getAudioUtteranceId(): string;
2450
+ getUrl(): string;
2451
+ getState(): ClientState;
2452
+ isConnected(): boolean;
2453
+ isConnecting(): boolean;
2454
+ isStopping(): boolean;
2455
+ isTranscriptionFinished(): boolean;
2456
+ isBufferOverflowing(): boolean;
2457
+ getVGFState(): RecognitionState;
2458
+ private isTerminalStatus;
2459
+ private notifyStateChange;
2460
+ }
2461
+ /**
2462
+ * Factory function for creating simplified client
2463
+ * Usage examples:
2464
+ *
2465
+ * // Basic usage
2466
+ * const client = createSimplifiedVGFClient({
2467
+ * asrRequestConfig: { provider: 'deepgram', language: 'en' },
2468
+ * onStateChange: (state) => {
2469
+ * console.log('VGF State updated:', state);
2470
+ * // Update React state, game UI, etc.
2471
+ * }
2472
+ * });
2473
+ *
2474
+ * // With initial state (e.g., restoring from previous session)
2475
+ * const client = createSimplifiedVGFClient({
2476
+ * asrRequestConfig: { provider: 'deepgram', language: 'en' },
2477
+ * initialState: previousState, // Will use audioUtteranceId from state
2478
+ * onStateChange: (state) => setVGFState(state)
2479
+ * });
2480
+ *
2481
+ * // With initial state containing promptSlotMap for enhanced recognition
2482
+ * const stateWithSlots: RecognitionState = {
2483
+ * audioUtteranceId: 'session-123',
2484
+ * promptSlotMap: {
2485
+ * 'song_title': ['one time', 'baby'],
2486
+ * 'artists': ['justin bieber']
2487
+ * }
2488
+ * };
2489
+ * const client = createSimplifiedVGFClient({
2490
+ * asrRequestConfig: { provider: 'deepgram', language: 'en' },
2491
+ * gameContext: {
2492
+ * type: RecognitionContextTypeV1.GAME_CONTEXT,
2493
+ * gameId: 'music-quiz', // Your game's ID
2494
+ * gamePhase: 'song-guessing' // Current game phase
2495
+ * },
2496
+ * initialState: stateWithSlots, // promptSlotMap will be added to gameContext
2497
+ * onStateChange: (state) => setVGFState(state)
2498
+ * });
2499
+ *
2500
+ * await client.connect();
2501
+ * client.sendAudio(audioData);
2502
+ * // VGF state automatically updates based on transcription results
2503
+ */
2504
+ declare function createSimplifiedVGFClient(config: SimplifiedVGFClientConfig): ISimplifiedVGFRecognitionClient;
2505
+
2506
+ /**
2507
+ * VGF Recognition Mapper
2508
+ *
2509
+ * Maps between the existing recognition client types and the simplified VGF state.
2510
+ * This provides a clean abstraction layer for game developers.
2511
+ */
2512
+
2513
+ /**
2514
+ * Resets session state with a new UUID.
2515
+ *
2516
+ * This creates a fresh session state while preserving non-session fields
2517
+ * (like promptSlotMap, asrConfig, etc.)
2518
+ *
2519
+ * Resets:
2520
+ * - audioUtteranceId → new UUID
2521
+ * - transcriptionStatus → NOT_STARTED
2522
+ * - startRecordingStatus → READY
2523
+ * - recognitionActionProcessingState → NOT_STARTED
2524
+ * - finalTranscript → undefined
2525
+ *
2526
+ * @param currentState - The current recognition state
2527
+ * @returns A new state with reset session fields and a new UUID
2528
+ */
2529
+ declare function resetRecognitionVGFState(currentState: RecognitionState): RecognitionState;
2530
+
2531
+ /**
2532
+ * Base URL schema shared across service endpoint helpers.
2533
+ */
2534
+ type ServiceBaseUrls = {
2535
+ httpBase: string;
2536
+ wsBase: string;
2537
+ };
2538
+ /**
2539
+ * Base URL mappings keyed by stage.
2540
+ */
2541
+ declare const RECOGNITION_SERVICE_BASES: Record<Stage, ServiceBaseUrls>;
2542
+ declare const RECOGNITION_CONDUCTOR_BASES: Record<Stage, ServiceBaseUrls>;
2543
+ /**
2544
+ * Normalize arbitrary stage input into a known `Stage`, defaulting to `local`.
2545
+ */
2546
+ declare function normalizeStage(input?: Stage | string | null | undefined): Stage;
2547
+ /**
2548
+ * Resolve the recognition-service base URLs for a given stage.
2549
+ */
2550
+ declare function getRecognitionServiceBase(stage?: Stage | string | null | undefined): ServiceBaseUrls;
2551
+ /**
2552
+ * Convenience helper for retrieving the HTTP base URL.
2553
+ */
2554
+ declare function getRecognitionServiceHttpBase(stage?: Stage | string | null | undefined): string;
2555
+ /**
2556
+ * Convenience helper for retrieving the WebSocket base URL.
2557
+ */
2558
+ declare function getRecognitionServiceWsBase(stage?: Stage | string | null | undefined): string;
2559
+ /**
2560
+ * Expose hostname lookup separately for callers that need raw host strings.
2561
+ */
2562
+ declare function getRecognitionServiceHost(stage?: Stage | string | null | undefined): string;
2563
+ /**
2564
+ * Resolve the recognition-conductor base URLs for a given stage.
2565
+ */
2566
+ declare function getRecognitionConductorBase(stage?: Stage | string | null | undefined): ServiceBaseUrls;
2567
+ declare function getRecognitionConductorHttpBase(stage?: Stage | string | null | undefined): string;
2568
+ declare function getRecognitionConductorWsBase(stage?: Stage | string | null | undefined): string;
2569
+ declare function getRecognitionConductorHost(stage?: Stage | string | null | undefined): string;
2570
+
2571
+ export { AudioEncoding, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GoogleModel, Language, OpenAIModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
2572
+ export type { ASRRequestConfig, ASRRequestV1, AuthenticationException, ConnectionException, ErrorResultV1, FunctionCallResultV1, GameContextV1, IRecognitionClient, IRecognitionClientConfig, IRecognitionClientStats, ISimplifiedVGFRecognitionClient, MetadataResultV1, ProviderException, QuotaExceededException, RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl, RecognitionException, RecognitionState, RecordingStatusType, SimplifiedVGFClientConfig, SlotMap, Stage, TimeoutException, TranscriptionResult, TranscriptionResultV1, TranscriptionStatusType, UnknownException, ValidationException };